美团App热门商圈团购采集(1)

环境:

针对美团版本5.4

美团App热门商圈团购采集(1) - 图1

在tutorial项目下

新建一个spider

  1. scrapy genspider -t basic Meituan_City meituan.com

编辑items.py

  1. class MeituanCity(Item):
  2. data = Field()

编辑 Meituan_City.py

  1. # -*- coding: utf-8 -*-
  2. import scrapy
  3. import json
  4. from tutorial.items import MeituanCity
  5. class MeituanCitySpider(scrapy.Spider):
  6. name = "Meituan_City"
  7. allowed_domains = ["meituan.com"]
  8. start_urls = (
  9. 'http://api.mobile.meituan.com/group/v1/city/list?show=all',
  10. )
  11. def parse(self, response):
  12. data = json.loads(response.body)
  13. for item in data['data']:
  14. cityId = item['id']
  15. # http://api.mobile.meituan.com/group/v2/area/list?cityId=42&spatialFields=center
  16. url = 'http://api.meituan.com/group/v2/area/list?cityId=%s&spatialFields=center' % cityId
  17. print url
  18. yield scrapy.Request(
  19. url,
  20. callback=self.Parse_Geo,
  21. meta={'item': item}
  22. )
  23. break
  24. def Parse_Geo(self, response):
  25. print response.url
  26. data = json.loads(response.body)
  27. metaitem = response.meta['item']
  28. # 商区信息
  29. subareasinfo = dict()
  30. if 'data' in data:
  31. if 'subareasinfo' in data['data']:
  32. for item in data['data']['subareasinfo']:
  33. subareasinfo[item['id']] = item
  34. if 'data' in data:
  35. if 'areasinfo' in data['data']:
  36. for line in data['data']['areasinfo']:
  37. # 行政区
  38. districtName = line['name']
  39. districtId = line['id']
  40. for tmp in line['subareas']:
  41. # 商圈信息
  42. area = subareasinfo[tmp]
  43. center = area['center']
  44. center = center.replace('POINT(', '').replace(')', '').split()
  45. if len(center) > 1:
  46. lat = center[1]
  47. lng = center[0]
  48. longitude = None
  49. latitude = None
  50. try:
  51. longitude = str(int(float(lng) * 1000000))
  52. latitude = str(int(float(lat) * 1000000))
  53. except:
  54. pass
  55. Item = MeituanCity()
  56. Item['data'] =dict()
  57. geoItem=Item['data']
  58. # 城市信息
  59. geoItem['cityid'] = metaitem['id']
  60. geoItem['cityname'] = metaitem['name']
  61. # 行政区
  62. geoItem['districtId'] = districtId
  63. geoItem['districtName'] = districtName
  64. # 商圈
  65. geoItem['SubAreaId'] = area['id']
  66. geoItem['secondArea'] = area['name']
  67. # 经纬度
  68. geoItem['longitude'] = longitude
  69. geoItem['latitude'] = latitude
  70. yield Item

此时运行:

  1. scrapy runspider tutorial/spiders/Meituan_City.py