资源简介
meituan_spider.rar
代码片段和文件信息
‘‘‘
author: Andy丶Tao
csdn博客: https://blog.csdn.net/tao15716645708
‘‘‘
import requests zlib base64
import random re xlrd
import json jsonpath
import pymysql
from datetime import datetime
from bs4 import BeautifulSoup
def getTime():
‘‘‘
:return: 返回从1970.1.1至今的毫秒数
‘‘‘
d1 = datetime(1970 1 1)
d2 = datetime.now()
d3 = int((d2 - d1).total_seconds() * 1000)
return d3
def url_encode(data stringify=False):
‘‘‘
token编码
:param data: 编码参数
:param stringify: boolean默认序列化
:return: token编码
‘‘‘
if (stringify == True):
base_data = zlib.compress(data.encode())
data = base64.b64encode(base_data)
return data
else:
data = json.dumps(data).replace(‘ ‘ ““)
return url_encode(data True)
def url_decode(data):
“““token解码“““
if isinstance(data str):
data = base64.b64decode(data)
base_data = zlib.decompress(data)
return base_data
def get_taken(url):
‘‘‘
访问酒店链接,从响应体里得到需要的参数信息
:param url: url
:return: taken
‘‘‘
cookies_iuuid = [
‘93AB5D4FEB3D1BFFF9B7727E5ECE71CF13A51383CD6ADB169C43832A6BB41843‘
‘8A8E20A923D42E033BC3505E3460BCC25AEA4D933CE3F233B19679BB0EEC89D4‘
‘C68174784AF5C11CC2F127774CC8BA60FB5E766509A7DCA8F4ECDFF59B45076F‘
‘850C1A14A798DC5834EEF2177EAAA430A8958DBE0813C5FAE858B61834D1F95D‘
]
response = requests.get(url headers=headers timeout=2.0)
response.encoding = ‘utf-8‘
soup = BeautifulSoup(response.text ‘lxml‘)
taken = {}
taken[“name“] = soup.select(‘.fs26.fc3.pull-left.bold‘)[0].text
taken[“cityId“] = re.findall(r‘“cityId“:[0-9]*‘ response.text)[0][8:]
taken[“poiId“] = re.findall(r‘“poiId“:[0-9]*‘ response.text)[0][8:]
taken[“start“] = re.findall(r‘“queryStart“:[0-9]*‘ response.text)[0][13:]
taken[“end“] = re.findall(r‘“queryEnd“:[0-9]*‘ response.text)[0][11:]
taken[“?type“] = “1“
taken[“&utm_medium“] = “PC“
taken[“version_name“] = “7.3.0“
# taken[“uuid“] = cookies_iuuid[random.randint(03)]
taken[“uuid“] = ‘7B20F54E2E3033B75A6B3775DDFDDF7D8EB12B67BA73BF1FA0FAB35619FDE640‘ # 如果这个uuid不管用,就把该行注释,并打开上一行注释
return taken
def get_tokon(taken):
‘‘‘
生成sign的值,并得到_token字典
:param taken: 明参
:return: _tokon
‘‘‘
sign = ‘“end=%s&poiId=%s&start=%s&type=1&utm_medium=PC&uuid=%s&version_name=%s“‘ % (
taken[‘end‘] taken[‘poiId‘] taken[‘start‘] taken[‘uuid‘] taken[‘version_name‘])
_tokon = {
“rId“: 100051
“ts“: getTime()
“cts“: getTime() + 356
“brVD“: [1536 222]
“brR“: [[1536 864] [1536 824] 24 24]
“bI“: [“%s“ % url ““]
“mT“: []
“kT“: []
“aT“: []
“tT“: []
“sign“: url_encode(sign).decode()
}
return _tokon
def get_url(_url):
‘‘‘
由于url连接各式各样,这里提取id,并拼接为
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 7408 2019-03-21 11:01 meituan_spider.py
----------- --------- ---------- ----- ----
7408 1
- 上一篇:扫雷游戏设计思路
- 下一篇:iFIX 数据库参考
相关资源
- pb做监控系统.zip
- 盗号木马源码.rar
- text.hex
- Intouch2014R2-2099.rar
- 老虎机程序v1.1.rar
- ITSS通用标准.pdf
- MUI全套视频教程地址.txt
- ccnbie.doc
- supplyair_2755453.zip
- 中国菜刀.txt
- TCP_Socket.zip
- QQctangyunxiangc.rar
- 东华-表结构资料.zip
- 敏感词词库.txt
- sunnyman2008_10945697.ap14
- Alamouti.zip
- sRGBColorCheckerBoundary.jpg
- 深入性能测试:LoadRunner性能测试、流
- Zookeeper学习.txt
- s.exe
- 编译好的opencv_conrib库.txt
- dr.com5.2.zip
- 新建文本文档.rar
- z6x36z.doc
- tech_video.zip
- 基于UDP的局域网聊天系统.doc
- office密码破解工具OPRTbox绿色注册版
- dhe0ug.rar
- 88npkv.doc
- 前端跳槽面试必备技巧.docx
评论
共有 条评论