资源简介
python 抓取1688店铺产品详情,爬虫
代码片段和文件信息
#coding=utf8
import urllib2
import re
import MySQLdb
import requests
from compiler.pycodegen import EXCEPT
import random
import time
import datetime
from lxml import etree
import sys
reload(sys)
sys.setdefaultencoding(‘utf8‘)
#程序进行前准备:要先创建个数据库将1688database名字的数据库,然后创建好urltable表,用sqlyong工具将店铺网址都放到urltable表里面去
#将urltable表的url店铺地址存储到url_list列表中
# url的格式:https://shop1470760677060.1688.com/page/offerlist.htm?spm=a261y.7663282.0.0.Su0VBS
def get_url_list():
url_list=[]
get_url_sql = ‘SELECT url FROM urltable;‘
count = cur.execute(get_url_sql)
print u‘ 有 %s 个店铺地址 ‘ % count
urlresults = cur.fetchall()
result=list(urlresults)
for url in result:
print url[0]
url_list.append(url[0])
return url_list
def get_all_goods_url(page):
begin = datetime.datetime.now()
# 如果出现异常,尝试次数5次,还是错误,则判断,页码超出范围,停止采集。
page = page
print u‘.................第%s页...........‘ %page
count = 0
conut_net = 0
DD = True
while DD:
print ‘conut_net-->‘conut_net
try:
proxyHost = “proxy.abuyun.com“
proxyPort = “9020“
proxyUser = “H4073W6H9EJ29Z4D“
proxyPass = “32D3D1294745B2B2“
proxymeta = “http://%(user)s:%(pass)s@%(host)s:%(port)s“ % {
“host“ : proxyHost
“port“ : proxyPort
“user“ : proxyUser
“pass“ : proxyPass
}
proxies = {
“http“ : proxymeta
“https“ : proxymeta
}
headers = {
#‘:authority‘:‘bertoys.1688.com‘
#‘:method‘:‘GET‘
#‘:path‘:‘/page/offerlist.htm?spm=a2615.7691456.0.0.0MNLge&tradenumFilter=false&sampleFilter=false&mixFilter=false&privateFilter=false&mobileOfferFilter=%24mobileOfferFilter&groupFilter=false&sortType=tradenumdown&pageNum=6‘
#‘:scheme‘:‘https‘
‘accept‘:‘text/htmlapplication/xhtml+xmlapplication/xml;q=0.9image/webp*/*;q=0.8‘
‘accept-encoding‘:‘gzip deflate sdch br‘
‘accept-language‘:‘zh-CNzh;q=0.8‘
‘referer‘:url
‘upgrade-insecure-requests‘:‘1‘
‘user-agent‘:useragent
}
shop_url = url.split(‘?‘)[0]
spm = url.split(‘?‘)[1]
parameter = {
‘spm‘:spm
‘tradenumFilter‘:‘false‘
‘sampleFilter‘:‘false‘
‘mixFilter‘:‘false‘
‘privateFilter‘:‘false‘
‘mobileOfferFilter‘:‘$mobileOfferFilter‘
‘groupFilter‘:‘false‘
‘sortType‘:‘tradenumdown‘
‘pageNum‘:str(page)
}
#测试
相关资源
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
- python模拟鼠标点击屏幕
- windows鼠标自动点击py脚本
- 鱼c小甲鱼零基础学python全套课后题和
- Python 练习题100道
- Practical Programming 2nd Edition
- wxPython Application Development Cookbook
- python 3.6
- Python 3.5.2 中文文档 互联网唯一CHM版本
- python3.5.2.chm官方文档
评论
共有 条评论