资源简介
里面有两个文件,一个是python代码文件,另一个是测试数据,程序不够健壮,由于Amazon有防机器人,所以本程序采用延时方法,所以耗时比较长。但对于初学者说,真的不失为一个好的实例,程序涉及到csv文件的读写,beautifulSoup的使用、报头的伪装。
代码片段和文件信息
from urllib import request
from urllib.request import HTTPError
from urllib.request import URLError
from bs4 import BeautifulSoup
from datetime import datetime
from time import sleep
from tkinter import *
from tkinter import filedialog
import csv
class Application(frame):
def __init__(selfmaster = None):
frame.__init__(selfmaster)
self.pack()
self.createWidgets()
def createWidgets(self):
self.helloLabel = Label(selftext = “您好,请选择文件!“)
self.helloLabel.pack()
self.selectButton = Button(self text = “选择“command = self.funcOpenRead)
self.selectButton.pack()
self.sureButton = Button(selftext = “确定“command = self.funcCheckWrite)
self.sureButton.pack()
def funcOpenRead(self):
self.filename = filedialog.askopenfilename(title = “选择文件“)
with open(self.filename“r“) as csvfile:
reader = csv.DictReader(csvfile)
self.asin_list = [asin[“ASIN“] for asin in reader]
return self.asin_list
def funcCheckWrite(self):
self.helloLabel[“text“] = “进行中……“
check = Check()
rank_list = check.check_rank(self.asin_list)
write_in(self.filenamerank_list)
self.helloLabel[“text“] = “已完成“
class Check():
def check_rank(selfasin_list):
rank_list = []
head = {}
head[“User-Agent“] = “Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0; JuziBrowser) like Gecko“
for asin in asin_list:
url = “https://www.amazon.com/dp/“+ asin
req = request.Request(urlheaders = head)
sleep(10)
try:
web = request.urlopen(req)
except HTTPError as e:
rank_want = “网页找不到,下架?拼写正确?服务器?UPC?“
print(e.code)
except URLError as e:
rank_want = “网络连接失败“
sleep(150)
else:
soup = BeautifulSoup(web.read()“html.parser“)
try:
doubel_rank = soup.findAll(“span“class_=“zg_hrsr_rank“)
rank_want = doubel_rank[-1].string
except :
rank_want = “null“
finally:
rank_list.append(rank_want)
return rank_list
def write_in(filenamerank_list):
write_list = []
write_list.append(datetime.now().strftime(“%Y-%m-%d %H:%M“))
write_list = write_list + rank_list
with open(filename“r“) as csvfile:
reader = csv.reader(csvfile)
lines = [line for line in reader]
with open(filename“w+“newline = ““) as csvfile:
writer = csv.writer(csvfile)
for itemelement in zip(lineswrite_list):
item.append(element)
writer.writerows(lines)
if __name__ == “__main__“:
app = Application()
app.master.title(“排名查询“)
app.mainloop()
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 3086 2017-09-28 10:58 checkrank.py
文件 213 2018-01-13 11:21 测试数据.csv
评论
共有 条评论