资源简介
本爬虫若自己修改需要一定的编程基础,可参考http://blog.csdn.net/gcs1024/article/details/78559488
代码片段和文件信息
import random
import requests
import urllib.parse
import urllib.request
from PIL import Image
import pytesseract
import os
import random
from time import timestrftime localtime
import time as t
qid=str(16454455)
rnqian=str(2063096382)
def download(qidheaderi):
url=‘https://www.wjx.cn/AntiSpamImageGen.aspx?q=‘+qid+‘&t=‘+str(int(time() * 1000))
req = urllib.request.Request(urlheaders=header)
data = urllib.request.urlopen(req).read()
pic = open(‘%d.gif‘%(i)‘wb‘)
pic.write(data)
pic.close()
def binarizing(img): #input: gray image
threshold=30
pixdata = img.load()
w h = img.size
for y in range(h):
for x in range(w):
if pixdata[x y] > threshold:
pixdata[x y] = 255
else:
pixdata[x y] = 0
return img
def depoint(img): #input: gray image
pixdata = img.load()
wh = img.size
for y in range(1h-1):
for x in range(1w-1):
count = 0
if pixdata[xy-1] > 245:
count = count + 1
if pixdata[xy+1] > 245:
count = count + 1
if pixdata[x-1y] > 245:
count = count + 1
if pixdata[x+1y] > 245:
count = count + 1
if count >2:
pixdata[xy] = 255
return img
def shibie(img):
imgry = img.convert(‘L‘)
threshold = 140
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = imgry.point(table ‘1‘)
print(str(pytesseract.image_to_string(out)).strip())
return(str(pytesseract.image_to_string(out)).strip())#适用于简单二维码
def post(qidrnqiani):
timeg=str(int(time() * 1000))
t.sleep(10)
timep=str(int(time() * 1000))
ip=str(random.randint(14))+‘.‘+str(random.randint(14))+‘.‘+str(random.randint(14))+‘.‘+str(random.randint(14))
rnhou=str(random.randint(1000000099999999))
headerget={
‘Host‘: ‘www.wjx.cn‘
‘Connection‘: ‘keep-alive‘
‘X-Forwarded-For‘: ip
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4‘
‘Accept‘: ‘image/webpimage/apngimage/**/*;q=0.8‘
‘Referer‘: ‘https://www.wjx.cn/jq/‘+qid+‘.aspx‘
‘Accept-Encoding‘: ‘gzip deflate br‘
‘Accept-Language‘: ‘zh-CNzh;q=0.9‘
‘Cookie‘: ‘.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; LastActivityJoin=16276361101135441472; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314151065385915106588821510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b=‘+timeg
‘RA-Ver‘: ‘2.4‘
‘RA-Sid‘: ‘6d8a2f10c62d11e7gqpxa53987ed19aa47e3‘
- 上一篇:python LDA学习
- 下一篇:python实现的k-means算法
相关资源
- python+ selenium教程
- PycURL(Windows7/Win32)Python2.7安装包 P
- 英文原版-Scientific Computing with Python
- 7.图像风格迁移 基于深度学习 pyt
- 基于Python的学生管理系统
- A Byte of Python(简明Python教程)(第
- Python实例174946
- Python 人脸识别
- Python 人事管理系统
- 基于python-flask的个人博客系统
- 计算机视觉应用开发流程
- python 调用sftp断点续传文件
- python socket游戏
- 基于Python爬虫爬取天气预报信息
- python函数编程和讲解
- Python开发的个人博客
- 基于python的三层神经网络模型搭建
- python实现自动操作windows应用
- python人脸识别(opencv)
- python 绘图(方形、线条、圆形)
- python疫情卡UN管控
- python 连连看小游戏源码
- 基于PyQt5的视频播放器设计
- 一个简单的python爬虫
- csv文件行列转换python实现代码
- Python操作Mysql教程手册
- Python Machine Learning Case Studies
- python获取硬件信息
- 量化交易(附python常见函数的使用方
- python 名字用字排行
评论
共有 条评论