资源简介
本爬虫若自己修改需要一定的编程基础,可参考http://blog.csdn.net/gcs1024/article/details/78559488
代码片段和文件信息
import random
import requests
import urllib.parse
import urllib.request
from PIL import Image
import pytesseract
import os
import random
from time import timestrftime localtime
import time as t
qid=str(16454455)
rnqian=str(2063096382)
def download(qidheaderi):
url=‘https://www.wjx.cn/AntiSpamImageGen.aspx?q=‘+qid+‘&t=‘+str(int(time() * 1000))
req = urllib.request.Request(urlheaders=header)
data = urllib.request.urlopen(req).read()
pic = open(‘%d.gif‘%(i)‘wb‘)
pic.write(data)
pic.close()
def binarizing(img): #input: gray image
threshold=30
pixdata = img.load()
w h = img.size
for y in range(h):
for x in range(w):
if pixdata[x y] > threshold:
pixdata[x y] = 255
else:
pixdata[x y] = 0
return img
def depoint(img): #input: gray image
pixdata = img.load()
wh = img.size
for y in range(1h-1):
for x in range(1w-1):
count = 0
if pixdata[xy-1] > 245:
count = count + 1
if pixdata[xy+1] > 245:
count = count + 1
if pixdata[x-1y] > 245:
count = count + 1
if pixdata[x+1y] > 245:
count = count + 1
if count >2:
pixdata[xy] = 255
return img
def shibie(img):
imgry = img.convert(‘L‘)
threshold = 140
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = imgry.point(table ‘1‘)
print(str(pytesseract.image_to_string(out)).strip())
return(str(pytesseract.image_to_string(out)).strip())#适用于简单二维码
def post(qidrnqiani):
timeg=str(int(time() * 1000))
t.sleep(10)
timep=str(int(time() * 1000))
ip=str(random.randint(14))+‘.‘+str(random.randint(14))+‘.‘+str(random.randint(14))+‘.‘+str(random.randint(14))
rnhou=str(random.randint(1000000099999999))
headerget={
‘Host‘: ‘www.wjx.cn‘
‘Connection‘: ‘keep-alive‘
‘X-Forwarded-For‘: ip
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4‘
‘Accept‘: ‘image/webpimage/apngimage/**/*;q=0.8‘
‘Referer‘: ‘https://www.wjx.cn/jq/‘+qid+‘.aspx‘
‘Accept-Encoding‘: ‘gzip deflate br‘
‘Accept-Language‘: ‘zh-CNzh;q=0.9‘
‘Cookie‘: ‘.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; LastActivityJoin=16276361101135441472; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314151065385915106588821510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b=‘+timeg
‘RA-Ver‘: ‘2.4‘
‘RA-Sid‘: ‘6d8a2f10c62d11e7gqpxa53987ed19aa47e3‘
- 上一篇:python LDA学习
- 下一篇:python实现的k-means算法
相关资源
- python实现的k-means算法
- python LDA学习
- 随机森林的代码实现和相应的数据集
-
Python sc
ripts For ABAQUS: Learn By Example - 小甲鱼零基础入门学习Python视频教程
- 基于tensorflow的二分类的python实现注释
- 《PyTorch生成对抗网络编程》思维导图
- 基于Python实现LFM种子传播算法
- 密码学重合指数计算python实现
- python新手算法函数思想入门项目,包
- 已知空间坐标和对应的属性,利用p
- 小甲鱼pythons视频+课件+源代码(96天)
- 找出最长的句子最长的单词
- 如何封装一个带传参的python程序成可
- 疯狂的python学习笔记
- wxPython写的类似qq截图的小程序
- Python3.6.4+Django2.0.2 单表的增删改查和
- python三边定位模块
- Python帮助手册CHM版
- MIC数据关联性挖掘算法Python源码
- 船舶AIS数据轨迹可视化python代码.rar
- python mysql 简单银行存取款转账系统
- 麦子学院Python全套视频.txt
- python多线程批量端口扫描
- [麻省理工-计算机科学及编程导论][
- python视频教程 老男孩全栈工程师教程
- 读取ros包中rgb和depth图,python代码
- python爬取亚马逊排名
- Python数据分析与机器学习-Python库分析
- 20newsgroup python分类聚类
评论
共有 条评论