资源简介
python
实现股吧评论抓取及分析
代码片段和文件信息
import re requests codecs time random jiebatushare
import jieba.analyse
from lxml import html
# proxies={“http“ : “123.53.86.133:61234“}
proxies = None
headers = {
‘Host‘: ‘guba.eastmoney.com‘
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X metaSr 1.0‘}
def get_url(stocknumpage):
url = ‘http://guba.eastmoney.com/list‘ + str(stocknum) + ‘_‘ + str(page) + ‘.html‘
try:
text = requests.get(url headers=headers proxies=proxies timeout=20)
requests.adapters.DEFAULT_RETRIES = 5
s = requests.session()
s.keep_alive = False
text = html.fromstring(text.text)
urls = text.xpath(‘//div[@id=“articlelistnew“]//div[@class=“articleh normal_post“]/span[3]/a/@href‘)
# print(urls)
except Exception as e:
time.sleep(random.random() + random.randint(1 3))
urls = ‘‘
return urls
def get_comments(urls):
for newurl in urls:
newurl1 = ‘http://guba.eastmoney.com‘ + newurl
# print(newurl1)
try:
text1 = requests.get(newurl1 headers=headers proxies=proxies timeout=20)
requests.adapters.DEFAULT_RETRIES = 5
s = requests.session()
s.keep_alive = False
text1 = html.fromstring(text1.text)
times1 = text1.xpath(‘//div[@class=“zwfbtime“]/text()|//div[@class=“zwli clearfix“]/div[4]/div/div[2]/text()‘)
times = ‘!‘.join(re.sub(re.compile(‘发表于| ‘) ‘‘ x)[:10] for x in times1).split(‘!‘)
# print(times)
# times=list(map(lambda x:re.sub(re.compile(‘发表于| ‘)‘‘x)[:10]times))
comments1 = text1.xpath(‘//div[@class=“stockcodec .xeditor“]/text()|//div[@class=“zwli clearfix“]/div[4]/div/div[3]/div/text()‘)
comments = ‘!‘.join(w.strip() for w in comments1).split(‘!‘)
if comments == [‘‘]:
continue
else:
dic = dict(zip(times comments))
save_to_file(dic)
except:
print(‘error!!!!‘)
time.sleep(random.random() + random.randint(0 3))
# if times and comments:
# dic.append({‘time‘:times‘comment‘:comments})
# re
相关资源
- 机器学习通用代码XGboost、LightGBM、C
- Python-TheElementsofStatisticalLearningESL的中
- 《量化投资:以python为工具》课后习
- 数据结构与算法 Python语言描述-裘宗燕
- Python-基于Tensorflow和Keras实现端到端的
- 《Python QT GUI快速编程 编程指南》书
- Python神经网络编程.pdf(英文版)+代码
- 真实世界的Python仪器监控 数据采集与
- python信号处理
- 22个python项目
- PYTHON数据可视化编程实战书籍+代码
- Python程序设计教程 江红
- 我的python世界--玩《Minecraft我的世界》
- OpenCV 3计算机视觉 Python语言实现第二
- 《Python Spark2.0 Hadoop机器学习与大数据
- laview_and_python_face_recognition.zip
- ABAQUS PYTHON二次开发攻略(完整版
- Python从入门到项目实践全彩版PDF+源码
- python项目学习
- 《python机器学习》
- 黑马程序员python原版教材
- 传智播客&黑马程序员PYTHON教程课件汇
- Python源码剖析
- Python大战机器学习(华校专)2017出版
- Python学习手册.pdf
- 按课时分类_小甲鱼零基础入门学习
- 基于arcgis的python编程秘籍第二版数据
- numpy-python-3.7版本安装包
- fer2013数据集和提取出的数据集图片以
- python.zip
评论
共有 条评论