资源简介
学习python过程中写的一个小说爬虫程序,仅供学习交流
代码片段和文件信息
#E小说网小说下载器
from bs4 import BeautifulSoup
import requests sys
import threading
import os
import string
class exiaoshuo(object):
“““docstring for exiaoshuo“““
def __init__(self):
self.exiao = str(input(‘请输入小说名字或作者的名字:‘))
self.server = ‘https://www.zwda.com‘
self.args = ‘https://www.zwda.com/search.php?keyword=‘
self.names = [] #存放章节名
self.urls = [] #存放章节链接
self.nums = 0 #章节数
self.xiao = [] #存放小说章节网址
self.shuo = []
self.author = [] #小说作者名字
p = 0
#判断小说是否存在
def get_name(self):
req = requests.get( url = self.args+self.exiao)
html = req.text
div_bf = BeautifulSoup(html‘lxml‘)
div = div_bf.find_all(‘h3‘class_ =‘result-item-title result-game-item-title‘)
a_bf = BeautifulSoup(str(div)‘lxml‘)
a = a_bf.find_all(‘a‘)
for each in a:
self.shuo.append(each.get(‘title‘))
self.xiao.append(each.get(‘href‘))
div_2 = div_bf.find_all(class_=“result-game-item-info“)
uu = len(div_2)
for e in range(uu):
hhh = BeautifulSoup(str(div_2[e])‘lxml‘)
aaa = hhh.find(class_=“result-game-item-info-tag“)
aaa = aaa.text.replace(‘\r‘‘‘).replace(‘\n‘‘‘).replace(‘ ‘‘‘)
self.author.append(aaa)
def get_urls(selftarget):
req = requests.get(url = target)
req.encoding = ‘gbk‘
html = req.text
div_bf = BeautifulSoup(html‘lxml‘)
div = div_bf.find_all(id = ‘list‘)
a_bf = BeautifulSoup(str(div[0])‘lxml‘)
a = a_bf.find_all(‘a‘)
print(a[8])
self.nums = len(a) #剔除不必要的章节,并统计章节数
for each in a:
self.names.append(each.string)
self.urls.append(self.server + each.get(‘href‘))
# print(self.names)
def panduan(self):
global p
print(‘寻找如下‘)
for i in range(len(self.shuo)):
print(i+1‘《‘+self.shuo[i]+‘》‘self.author[i])
p = int(input(‘输入下载小说序列:‘))
if p >len(self.shuo):
print(‘,输入有误,请重新输入‘)
p = int(input(‘输入下载小说序列:‘))
return self.xiao[p-1]
def get_txt(selftarget):
try:
req = requests.get(url = targettimeout=10)
req.encoding = ‘gbk‘
html = req.text
bf = BeautifulSoup(html‘lxml‘)
texts = bf.find_all(id=“content“)
texts = texts[0].text.replace(‘\xa0‘*4‘\n‘)
return texts
except:
get_txt(selftarget)
def writer(self name path text):
write_flag = True
with open(path ‘a‘ encoding=‘utf-8‘) as f:
f.write(name + ‘\n‘)
f.writelines(text)
f.write(‘\n\n‘)
class xianc1(exiaoshuo):
def xianc(selfqqwwhh):
- 上一篇:图像处理python
- 下一篇:梯度下降python程序实现+可视化
相关资源
- 一个多线程智能爬虫,爬取网站小说
- 基于Python爬虫爬取天气预报信息
- 顶点小说单本书爬虫.py
- 一个简单的python爬虫
- 豆瓣爬虫;Scrapy框架
- 中国城市经纬度爬虫.ipynb
- Python爬虫数据分析可视化
- 网站列表信息爬虫
- 百度图片爬虫(python版)
- python爬取小说59868
- 彼岸花网壁纸爬虫
- Python 爬虫小说.ipynb
- 爬虫爬取网易云音乐
- 北邮python爬虫学堂在线
- python简单爬虫
- 爬取58同城二手房信息.py
- 知网爬虫软件(python)
- python爬虫爬取微博热搜
- python爬虫爬取旅游信息(附源码,c
- python爬虫爬取豆瓣电影信息
- 爬取上百张妹子图源码可直接运行
- Python爬虫实战入门教程
- 网络爬虫(pachong_anjuke.py)
- Python-京东抢购助手包含登录查询商品
- python网络爬虫获取景点信息源码
- python爬取维基百科程序语言消息盒(
- python新浪微博爬虫
- 12306爬虫实现
- 中国裁判文书网爬虫
- Python爬虫相关书籍.zip
评论
共有 条评论