资源简介

运用了requests os re lxml threading queue prettytable bs4

等库,实现了小说的智能选择与爬取。

资源截图

代码片段和文件信息

import requests
import os
import re
import time
from lxml import etree
from threading import Thread
from queue import Queue
from prettytable import PrettyTable
import prettytable as pt
from bs4 import BeautifulSoup


def request():
    global input_name
    # “https://www.booktxt.net/2_2219/“
    input_name = input(“请输入要查找的小说:\n“)
    headers = {
        ‘user-agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) ‘
                      ‘Chrome/73.0.3683.86 Safari/537.36 ‘
    }
    root = “https://www.xsbiquge.com/search.php?keyword=“ + input_name
    response = requests.get(root headers=headers)
    # page_content = etree.HTML(response.text)
    response.encoding = ‘utf-8‘


information = 0


def find_details(information):
    headers = {
        ‘user-agent‘: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) ‘
                      ‘Chrome/73.0.3683.86 Safari/537.36 ‘
    }
    root = “http://www.biquger.com/modules/article/search.php?searchkey=“ + input_name
    response = requests.get(root headers=headers)
    page_content = etree.HTML(response.content)
    response.encoding = ‘utf-8‘
    find_first = page_content.xpath(‘string(//*[@id=“wrapper“]/table)‘)
    print(find_first)
    print(len(find_first))
    find_first_show = page_content.xpath(“normalize-space(string(//div[@class=‘novelslist2‘]/ul/li[position()>1]))“)
    global information_first
    if len(find_first) <= 1:
        str(find_first_show)
        print(find_first_show)
        input_name_find = input(“你所查找的小说仅此一个按1开始下载,按2返回查找页面\n“)
        if input_name_find.replace(“.“ ‘‘).isdigit():
            if input_name_find.count(“.“) == 0:
                if int(input_name_find) == 1:
                    find_first_a = page_content.xpath(
                        “//div[@class=‘novelslist2‘]/ul/li[position()>1]/span[@class=‘s2‘]/a/text()“)
                    print(find_first_a)
                    index_first = find_first_a.index(input_name)
                    find_first_href = page_content.xpath(
                        “//div[@class=‘novelslist2‘]/ul/li[position()>1]/span[@class=‘s2‘]/a/@href“)
                    information_first = find_first_href[index_first]
                    return 0
                if int(input_name_find) == 2:
                    request()
                    find_details(information)
            else:
                print(“请不要输入带点的小数,只能输入1或2 3秒后返回“)
                time.sleep(3)
                find_details(information)
        else:
            print(“请输入1或2 不得输入其他字符 3秒后将返回当前的小说页面“)
            print(“3“)
            time.sleep(1)
            print(“2“)
            time.sleep(1)
            print(“1“)
            time.sleep(1)
            find_details(information)


class MyThread(Thread):
    def __init__(self q):
        Thread.__init__(self)
        self.q = q

    def run(self):
        global index

评论

共有 条评论