• 大小: 63.63MB
    文件类型: .zip
    金币: 2
    下载: 1 次
    发布日期: 2022-12-27
  • 语言: Python
  • 标签:

资源简介

python 爬虫爬取站长之站的模板,需要的看一看,毕业了,需要模板

资源截图

代码片段和文件信息

# -*- conding:UTF-8 -*-
import requests
import re


class Resume(object):
    def __init__(self):
        self.headers = {
            “User-Agent“: “Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/67.0.3396.99 Safari/537.36“
        }
        self.next_url_list = []
        self.Download_list = []

    def get_page(self url):
        response = requests.get(url=url headers=self.headers)
        if response.status_code == 200:
            print(“请求成功“)
            html = response.text
            next_urls = re.findall(‘\s‘ html)
            for next_url in next_urls:
                self.next_url_list.append(next_url)

    def parse_page(self next_url):
        response = requests.get(url=next_url headers=self.headers)
        response.encoding = “UTF-8“  # 转码为中文
        if response.status_code == 200:
            print(“请求成功“)
            html = response.text
            Download_url = re.findall(“
  • 福建电信下载
  • “ html)
                name = re.findall(‘title“: “(.*?)“‘ html)
                print(‘----------------------------------------------------‘)
                print(Download_url)
                print(name)
                print(‘----------------------------------------------------‘)
                self.Download_list.append([Download_url[0] name[0]])
                print(Download_url name)

        def Download(self download_url name):
            response = requests.get(url=download_url headers=self.headers)
            if response.status_code == 200:
                with open(“test/%s.rar“ % name ‘wb‘) as f:
                    f.write(response.content)

        def main(self):
            for page in range(2 3):
                if page == 1:
                    url = “http://sc.chinaz.com/jianli/free.html“
                else:
                    url = “http://sc.chinaz.com/jianli/free_%s.html“ % page
                print(url)
                self.get_page(url)
            for next_url in self.next_url_list:
                self.parse_page(next_url)
            for Download_info in self.Download_list:
                self.Download(Download_info[0] Download_info[1])


    if __name__ == “__main__“:
        resume = Resume()
        resume.main()

     属性            大小     日期    时间   名称
    ----------- ---------  ---------- -----  ----

         文件     240993  2016-01-13 14:44  jianli8782\1.jpg

    ----------- ---------  ---------- -----  ----

                   240993                    1


    评论

    共有 条评论

    相关资源