资源简介
获取58同城租房信息,同时破解字体加密,使用python3.7开发
代码片段和文件信息
# -*- coding: utf-8 -*-
import scrapy
from scrapy.linkextractors import linkExtractor
from scrapy.spiders import CrawlSpider Rule
from rentHouse.items import RenthouseItem
import re
from fontTools.ttLib import TTFont
import base64
from io import BytesIO
class ZhuFangSpider(CrawlSpider):
name = ‘zhu_fang‘
allowed_domains = [‘wh.58.com‘]
start_urls = [‘https://wh.58.com/zufang/‘]
rules = (
Rule(linkExtractor(allow=r‘zufang/pn\d+/‘) follow=True)
Rule(linkExtractor(allow=r‘zufang/\d+x\.shtml‘) callback=‘parse_item‘)
)
def parse_item(self response):
result = re.findall(r“base64\(.*?)‘\)“ response.body_as_unicode() re.S)
# 租金
rent_money = self.get_rent_money(responseresult)
# 押金
ya_jin = self.get_ya_jin(response)
# 租赁方式
rent_way = self.get_rent_way(response)
# 房屋类型
house_style = self.get_house_style(responseresult)
# 房屋朝向
house_toward = self.get_house_toward(response)
# 房屋楼层
house_foor = self.get_house_foor(responseresult)
# 小区
xiao_qu = self.get_xiao_qu(response)
# 区域
house_area = self.get_house_area(response)
# 详细区域
detail_area = self.get_detail_area(response)
# 链接
url = response.url
item = RenthouseItem(rent_money = rent_moneyya_jin = ya_jinrent_way = rent_wayhouse_style = house_style house_toward = house_toward
house_foor = house_foorxiao_qu = xiao_quhouse_area = house_areadetail_area = detail_areaurl = url)
yield item
def get_rent_money(selfresponseresult):
rent_money = response.xpath(‘//b[@class=“f36 strongbox“]/text()‘).extract()
if len(rent_money) == 0:
return None
else:
if len(result) == 0:
return rent_money[0]
else:
ziti = rent_money[0]
code_str = result[0]
rent_money = self.jiema(ziti code_str)
return rent_money
def get_ya_jin(selfresponse):
ya_jin = response.xpath(‘//span[@class=“c_333“]/text()‘).extract()
if len(ya_jin) != 0:
return ya_jin[0]
else:
return None
def get_rent_way(selfresponse):
rent_way = response.xpath(‘//ul[@class=“f14“]/li[1]/span[2]/text()‘).extract()
if len(rent_way) != 0:
return rent_way[0]
else:
return None
def get_house_style(selfresponseresult):
house_style = response.xpath(‘//ul[@class=“f14“]/li[2]/span[2]/text()‘).extract()
if len(house_style) == 0:
return None
else:
if len(result) == 0:
return house_style[0].replace(“\xa0“““).replace(“ “““)
else:
code_str = result[0]
相关资源
- 基于Python爬虫爬取天气预报信息
- 一个简单的python爬虫
- Python爬虫数据分析可视化
- 北邮python爬虫学堂在线
- python爬虫爬取微博热搜
- python爬虫爬取旅游信息(附源码,c
- python爬虫爬取豆瓣电影信息
- Python爬虫实战入门教程
- Python爬虫相关书籍.zip
- 疫情数据爬虫并绘制柱状图.py
- python新浪微博爬虫,爬取微博和用户
- 一套最新价值1680元的python爬虫实战全
- 11-Python爬虫工程师-App抓取进阶
- 法律判决文书python爬虫、以及数据处
- Python爬虫、Flask框架与ECharts实现数据
- Python爬虫入门到实战 (二花) PDF版
- 学习python爬虫看一篇就足够了之爬取
- 基于Python智联招聘牌爬虫+本科毕业论
- Python爬虫开源项目代码
- 《Python爬虫-开发与项目实战》源码
- Python爬虫爬取智联招聘
- Python爬虫入门:如何爬取招聘网站并
- 基于selenium模拟天眼查登录并爬取企业
- python爬虫爬取杭州市幼儿园信息
- 《零基础:21天搞定Python分布爬虫》课
- python爬虫爬取豆瓣评分数据
- Python爬虫教学PPT
- Python爬虫开发与项目实战.mobi
- (一)python爬虫验证码识别去除干扰
- 全套从零开始学Python网络爬虫教学以
评论
共有 条评论