资源简介
使用scrapy爬虫框架爬取京东/天猫/滚雪球等主流网站,并存入mongodb数据库中
代码片段和文件信息
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
itemlist = [‘report_date‘‘report_name‘‘avg_roe‘ ‘np_per_share‘ ‘operate_cash_flow_ps‘ ‘basic_eps‘ ‘capital_reserve‘ ‘undistri_profit_ps‘ ‘net_interest_of_total_assets‘ ‘net_selling_rate‘ ‘gross_selling_rate‘ ‘total_revenue‘ ‘operating_income_yoy‘ ‘net_profit_atsopc‘ ‘net_profit_atsopc_yoy‘ ‘net_profit_after_nrgal_atsolc‘ ‘np_atsopc_nrgal_yoy‘ ‘ore_dlt‘ ‘rop‘ ‘asset_liab_ratio‘ ‘current_ratio‘ ‘quick_ratio‘ ‘equity_multiplier‘ ‘equity_ratio‘ ‘holder_equity‘ ‘ncf_from_oa_to_total_liab‘ ‘inventory_turnover_days‘ ‘receivable_turnover_days‘ ‘accounts_payable_turnover_days‘ ‘cash_cycle‘ ‘operating_cycle‘ ‘total_capital_turnover‘ ‘inventory_turnover‘ ‘account_receivable_turnover‘ ‘accounts_payable_turnover‘ ‘current_asset_turnover_rate‘ ‘fixed_asset_turnover_ratio‘]
class JingdongItem(scrapy.Item):
# define the fields for your item here like:
#collection = ‘jd‘
id = scrapy.Field()
price = scrapy.Field()
keyword = scrapy.Field()
title = scrapy.Field()
promo_words = scrapy.Field()
sale_num = scrapy.Field()
href = scrapy.Field()
shop = scrapy.Field()
img = scrapy.Field()
img_small = scrapy.Field()
pass
class TmailItem(scrapy.Item):
# define the fields for your item here like:
id = scrapy.Field()
price = scrapy.Field()
keyword = scrapy.Field()
title = scrapy.Field()
promo_words = scrapy.Field()
sale_num = scrapy.Field()
href = scrapy.Field()
shop = scrapy.Field()
img = scrapy.Field()
img_small = scrapy.Field()
pass
“““
class JingdongItem(scrapy.Item):
# define the fields for your item here like:
collection = ‘jd‘
id = scrapy.Field()
price = scrapy.Field()
keyword = scrapy.Field()
title = scrapy.Field()
promo_words = scrapy.Field()
comment_num = scrapy.Field()
href = scrapy.Field()
shop = scrapy.Field()
img = scrapy.Field()
pass
“““
class GSJJItem(scrapy.Item):
# define the fields for your item here like:
id = scrapy.Field()
org_name_cn = scrapy.Field()
pre_name_cn = scrapy.Field()
provincial_name = scrapy.Field()
actual_controller = scrapy.Field()
classi_name = scrapy.Field()
main_operation_business = scrapy.Field()
org_cn_introduction = scrapy.Field()
chairman = scrapy.Field()
general_manager = scrapy.Field()
secretary = scrapy.Field()
established_date = scrapy.Field()
reg_asset = scrapy.Field()
staff_num = scrapy.Field()
executives_nums = scrapy.Field()
listed_date = scrapy.Field()
actual_issue_vol = scrapy.Field()
issue_price = scrapy.Field()
actual_rc_net_amt = scrapy.Field()
pe_after_issuing = scrapy.Field()
online_success_rate_of_issue = scrapy.Field()
telephone = scrapy.Field()
postcode = scrapy.Field()
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 0 2018-07-12 05:14 __init__.py
目录 0 2018-12-23 18:18 __pycache__\
文件 134 2018-11-25 17:32 __pycache__\__init__.cpython-36.pyc
文件 3182 2018-12-23 17:18 __pycache__\items.cpython-36.pyc
文件 3558 2018-12-16 16:29 __pycache__\middlewares.cpython-36.pyc
文件 1347 2018-12-23 17:11 __pycache__\pipelines.cpython-36.pyc
文件 1909 2018-12-23 18:18 __pycache__\settings.cpython-36.pyc
文件 4716 2018-12-23 17:17 items.py
文件 4360 2018-12-16 16:29 middlewares.py
文件 1506 2018-12-23 17:08 pipelines.py
文件 5944 2018-12-23 18:15 settings.py
目录 0 2019-01-04 08:13 spiders\
文件 161 2018-07-12 05:14 spiders\__init__.py
目录 0 2018-12-23 18:18 spiders\__pycache__\
文件 142 2018-11-25 17:32 spiders\__pycache__\__init__.cpython-36.pyc
文件 852 2018-12-23 18:01 spiders\__pycache__\csv_item_exporter.cpython-36.pyc
文件 2725 2018-12-22 18:13 spiders\__pycache__\gunxq.cpython-36.pyc
文件 3328 2018-12-23 18:18 spiders\__pycache__\gunxqzyzb.cpython-36.pyc
文件 1923 2018-12-08 19:27 spiders\__pycache__\jingdong_.cpython-36.pyc
文件 3106 2018-12-21 21:28 spiders\__pycache__\tmail.cpython-36.pyc
文件 614 2018-12-23 17:38 spiders\csv_item_exporter.py
文件 2821 2018-12-22 18:13 spiders\gunxq.py
文件 3559 2019-02-13 20:52 spiders\gunxqzyzb.py
文件 2275 2018-12-08 19:26 spiders\jingdong_.py
文件 3852 2018-12-21 21:27 spiders\tmail.py
文件 37900 2018-12-22 22:46 stocklist.csv
- 上一篇:WInCE截图小工具
- 下一篇:arduino pid循迹代码.zip
评论
共有 条评论