• 大小: 31KB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2021-05-12
  • 语言: 其他
  • 标签: scrapy  

资源简介

使用scrapy爬虫框架爬取京东/天猫/滚雪球等主流网站,并存入mongodb数据库中

资源截图

代码片段和文件信息

# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy
itemlist = [‘report_date‘‘report_name‘‘avg_roe‘ ‘np_per_share‘ ‘operate_cash_flow_ps‘ ‘basic_eps‘ ‘capital_reserve‘ ‘undistri_profit_ps‘ ‘net_interest_of_total_assets‘ ‘net_selling_rate‘ ‘gross_selling_rate‘ ‘total_revenue‘ ‘operating_income_yoy‘ ‘net_profit_atsopc‘ ‘net_profit_atsopc_yoy‘ ‘net_profit_after_nrgal_atsolc‘ ‘np_atsopc_nrgal_yoy‘ ‘ore_dlt‘ ‘rop‘ ‘asset_liab_ratio‘ ‘current_ratio‘ ‘quick_ratio‘ ‘equity_multiplier‘ ‘equity_ratio‘ ‘holder_equity‘ ‘ncf_from_oa_to_total_liab‘ ‘inventory_turnover_days‘ ‘receivable_turnover_days‘ ‘accounts_payable_turnover_days‘ ‘cash_cycle‘ ‘operating_cycle‘ ‘total_capital_turnover‘ ‘inventory_turnover‘ ‘account_receivable_turnover‘ ‘accounts_payable_turnover‘ ‘current_asset_turnover_rate‘ ‘fixed_asset_turnover_ratio‘]

class JingdongItem(scrapy.Item):
    # define the fields for your item here like:
    #collection = ‘jd‘
    id = scrapy.Field()
    price = scrapy.Field()
    keyword = scrapy.Field()
    title = scrapy.Field()
    promo_words = scrapy.Field()
    sale_num = scrapy.Field()
    href = scrapy.Field()
    shop = scrapy.Field()
    img = scrapy.Field()
    img_small = scrapy.Field()
    pass

class TmailItem(scrapy.Item):
    # define the fields for your item here like:
    id = scrapy.Field()
    price = scrapy.Field()
    keyword = scrapy.Field()
    title = scrapy.Field()
    promo_words = scrapy.Field()
    sale_num = scrapy.Field()
    href = scrapy.Field()
    shop = scrapy.Field()
    img = scrapy.Field()
    img_small = scrapy.Field()
    pass
“““
class JingdongItem(scrapy.Item):
    # define the fields for your item here like:
    collection = ‘jd‘
    id = scrapy.Field()
    price = scrapy.Field()
    keyword = scrapy.Field()
    title = scrapy.Field()
    promo_words = scrapy.Field()
    comment_num = scrapy.Field()
    href = scrapy.Field()
    shop = scrapy.Field()
    img = scrapy.Field()
    pass
“““
class GSJJItem(scrapy.Item):
    # define the fields for your item here like:
    id = scrapy.Field()
    org_name_cn = scrapy.Field()
    pre_name_cn = scrapy.Field()
    provincial_name = scrapy.Field()
    actual_controller = scrapy.Field()
    classi_name = scrapy.Field()
    main_operation_business = scrapy.Field()
    org_cn_introduction = scrapy.Field()
    chairman = scrapy.Field()
    
    general_manager = scrapy.Field()
    secretary = scrapy.Field()
    established_date = scrapy.Field()
    reg_asset = scrapy.Field()
    staff_num = scrapy.Field()
    executives_nums = scrapy.Field()
    listed_date = scrapy.Field()
    actual_issue_vol = scrapy.Field()
    issue_price = scrapy.Field()
    actual_rc_net_amt = scrapy.Field()
    pe_after_issuing = scrapy.Field()
    online_success_rate_of_issue = scrapy.Field()
    telephone = scrapy.Field()
    postcode = scrapy.Field()
 

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     文件           0  2018-07-12 05:14  __init__.py
     目录           0  2018-12-23 18:18  __pycache__\
     文件         134  2018-11-25 17:32  __pycache__\__init__.cpython-36.pyc
     文件        3182  2018-12-23 17:18  __pycache__\items.cpython-36.pyc
     文件        3558  2018-12-16 16:29  __pycache__\middlewares.cpython-36.pyc
     文件        1347  2018-12-23 17:11  __pycache__\pipelines.cpython-36.pyc
     文件        1909  2018-12-23 18:18  __pycache__\settings.cpython-36.pyc
     文件        4716  2018-12-23 17:17  items.py
     文件        4360  2018-12-16 16:29  middlewares.py
     文件        1506  2018-12-23 17:08  pipelines.py
     文件        5944  2018-12-23 18:15  settings.py
     目录           0  2019-01-04 08:13  spiders\
     文件         161  2018-07-12 05:14  spiders\__init__.py
     目录           0  2018-12-23 18:18  spiders\__pycache__\
     文件         142  2018-11-25 17:32  spiders\__pycache__\__init__.cpython-36.pyc
     文件         852  2018-12-23 18:01  spiders\__pycache__\csv_item_exporter.cpython-36.pyc
     文件        2725  2018-12-22 18:13  spiders\__pycache__\gunxq.cpython-36.pyc
     文件        3328  2018-12-23 18:18  spiders\__pycache__\gunxqzyzb.cpython-36.pyc
     文件        1923  2018-12-08 19:27  spiders\__pycache__\jingdong_.cpython-36.pyc
     文件        3106  2018-12-21 21:28  spiders\__pycache__\tmail.cpython-36.pyc
     文件         614  2018-12-23 17:38  spiders\csv_item_exporter.py
     文件        2821  2018-12-22 18:13  spiders\gunxq.py
     文件        3559  2019-02-13 20:52  spiders\gunxqzyzb.py
     文件        2275  2018-12-08 19:26  spiders\jingdong_.py
     文件        3852  2018-12-21 21:27  spiders\tmail.py
     文件       37900  2018-12-22 22:46  stocklist.csv

评论

共有 条评论