• 大小: 212KB
    文件类型: .zip
    金币: 2
    下载: 2 次
    发布日期: 2021-07-07
  • 语言: Python
  • 标签: python  

资源简介

python爬取摩拜单车API数据并做可视化分析(源码)

资源截图

代码片段和文件信息

import datetime
import os
import os.path
import random
import sqlite3
import threading
import time
import ujson
from concurrent.futures import ThreadPoolExecutor

import numpy as np
import requests
from retrying import retry

from modules.ProxyProvider import ProxyProvider


class Crawler:
    def __init__(self):
        self.start_time = datetime.datetime.now()
        self.csv_path = “./db/“ + datetime.datetime.now().strftime(“%Y%m%d“)
        os.makedirs(self.csv_path exist_ok=True)
        self.csv_name = self.csv_path + “/“ + datetime.datetime.now().strftime(“%Y%m%d-%H%M%S“) + ‘.csv‘
        self.db_name = “./temp.db“
        self.lock = threading.Lock()
        self.proxyProvider = ProxyProvider()
        self.total = 0
        self.done = 0

    def get_nearby_bikes(self args):
        try:
            url = “https://mwx.mobike.com/mobike-api/rent/nearbyBikesInfo.do“

            payload = “latitude=%s&longitude=%s&errMsg=getMapCenterLocation“ % (args[0] args[1])

            headers = {
                ‘charset‘: “utf-8“
                ‘platform‘: “4“
                “referer“:“https://servicewechat.com/wx40f112341ae33edb/1/“
                ‘content-type‘: “application/x-www-form-urlencoded“
                ‘user-agent‘: “MicroMessenger/6.5.4.1000 NetType/WIFI Language/zh_CN“
                ‘host‘: “mwx.mobike.com“
                ‘connection‘: “Keep-Alive“
                ‘accept-encoding‘: “gzip“
                ‘cache-control‘: “no-cache“
            }

            self.request(headers payload args url)
        except Exception as ex:
            print(ex)

    def request(self headers payload args url):
        while True:
            proxy = self.proxyProvider.pick()
            try:
                response = requests.request(
                    “POST“ url data=payload headers=headers
                    proxies={“https“: proxy.url}
                    timeout=5verify=False
                )

                with self.lock:
                    with sqlite3.connect(self.db_name) as c:
                        try:
                            print(response.text)
                            decoded = ujson.decode(response.text)[‘object‘]
                            self.done += 1
                            for x in decoded:
                                c.execute(“INSERT INTO mobike VALUES (%d‘%s‘%d%d%s%s%f%f)“ % (
                                    int(time.time()) * 1000 x[‘bikeIds‘] int(x[‘biketype‘]) int(x[‘distId‘])
                                    x[‘distNum‘] x[‘type‘] x[‘distX‘]
                                    x[‘distY‘]))

                            timespend = datetime.datetime.now() - self.start_time
                            percent = self.done / self.total
                            total = timespend / percent
                            print(args self.done percent * 100 self.done / timespend.total_seconds() * 60 total
                                  total - 

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2017-04-17 15:39  mobike-crawler-master\
     文件        1513  2017-04-17 15:39  mobike-crawler-master\README.md
     目录           0  2017-04-17 15:39  mobike-crawler-master\analysis\
     文件      190964  2017-04-17 15:39  mobike-crawler-master\analysis\analysis.ipynb
     文件        4794  2017-04-17 15:39  mobike-crawler-master\crawler.py
     文件        2718  2017-04-17 15:39  mobike-crawler-master\importToDb.py
     目录           0  2017-04-17 15:39  mobike-crawler-master\influx-importer\
     文件         353  2017-04-17 15:39  mobike-crawler-master\influx-importer\build.gradle
     文件          38  2017-04-17 15:39  mobike-crawler-master\influx-importer\settings.gradle
     目录           0  2017-04-17 15:39  mobike-crawler-master\influx-importer\src\
     目录           0  2017-04-17 15:39  mobike-crawler-master\influx-importer\src\main\
     目录           0  2017-04-17 15:39  mobike-crawler-master\influx-importer\src\main\java\
     目录           0  2017-04-17 15:39  mobike-crawler-master\influx-importer\src\main\java\com\
     目录           0  2017-04-17 15:39  mobike-crawler-master\influx-importer\src\main\java\com\april1985\
     文件        3868  2017-04-17 15:39  mobike-crawler-master\influx-importer\src\main\java\com\april1985\Application.java
     目录           0  2017-04-17 15:39  mobike-crawler-master\modules\
     文件         436  2017-04-17 15:39  mobike-crawler-master\modules\Proxy.py
     文件        1178  2017-04-17 15:39  mobike-crawler-master\modules\ProxyProvider.py
     文件           0  2017-04-17 15:39  mobike-crawler-master\modules\__init__.py
     文件         228  2017-04-17 15:39  mobike-crawler-master\sql.sql
     文件         113  2017-04-17 15:39  mobike-crawler-master\start.sh
     目录           0  2017-04-17 15:39  mobike-crawler-master\web\
     文件        5992  2017-04-17 15:39  mobike-crawler-master\web\server.py
     目录           0  2017-04-17 15:39  mobike-crawler-master\web\static\
     文件         180  2017-04-17 15:39  mobike-crawler-master\web\static\.gitignore
     文件       57790  2017-04-17 15:39  mobike-crawler-master\web\static\README.md
     文件         539  2017-04-17 15:39  mobike-crawler-master\web\static\package.json
     目录           0  2017-04-17 15:39  mobike-crawler-master\web\static\public\
     文件        6722  2017-04-17 15:39  mobike-crawler-master\web\static\public\icon.png
     文件        1332  2017-04-17 15:39  mobike-crawler-master\web\static\public\index.html
     文件        2418  2017-04-17 15:39  mobike-crawler-master\web\static\public\select.png
............此处省略11个文件信息

评论

共有 条评论