资源简介

实现对中国大学MOOC上的视频、文档、附件进行爬取的Python源码,无GUI、未打包exe,支持多进程、断点续传、文件结构同网页中显示结构。PS:此处为1.5.6版本,欢迎大家加我交流或者提建议(可直接获取最新版本)

资源截图

代码片段和文件信息

import requests
import json
import re
import os
import random
import time
import hashlib
import platform
import multiprocessing

from bs4 import BeautifulSoup
from multiprocessing import Pool

from siguretools.network_file import Networkfile
from siguretools.config import Config


def getterminfos(cid):
    response=requests.get(‘https://www.icourse163.org/course/DUT-{}#/info‘.format(cid))
    soup=BeautifulSoup(response.text‘html.parser‘)
    scripts=soup.find_all(‘script‘string=re.compile(‘termId‘))
    if len(scripts)>1:
        script=scripts[-1].string
    elif len(scripts)<1:
        script=‘‘
    else:
        script=scripts[0].string
    cinfo=re.search(r‘window.courseDto[\s\S]*window.chiefLector‘script)
    if cinfo:
        coursename=eval(re.search(r‘name:[\s\S]*?(?P“[\s\S]*?“)[\s\S]‘cinfo.group(0)).group(‘coursename‘))
    else:
        coursename=‘‘
    tinfo=re.search(r‘window.termInfoList[\s\S]*(?P\[[\s\S]*\])[\s\S]*window.categories‘script)
    if tinfo:
        termInfoList=json.loads(tinfo.group(‘termInfoList‘)\
                                .replace(‘id‘‘“id“‘)\
                                .replace(‘courseId‘‘“courseId“‘)\
                                .replace(‘startTime‘‘“startTime“‘)\
                                .replace(‘endTime‘‘“endTime“‘)\
                                .replace(‘duration‘‘“duration“‘)\
                                .replace(‘text‘‘“text“‘)
                                )
    else:
        termInfoList=[]
    return coursenametermInfoList

def get_courseinfo(tidmob_token):
    headers={‘User-Agent‘: ‘Dalvik/2.1.0 (Linux; U; Android 7.0; SM-G9300 Build/NRD90M)‘}
    data={#‘cid‘:cid#可缺省\
          ‘tid‘:tid\
          ‘mob-token‘:mob_token}
    url=‘https://www.icourse163.org/mob/course/courseLearn/v1‘
    r=requests.post(urldata=datatimeout=30).content
    return json.loads(r.decode(‘utf8‘))#.get(‘results‘).get(‘termDto‘).get(‘chapters‘)#.keys()

def rename(name):
    for i in [‘\n‘‘\r‘‘\b‘‘\t‘‘\\‘‘/‘‘:‘‘*‘‘?‘‘“‘‘<‘‘>‘‘|‘]:
        name=name.replace(i‘‘)
    return name

def gettoken(usernamepasswd):
    headers={‘edu-app-type‘: ‘android‘\
             ‘edu-app-version‘: ‘2.6.1‘}
    data={‘username‘:username\
          ‘passwd‘:passwd\
          ‘mob-token‘:‘‘}
    r=requests.post(‘http://www.icourse163.org/mob/logonByIcourse‘\
                    headers=headers\
                    data=data).content
    j=json.loads(r.decode(‘utf8‘))
    if j.get(“status“).get(“code“)==0:
        return [j.get(“results“).get(“mob-token“)\
                j.get(“status“).get(“code“)]
    elif j.get(“status“).get(“code“)==100:
        return [None\
                j.get(“status“).get(“code“)]

class Courseware():
    def __init__(selfcourseinfochapternumlessonnumunitnumrootmob_tokensharpness):
        courseDto=courseinfo.get(‘results‘).get(‘courseDto‘)

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2018-06-30 22:23  MOOC_Downloading1.5\
     文件       22878  2018-06-30 22:23  MOOC_Downloading1.5\Easyload.py
     文件        4179  2018-06-30 19:57  MOOC_Downloading1.5\Myload.py
     目录           0  2018-06-22 14:06  MOOC_Downloading1.5\References\
     文件         113  2018-06-22 14:06  MOOC_Downloading1.5\References\References.txt
     目录           0  2015-02-24 14:09  MOOC_Downloading1.5\References\py-wget-master\
     文件        6148  2015-02-24 14:09  MOOC_Downloading1.5\References\py-wget-master\.DS_Store
     文件         326  2015-02-24 14:09  MOOC_Downloading1.5\References\py-wget-master\README.md
     文件     1594539  2015-02-24 14:09  MOOC_Downloading1.5\References\py-wget-master\example.gif
     文件        3521  2018-06-22 14:03  MOOC_Downloading1.5\References\py-wget-master\py-wget.py
     文件         237  2015-02-24 14:09  MOOC_Downloading1.5\References\py-wget-master\test.py
     目录           0  2018-06-22 13:59  MOOC_Downloading1.5\References\python下载界面\
     目录           0  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\__pycache__\
     文件        1837  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\__pycache__\getfile.cpython-36.pyc
     文件        1848  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\getfile.py
     目录           0  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\icon\
     文件         314  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\icon\cancel.PNG
     文件         304  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\icon\dir.PNG
     文件         275  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\icon\file.png
     文件         193  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\icon\pause.png
     文件         367  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\icon\start.PNG
     文件        7788  2018-06-20 20:21  MOOC_Downloading1.5\References\python下载界面\tkclass.py
     目录           0  2018-06-30 14:21  MOOC_Downloading1.5\__pycache__\
     文件       12196  2018-06-22 13:11  MOOC_Downloading1.5\__pycache__\Easyload.cpython-35.pyc
     文件       15687  2018-06-30 14:21  MOOC_Downloading1.5\__pycache__\Easyload.cpython-36.pyc
     文件       11965  2018-06-20 20:21  MOOC_Downloading1.5\__pycache__\Easyload2.cpython-35.pyc
     文件        1875  2018-06-23 22:25  MOOC_Downloading1.5\__pycache__\config.cpython-35.pyc
     文件        1678  2018-06-27 11:20  MOOC_Downloading1.5\__pycache__\config.cpython-36.pyc
     文件        1090  2018-06-22 13:11  MOOC_Downloading1.5\__pycache__\mul_process_package.cpython-35.pyc
     文件         997  2018-06-27 11:57  MOOC_Downloading1.5\__pycache__\mul_process_package.cpython-36.pyc
     文件        1422  2018-06-22 13:11  MOOC_Downloading1.5\__pycache__\network_file.cpython-35.pyc
............此处省略47个文件信息

评论

共有 条评论