资源简介
demo百度文库下载
python代码
class DownloadImg():
def __init__(self):
self.ua = UserAgent()
def download_one_img(self, img_url, saved_path):
# 下载图片
header = {
"User-Agent": "{}".format(self.ua.random().strip()),
'Connection': 'close'}
r = requests.get(img_url, headers=header, stream=True)
print("请求图片状态码 {}".format(r.status_code)) # 返回状态码
if r.status_code == 200: # 写入图片
with open(saved_path, mode="wb") as f:
f.write(r.content)
print("download {} success!".format(saved_path))
del r
return saved_path
class StartChrome():
def __init__(self):
mobile_emulation = {"deviceName": "Galaxy S5"}
capabilities = DesiredCapabilities.CHROME
capabilities['loggingPrefs'] = {'browser': 'ALL'}
options = webdriver.ChromeOptions()
options.add_experimental_option("mobileEmulation", mobile_emulation)
self.brower = webdriver.Chrome(desired_capabilities=capabilities,
chrome_options=options)
# 启动浏览器,打开需要下载的网页
self.brower.get(url)
self.download_img = DownloadImg()
代码片段和文件信息
import os
import time
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from scrapy import Selector
import requests
from my_fake_useragent import UserAgent
import docx
from docx.shared import Inches
import cv2 # opencv 计算机视觉
from pptx import Presentation
from pptx.util import Inches
#dows是的chromedriver
chromedriver_path = “./chromedriver.exe“
#用ubuntu的chromedriver
# chromedriver_path = “./chromedriver“
doc_dir_path = “./doc“
ppt_dir_path = “./ppt“
# url = “https://wenku.baidu.com/view/4410199cb0717fd5370cdc2e.html?fr=search“# doc_txt p
# url = “https://wenku.baidu.com/view/4d18916f7c21af45b307e87101f69e314332fa36.html“ # doc_txt span
# url = “https://wenku.baidu.com/view/dea519c7e53a580216fcfefa.html?fr=search“ # doc_txt span br
# url = ‘https://wk.baidu.com/view/062edabeb6360b4c2e3f5727a5e9856a5712262d?pcf=2&bfetype=new‘ # doc_img
# url = “https://wenku.baidu.com/view/2af6de34a7e9856a561252d380eb6294dd88228d“# vip限定doc
# url = “https://wenku.baidu.com/view/3de365cc6aec0975f46527d3240c844769eaa0aa.html?fr=search“ #ppt
# url = “https://wenku.baidu.com/view/18a8bc08094e767f5acfa1c7aa00b52acec79c55“#pdf
# url = “https://wenku.baidu.com/view/bbe27bf21b5f312b3169a45177232f60dccce772“
# url = “https://wenku.baidu.com/view/5cb11d096e1aff00bed5b9f3f90f76c660374c24.html?fr=search“
# url = “https://wenku.baidu.com/view/71f9818fef06eff9aef8941ea76e58fafab045a6.html“
# url = “https://wenku.baidu.com/view/ffc6b32a68eae009581b6bd97f1922791788be69.html“
#url = “https://wenku.baidu.com/view/d4d2e1e3122de2bd960590c69ec3d5bbfd0adaa6.html“
url = ‘https://wenku.baidu.com/view/a277ab04ce84b9d528ea81c758f5f61fb73628ef.html‘
class DownloadImg():
def __init__(self):
self.ua = UserAgent()
def download_one_img(self img_url saved_path):
# 下载图片
header = {
“User-Agent“: “{}“.format(self.ua.random().strip())
‘Connection‘: ‘close‘}
r = requests.get(img_url headers=header stream=True)
print(“请求图片状态码 {}“.format(r.status_code)) # 返回状态码
if r.status_code == 200: # 写入图片
with open(saved_path mode=“wb“) as f:
f.write(r.content)
print(“download {} success!“.format(saved_path))
del r
return saved_path
class StartChrome():
def __init__(self):
mobile_emulation = {“deviceName“: “Galaxy S5“}
capabilities = DesiredCapabilities.CHROME
capabilities[‘loggingPrefs‘] = {‘browser‘: ‘ALL‘}
options = webdriver.ChromeOptions()
options.add_experimental_option(“mobileEmulation“ mobile_emulation)
self.brower = webdriver.Chrome(desired_capabilities=capabilities
chrome_options=options)
# 启动浏览器,打开需要下载的网页
self.brower.get(url)
self.download_img = DownloadImg()
def click_ele(self click_xpath):
# 单击指定控件
相关资源
- 网络爬虫(pachong_anjuke.py)
- Python总结(精简).doc
- Supervised Learning with Python
- python实现一个简单的名片管理系统功
- Python源码剖析.pdf59505
- python语言实现的基于opencv的表针识别
- 基于Python的酒店管理系统
- 打砖块
- python数据结构
- python实现SGBM图像匹配算法
- python实现灰度直方图均衡化
- scrapy_qunar_one
- Python学习全系列教程永久可用
- python简明教程.chm
- 抽奖大转盘python的图形化界面
- 双边滤波器实验报告及代码python
- python +MYSQL+HTML实现21蛋糕网上商城
- Python-直播答题助手自动检测出题搜索
- OpenCV入门教程+OpenCV官方教程中文版
- Python 串口工具源码+.exe文件
- Python开发的全栈股票系统.zip
- Python操作Excel表格并将其中部分数据写
- python书籍 PDF
- 利用python绘制散点图
- python+labview+No1.vi
- 老男孩python项目实战
- python源码制作whl文件.rar
- python3.5可用的scipy
- PYTHON3 经典50案例.pptx
- 计算机科学导论-python.pdf
评论
共有 条评论