资源简介
20200901版裁判文书爬虫,需要设置登录账号,相关思路可以参阅我的博客:https://mp.csdn.net/console/article
代码片段和文件信息
“““程序说明“““
# -*- coding: utf-8 -*-
# Author: cao wang
# Datetime : 2020
# software: PyCharm
# 收获:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import math
import time
import logging
from selenium.webdriver.firefox.options import Options
import os
from crawler_tools import user_agent as u
from datetime import datetime
from selenium.common.exceptions import *
import pyautogui
import random
from selenium.webdriver import ActionChains
from retrying import retry
logging.disable(logging.INFO)
def start_logger():
path = os.path.dirname(__file__)+“\\log“
if not os.path.exists(path):
os.makedirs(path)
“““日志初始化设置、文件名(时间)、DEBUG为调试级别(级别导致输出内容的不同)、日志的记录格式、日期格式“““
logging.basicConfig(filename=path+‘//daily_report_%s.log‘ %datetime.strftime(datetime.now() ‘%m%d%Y_%H%M%S‘)
level=logging.WARNING
format=‘%(asctime)s %(message)s‘
datefmt=‘%m-%d %H:%M:%S‘)
start_logger()
class Selenium_firefox():
def __init__(self):
# 设置输出内容目录
# 下载无弹窗
path = “E:\Firefox\Download“
if not os.path.exists(path):
os.makedirs(path)
profile = webdriver.FirefoxProfile()
# profile.set_preference(‘browser.download.folderList‘ 2)
# logging.info(‘运行支持‘)
profile.set_preference(‘browser.download.dir‘ path.strip(‘\u202a‘))
profile.set_preference(‘browser.download.folderList‘ 2)
profile.set_preference(‘browser.download.manager.showWhenStarting‘ False)
profile.set_preference(‘browser.helperApps.neverAsk.saveToDisk‘ ‘application/zipapplication/octet-stream‘)
# 无图
profile.set_preference(‘browser.migration.version‘ 9001)
profile.set_preference(‘permissions.default.image‘ 2)
profile.set_preference(‘user-agent‘ u()[‘User-Agent‘])
ops = Options()
ops.add_argument(‘--headless‘)
ops.add_argument(‘disable-infobars‘)
“““网页获取“““
self.browser = webdriver.Firefox(profileoptions=ops)
self.wait = WebDriverWait(self.browser 20)
self.browser.get(‘https://wenshu.court.gov.cn/website/wenshu/181217BMTKHNT2W0/index.html?pageId=d176b4c9586ed2bea95d1fbab98bdd9d&s8=02‘)
@retry
def login(self):
“““登录“““
# 切换框架
wait = self.wait
self.browser.refresh()
frame = wait.until(EC.presence_of_element_located((By.XPATH ‘//*[@id=“contentiframe“]‘)))
self.browser.switch_to.frame(frame)
click = wait.until(EC.presence_of_element_located(
(By.XPATH ‘/html/body/app-root/div/app-login/div/div/form/div/div[1]/input‘)))
# actions.move_to_element(click).click().perform()
# click.click()
click.send_keys(“自己的手机号“)
time.sleep(1)
click1 = wait.until(E
- 上一篇:生物信息平台Galaxy文档
- 下一篇:美萍2010v5绿色免安装完美破解版
评论
共有 条评论