资源简介
简单的爬虫实例,爬取了大学排名的相关信息,适合爬虫入门
代码片段和文件信息
# -*- coding: utf-8 -*-
“““
Created on Thu Aug 9 16:24:29 2018
@author: Administrator
“““
from bs4 import BeautifulSoup
import requests
def getHtmlUrl(url):
try:
r=requests.get(url)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except baseException as e:
print(e)
def getData(html):
title=[]
range_num=[]
rating_num=[]
province=[]
source_num=[]
data={}
soup=BeautifulSoup(html‘html.parser‘)
uni_all=soup.find_all(‘tr‘class_=‘alt‘)
for i in range(010):
range_num.append(uni_all[i].find_all(‘td‘)[0].text)
title.append(uni_all[i].find_all(‘td‘)[1].find(‘div‘).text)
province.append(uni_all[i].find_all(‘td‘)[2].text)
rating_num.append(uni_all[i].find_all(‘td‘)[3].text)
source_num.append(uni_all[i].find_all(‘td‘)[4].text)
data[‘title‘]=title
data[‘rating_num‘]=rating_num
data[‘range_num‘]=range_num
data[‘province‘]=province
data[‘souce_num‘]=source_num
return data
def show(data):
f=open(“D://uni.html“‘w‘)
f.write(““)
f.write(““)
f.write(“
评论
共有 条评论