• 大小: 9KB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-12-23
  • 语言: 其他
  • 标签: 爬虫,NBA  

资源简介

NBA数据爬虫

资源截图

代码片段和文件信息

import urllib2
from bs4 import  BeautifulSoup
import re
import xlrd
import xdrlibsys
import xlwt
def transformCodec(re_data):#ascii (gbk) ת unicode  
    try:  
        re_data = re_data.decode(‘gbk‘)  
    except Exception as error:  
        print error  
        print ‘delete illegal stringtry again...‘  
          
        pos = re.findall(r‘decodebytesinposition([\d]+)-([\d]+):illegal‘str(error).replace(‘ ‘‘‘))  
        if len(pos)==1:  
            re_data = re_data[0:int(pos[0][0])]+re_data[int(pos[0][1]):]  
            re_data = transformCodec(re_data)  
            return re_data  
    return re_data

file=xlwt.Workbook()
table=file.add_sheet(‘shuju‘cell_overwrite_ok=True)
table.write(00‘team‘)
table.write(01‘W/L‘)
table.write(02‘Strk‘)
table.write(03‘Home‘)
table.write(04‘Away‘)
table.write(05‘Day‘)
table.write(06‘Night‘)
table.write(07‘Div‘)
table.write(08‘Conf‘)
row=1
col=0
for page in range(128):
    print page
    url=“http://www.covers.com/pageLoader/pageLoader.aspx?page=/data/nba/matchups/g5_preview_“+str(page)+“.html“
    response=urllib2.urlopen(url)
    print response.getcode()
    soup=BeautifulSoup(
                                response
                                ‘html.parser‘
                                from_encoding=‘utf-8‘ 
                                )
    links2=soup.find_all(‘div‘class_=“sdi-so“limit=2)
    cishu=0
    for i in links2:
        if(cishu==1):
            two=i.find_all(‘td‘class_=“sdi-datacell“)
            for q in two:
                print q.text
                table.write(rowcolq.text)
                col=(col+1)%9
                if(col==0):
                    row=row+1
            row=row+1
            file.save(‘NBA.xls‘)
        cishu=cishu+1
    
file.save(‘NBA.xls‘)

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2017-04-10 06:23  PythonApplication1\
     目录           0  2017-04-15 16:38  PythonApplication1\PythonApplication1\
     文件         859  2017-04-10 06:23  PythonApplication1\PythonApplication1.sln
     文件       18944  2017-05-03 03:01  PythonApplication1\PythonApplication1.v12.suo
     文件       13824  2017-04-24 00:04  PythonApplication1\PythonApplication1\NBA.xls
     文件        1859  2017-04-15 16:38  PythonApplication1\PythonApplication1\PythonApplication1.py
     文件        1953  2017-04-10 06:23  PythonApplication1\PythonApplication1\PythonApplication1.pyproj

评论

共有 条评论

相关资源