资源简介
5652华尔街见闻.py
代码片段和文件信息
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import sys
import re
import urllib urllib2
import requests
import pymongo
import datetime
import multiprocessing as mp
Category_Map = {
“1“:u“外汇“
“2“:u“股市“
“3“:u“商品“
“4“:u“债市“
“5“:u“央行“
“9“:u“中国“
“10“:u“美国“
“11“:u“欧元区“
“12“:u“日本“
“13“:u“英国“
“14“:u“澳洲“
“15“:u“加拿大“
“16“:u“瑞士“
“17“:u“其他地区“
}
def num2name(category_num):
if Category_Map.has_key(category_num):
return Category_Map[category_num]
else:
return ““
class MongoDBIO:
# 申明相关的属性
def __init__(self host port name password database collection):
self.host = host
self.port = port
self.name = name
self.password = password
self.database = database
self.collection = collection
# 连接数据库,db和posts为数据库和集合的游标
def Connection(self):
# connection = pymongo.Connection() # 连接本地数据库
connection = pymongo.Connection(host=self.host port=self.port)
# db = connection.datas
db = connection[self.database]
if self.name or self.password:
db.authenticate(name=self.name password=self.password) # 验证用户名密码
# print “Database:“ db.name
# posts = db.cn_live_news
posts = db[self.collection]
# print “Collection:“ posts.name
return posts
# 保存操作
# def ResultSave(save_host save_port save_name save_password save_database save_collection save_contents):
# posts = MongoDBIO(save_host save_port save_name save_password save_database save_collection).Connection()
# for save_content in save_contents:
# posts.save(save_content)
def ResultSave(save_host save_port save_name save_password save_database save_collection save_content):
posts = MongoDBIO(save_host save_port save_name save_password save_database save_collection).Connection()
posts.save(save_content)
def Spider(url data):
# # 方法1:requests get
content = requests.get(url=url params=data).content # GET请求发送
# # 方法2:urllib2 get
# data = urllib.urlencode(data) # 编码工作,由dict转为string
# full_url = url+‘?‘+data
# print full_url
# content = urllib2.urlopen(full_url).read() # GET请求发送
# # content = requests.get(full_url).content # GET请求发送
# print type(content) # str
return content
def ContentSave(item):
# 保存配置
save_host = “localhost“
save_port = 27017
save_name = ““
save_password = ““
save_database = “textclassify“
save_collection = “WallstreetcnSave“
source = “wallstreetcn“
createdtime = datetime.datetime.now()
相关资源
- python+ selenium教程
- 英文原版-Scientific Computing with Python
- CpuMemSets在Linux操作系统中的实现
- Python学习全系列教程永久可用
- 蓝奏云批量上传工具.zip
- python书籍 PDF
- 老男孩python项目实战
- Python.rar99111
- decision_tree_v2.py
- Python绝技运用Python成为顶级黑客.pdf
- python小波包文档及论文.zip
- Python黑帽子(黑客与渗透测试编程之
- FlaskWeb开发:基于Python的Web应用开发实
- Python基础教程第3版中英文源码.rar
- python数据结构与算法中文版.zip
- Python-冲顶大会芝士超人西瓜视频头脑
- time_series_forecasting_with_python.zip
- Python基础教程第三版PDF高清可复制.
- python编程从入门到实践.zip237878
- FlaskWeb开发:Python基于Web应用开发实战
- pythonBCRMDSJ.mobi
- 量化交易之路用Python做股票量化分析
- PYTHON自然语言处理中文版.pdf
- Python基础教程(第3版).rar
- GRAYHATPYTHON高清.英文.书签版.pdf
- Python简明教程第四版.rar
- Python编程:从入门到实践带书签完整
- Python基础教程(第3版).pdf109608
- vamei-从Python开始学编程.pdf
- 利用Python进行数据分析.pdf
评论
共有 条评论