资源简介
5652华尔街见闻.py
代码片段和文件信息
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import sys
import re
import urllib urllib2
import requests
import pymongo
import datetime
import multiprocessing as mp
Category_Map = {
“1“:u“外汇“
“2“:u“股市“
“3“:u“商品“
“4“:u“债市“
“5“:u“央行“
“9“:u“中国“
“10“:u“美国“
“11“:u“欧元区“
“12“:u“日本“
“13“:u“英国“
“14“:u“澳洲“
“15“:u“加拿大“
“16“:u“瑞士“
“17“:u“其他地区“
}
def num2name(category_num):
if Category_Map.has_key(category_num):
return Category_Map[category_num]
else:
return ““
class MongoDBIO:
# 申明相关的属性
def __init__(self host port name password database collection):
self.host = host
self.port = port
self.name = name
self.password = password
self.database = database
self.collection = collection
# 连接数据库,db和posts为数据库和集合的游标
def Connection(self):
# connection = pymongo.Connection() # 连接本地数据库
connection = pymongo.Connection(host=self.host port=self.port)
# db = connection.datas
db = connection[self.database]
if self.name or self.password:
db.authenticate(name=self.name password=self.password) # 验证用户名密码
# print “Database:“ db.name
# posts = db.cn_live_news
posts = db[self.collection]
# print “Collection:“ posts.name
return posts
# 保存操作
# def ResultSave(save_host save_port save_name save_password save_database save_collection save_contents):
# posts = MongoDBIO(save_host save_port save_name save_password save_database save_collection).Connection()
# for save_content in save_contents:
# posts.save(save_content)
def ResultSave(save_host save_port save_name save_password save_database save_collection save_content):
posts = MongoDBIO(save_host save_port save_name save_password save_database save_collection).Connection()
posts.save(save_content)
def Spider(url data):
# # 方法1:requests get
content = requests.get(url=url params=data).content # GET请求发送
# # 方法2:urllib2 get
# data = urllib.urlencode(data) # 编码工作,由dict转为string
# full_url = url+‘?‘+data
# print full_url
# content = urllib2.urlopen(full_url).read() # GET请求发送
# # content = requests.get(full_url).content # GET请求发送
# print type(content) # str
return content
def ContentSave(item):
# 保存配置
save_host = “localhost“
save_port = 27017
save_name = ““
save_password = ““
save_database = “textclassify“
save_collection = “WallstreetcnSave“
source = “wallstreetcn“
createdtime = datetime.datetime.now()
相关资源
- hulk.py
- 麦子学院Python视频.txt
- python核心基础.txt
- 仿真3D版本.py
- Python零基础10天进阶班.rar
- 用Python自动办公,做职场高手.txt
- 基于python实现的http接口自动化测试框
- spider_LOL.py
- Python教程.rar
- 小甲鱼零基础入门学习Python视频教程
- 麦子学院Python全套视频.txt
- 《疯狂Python讲义》习题答案.rar
- publishHelper.py
- sendInfo.py
- Python零基础10天进阶班.docx
- Python数据科学指南_Code.zip
- 爬取URP教务网站学籍信息.py
- Anaconda历史版本Python3.6版本.zip
- car_detected.py
- Python-图像分割Keras在Keras中实现Segne
评论
共有 条评论