资源简介
微博情感分析,文本分类,毕业设计项目
代码片段和文件信息
import random
import re
import traceback
import jieba
import numpy as np
from sklearn.externals import joblib
from sklearn.naive_bayes import MultinomialNB
jieba.load_userdict(“train/word.txt“)
stop = [line.strip() for line in open(‘ad/stop.txt‘ ‘r‘ encoding=‘utf-8‘).readlines()] # 停用词
def build_key_word(path): # 通过词频产生特征
d = {}
with open(path encoding=“utf-8“) as fp:
for line in fp:
for word in jieba.cut(line.strip()):
p = re.compile(r‘\w‘ re.L)
result = p.sub(““ word)
if not result or result == ‘ ‘: # 空字符
continue
if len(word) > 1: # 避免大量无意义的词语进入统计范围
d[word] = d.get(word 0) + 1
kw_list = sorted(d key=lambda x: d[x] reverse=True)
size = int(len(kw_list) * 0.2) # 取最前的30%
mood = set(kw_list[:size])
return list(mood - set(stop))
def loadDataSet(path): # 返回每条微博的分词与标签
line_cut = []
label = []
with open(path encoding=“utf-8“) as fp:
for line in fp:
temp = line.strip()
try:
sentence = temp[2:].lstrip() # 每条微博
label.append(int(temp[:2])) # 获取标注
word_list = []
sentence = str(sentence).replace(‘\u200b‘ ‘‘)
for word in jieba.cut(sentence.strip()):
p = re.compile(r‘\w‘ re.L)
result = p.sub(““ word)
if not result or result == ‘ ‘: # 空字符
continue
word_list.append(word)
word_list = list(set(word_list) - set(stop) - set(‘\u200b‘)
- set(‘ ‘) - set(‘\u3000‘) - set(‘️‘))
line_cut.append(word_list)
except Exception:
continue
return line_cut label # 返回每条微博的分词和标注
def setOfWordsToVecTor(vocabularyList moodWords): # 每条微博向量化
vocabMarked = [0] * len(vocabularyList)
for smsWord in moodWords:
if smsWord in vocabularyList:
vocabMarked[vocabularyList.index(smsWord)] += 1
return np.array(vocabMarked)
def setOfWordsListToVecTor(vocabularyList train_mood_array): # 将所有微博准备向量化
vocabMarkedList = []
for i in range(len(train_mood_array)):
vocabMarked = setOfWordsToVecTor(vocabularyList train_mood_array[i])
vocabMarkedList.append(vocabMarked)
return vocabMarkedList
def trainingNaiveBayes(train_mood_array label): # 计算先验概率
numTrainDoc = len(train_mood_array)
numWords = len(train_mood_array[0])
prior_Pos prior_Neg prior_Neutral = 0.0 0.0 0.0
for i in label:
if i == 1:
prior_Pos = prior_Pos + 1
elif i == 2:
prior_Neg = prior_Neg + 1
else:
prior_Neutral = prior_Neutral + 1
prior_Pos = prior_Pos / float(numTrainDoc)
prior_Neg = prior_Neg / float(numTrainDoc)
prior_Neutral = prior_Neutral / float(numTrainDoc)
wordsInPosNum = np.ones
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2017-12-17 09:54 weiboanalysis-master\
文件 6977 2017-12-17 09:54 weiboanalysis-master\Bayes.py
文件 11357 2017-12-17 09:54 weiboanalysis-master\LICENSE
文件 1765 2017-12-17 09:54 weiboanalysis-master\README.md
文件 3357 2017-12-17 09:54 weiboanalysis-master\SVM.py
目录 0 2017-12-17 09:54 weiboanalysis-master\ad\
文件 45522 2017-12-17 09:54 weiboanalysis-master\ad\advertise.txt
文件 66038 2017-12-17 09:54 weiboanalysis-master\ad\normal.txt
文件 13407 2017-12-17 09:54 weiboanalysis-master\ad\stop.txt
文件 79 2017-12-17 09:54 weiboanalysis-master\ad\train.txt
目录 0 2017-12-17 09:54 weiboanalysis-master\doc\
文件 1295601 2017-12-17 09:54 weiboanalysis-master\doc\基于AdaBoost算法的情感分析研究.docx
文件 2385 2017-12-17 09:54 weiboanalysis-master\draw_pic.py
文件 347 2017-12-17 09:54 weiboanalysis-master\from_databa
文件 749 2017-12-17 09:54 weiboanalysis-master\jiebatest.py
目录 0 2017-12-17 09:54 weiboanalysis-master\model\
文件 539 2017-12-17 09:54 weiboanalysis-master\model\gnb.model
文件 92 2017-12-17 09:54 weiboanalysis-master\model\gnb.model_01.npy
文件 104 2017-12-17 09:54 weiboanalysis-master\model\gnb.model_02.npy
文件 7352 2017-12-17 09:54 weiboanalysis-master\model\gnb.model_03.npy
文件 104 2017-12-17 09:54 weiboanalysis-master\model\gnb.model_04.npy
文件 7352 2017-12-17 09:54 weiboanalysis-master\model\gnb.model_05.npy
目录 0 2017-12-17 09:54 weiboanalysis-master\multi_AdaBoost\
文件 7951 2017-12-17 09:54 weiboanalysis-master\multi_AdaBoost\Bayes.py
目录 0 2017-12-17 09:54 weiboanalysis-master\multi_AdaBoost\model\
文件 15554 2017-12-17 09:54 weiboanalysis-master\multi_AdaBoost\model\gnb.model
文件 2635 2017-12-17 09:54 weiboanalysis-master\multi_AdaBoost\multi_boost.py
文件 4171 2017-12-17 09:54 weiboanalysis-master\multi_AdaBoost\multi_test.py
目录 0 2017-12-17 09:54 weiboanalysis-master\ntusd\
文件 80818 2017-12-17 09:54 weiboanalysis-master\ntusd\ntusd-negative.txt
文件 26508 2017-12-17 09:54 weiboanalysis-master\ntusd\ntusd-positive.txt
............此处省略54个文件信息
- 上一篇:isoview7 插件
- 下一篇:LR1分析器代码实现
相关资源
- pip-10.0.1.tar.gz
- Data Science from Scratch 2nd Edition
- shape_predictor_68_face_landmarks.dat.bz2 68个标
- 爬取豆瓣电影TOP250程序,包含非常详
- 中文维基百科语料库百度网盘网址.
- MSCNN_dehaze.rar
- 爬取豆瓣排行榜电影数据(含GUI界面
- 字典文本资源
- Brainfuck / OoK 解码脚本
- 案例实战信用卡欺诈检测数据集
- 招商策略_抱团启示录那些年我们一起
- sip-4.19.zip
- 树莓派3b+学习使用教程
- numpy 中文学习手册
- pytorch-1.4.0-py3.7_cpu_0.tar.bz2
- 机器学习实战 高清完整版PDF
- 泰坦尼克号0.81准确率实验报告.docx
-
abaqus sc
ripting reference manual.pdf - 网页版聊天程序--网络程序设计课程大
- Give Me Some Credit
-
ba
semap安装出错时,正确得pyproj文件 - 微信头像拼接工具
- 统计思维:程序员数学之概率统计第
- 基于open cv的人脸识别
- Django web 开发中文 完整版本
- 影视领域可视化数据挖掘综述
- pyexiv2安装包
- dlib-19.4.0-cp35.whl Windows64位 不用boos
- PySpark Recipes-A Problem-Solution Approach wi
- 拆分CSV文件.zip
评论
共有 条评论