资源简介
基于 机器学习的情感分析,简单实现,可显示准确率、精确率、召回率、F1值
代码片段和文件信息
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from yelp_utils import numLines loadData
import sys
import time
import numpy as np
from sklearn import metrics
# 文本情感分类
def classify(technique posneg percentData):
# 读取数据(按比列分配测试集与训练集-可自行调节)
filename = ‘Phoenix_reviews_filtered.json‘
num_lines = numLines(filename)
linesToRead = int(num_lines*(float(percentData)/100.0))
train_end = linesToRead*0.7
train_data train_labels = loadData(filename 0 train_end posneg)
test_data test_labels = loadData(filename train_end+1 linesToRead posneg)
# 选取分类方式(贝叶斯、svm、逻辑回归)
if technique == ‘nb‘:
clf_obj = MultinomialNB()
elif technique == ‘svm‘:
clf_obj = SGDClassifier(loss=‘hinge‘ penalty=‘l2‘ alpha=1e-3 n_iter=5 random_state=42)
elif technique == ‘lr‘:
clf_obj = LogisticRegression()
start_time = time.time()
# 将原始评论数据进行预处理(分词)并用TF-IDF进行文本向量化
text_clf = Pipeline([(‘vect‘ CountVectorizer(stop_words=‘english‘))
(‘tfidf‘ TfidfTransformer())
(‘clf‘ clf_obj)
])
# 用训练集进行训练
text_clf = text_clf.fit(train_data train_labels)
# 用测试集进行预测
predicted = text_clf.predict(test_data)
print(“time: %s seconds“ % (time.time() - start_time))
# 显示预测评估结果
get_metrics(true_labels=test_labels predicted_labels=predicted)
def get_metrics(true_labels predicted_labels):
print(‘Accuracy:‘ np.round(
metrics.accuracy_score(true_labels
predicted_labels)
2))
print(‘Precision:‘ np.round(
metrics.precision_score(true_labels
predicted_labels
average=‘weighted‘)
2))
print(‘Recall:‘ np.round(
metrics.recall_score(true_labels
predicted_labels
average=‘weighted‘)
2))
print(‘F1 Score:‘ np.round(
metrics.f1_score(true_labels
predicted_labels
average=‘weighted‘)
2))
def print_usage():
print(“Usage: classify.py “)
print(“e.g. classify.py nb True 85“)
def valid_args(avail_techniques technique posneg percentData):
return technique in avail_techniques and\
(posneg == ‘True‘ or posneg == ‘False‘) and\
(int(percentData) >= 0 and int(percentData) <= 100)
if __name__ == ‘__main__‘:
techniques = {‘nb‘: ‘Naive Bayes‘ ‘svm‘: ‘Support Vector Machines‘ ‘lr‘: ‘Logistic Regression‘}
try:
technique = sys.arg
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-12-15 11:54 Sentiment-Analysis-Logistic\
目录 0 2018-12-15 11:56 Sentiment-Analysis-Logistic\.idea\
文件 600 2018-12-15 10:25 Sentiment-Analysis-Logistic\.idea\Sentiment-Analysis-Logistic.iml
目录 0 2018-12-15 11:56 Sentiment-Analysis-Logistic\.idea\inspectionProfiles\
目录 0 2018-12-15 10:25 Sentiment-Analysis-Logistic\.idea\libraries\
文件 128 2018-12-15 10:25 Sentiment-Analysis-Logistic\.idea\libraries\R_User_Library.xm
文件 306 2018-12-15 10:21 Sentiment-Analysis-Logistic\.idea\modules.xm
文件 12842 2018-12-15 11:56 Sentiment-Analysis-Logistic\.idea\workspace.xm
文件 1081 2016-01-03 07:56 Sentiment-Analysis-Logistic\MIT-LICENSE
文件 70445752 2018-12-06 23:54 Sentiment-Analysis-Logistic\Phoenix_reviews_filtered.json
文件 2078 2016-01-03 07:56 Sentiment-Analysis-Logistic\README.md
目录 0 2018-12-15 10:28 Sentiment-Analysis-Logistic\__pycache__\
文件 898 2018-12-15 10:28 Sentiment-Analysis-Logistic\__pycache__\yelp_utils.cpython-35.pyc
文件 3855 2018-12-15 11:54 Sentiment-Analysis-Logistic\classify.py
文件 643 2018-12-15 10:28 Sentiment-Analysis-Logistic\yelp_utils.py
相关资源
- 基于ssm的小米商城
- Solr In Action(中文版)_高清
- 吴恩达deeplearning课程作业及需要的的
- 知网情感词典HOWNET
- 知网Hownet情感词典
- GLCM-SVM-master.zip
- 68人脸特征点Hog+SVM人脸表情识别
- 汉语语音情感语料库.rar
- 用户评论情感分析数据集细粒度收集
- 康奈尔影评数据集
- LR分析器 C 语言实现
- Opencv_SVM训练_识别几何体
- Haar人脸检测+SVM+PCA人脸识别
- SVM实现手写数字识别
- 基于LSTM长短期记忆的影评情感分析
- 细粒度用户评论情感分析数据集(2
- Movie review sentiment analysis
- antlr4权威指南中文版
- 电商产品评论数据情感分析 stoplist
- kaggle 文本情感分析数据集
- 喜悦、愤怒、厌恶、低落微博带标注
- LR实现语法分析
- WebRtc+SignalR 的demo
- 基于SVM的情感分析系统
- 构造LR(1)分析程序,利用它进行语
- SVM支持向量机代码解释
- Solr权威指南-上卷-高清-完整目录-20
- solr6.2.1项目包
- 一个更加完善的SignalR服务端,本人已
- 行人数据库正样本
评论
共有 条评论