• 大小: 24.15MB
    文件类型: .zip
    金币: 1
    下载: 0 次
    发布日期: 2023-06-13
  • 语言: 其他
  • 标签: 情感分析  LR  SVM  NB  

资源简介

基于 机器学习的情感分析,简单实现,可显示准确率、精确率、召回率、F1值

资源截图

代码片段和文件信息

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from yelp_utils import numLines loadData
import sys
import time
import numpy as np
from sklearn import metrics


# 文本情感分类
def classify(technique posneg percentData):
    # 读取数据(按比列分配测试集与训练集-可自行调节)
    filename = ‘Phoenix_reviews_filtered.json‘
    num_lines = numLines(filename)
    linesToRead = int(num_lines*(float(percentData)/100.0))
    train_end = linesToRead*0.7

    train_data train_labels = loadData(filename 0 train_end posneg)
    test_data test_labels = loadData(filename train_end+1 linesToRead posneg)
    # 选取分类方式(贝叶斯、svm、逻辑回归)
    if technique == ‘nb‘:
        clf_obj = MultinomialNB()
    elif technique == ‘svm‘:
        clf_obj = SGDClassifier(loss=‘hinge‘ penalty=‘l2‘ alpha=1e-3 n_iter=5 random_state=42)
    elif technique == ‘lr‘:
        clf_obj = LogisticRegression()

    start_time = time.time()
    # 将原始评论数据进行预处理(分词)并用TF-IDF进行文本向量化
    text_clf = Pipeline([(‘vect‘ CountVectorizer(stop_words=‘english‘))
                        (‘tfidf‘ TfidfTransformer())
                        (‘clf‘ clf_obj)
    ])
    # 用训练集进行训练
    text_clf = text_clf.fit(train_data train_labels)
    # 用测试集进行预测
    predicted = text_clf.predict(test_data)
    print(“time: %s seconds“ % (time.time() - start_time))
    # 显示预测评估结果
    get_metrics(true_labels=test_labels predicted_labels=predicted)


def get_metrics(true_labels predicted_labels):
    print(‘Accuracy:‘ np.round(
        metrics.accuracy_score(true_labels
                               predicted_labels)
        2))
    print(‘Precision:‘ np.round(
        metrics.precision_score(true_labels
                                predicted_labels
                                average=‘weighted‘)
        2))
    print(‘Recall:‘ np.round(
        metrics.recall_score(true_labels
                             predicted_labels
                             average=‘weighted‘)
        2))
    print(‘F1 Score:‘ np.round(
        metrics.f1_score(true_labels
                         predicted_labels
                         average=‘weighted‘)
        2))


def print_usage():
    print(“Usage: classify.py   “)
    print(“e.g. classify.py nb True 85“)


def valid_args(avail_techniques technique posneg percentData):
    return technique in avail_techniques and\
            (posneg == ‘True‘ or posneg == ‘False‘) and\
            (int(percentData) >= 0 and int(percentData) <= 100)


if __name__ == ‘__main__‘:
    techniques = {‘nb‘: ‘Naive Bayes‘ ‘svm‘: ‘Support Vector Machines‘ ‘lr‘: ‘Logistic Regression‘}
    
    try:
        technique = sys.arg

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----
     目录           0  2018-12-15 11:54  Sentiment-Analysis-Logistic\
     目录           0  2018-12-15 11:56  Sentiment-Analysis-Logistic\.idea\
     文件         600  2018-12-15 10:25  Sentiment-Analysis-Logistic\.idea\Sentiment-Analysis-Logistic.iml
     目录           0  2018-12-15 11:56  Sentiment-Analysis-Logistic\.idea\inspectionProfiles\
     目录           0  2018-12-15 10:25  Sentiment-Analysis-Logistic\.idea\libraries\
     文件         128  2018-12-15 10:25  Sentiment-Analysis-Logistic\.idea\libraries\R_User_Library.xml
     文件         306  2018-12-15 10:21  Sentiment-Analysis-Logistic\.idea\modules.xml
     文件       12842  2018-12-15 11:56  Sentiment-Analysis-Logistic\.idea\workspace.xml
     文件        1081  2016-01-03 07:56  Sentiment-Analysis-Logistic\MIT-LICENSE
     文件    70445752  2018-12-06 23:54  Sentiment-Analysis-Logistic\Phoenix_reviews_filtered.json
     文件        2078  2016-01-03 07:56  Sentiment-Analysis-Logistic\README.md
     目录           0  2018-12-15 10:28  Sentiment-Analysis-Logistic\__pycache__\
     文件         898  2018-12-15 10:28  Sentiment-Analysis-Logistic\__pycache__\yelp_utils.cpython-35.pyc
     文件        3855  2018-12-15 11:54  Sentiment-Analysis-Logistic\classify.py
     文件         643  2018-12-15 10:28  Sentiment-Analysis-Logistic\yelp_utils.py

评论

共有 条评论