基于情感词典的情感分析

大小: 14.11MB

文件类型: .rar

金币: 1

下载: 0 次

发布日期: 2023-07-30
语言: Python
标签: 情感分析

高速下载

资源简介

里面包含情感词典和否定词停用词程度副词等，还有python的代码，用的是python的ide，pycharm

资源截图

小图大图

代码片段和文件信息

from collections import defaultdict
import os
import re
import jieba
import codecs

def seg_word（sentence）:
    “““使用jieba对文档分词“““
    seg_list = jieba.cut（sentence）
    seg_result = []
    for w in seg_list:
        seg_result.append（w）
    print（seg_result）
    # 读取停用词文件
    stopwords = set（）
    fr = codecs.open（‘stopwords.txt‘ ‘r‘ ‘utf-8‘）
    for word in fr:
        stopwords.add（word.strip（））
    fr.close（）
    # 去除停用词
    return list（filter（lambda x: x not in stopwords seg_result））


def classify_words（word_dict）:
    “““词语分类找出情感词、否定词、程度副词“““
    # 读取情感字典文件
    sen_file = open（‘BosonNLP_sentiment_score.txt‘ ‘r+‘ encoding=‘utf-8‘）
    # 获取字典文件内容
    sen_list = sen_file.readlines（）
    # 创建情感字典
    sen_dict = defaultdict（）
    # 读取字典文件每一行内容，将其转换为字典对象，key为情感词，value为对应的分值
    for s in sen_list:
        # 每一行内容根据空格分割，索引0是情感词，索引01是情感分值
        try:
            sen_dict[s.split（‘ ‘）[0]] = s.split（‘ ‘）[1]
        except IndexError:
            pass

    # 读取否定词文件
    not_word_file = open（‘notDic.txt‘ ‘r+‘ encoding=‘utf-8‘）
    # 由于否定词只有词，没有分值，使用list即可
    not_word_list = not_word_file.readlines（）
    for i in range（0  len（not_word_list））:
        not_word_list[i] = not_word_list[i].strip（‘\n‘）
    # print（not_word_list）


    # 读取程度副词文件
    degree_file = open（‘degree.txt‘ ‘r+‘ encoding=‘gbk‘）
    degree_list = degree_file.readlines（）
    degree_dic = defaultdict（）
    # 程度副词与情感词处理方式一样，转为程度副词字典对象，key为程度副词，value为对应的程度值
    for d in degree_list:
        try:
            degree_dic[d.split（‘‘）[0]] = d.split（‘‘）[1]
        except IndexError:
            pass

    # 分类结果，词语的index作为key词语的分值作为value，否定词分值设为-1
    sen_word = dict（）
    not_word = dict（）
    degree_word = dict（）

    # 分类
    for word in word_dict.keys（）:
        if word in sen_dict.keys（） and word not in not_word_list and word not in degree_dic.keys（）:
            sen_word[word_dict[word]] = sen_dict[word]
        elif word in degree_dic.keys（） and word not in not_word_list:
            degree_word[word_dict[word]] = degree_dic[word]
        elif word in not_word_list:
            not_word[word_dict[word]] = -1
        # if word in sen_dict.keys（） and word not in not_word_list and word not in degree_dic.keys（）:
        #     # 找出分词结果中在情感字典中的词
        #     sen_word[word_dict[word]] = sen_dict[word]
        # elif word in not_word_list and word not in degree_dic.keys（）:
        #     # 分词结果中在否定词列表中的词
        #     not_word[word_dict[word]] = -1
        # elif word in degree_dic.keys（）:
        #     # 分词结果中在程度副词中的词
        #     degree_word[word_dict[word]] = degree_dic[word]
    sen_file.close（）
    degree_file.close（）
    not_word_file.close（）
    # 将分类结果返回
    return sen_word not_word degree_word


def list_to_dict（word_list）:
    “““将分词后的列表转为字典，key为单词，value为单词在列表中的索引，索引相当于词语在文档中出现的位置“““
    data = {}
    for x in range（0 len（word_list））:
        data[word_list[x]] = x
    return

属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件       6276  2007-10-21 16:16  正面情感词语（中文）.txt

     文件      14190  2007-10-21 16:16  正面情感词语（英文）.txt

     文件      30409  2007-10-21 16:16  正面评价词语（中文）.txt

     文件      61667  2007-10-21 16:16  正面评价词语（英文）.txt

     文件       1480  2007-10-21 16:16  程度级别词语（中文）.txt

     文件       2114  2007-10-21 16:16  程度级别词语（英文）.txt

     文件       9952  2007-10-21 16:16  负面情感词语（中文）.txt

     文件      18511  2007-10-21 16:16  负面情感词语（英文）.txt

     文件      26101  2007-10-21 16:16  负面评价词语（中文）.txt

     文件      57704  2007-10-21 16:16  负面评价词语（英文）.txt

     文件        289  2007-10-21 16:16  主张词语（中文）.txt

     文件        451  2007-10-21 16:16  主张词语（英文）.txt

     文件        181  2018-12-22 19:41  sentiment\.idea\encodings.xml

     文件        300  2018-12-22 18:21  sentiment\.idea\misc.xml

     文件        270  2018-12-22 18:21  sentiment\.idea\modules.xml

     文件        466  2018-12-22 18:21  sentiment\.idea\sentiment.iml

     文件      21273  2019-01-16 21:27  sentiment\.idea\workspace.xml

     文件       6472  2019-01-16 17:21  sentiment\1_1.py

     文件        714  2019-01-14 22:26  sentiment\1_2.py

     文件    2528956  2019-01-15 15:57  sentiment\BosonNLP_sentiment_score.txt

     文件       2070  2019-01-15 15:29  sentiment\degree.txt

     文件        553  2017-09-03 15:38  sentiment\notDic.txt

     文件       9213  2019-01-15 16:42  sentiment\stopwords.txt

     文件         55  2018-12-22 18:21  sentiment\venv\Lib\site-packages\easy-install.pth

     文件       1403  2018-12-22 18:24  sentiment\venv\Lib\site-packages\jieba\analyse\analyzer.py

     文件    6200957  2018-12-22 18:24  sentiment\venv\Lib\site-packages\jieba\analyse\idf.txt

     文件       3772  2018-12-22 18:24  sentiment\venv\Lib\site-packages\jieba\analyse\textrank.py

     文件       4310  2018-12-22 18:24  sentiment\venv\Lib\site-packages\jieba\analyse\tfidf.py

     文件        501  2018-12-22 18:24  sentiment\venv\Lib\site-packages\jieba\analyse\__init__.py

     文件    5071852  2018-12-22 18:24  sentiment\venv\Lib\site-packages\jieba\dict.txt

............此处省略438个文件信息

上一篇：Python游戏编程快速上手.rar
下一篇：《Python编程快速上手》英文原版

共有条评论

基于情感词典的情感分析

资源简介

资源截图

代码片段和文件信息

评论

相关资源