资源简介
Al_challenger细粒度情感分析数据集,包含ai_challenger_sentiment_analysis_testa_20180816、ai_challenger_sentiment_analysis_trainingset_20180816、ai_challenger_sentiment_analysis_validationset_20180816
代码片段和文件信息
import numpy as np
import pickle
def get_data(file_path):
data = []
with open(file_path) as f:
for line in f:
data.append(line.strip())
return data
train_content_ori = get_data(
‘../data/ai_challenger_sentiment_analysis_trainingset_20180816/sentiment_analysis_trainingset_cut_word_rst.txt‘)
val_content_ori = get_data(
‘../data/ai_challenger_sentiment_analysis_validationset_20180816/sentiment_analysis_validationset_cut_word_rst.txt‘)
test_content_ori = get_data(
‘../data/ai_challenger_sentiment_analysis_testa_20180816/sentiment_analysis_testa_cut_word_rst.txt‘)
print(len(train_content_ori) len(val_content_ori) len(test_content_ori))
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
vectorizer.fit(train_content_ori)
train_content = vectorizer.transform(train_content_ori)
val_content = vectorizer.transform(val_content_ori)
test_content = vectorizer.transform(test_content_ori)
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=20 * 4 n_iter=7 random_state=2018)
svd.fit(train_content)
train_svd = svd.transform(train_content)
val_svd = svd.transform(val_content)
test_svd = svd.transform(test_content)
prefix = ‘svd_tfidf_withP_80‘
np.save(‘../data/%s_train‘ % prefix train_svd)
np.save(‘../data/%s_val‘ % prefix val_svd)
np.save(‘../data/%s_test‘ % prefix test_svd)
pickle.dump(svd open(‘../data/%s.pk‘ % prefix ‘wb‘))
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\
文件 1056 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\.gitignore
目录 0 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\.idea\
文件 459 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\.idea\Al_challenger_2018_sentiment_analysis.iml
文件 599 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\.idea\misc.xm
文件 326 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\.idea\modules.xm
文件 31566 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\.idea\workspace.xm
文件 3788 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\README.md
目录 0 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\
文件 19447 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\GCAE_word_char.py
文件 25901 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\SynAtt_expand_model.py
文件 1463 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\feature.py
目录 0 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\models\
文件 1 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\models\tmp.txt
目录 0 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\tf_graph\
文件 1 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\tf_graph\tmp.txt
文件 14737 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\train.py
文件 10890 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\utils.py
文件 3730 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\code\word2vec.py
目录 0 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\data\
文件 2077 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\data\哈工大停用标点表.txt
文件 5275 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\data\哈工大停用词表扩展.txt
目录 0 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\readme_resource\
文件 14338 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\readme_resource\GCAE.jpg
文件 34821 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\readme_resource\SynAtt.jpg
文件 266 2018-11-16 09:29 Al_challenger_2018_sentiment_analysis-master\run.sh
- 上一篇:JMS完全八个
- 下一篇:网站需求分析模板模板中有详细的说明
评论
共有 条评论