资源简介
淘宝商品评价及新闻评论情感倾向分析,使用了python来实现。模型包括RNN和CNN
代码片段和文件信息
from TextClassification import TextClassification DataPreprocess
from sklearn.model_selection import train_test_split
from TextClassification import load_data
import numpy as np
# load data
#-----------------------------------
data = load_data(name=‘single‘)
x = data[‘evaluation‘]
y = [[i] for i in data[‘label‘]]
# data process
#-----------------------------------
process = DataPreprocess()
# cut texts
x_cut = process.cut_texts(texts=x need_cut=True word_len=2 savepath=None)
# texts to sequence
x_seq = process.text2seq(texts_cut=x_cut tokenizer=tokenizer tokenizer_savapah=None
num_words=num_words maxlen=maxlen batchsize=10000)
# list to array
x_seq = np.array(x_seq)
# texts to word vector
x_word_vec = model.text2vec(texts_cut=x sg=1 size=128 window=5 min_count=1)
# texts vector
x_vec = np.array([sum(i) / len(i) for i in x_word_vec])
# single target
# train model
#------------------------------------
X_train X_test y_train y_test = train_test_split(x y test_size=0.2)
model = TextClassification()
model.fit(x=X_train y=y_train method=‘CNN‘ model=None
x_need_preprocess=True y_need_preprocess=True
epochs=10 batchsize=128 output_type=‘single‘)
label_set = model.label_set
y_predict = model.predict(x=X_test x_need_preprocess=True)
y_predict_label = model.label2toptag(predictions=y_predict labelset=label_set)
print(sum([y_predict_label[i] == y_test[i] for i in range(len(y_predict))]) / len(y_predict))
# multiple target
# load data
#-----------------------------------
data = load_data(name=‘multiple‘)
x = [i[‘fact‘] for i in data]
y = [i[‘accusation‘] for i in data]
X_train X_test y_train y_test = train_test_split(x y test_size=0.2)
model = TextClassification()
model.fit(x=X_train y=y_train method=‘CNN‘ model=None
x_need_preprocess=True y_need_preprocess=True
epochs=10 batchsize=128 output_type=‘multiple‘)
label_set = model.label_set
y_predict = model.predict(x=X_test x_need_preprocess=True)
y_predict_label = model.label2tag(predictions=y_predict labelset=label_set)
print(sum([y_predict_label[i] == y_test[i] for i in range(len(y_predict))]) / len(y_predict))
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2018-07-06 08:28 Text-Classification-master\
文件 66 2018-07-06 08:28 Text-Classification-master\.gitattributes
文件 1045 2018-07-06 08:28 Text-Classification-master\.gitignore
文件 5155 2018-07-06 08:28 Text-Classification-master\README.md
目录 0 2018-07-06 08:28 Text-Classification-master\TextClassification\
文件 6138 2018-07-06 08:28 Text-Classification-master\TextClassification\DataPreprocess.py
文件 6770 2018-07-06 08:28 Text-Classification-master\TextClassification\TextClassification.py
文件 127 2018-07-06 08:28 Text-Classification-master\TextClassification\__init__.py
目录 0 2018-07-06 08:28 Text-Classification-master\TextClassification\data\
文件 21503783 2018-07-06 08:28 Text-Classification-master\TextClassification\data\data_multiple.json
文件 949081 2018-07-06 08:28 Text-Classification-master\TextClassification\data\data_single.csv
文件 413 2018-07-06 08:28 Text-Classification-master\TextClassification\load_data.py
目录 0 2018-07-06 08:28 Text-Classification-master\TextClassification\models\
文件 2072 2018-07-06 08:28 Text-Classification-master\TextClassification\models\CNN.py
文件 2048 2018-07-06 08:28 Text-Classification-master\TextClassification\models\RNN.py
文件 669 2018-07-06 08:28 Text-Classification-master\TextClassification\models\SklearnClf.py
文件 76 2018-07-06 08:28 Text-Classification-master\TextClassification\models\__init__.py
文件 2205 2018-07-06 08:28 Text-Classification-master\demo.py
目录 0 2018-07-06 08:28 Text-Classification-master\demo\
目录 0 2018-07-06 08:28 Text-Classification-master\demo\.idea\
文件 15324 2018-07-06 08:28 Text-Classification-master\demo\.idea\workspace.xm
文件 874 2018-07-06 08:28 Text-Classification-master\demo\demo_net_multiple.py
文件 2183 2018-07-06 08:28 Text-Classification-master\demo\demo_net_multiple_use_process.py
文件 862 2018-07-06 08:28 Text-Classification-master\demo\demo_net_single.py
文件 2161 2018-07-06 08:28 Text-Classification-master\demo\demo_net_single_use_process.py
文件 1309 2018-07-06 08:28 Text-Classification-master\demo\demo_sklearn.py
目录 0 2018-07-06 08:28 Text-Classification-master\picture\
文件 308331 2018-07-06 08:28 Text-Classification-master\picture\data_multiple.png
文件 61071 2018-07-06 08:28 Text-Classification-master\picture\data_single.png
- 上一篇:python的计量经济学
- 下一篇:python版植物大战僵尸源码
评论
共有 条评论