资源简介
基于lstm的语义相似度计算模型,使用百度qa的数据集进行实验。
代码片段和文件信息
import os
import json
import h5py
import utils
import jieba
import pickle
import numpy as np
import keras.preprocessing.text
from gensim.models import Word2Vec
from sklearn.metrics import roc_curve auc
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense Input LSTM embedding Dropout
from keras.layers.merge import concatenate
from keras.layers.wrappers import Bidirectional
from keras.models import Modelmodel_from_jsonload_model
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping ModelCheckpoint
def get_tokenizer():
with open(‘tok_expend.pkl‘‘rb‘) as f:
tokenizer=pickle.load(f)
word_index = tokenizer.word_index
nb_words=min(700000len(word_index)+1)
return nb_wordsword_index
def get_data():
data_1=np.load(‘E:/2018泰迪杯/LSTM/data/data_1_s.npy‘)
data_2=np.load(‘E:/2018泰迪杯/LSTM/data/data_2_s.npy‘)
labels=np.load(‘E:/2018泰迪杯/LSTM/data/labels_s.npy‘)
data_t_1=np.load(‘E:/2018泰迪杯/LSTM/data/data_t_1.npy‘)
data_t_2=np.load(‘E:/2018泰迪杯/LSTM/data/data_t_2.npy‘)
labels_t=np.load(‘E:/2018泰迪杯/LSTM/data/labels_t.npy‘)
return data_1data_2labelsdata_t_1data_t_2labels_t
def train():
num_lstm = 175
num_dense = 100
rate_drop_lstm = 0.15
rate_drop_dense = 0.15
embedDING_DIM = 100
VALIDATION_SPLIT = 0.1
act = ‘relu‘
print(‘get_data‘)
nb_wordsword_index=get_tokenizer()
data_1data_2labelsdata_t_1data_t_2labels_t=get_data()
print(‘get_model‘)
word2vec = Word2Vec.load(‘E:/2018泰迪杯/数据/w2v_expend.mod‘)
embedding_matrix = np.zeros((nb_words embedDING_DIM))
for word i in word_index.items():
if word in word2vec.wv.vocab:
try:
embedding_matrix[i] = word2vec.wv.word_vec(word)
except:
pass
embedding_layer_1 = embedding(nb_words
embedDING_DIM
weights=[embedding_matrix]
input_length=30
trainable=False)
embedding_layer_2 = embedding(nb_words
embedDING_DIM
weights=[embedding_matrix]
input_length=300
trainable=False)
lstm_layer = LSTM(num_lstm dropout=rate_drop_lstm recurrent_dropout=rate_drop_lstm)
sequence_1_input = Input(shape=(30) dtype=‘int32‘)
embedded_sequences_1 = embedding_layer_1(sequence_1_input)
y1 = lstm_layer(embedded_sequences_1)
sequence_2_input = Input(shape=(300) dtype=‘int32‘)
embedded_sequences_2 = embedding_layer_2(sequence_2_input)
y2 = lstm_layer(embedded_sequences_2)
merged = concatenate([y1 y2])
merged = Dropout(rate_drop_dense)(merged)
merg
- 上一篇:21天学通python.txt
- 下一篇:WordCloud
相关资源
- 微博用户评论情感分析python代码数据
- 运用LSTM对CPI数据进行预测.py
- Deep Learning for Natural Language Processing
- Python-神经网络模型能够从音频演讲中
- Long Short-Term Memory Networks With Python
- MLP/RNN/LSTM模型进行IMDb情感分析
-
me
tadata.txt - 唐诗生成器自动生成
- Introduction to time series.pdf + Deep Time Se
- 时间序列预测讲义ARIMA&LSTM;及python代码
- Python-NLP之旅包含NLP文章代码集锦
- Python-一个非常简单的BiLSTMCRF模型用于
- LSTM模型学习
- Python-20182019校招春招秋招算法NLP深度
- LSTM数据集+python源码
- python snownlp-0.12.3.tar.gz
- NLP实战之fasttext进行THUCNews文本分类
- python 自然语言处理实战代码部分
- DNN判断句子的通顺程度.py
- PSO-LSTM.py
- 15.时间序列预测LSTM模型python代码实现
- LSTM预测股价代码
- nlp肯定句与否定句判断
- NLPIR.user
- LSTM股价预测(python).zip
- nltk语料库
- LSTM可以运行
- LSTM时间序列预测销量
- 使用LSTM进行时间序列预测
- lstm_attention文本分类代码
评论
共有 条评论