资源简介
基于lstm的语义相似度计算模型,使用百度qa的数据集进行实验。
代码片段和文件信息
import os
import json
import h5py
import utils
import jieba
import pickle
import numpy as np
import keras.preprocessing.text
from gensim.models import Word2Vec
from sklearn.metrics import roc_curve auc
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Dense Input LSTM embedding Dropout
from keras.layers.merge import concatenate
from keras.layers.wrappers import Bidirectional
from keras.models import Modelmodel_from_jsonload_model
from keras.layers.normalization import BatchNormalization
from keras.callbacks import EarlyStopping ModelCheckpoint
def get_tokenizer():
with open(‘tok_expend.pkl‘‘rb‘) as f:
tokenizer=pickle.load(f)
word_index = tokenizer.word_index
nb_words=min(700000len(word_index)+1)
return nb_wordsword_index
def get_data():
data_1=np.load(‘E:/2018泰迪杯/LSTM/data/data_1_s.npy‘)
data_2=np.load(‘E:/2018泰迪杯/LSTM/data/data_2_s.npy‘)
labels=np.load(‘E:/2018泰迪杯/LSTM/data/labels_s.npy‘)
data_t_1=np.load(‘E:/2018泰迪杯/LSTM/data/data_t_1.npy‘)
data_t_2=np.load(‘E:/2018泰迪杯/LSTM/data/data_t_2.npy‘)
labels_t=np.load(‘E:/2018泰迪杯/LSTM/data/labels_t.npy‘)
return data_1data_2labelsdata_t_1data_t_2labels_t
def train():
num_lstm = 175
num_dense = 100
rate_drop_lstm = 0.15
rate_drop_dense = 0.15
embedDING_DIM = 100
VALIDATION_SPLIT = 0.1
act = ‘relu‘
print(‘get_data‘)
nb_wordsword_index=get_tokenizer()
data_1data_2labelsdata_t_1data_t_2labels_t=get_data()
print(‘get_model‘)
word2vec = Word2Vec.load(‘E:/2018泰迪杯/数据/w2v_expend.mod‘)
embedding_matrix = np.zeros((nb_words embedDING_DIM))
for word i in word_index.items():
if word in word2vec.wv.vocab:
try:
embedding_matrix[i] = word2vec.wv.word_vec(word)
except:
pass
embedding_layer_1 = embedding(nb_words
embedDING_DIM
weights=[embedding_matrix]
input_length=30
trainable=False)
embedding_layer_2 = embedding(nb_words
embedDING_DIM
weights=[embedding_matrix]
input_length=300
trainable=False)
lstm_layer = LSTM(num_lstm dropout=rate_drop_lstm recurrent_dropout=rate_drop_lstm)
sequence_1_input = Input(shape=(30) dtype=‘int32‘)
embedded_sequences_1 = embedding_layer_1(sequence_1_input)
y1 = lstm_layer(embedded_sequences_1)
sequence_2_input = Input(shape=(300) dtype=‘int32‘)
embedded_sequences_2 = embedding_layer_2(sequence_2_input)
y2 = lstm_layer(embedded_sequences_2)
merged = concatenate([y1 y2])
merged = Dropout(rate_drop_dense)(merged)
merg
- 上一篇:21天学通python.txt
- 下一篇:WordCloud
相关资源
- 使用LSTM进行时间序列预测
- lstm_attention文本分类代码
- textrank自动文摘抽取python代码
- 基于LSTM的RNN网络人体骨骼关节点检测
- 时间长短序列网络LSTM
- Tensorflow-BiLSTM分类
- RasaCore官方文档中文版
- lstm实现时间序列一维预测
- keras上LSTM长短期记忆网络金融时序预
- 基于LSTM的航班乘客预测
- lstm_tensorflow
- LSTM股票预测
- 2019传智播客python零基础入门视频教程
- lstm情感分析代码
- NLP分词
- tensorflow下用LSTM网络进行时间序列预测
- 股票预测 LSTM 时间序列rnn 代码程序数
- 基于SnowNLP的豆瓣评论情感分析及词云
- 正向最大匹配分词算法及KNN文本分类
- LSTMLong Short-Term Memory长短期记忆网络
- 卷积LSTM代码
- LSTM上证指数收盘价预测.zip
- Image Caption 看图说话python代码
- 非常简易的keras函数式Functional模型学
评论
共有 条评论