资源简介
基于Python的循环神经网络(RNN)实现
代码片段和文件信息
# -*- coding: utf-8 -*-
“““
Created on Thu t 08 17:36:23 2017
@author: Administrator
“““
import numpy as np
#import codecs
dataSet = open(‘D:\\test.txt‘ ‘r‘).read() #读取txt一整个文件的内容为字符串str类型
charSet = list(set(dataSet))#去除重复的字符
print charSet
#打印源文件中包含的字符个数、去重后字符个数
dataSet_size vocab_size = len(dataSet) len(charSet)
print ‘dataSet has %d characters %d unique.‘ % (dataSet_size vocab_size)
#创建字符的索引表
char_to_ix = { ch:i for ich in enumerate(charSet) }
ix_to_char = { i:ch for ich in enumerate(charSet) }
print char_to_ix
hiddenSize = 100 # 隐藏层神经元个数
seq_length = 20 #
learning_rate = 1e-1#学习率
#网络模型
Input_Hidden = np.random.randn(hiddenSize vocab_size)*0.01 # 输入层到隐藏层
Hidden_Hidden = np.random.randn(hiddenSize hiddenSize)*0.01 # 隐藏层与隐藏层
Hidden_Output = np.random.randn(vocab_size hiddenSize)*0.01 # 隐藏层到输出层,输出层预测的是每个字符的概率
Hidden_Bias = np.zeros((hiddenSize 1)) #隐藏层偏置项
Output_Bias = np.zeros((vocab_size 1)) #输出层偏置项
#inputs t时刻序列,也就是相当于输入
#targets t+1时刻序列,也就是相当于输出
#hprev t-1时刻的隐藏层神经元激活值
def lossFun(inputs targets hprev):
xs hs ys ps = {} {} {} {}
hs[-1] = np.copy(hprev)
loss = 0
#前向传导
for t in xrange(len(inputs)):
xs[t] = np.zeros((vocab_size1)) #把输入编码成0、1格式,在input中,为0代表此字符未激活
xs[t][inputs[t]] = 1
hs[t] = np.tanh(np.dot(Input_Hidden xs[t]) + np.dot(Hidden_Hidden hs[t-1]) + Hidden_Bias) # RNN的隐藏层神经元激活值计算 矩阵内积
ys[t] = np.dot(Hidden_Output hs[t]) + Output_Bias # RNN的输出
ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # 概率归一化
loss += -np.log(ps[t][targets[t]0]) # softmax 损失函数
#反向传播
dInput_Hidden dHidden_Hidden dHidden_Output = np.zeros_like(Input_Hidden) np.zeros_like(Hidden_Hidden) np.zeros_like(Hidden_Output)
dHidden_Bias dOutput_Bias = np.zeros_like(Hidden_Bias) np.zeros_like(Output_Bias)
dhnext = np.zeros_like(hs[0])
for t in reversed(xrange(len(inputs))):
dy = np.copy(ps[t])
dy[targets[t]] -= 1 # backprop into y
dHidden_Output += np.dot(dy hs[t].T)
dOutput_Bias += dy
dh = np.dot(Hidden_Output.T dy) + dhnext # backprop into h
dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
dHidden_Bias += dhraw
dInput_Hidden += np.dot(dhraw xs[t].T)
dHidden_Hidden += np.dot(dhraw hs[t-1].T)
dhnext = np.dot(Hidden_Hidden.T dhraw)
for dparam in [dInput_Hidden dHidden_Hidden dHidden_Output dHidden_Bias dOutput_Bias]:
np.clip(dparam -5 5 out=dparam) # clip to mitigate exploding gradients
return loss dInput_Hidden dHidden_Hidden dHidden_
评论
共有 条评论