资源简介
用RNN实现的一个语言模型(实现的是可以自主生成歌词,用的是周杰伦的一些歌词,但由于是自己从网上下来的,前期预处理不是很好,每个人也可以用诗歌等训练,来生成诗歌)
代码片段和文件信息
# -*- coding: utf-8 -*-
“““
Created on Fri May 18 11:03:21 2018
@author: Administrator
“““
with open(r‘G:\研一\t.txt‘) as f:#添加文件
corpus_chars = f.read()
print(len(corpus_chars))
corpus_chars = corpus_chars.replace(‘\n‘ ‘ ‘).replace(‘\r‘ ‘ ‘)
corpus_chars = corpus_chars[0:20000]
idx_to_char = list(set(corpus_chars))
char_to_idx = dict([(char i) for i char in enumerate(idx_to_char)])
vocab_size = len(char_to_idx)
print(‘vocab size:‘ vocab_size)
corpus_indices = [char_to_idx[char] for char in corpus_chars]
sample = corpus_indices[:40]
#print(‘chars: \n‘ ‘‘.join([idx_to_char[idx] for idx in sample]))
#print(‘\nindices: \n‘ sample)
import random
from mxnet import nd
def data_iter_random(corpus_indices batch_size num_steps ctx=None):
# 减一是因为label的索引是相应data的索引加一
num_examples = (len(corpus_indices) - 1) // num_steps
epoch_size = num_examples // batch_size
# 随机化样本
example_indices = list(range(num_examples))
random.shuffle(example_indices)
# 返回num_steps个数据
def _data(pos):
return corpus_indices[pos: pos + num_steps]
for i in range(epoch_size):
# 每次读取batch_size个随机样本
i = i * batch_size
batch_indices = example_indices[i: i + batch_size]
data = nd.array(
[_data(j * num_steps) for j in batch_indices] ctx=ctx)
label = nd.array(
[_data(j * num_steps + 1) for j in batch_indices] ctx=ctx)
yield data label
#my_seq = list(range(30))
#for data label in data_iter_random(my_seq batch_size=2 num_steps=3):
#print(‘data: ‘ data ‘\nlabel:‘ label ‘\n‘)
def data_iter_consecutive(corpus_indices batch_size num_steps ctx=None):
corpus_indices = nd.array(corpus_indices ctx=ctx)
data_len = len(corpus_indices)
batch_len = data_len // batch_size
indices = corpus_indices[0: batch_size * batch_len].reshape((
batch_size batch_len))
# 减一是因为label的索引是相应data的索引加一
epoch_size = (batch_len - 1) // num_steps
for i in range(epoch_size):
i = i * num_steps
data = indices[: i: i + num_steps]
label = indices[: i + 1: i + num_steps + 1]
yield data label
#my_seq = list(range(30))
#for data label in data_iter_consecutive(my_seq batch_size=2 num_steps=3):
#print(‘data: ‘ data ‘\nlabel:‘ label ‘\n‘)
nd.one_hot(nd.array([0 2]) vocab_size)
def get_inputs(data):
return [nd.one_hot(X vocab_size) for X in data.T]
inputs = get_inputs(data)
print(‘input length: ‘ len(inputs))
print(‘input[0] shape: ‘ inputs[0].shape)
import mxnet as mx
# 尝试使用GPU
import sys
sys.path.append(‘..‘)
import utilss
ctx = utilss.try_gpu()
print(‘Will use‘ ctx)
input_dim = vocab_size
# 隐含状态长度
hidden_dim = 256
output_dim = vocab_size
std = .01
def get_params():
# 隐含层
W_xh = nd.random_normal(scale=std shape=(input_dim hidden_dim) ctx=ctx)
W_hh = nd
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
文件 10248 2018-05-18 22:13 555555.py
文件 167894 2018-05-18 14:10 t.txt
文件 16263 2018-05-18 10:05 utilss.py
评论
共有 条评论