• 大小: 11.61MB
    文件类型: .rar
    金币: 1
    下载: 0 次
    发布日期: 2023-07-29
  • 语言: 其他
  • 标签: rnn  深度学习  

资源简介

用LSTM实现机器翻译,有教程,有任务,非常适合学习。

资源截图

代码片段和文件信息

import os
import pickle
import copy
import numpy as np


CODES = {‘‘: 0 ‘‘: 1 ‘‘: 2 ‘‘: 3 }


def load_data(path):
    “““
    Load Dataset from File
    “““
    input_file = os.path.join(path)
    with open(input_file ‘r‘ encoding=‘utf-8‘) as f:
        return f.read()


def preprocess_and_save_data(source_path target_path text_to_ids):
    “““
    Preprocess Text Data.  Save to to file.
    “““
    # Preprocess
    source_text = load_data(source_path)
    target_text = load_data(target_path)

    source_text = source_text.lower()
    target_text = target_text.lower()

    source_vocab_to_int source_int_to_vocab = create_lookup_tables(source_text)
    target_vocab_to_int target_int_to_vocab = create_lookup_tables(target_text)

    source_text target_text = text_to_ids(source_text target_text source_vocab_to_int target_vocab_to_int)

    # Save Data
    with open(‘preprocess.p‘ ‘wb‘) as out_file:
        pickle.dump((
            (source_text target_text)
            (source_vocab_to_int target_vocab_to_int)
            (source_int_to_vocab target_int_to_vocab)) out_file)


def load_preprocess():
    “““
    Load the Preprocessed Training data and return them in batches of  or less
    “““
    with open(‘preprocess.p‘ mode=‘rb‘) as in_file:
        return pickle.load(in_file)


def create_lookup_tables(text):
    “““
    Create lookup tables for vocabulary
    “““
    vocab = set(text.split())
    vocab_to_int = copy.copy(CODES)

    for v_i v in enumerate(vocab len(CODES)):
        vocab_to_int[v] = v_i

    int_to_vocab = {v_i: v for v v_i in vocab_to_int.items()}

    return vocab_to_int int_to_vocab


def save_params(params):
    “““
    Save parameters to file
    “““
    with open(‘params.p‘ ‘wb‘) as out_file:
        pickle.dump(params out_file)


def load_params():
    “““
    Load parameters from file
    “““
    with open(‘params.p‘ mode=‘rb‘) as in_file:
        return pickle.load(in_file)


def batch_data(source target batch_size):
    “““
    Batch source and target together
    “““
    for batch_i in range(0 len(source)//batch_size):
        start_i = batch_i * batch_size
        source_batch = source[start_i:start_i + batch_size]
        target_batch = target[start_i:start_i + batch_size]
        yield np.array(pad_sentence_batch(source_batch)) np.array(pad_sentence_batch(target_batch))


def pad_sentence_batch(sentence_batch):
    “““
    Pad sentence with  id
    “““
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [CODES[‘‘]] * (max_sentence - len(sentence))
            for sentence in sentence_batch]

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件        279  2018-05-19 14:21  dlnd_language_translation\.git\config

     文件         73  2018-05-19 14:20  dlnd_language_translation\.git\description

     文件         23  2018-05-19 14:21  dlnd_language_translation\.git\HEAD

     文件        478  2018-05-19 14:20  dlnd_language_translation\.git\hooks\applypatch-msg.sample

     文件        896  2018-05-19 14:20  dlnd_language_translation\.git\hooks\commit-msg.sample

     文件        189  2018-05-19 14:20  dlnd_language_translation\.git\hooks\post-update.sample

     文件        424  2018-05-19 14:20  dlnd_language_translation\.git\hooks\pre-applypatch.sample

     文件       1642  2018-05-19 14:20  dlnd_language_translation\.git\hooks\pre-commit.sample

     文件       1348  2018-05-19 14:20  dlnd_language_translation\.git\hooks\pre-push.sample

     文件       4898  2018-05-19 14:20  dlnd_language_translation\.git\hooks\pre-rebase.sample

     文件        544  2018-05-19 14:20  dlnd_language_translation\.git\hooks\pre-receive.sample

     文件       1239  2018-05-19 14:20  dlnd_language_translation\.git\hooks\prepare-commit-msg.sample

     文件       3610  2018-05-19 14:20  dlnd_language_translation\.git\hooks\update.sample

     文件        963  2018-05-19 14:21  dlnd_language_translation\.git\index

     文件        240  2018-05-19 14:20  dlnd_language_translation\.git\info\exclude

     文件        194  2018-05-19 14:21  dlnd_language_translation\.git\logs\HEAD

     文件        194  2018-05-19 14:21  dlnd_language_translation\.git\logs\refs\heads\master

     文件        194  2018-05-19 14:21  dlnd_language_translation\.git\logs\refs\remotes\origin\HEAD

     文件        118  2018-05-19 14:20  dlnd_language_translation\.git\objects\00\96fa1ce6019d85f7bdd820b3e9cb5af4027af1

     文件        648  2018-05-19 14:20  dlnd_language_translation\.git\objects\18\62e57475ebf4975a48badc737d0327b027f26c

     文件        277  2018-05-19 14:20  dlnd_language_translation\.git\objects\3f\e19635d7e75f9e303a11a8a369b7934a15e890

     文件        183  2018-05-19 14:20  dlnd_language_translation\.git\objects\45\14299be234da67d006c71079fb6d7b7143f852

     文件        185  2018-05-19 14:20  dlnd_language_translation\.git\objects\4a\613afc8805acb9cb56563e6ae63effc0f44893

     文件    1935397  2018-05-19 14:20  dlnd_language_translation\.git\objects\4d\4943e4c7bb1f11f85e9a98c368ff471b948268

     文件        802  2018-05-19 14:20  dlnd_language_translation\.git\objects\4d\eb96aeae29df9a733e9b52faccf585be77d55c

     文件        986  2018-05-19 14:21  dlnd_language_translation\.git\objects\4e\c7c43fd617d664cf4f2ecae84d68b0f2a5a886

     文件        246  2018-05-19 14:21  dlnd_language_translation\.git\objects\56\47a2bf69cbf73e7d658c11339de5ed83126ffb

     文件      19417  2018-05-19 14:21  dlnd_language_translation\.git\objects\70\09b8a1fe0bca4a1e20c945e1a6337fe02a3c8a

     文件         54  2018-05-19 14:20  dlnd_language_translation\.git\objects\7c\19400cb7ceece89ba9d15e6854871a4056ffd9

     文件        105  2018-05-19 14:21  dlnd_language_translation\.git\objects\81\cc754df1ea93962a7030b98895538db27a7091

............此处省略73个文件信息

评论

共有 条评论