资源简介

该压缩包为Attention is all you need,里面包括tensorflow以及keras版本的代码,还有论文Attention is all you need,性价比很高,欢迎大家一起学习!

资源截图

代码片段和文件信息

#! -*- coding: utf-8 -*-

from keras import backend as K
from keras.engine.topology import layer

class Position_embedding(layer):
    
    def __init__(self size=None mode=‘sum‘ **kwargs):
        self.size = size #必须为偶数
        self.mode = mode
        super(Position_embedding self).__init__(**kwargs)
        
    def call(self x):
        if (self.size == None) or (self.mode == ‘sum‘):
            self.size = int(x.shape[-1])
        batch_sizeseq_len = K.shape(x)[0]K.shape(x)[1]
        position_j = 1. / K.pow(10000. \
                                 2 * K.arange(self.size / 2 dtype=‘float32‘ \
                               ) / self.size)
        position_j = K.expand_dims(position_j 0)
        position_i = K.cumsum(K.ones_like(x[::0]) 1)-1 #K.arange不支持变长,只好用这种方法生成
        position_i = K.expand_dims(position_i 2)
        position_ij = K.dot(position_i position_j)
        position_ij = K.concatenate([K.cos(position_ij) K.sin(position_ij)] 2)
        if self.mode == ‘sum‘:
            return position_ij + x
        elif self.mode == ‘concat‘:
            return K.concatenate([position_ij x] 2)
        
    def compute_output_shape(self input_shape):
        if self.mode == ‘sum‘:
            return input_shape
        elif self.mode == ‘concat‘:
            return (input_shape[0] input_shape[1] input_shape[2]+self.size)


class Attention(layer):

    def __init__(self nb_head size_per_head **kwargs):
        self.nb_head = nb_head
        self.size_per_head = size_per_head
        self.output_dim = nb_head*size_per_head
        super(Attention self).__init__(**kwargs)

    def build(self input_shape):
        self.WQ = self.add_weight(name=‘WQ‘ 
                                  shape=(input_shape[0][-1] self.output_dim)
                                  initializer=‘glorot_uniform‘
                                  trainable=True)
        self.WK = self.add_weight(name=‘WK‘ 
                                  shape=(input_shape[1][-1] self.output_dim)
                                  initializer=‘glorot_uniform‘
                                  trainable=True)
        self.WV = self.add_weight(name=‘WV‘ 
                                  shape=(input_shape[2][-1] self.output_dim)
                                  initializer=‘glorot_uniform‘
                                  trainable=True)
        super(Attention self).build(input_shape)
        
    def Mask(self inputs seq_len mode=‘mul‘):
        if seq_len == None:
            return inputs
        else:
            mask = K.one_hot(seq_len[:0] K.shape(inputs)[1])
            mask = 1 - K.cumsum(mask 1)
            for _ in range(len(inputs.shape)-2):
                mask = K.expand_dims(mask 2)
            if mode == ‘mul‘:
                return inputs * mask
            if mode == ‘add‘:
                return inputs - (1 - mask) * 1e12
                
    def call(self x):
        #如果只传入Q_seqK_seqV_seq,那么就不做Mask
        #如果同时传入Q_se

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

    .......      4578  2018-03-28 02:27  Attention is all you need\Attention is all you need\attention_keras.py

    .......      3598  2018-03-28 02:27  Attention is all you need\Attention is all you need\attention_tf.py

    .......       135  2018-03-28 02:27  Attention is all you need\Attention is all you need\README.md

     文件    2201700  2018-08-05 16:12  Attention is all you need\Attention is all you need.pdf

     目录          0  2018-03-28 02:27  Attention is all you need\Attention is all you need

     目录          0  2018-08-12 21:22  Attention is all you need

----------- ---------  ---------- -----  ----

              2210011                    6


评论

共有 条评论