ó šÄïYc@s¢dZddlmZddlmZddlmZddlmZd efd „ƒYZd efd „ƒYZ d efd„ƒYZ defd„ƒYZ dS(s6Definition of various recurrent neural network layers.iÿÿÿÿ(tprint_functioni(tndarrayi(tBlocki(trnn_cellt _RNNLayercBseeZdZd„Zd„Zdd„Zd„Zdejd„Z d d„Z d„Z d „Z RS( s#Implementation of recurrent layers.c  Ksytt|ƒj| |dks>|dks>td|ƒ‚||_||_| |_||_||_|rwdnd|_ ||_ ||_ ||_ | |_ | |_idd6dd6dd 6d d 6| |_g|_g|_g|_g|_|j||} }}xRt|ƒD]D}x.|j dkrBd d gnd gD]}|jj|jjd||fd| ||fd|dtƒƒ|jj|jjd||fd| ||fd|dtƒƒ|jj|jjd||fd| |fd| dtƒƒ|jj|jjd||fd| |fd| dtƒƒqIW||j }qW|jƒ|_dS(NtTNCtNTCs2Invalid layout %s; must be one of ['TNC' or 'NTC']iitrnn_relutrnn_tanhitlstmitgrutltrs%s%d_i2h_weighttshapetinittallow_deferred_inits%s%d_h2h_weights %s%d_i2h_biass %s%d_h2h_bias(tsuperRt__init__tAssertionErrort _hidden_sizet _num_layerst_modet_layoutt_dropoutt_dirt _input_sizet_i2h_weight_initializert_h2h_weight_initializert_i2h_bias_initializert_h2h_bias_initializert_gatest i2h_weightt h2h_weightti2h_biasth2h_biastrangetappendtparamstgettTruet_unfuset_unfused(tselft hidden_sizet num_layerstlayouttdropoutt bidirectionalt input_sizeti2h_weight_initializerth2h_weight_initializerti2h_bias_initializerth2h_bias_initializertmodetkwargstngtnitnhtitj((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyR sP           )    + )  )  &  &cCsªd}|jdkr"|d7}n|jdkr>|d7}n|jdkrZ|d7}n|d7}|jrd j|jn|j}|jd |jjd ||jS( Ns{name}({mapping}, {_layout}is, num_layers={_num_layers}is, dropout={_dropout}is, bidirectionalt)s{_input_size} -> {_hidden_size}tnametmapping( RRRRtformatt__dict__Rt __class__t__name__(R*tsR>((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyt__repr__Rs     icCs t‚dS(N(tNotImplementedError(R*t batch_size((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyt state_infoasc si‡fd†d6‡fd†d6‡fd†d6‡fd†d6ˆj}tjd ˆjd ˆjƒ}|jƒˆj}xtˆjƒD]ñ}i|d 6ˆj d 6ˆj d 6ˆj d6ˆj d6}ˆj dkr|jtj|d d|||d d||ƒƒn|j|d d||ƒˆjdkrr|ˆjdkrr|jtjˆjƒƒnˆjˆj }q‘WWdQX|S(s1Unfuses the fused RNN in to a stack of rnn cells.cstjˆjdd|S(Nt activationtrelu(RtRNNCellR(R6(R*(s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pytfsRcstjˆjdd|S(NRHttanh(RRJR(R6(R*(s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRKisRcstjˆj|S(N(RtLSTMCellR(R6(R*(s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRKls R cstjˆj|S(N(RtGRUCellR(R6(R*(s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRKns R tprefixR%R0R1R2R3R4isl%d_sr%d_iiN(RRtSequentialRNNCellROR%t name_scopeRR#RRRRRRtaddtBidirectionalCellRt DropoutCellR(R*tget_celltstackR8R:R6((R*s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyR(ds.         "cKsxg}xkt|j|ƒƒD]T\}}|dk rD|j|ƒn|}|j|dd|j|f|ƒqW|S(s;Initial state for this cell. Parameters ---------- batch_size: int Only required for `NDArray` API. Size of the batch ('N' in layout). Dimension of the input. func : callable, default `symbol.zeros` Function for creating initial state. For Symbol API, func can be `symbol.zeros`, `symbol.uniform`, `symbol.var` etc. Use `symbol.var` if you want to directly feed input as states. For NDArray API, func can be `ndarray.zeros`, `ndarray.ones`, etc. **kwargs : Additional keyword arguments passed to func. For example `mean`, `std`, `dtype`, etc. Returns ------- states : nested list of Symbol Starting states for the first RNN step. R=s%sh0_%dN(t enumerateRGtNonetupdateR$RO(R*RFtfuncR6tstatesR:tinfo((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyt begin_stateˆs" *c Csr|j|jjdƒ}|dk}|r=|j|ƒ}nt|tjƒr[|g}nxet||j |ƒƒD]K\}}|j|dkrtt dt |dƒt |jƒfƒ‚qtqtW|j dkr'xRt |jƒD]>}|j|j|jdf|j|_|j|jƒqâWn|jjdkrN|j||ƒ}n|j||ƒ}|rn|dS|S(NtNR s4Invalid recurrent state shape. Expecting %s, got %s.iitgpu(R RtfindRXR]t isinstanceRtNDArraytzipRGt ValueErrortstrRR#RRRRt_finish_deferred_inittcontextt device_typet _forward_gput _forward_cpu( R*tinputsR[RFt skip_stateststateR\R:tout((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pytforward«s&  %*'c CsÑt|ƒ}|jjdƒ}ttd„|DƒŒdƒ}|jj|j|||d|jdtƒ\}}g}xMt |ƒD]?}t j ddd„||d|…DƒŒ}|j |ƒq„W||fS( NtTcss|]}d„|DƒVqdS(css|] }|VqdS(N((t.0R;((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pys ÆsN((RqR:((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pys ÆsR-t merge_outputstdimicss%|]}|jd|jƒVqdS(iN(i(treshapeR (RqR;((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pys Ìs(( tlenRR`tsumRcR)tunrollR R'R#RtconcatR$( R*RkR[tnstaxistoutputst new_statesR:Rm((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRjÃs  ,cso|jdkr-tj|ddddƒ}n|j‰tt|j|jƒdƒ}|tt|j|j ƒdƒ7}‡fd†|Dƒ}tj dd|Œ}tj ||d|j d |j d |jd kd |jd td|j|Œ}|jdkr |d|d|d g}}n|d|dg}}|jdkretj|ddddƒ}n||fS(NRtdim1itdim2ic3s'|]}|jˆƒjdƒVqdS(iÿÿÿÿN(iÿÿÿÿ(tdataRt(RqR:(tctx(s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pys ×sRst state_sizeR,R/itpt state_outputsR5R (((RRtswapaxesRgRvRcRR R!R"RxtRNNRRRRR'R(R*RkR[R%trnnR{((R€s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRiÑs  ""N(RBt __module__t__doc__RRDRGR(RtzerosR]RXRoRjRi(((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRs 2   $#  R…c BsAeZdZddddedddddd„ Zdd„ZRS( s@ Applies a multi-layer Elman RNN with `tanh` or `ReLU` non-linearity to an input sequence. For each element in the input sequence, each layer computes the following function: .. math:: h_t = \tanh(w_{ih} * x_t + b_{ih} + w_{hh} * h_{(t-1)} + b_{hh}) where :math:`h_t` is the hidden state at time `t`, and :math:`x_t` is the hidden state of the previous layer at time `t` or :math:`input_t` for the first layer. If nonlinearity='relu', then `ReLU` is used instead of `tanh`. Parameters ---------- hidden_size: int The number of features in the hidden state h. num_layers: int, default 1 Number of recurrent layers. activation: {'relu' or 'tanh'}, default 'tanh' The activation function to use. layout : str, default 'TNC' The format of input and output tensors. T, N and C stand for sequence length, batch size, and feature dimensions respectively. dropout: float, default 0 If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer. bidirectional: bool, default False If `True`, becomes a bidirectional RNN. i2h_weight_initializer : str or Initializer Initializer for the input weights matrix, used for the linear transformation of the inputs. h2h_weight_initializer : str or Initializer Initializer for the recurrent weights matrix, used for the linear transformation of the recurrent state. i2h_bias_initializer : str or Initializer Initializer for the bias vector. h2h_bias_initializer : str or Initializer Initializer for the bias vector. input_size: int, default 0 The number of expected features in the input x. If not specified, it will be inferred from input. prefix : str or None Prefix of this `Block`. params : ParameterDict or None Shared Parameters for this `Block`. Input shapes: The input shape depends on `layout`. For `layout='TNC'`, the input has shape `(sequence_length, batch_size, input_size)` Output shape: The output shape depends on `layout`. For `layout='TNC'`, the output has shape `(sequence_length, batch_size, num_hidden)`. If `bidirectional` is True, output shape will instead be `(sequence_length, batch_size, 2*num_hidden)` Recurrent state: The recurrent state is an NDArray with shape `(num_layers, batch_size, num_hidden)`. If `bidirectional` is True, the recurrent state shape will instead be `(2*num_layers, batch_size, num_hidden)` If input recurrent state is None, zeros are used as default begin states, and the output recurrent state is omitted. Examples -------- >>> layer = mx.gluon.rnn.RNN(100, 3) >>> layer.initialize() >>> input = mx.nd.random_uniform(shape=(5, 3, 10)) >>> # by default zeros are used as begin state >>> output = layer(input) >>> # manually specify begin state. >>> h0 = mx.nd.random_uniform(shape=(3, 3, 100)) >>> output, hn = layer(input, h0) iRIRiR‰c Ks?tt|ƒj|||||| ||| | d|| dS(Ntrnn_(RR…R( R*R+R,RHR-R.R/R1R2R3R4R0R6((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyR7s  cCs+i|j|j||jfd6dd6gS(NR tLNCt __layout__(RRR(R*RF((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRGBs N(RBR‡RˆtFalseRXRRG(((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyR…ésM  tLSTMc Bs>eZdZdddedddddd„ Zdd„ZRS(sxApplies a multi-layer long short-term memory (LSTM) RNN to an input sequence. For each element in the input sequence, each layer computes the following function: .. math:: \begin{array}{ll} i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \\ f_t = sigmoid(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \\ g_t = \tanh(W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \\ o_t = sigmoid(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \\ c_t = f_t * c_{(t-1)} + i_t * g_t \\ h_t = o_t * \tanh(c_t) \end{array} where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell state at time `t`, :math:`x_t` is the hidden state of the previous layer at time `t` or :math:`input_t` for the first layer, and :math:`i_t`, :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, cell, and out gates, respectively. Parameters ---------- hidden_size: int The number of features in the hidden state h. num_layers: int, default 1 Number of recurrent layers. layout : str, default 'TNC' The format of input and output tensors. T, N and C stand for sequence length, batch size, and feature dimensions respectively. dropout: float, default 0 If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer. bidirectional: bool, default False If `True`, becomes a bidirectional RNN. i2h_weight_initializer : str or Initializer Initializer for the input weights matrix, used for the linear transformation of the inputs. h2h_weight_initializer : str or Initializer Initializer for the recurrent weights matrix, used for the linear transformation of the recurrent state. i2h_bias_initializer : str or Initializer, default 'lstmbias' Initializer for the bias vector. By default, bias for the forget gate is initialized to 1 while all other biases are initialized to zero. h2h_bias_initializer : str or Initializer Initializer for the bias vector. input_size: int, default 0 The number of expected features in the input x. If not specified, it will be inferred from input. prefix : str or None Prefix of this `Block`. params : `ParameterDict` or `None` Shared Parameters for this `Block`. Input shapes: The input shape depends on `layout`. For `layout='TNC'`, the input has shape `(sequence_length, batch_size, input_size)` Output shape: The output shape depends on `layout`. For `layout='TNC'`, the output has shape `(sequence_length, batch_size, num_hidden)`. If `bidirectional` is True, output shape will instead be `(sequence_length, batch_size, 2*num_hidden)` Recurrent state: The recurrent state is a list of two NDArrays. Both has shape `(num_layers, batch_size, num_hidden)`. If `bidirectional` is True, each recurrent state will instead have shape `(2*num_layers, batch_size, num_hidden)`. If input recurrent state is None, zeros are used as default begin states, and the output recurrent state is omitted. Examples -------- >>> layer = mx.gluon.rnn.LSTM(100, 3) >>> layer.initialize() >>> input = mx.nd.random_uniform(shape=(5, 3, 10)) >>> # by default zeros are used as begin state >>> output = layer(input) >>> # manually specify begin state. >>> h0 = mx.nd.random_uniform(shape=(3, 3, 100)) >>> c0 = mx.nd.random_uniform(shape=(3, 3, 100)) >>> output, hn = layer(input, [h0, c0]) iRiR‰c Ks;tt|ƒj||||||||| | d| dS(NR (RRŽR( R*R+R,R-R.R/R0R1R2R3R4R6((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRŸs  cCsRi|j|j||jfd6dd6i|j|j||jfd6dd6gS(NR R‹RŒ(RRR(R*RF((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRGªs  N(RBR‡RˆRRXRRG(((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRŽGs W tGRUc Bs>eZdZdddedddddd„ Zdd„ZRS(sâ Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence. For each element in the input sequence, each layer computes the following function: .. math:: \begin{array}{ll} r_t = sigmoid(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \\ i_t = sigmoid(W_{ii} x_t + b_{ii} + W_hi h_{(t-1)} + b_{hi}) \\ n_t = \tanh(W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \\ h_t = (1 - i_t) * n_t + i_t * h_{(t-1)} \\ \end{array} where :math:`h_t` is the hidden state at time `t`, :math:`x_t` is the hidden state of the previous layer at time `t` or :math:`input_t` for the first layer, and :math:`r_t`, :math:`i_t`, :math:`n_t` are the reset, input, and new gates, respectively. Parameters ---------- hidden_size: int The number of features in the hidden state h num_layers: int, default 1 Number of recurrent layers. layout : str, default 'TNC' The format of input and output tensors. T, N and C stand for sequence length, batch size, and feature dimensions respectively. dropout: float, default 0 If non-zero, introduces a dropout layer on the outputs of each RNN layer except the last layer bidirectional: bool, default False If True, becomes a bidirectional RNN. i2h_weight_initializer : str or Initializer Initializer for the input weights matrix, used for the linear transformation of the inputs. h2h_weight_initializer : str or Initializer Initializer for the recurrent weights matrix, used for the linear transformation of the recurrent state. i2h_bias_initializer : str or Initializer Initializer for the bias vector. h2h_bias_initializer : str or Initializer Initializer for the bias vector. input_size: int, default 0 The number of expected features in the input x. If not specified, it will be inferred from input. prefix : str or None Prefix of this `Block`. params : ParameterDict or None Shared Parameters for this `Block`. Input shapes: The input shape depends on `layout`. For `layout='TNC'`, the input has shape `(sequence_length, batch_size, input_size)` Output shape: The output shape depends on `layout`. For `layout='TNC'`, the output has shape `(sequence_length, batch_size, num_hidden)`. If `bidirectional` is True, output shape will instead be `(sequence_length, batch_size, 2*num_hidden)` Recurrent state: The recurrent state is an NDArray with shape `(num_layers, batch_size, num_hidden)`. If `bidirectional` is True, the recurrent state shape will instead be `(2*num_layers, batch_size, num_hidden)` If input recurrent state is None, zeros are used as default begin states, and the output recurrent state is omitted. Examples -------- >>> layer = mx.gluon.rnn.GRU(100, 3) >>> layer.initialize() >>> input = mx.nd.random_uniform(shape=(5, 3, 10)) >>> # by default zeros are used as begin state >>> output = layer(input) >>> # manually specify begin state. >>> h0 = mx.nd.random_uniform(shape=(3, 3, 100)) >>> output, hn = layer(input, h0) iRiR‰c Ks;tt|ƒj||||||||| | d| dS(NR (RRR( R*R+R,R-R.R/R0R1R2R3R4R6((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRs  cCs+i|j|j||jfd6dd6gS(NR R‹RŒ(RRR(R*RF((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyRG s N(RBR‡RˆRRXRRG(((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pyR±s O N( Rˆt __future__RtRtnnRRRR…RŽR(((s9build/bdist.linux-armv7l/egg/mxnet/gluon/rnn/rnn_layer.pytsË^j