v]c@sdZddlZddlZddlmZddlmZddlmZ ddl m Z dd l m Z mZmZmZdd l mZdd lmZmZdd lmZdd lmZddlmZmZmZdefdYZdS(svA `Module` implement the `BaseModule` API by wrapping a `Symbol` and one or more `Executor` for data parallelization. iNi(tcontext(t optimizer(tndarrayi(tDataParallelExecutorGroup(t_create_kvstoret_initialize_kvstoret_update_paramst_update_params_on_kvstore(tload_checkpoint(tUniformtInitDesc(tDataDesc(tzeros(t BaseModulet_check_input_namest_parse_data_desctModulec BseZdZd&d'eejd(d(d(d(d(d Zee dZ e e dZ dZ edZedZed Zed Zed Zed Zd Zedd(d(e e e dZe e e dZd(e e e d(ddZd(dZddd*e dZdZd(dZd(dZdZe dZ e dZ!e dZ"d(d(dZ#e d Z$d!Z%d"Z&d#Z'd$Z(d(d%Z)RS(+sModule is a basic module that wrap a `Symbol`. It is functionally the same as the `FeedForward` model, except under the module API. Parameters ---------- symbol : Symbol data_names : list of str Defaults to `('data')` for a typical model used in image classification. label_names : list of str Defaults to `('softmax_label')` for a typical model used in image classification. logger : Logger Defaults to `logging`. context : Context or list of Context Defaults to ``mx.cpu()``. work_load_list : list of number Default ``None``, indicating uniform workload. fixed_param_names: list of str Default ``None``, indicating no network parameters are fixed. state_names : list of str states are similar to data and label, but not provided by data iterator. Instead they are initialized to 0 and can be set by `set_states()`. group2ctxs : dict of str to context or list of context, or list of dict of str to context Default is `None`. Mapping the `ctx_group` attribute to the context assignment. compression_params : dict Specifies type of gradient compression and additional arguments depending on the type of compression being used. For example, 2bit compression requires a threshold. Arguments would then be {'type':'2bit', 'threshold':0.5} See mxnet.KVStore.set_gradient_compression method for more details on gradient compression. tdatat softmax_labelc Csbtt|jd|t|tjr7|g}n||_|dkredgt|j}nt|t|jkst ||_ | |_ ||_ |dk rt |ng}|dk rt |ng}|dk rt |ng}|dk rt |ng}t||dtt||dtt||dtt||dt|j} |||} g| D]} | | kr| ^q|_||_|j|_||_||_||_|j|_d|_d|_t|_| |_d|_d|_ d|_!d|_"d|_#d|_$d|_%d|_&d|_'dS(NtloggeriRtlabeltstatet fixed_param((tsuperRt__init__t isinstancetctxtContextt_contexttNonetlentAssertionErrort_work_load_listt _group2ctxst_symboltlistRtTruetFalsetlist_argumentst _param_namest_fixed_param_namestlist_auxiliary_statest _aux_namest _data_namest _label_namest _state_namest list_outputst _output_namest _arg_paramst _aux_paramst _params_dirtyt_compression_paramst _optimizert_kvstoret_update_on_kvstoret_updatert_preload_opt_statest _grad_reqt _exec_groupt _data_shapest _label_shapes(tselftsymbolt data_namest label_namesRRtwork_load_listtfixed_param_namest state_namest group2ctxstcompression_paramst arg_namest input_namestx((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRHsP   !    (                cKset||\}}}td||}||_||_t|_|rad||f|_n|S(sxCreates a model from previously saved checkpoint. Parameters ---------- prefix : str path prefix of saved model files. You should have "prefix-symbol.json", "prefix-xxxx.params", and optionally "prefix-xxxx.states", where xxxx is the epoch number. epoch : int epoch to load. load_optimizer_states : bool whether to load optimizer states. Checkpoint needs to have been made with save_optimizer_states=True. data_names : list of str Default is `('data')` for a typical model used in image classification. label_names : list of str Default is `('softmax_label')` for a typical model used in image classification. logger : Logger Default is `logging`. context : Context or list of Context Default is ``cpu()``. work_load_list : list of number Default ``None``, indicating uniform workload. fixed_param_names: list of str Default ``None``, indicating no network parameters are fixed. R>s%s-%04d.states(RRR0R1R$tparams_initializedR8(tprefixtepochtload_optimizer_statestkwargstsymtargstauxstmod((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pytload~s   cCs|jjd|d|d||f}|j|tjd||r}d||f}|j|tjd|ndS(sSaves current progress to checkpoint. Use `mx.callback.module_checkpoint` as `epoch_end_callback` to save during training. Parameters ---------- prefix : str The file prefix to checkpoint to. epoch : int The current epoch number. save_optimizer_states : bool Whether to save optimizer states to continue training. s%s-symbol.jsontremove_amp_casts%s-%04d.paramssSaved checkpoint to "%s"s%s-%04d.statessSaved optimizer state to "%s"N(R"tsavet save_paramstloggingtinfotsave_optimizer_states(R=RJRKRXRSt param_namet state_name((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pytsave_checkpoints   cCs(t|_d|_d|_d|_dS(s(Internal function to reset binded state.N(R%tbindedRR:R;R<(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyt _reset_binds   cCs|jS(s1A list of names for data required by this module.(R+(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyR?scCs|jS(s3A list of names for labels required by this module.(R,(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyR@scCs|jS(s/A list of names for the outputs of this module.(R/(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyt output_namesscCs|jst|jS(sdGets data shapes. Returns ------- A list of `(name, shape)` pairs. (R\RR;(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyt data_shapesscCs|jst|jS(s/Gets label shapes. Returns ------- A list of `(name, shape)` pairs. The return value could be ``None`` if the module does not need labels, or if the module is not bound for training (in this case, label information is not available). (R\RR<(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyt label_shapess cCs|jst|jjS(sfGets output shapes. Returns ------- A list of `(name, shape)` pairs. (R\RR:tget_output_shapes(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyt output_shapesscCs>|jr|jst|jr.|jn|j|jfS(sGets current parameters. Returns ------- `(arg_params, aux_params)` A pair of dictionaries each mapping parameter names to NDArray values. (R\RIRR2t_sync_params_from_devicesR0R1(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyt get_paramss  g{Gz?c s4|jr'| r'tjddddS|js<tdfd}|jj}xNt|jj D]7\} } t | |j | d} || | |qsWxNt|j j D]7\} } t | |j | d} || | |qWt|_t|_|jj|j|j d|dS(sInitializes the parameters and auxiliary states. Parameters ---------- initializer : Initializer Called to initialize parameters if needed. arg_params : dict If not ``None``, should be a dictionary of existing arg_params. Initialization will be copied from that. aux_params : dict If not ``None``, should be a dictionary of existing aux_params. Initialization will be copied from that. allow_missing : bool If ``True``, params could contain missing values, and the initializer will be called to fill those missing params. force_init : bool If ``True``, will force re-initialize even if already initialized. allow_extra : boolean, optional Whether allow extra parameters that are not needed by symbol. If this is True, no error will be thrown when arg_params or aux_params contain extra parameters that is not needed by the executor. sNParameters already initialized and force_init=False. init_params call ignored.t stackleveliNs,call bind before initializing the parameterscs|dk ry||krA||}||k rv|j|qvqsZtd|ndk r||qn ||dS(s,Internal helper for parameter initializations%s is not presentedN(Rtcopytot RuntimeError(tnametarrtcachet cache_arr(t allow_missingt initializer(s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyt_impl!s     t allow_extra(RItwarningstwarnR\RR"t attr_dicttsortedR0titemsR tgetRR1R$R%R2R:t set_params( R=Rmt arg_paramst aux_paramsRlt force_initRoRntattrsRhRitdesc((RlRms4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyt init_paramss" ""  c Cs|s8|jddd|d|d|d|d|dS|jr_| r_tjdd d dS|jj||d|t|_t|_dS( sAssigns parameter and aux state values. Parameters ---------- arg_params : dict Dictionary of name to `NDArray`. aux_params : dict Dictionary of name to `NDArray`. allow_missing : bool If ``True``, params could contain missing values, and the initializer will be called to fill those missing params. force_init : bool If ``True``, will force re-initialize even if already initialized. allow_extra : boolean, optional Whether allow extra parameters that are not needed by symbol. If this is True, no error will be thrown when arg_params or aux_params contain extra parameters that is not needed by the executor. Examples -------- >>> # An example of setting module parameters. >>> sym, arg_params, aux_params = mx.model.load_checkpoint(model_prefix, n_epoch_load) >>> mod.set_params(arg_params=arg_params, aux_params=aux_params) RmRwRxRlRyRoNsMParameters already initialized and force_init=False. set_params call ignored.Rei( R|RRIRpRqR:RvR$R2(R=RwRxRlRyRo((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRvBs   twritec Cs|r|jn|jr0|jjddS||_||_||_|sa| satnt|j |j ||\|_ |_ |dk rt|tr|jr|jst|j}t|jt|jkstnd}t|j|j|j|j |j |j|||d|jd|jd|d|jd|j |_|jj|_|dk rt|_|j|_|j|_n |jr|jj |j|jn|jdkr|jdkstg|jj!D]6} t"d| d j#d | d j$d | d j%^q} d t&|j| D|_g|jj'D]&} t"| d j#d | d j$^qQ} d t&|j(| D|_|dk r|j)r|j*|nt|_dS(sBinds the symbols to construct executors. This is necessary before one can perform computation with the module. Parameters ---------- data_shapes : list of (str, tuple) Typically is ``data_iter.provide_data``. label_shapes : list of (str, tuple) Typically is ``data_iter.provide_label``. for_training : bool Default is ``True``. Whether the executors should be bound for training. inputs_need_grad : bool Default is ``False``. Whether the gradients to the input data need to be computed. Typically this is not needed. But this might be needed when implementing composition of modules. force_rebind : bool Default is ``False``. This function does nothing if the executors are already bound. But with this ``True``, the executors will be forced to rebind. shared_module : Module Default is ``None``. This is used in bucketing. When not ``None``, the shared module essentially corresponds to a different bucket -- a module with different symbol but with the same sets of parameters (e.g. unrolled RNNs with different lengths). sAlready bound, ignoring bind()NRRBtgrad_reqRDRCtshapeitdtypetstypecSsi|]\}}||qS(((t.0RhRi((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pys s cSsi|]\}}||qS(((RRhRi((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pys s (+R]R\Rtwarningt for_trainingtinputs_need_gradR9RRR?R@R;R<RRRRIR:RtexecsRRR"R R'R(R!R-t_total_exec_bytesR$R0R1Rvt param_arraysR RRRtzipt aux_arraysR*toptimizer_initializedtborrow_optimizer( R=R_R`RRt force_rebindt shared_moduleR~t shared_groupRHRR((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pytbindlsV     $  '       $F6cCsS|jstt|j|j||\|_|_|jj|j|jdS(sReshapes the module for new input shapes. Parameters ---------- data_shapes : list of (str, tuple) Typically is ``data_iter.provide_data``. label_shapes : list of (str, tuple) Typically is ``data_iter.provide_label``. N( R\RRR?R@R;R<R:treshape(R=R_R`((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRs $tlocaltsgdt learning_ratec sjrjstjr<| r<jjddSjrRjnt|t j j \}}j j }|rd|jkrd|jkr||j9}nd|}i}|r|jtj jnLxItt j D]2|jfdtj jDqWt|trt|}d|krh||ds t rescale_gradRNtparam_idx2names;Optimizer created manually outside Module but rescale_grad s=is not normalized to 1.0/batch_size/num_workers (%s vs. %s). sIs this intended?ReitkvstoreRRwt param_namestupdate_on_kvstore(/R\RIRRRRR2RcRRRR0R:t batch_sizettypet num_workerstupdatet enumerateRtrangeRtstrtdicttopttcreateR>t OptimizerRRpRqtidx2nametcopyR4R5R6RR7R3tset_gradient_compressiont set_optimizerRRR't get_updaterR$R8RL( R=RRtoptimizer_paramsRyRRRR((RR=s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pytinit_optimizersf  $ $                   cCsL|jst|j|_|j|_|j|_|j|_t|_dS(sBorrows optimizer from a shared module. Used in bucketing, where exactly the same optimizer (esp. kvstore) is used. Parameters ---------- shared_module : Module N(RRR4R5R6R7R$(R=R((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyR0s     c CsH|jr|jsttd|jD}t|tr|d k sXtdg}xtt |dj D]y}|dj |j }x0|D](}||j |j kstdqWt ||d}|j |f|dqxWt|}ntd|j D}||kr1t |drM|jrM|j} nCgt|j|D]*\}}t|j||j|j^q`} t |dr|jr|j} njt |d r|jrgt|j|jD]-\}} t|j| j |j|j^q} nd } |j| | n|jj||d S( sForward computation. It supports data batches with different shapes, such as different batch sizes or different image sizes. If reshaping of data batch relates to modification of symbol or module, such as changing image layout ordering or switching from training to predicting, module rebinding is required. See Also ---------- :meth:`BaseModule.forward`. Parameters ---------- data_batch : DataBatch Could be anything with similar API implemented. is_train : bool Default is ``None``, which means ``is_train`` takes the value of ``self.for_training``. css|]}|jVqdS(N(R(RR((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pys SssEncountered empty data batchis6All data batches in a list need to have the same shapeicss|]}|jVqdS(N(R(RR((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pys `st provide_datat provide_labelRN(R\RIRttupleR;RR#RRRRRtappendthasattrRRR RhRtlayoutRRR<RR:tforward( R=t data_batchtis_traintcurr_data_shapestnew_data_shapesRRtdbtnew_batch_sizet new_dshapet new_lshapetj((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyR?s6     @ IcCs/|jr|jst|jjd|dS(svBackward computation. See Also ---------- :meth:`BaseModule.backward`. Parameters ---------- out_grads : NDArray or list of NDArray, optional Gradient on the outputs to be propagated back. This parameter is only needed when bind is called on outputs that are not a loss function. t out_gradsN(R\RIRR:tbackward(R=R((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRusc Cs|jr|jr|js!tt|_|jr^t|jj |jj |j |jj nFt |jj |jj d|jdt|jd|j d|jj dS(sUpdates parameters according to the installed optimizer and the gradients computed in the previous forward-backward batch. When KVStore is used to update parameters for multi-device or multi-machine training, a copy of the parameters are stored in KVStore. Note that for `row_sparse` parameters, this function does update the copy of parameters in KVStore, but doesn't broadcast the updated parameters to all devices / machines. Please call `prepare` to broadcast `row_sparse` parameters with the next batch of data. See Also ---------- :meth:`BaseModule.update`. tupdatert num_deviceRRN(R\RIRRR$R2R6RR:Rt grad_arraysR5RRR7RR(R=((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRs!        cCs+|jr|jst|jjd|S(s6Gets outputs of the previous forward computation. If ``merge_multi_context`` is ``True``, it is like ``[out1, out2]``. Otherwise, it is like ``[[out1_dev1, out1_dev2], [out2_dev1, out2_dev2]]``. All the output elements are `NDArray`. When `merge_multi_context` is `False`, those `NDArray` might live on different devices. Parameters ---------- merge_multi_context : bool Default is ``True``. In the case when data-parallelism is used, the outputs will be collected from multiple devices. A ``True`` value indicate that we should merge the collected results so that they look like from a single executor. Returns ------- list of NDArray or list of list of NDArray Output. tmerge_multi_context(R\RIRR:t get_outputs(R=R((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRscCs4|jr|jr|js!t|jjd|S(sGets the gradients with respect to the inputs of the module. If ``merge_multi_context`` is ``True``, it is like ``[grad1, grad2]``. Otherwise, it is like ``[[grad1_dev1, grad1_dev2], [grad2_dev1, grad2_dev2]]``. All the output elements are `NDArray`. Parameters ---------- merge_multi_context : bool Default is ``True``. In the case when data-parallelism is used, the outputs will be collected from multiple devices. A ``True`` value indicate that we should merge the collected results so that they look like from a single executor. Returns ------- list of NDArray or list of list of NDArray Input gradients R(R\RIRRR:tget_input_grads(R=R((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRs!cCs+|jr|jst|jjd|S(sGets states from all devices. If `merge_multi_context` is ``True``, it is like ``[out1, out2]``. Otherwise, it is like ``[[out1_dev1, out1_dev2], [out2_dev1, out2_dev2]]``. All the output elements are `NDArray`. Parameters ---------- merge_multi_context : bool Default is ``True``. In the case when data-parallelism is used, the states will be collected from multiple devices. A ``True`` value indicate that we should merge the collected results so that they look like from a single executor. Returns ------- list of NDArray or list of list of NDArray States R(R\RIRR:t get_states(R=R((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRscCs/|jr|jst|jj||dS(sjSets value for states. Only one of the states & value can be specified. Parameters ---------- states : list of list of NDArrays source states arrays formatted like ``[[state1_dev1, state1_dev2], [state2_dev1, state2_dev2]]``. value : number a single scalar value for all state arrays. N(R\RIRR:t set_states(R=tstatestvalue((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRs cCs|jj|||dS(s!Evaluates and accumulates evaluation metric on outputs of the last forward computation. See Also ---------- :meth:`BaseModule.update_metric`. Parameters ---------- eval_metric : EvalMetric Evaluation metric to use. labels : list of NDArray if `pre_sliced` parameter is set to `False`, list of lists of NDArray otherwise. Typically `data_batch.label`. pre_sliced: bool Whether the labels are already sliced per device (default: False). N(R:t update_metric(R=t eval_metrictlabelst pre_sliced((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRscCs|jj|j|j|jr|jrxpt|jjD]V\}}|jdkrAt j d|j ddd}|jj ||d|qAqAWnt |_dS(sdSynchronizes parameters from devices to CPU. This function should be called after calling `update` that updates the parameters on the devices, before one can read the latest parameters from ``self._arg_params`` and ``self._aux_params``. For row_sparse parameters on devices, ther are pulled from KVStore with all row ids. t row_sparseiRtint64trow_idsN(R:RdR0R1R5R6RsRtRtndtarangeRtrow_sparse_pullR%R2(R=RYt param_valR((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRc s"#cCs]|jst|jr+|jj|n.t|d}|j|jjWdQXdS(sSaves optimizer (updater) state to a file. Parameters ---------- fname : str Path to output states file. twbN( RRR6R5RXtopenR}R7R(R=tfnametfout((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRXs  cCsN|jst|jr+|jj|n|jjt|djdS(sLoads optimizer (updater) state from a file. Parameters ---------- fname : str Path to input states file. trbN( RRR6R5RLR7RRtread(R=R((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyRL)s cCs#|jst|jj|dS(s#Installs monitor on all executors. N(R\RR:tinstall_monitor(R=tmon((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyR8scCs|jst|dk r|j s/|j rEtjtdq||}t|t sltdx|j D]\}}|j j j |}|j j|}t|ttfst|djdkrtjtd|qy|jj||d|d| qyWndS( sPrepares the module for processing a data batch. Usually involves switching bucket and reshaping. For modules that contain `row_sparse` parameters in KVStore, it prepares the `row_sparse` parameters based on the sparse_row_id_fn. When KVStore is used to update parameters for multi-device or multi-machine training, a copy of the parameters are stored in KVStore. Note that for `row_sparse` parameters, the `update()` updates the copy of parameters in KVStore, but doesn't broadcast the updated parameters to all devices / machines. The `prepare` function is used to broadcast `row_sparse` parameters with the next batch of data. Parameters ---------- data_batch : DataBatch The current batch of data for forward computation. sparse_row_id_fn : A callback function The function takes `data_batch` as an input and returns a dict of str -> NDArray. The resulting dict is used for pulling row_sparse parameters from the kvstore, where the str key is the name of the param, and the value is the row id of the param to pull. sLParameters are not updated in the KVStore. No need to call sparse_row_id_fn.s*Expected dict output from sparse_row_id_fniRsA%s.stype is not 'row_sparse'. No need to perform row_sparse_pull.RtpriorityN(R\RRR5R6RpRqt UserWarningRRRtR:RtindexRRR#RR(R=Rtsparse_row_id_fnRRYtrow_idt param_idxR((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pytprepare=s   (R(RN(Rg{Gz?((Rg{Gz?(*t__name__t __module__t__doc__RVRtcpuRRt staticmethodR%RRR$R[R]tpropertyR?R@R^R_R`RbRdR R|RvRRRRRRRRRRRRRcRXRLRR(((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyR(sP  3&   > )  \ U  6          (RRVRptRRRRRRtexecutor_groupRtmodelRRRRRRmR R tioR R t base_moduleR RRR(((s4/tmp/pip-install-Qvdv_2/mxnet/mxnet/module/module.pyts  "