U C^~@sddlmZddlmZddlmZdaddZd d Zd d d Z e e ej e dddee dddededdGdddeZddZddZddZddZd S)!)unicode_literals)describe)ModelFcCs|adS)zBAllow backwards compatibility with calculations bug from Thinc 6.8N) REPRODUCE_BUG) flag_valuer B/tmp/pip-install-6_kvzl1k/thinc/thinc/neural/_classes/layernorm.pyset_compat_six_eight sr cCs|ddS)N?)fill)Wopsr r r _init_to_onesrNcCs&|jr"|jjD]}||j||qdSN)childZ on_data_hooks)modelXyhookr r r _run_child_hookss rzScaling vectorcCs|jfSrnOobjr r r rz Bias vectorcCs|jfSrrrr r r rrGb)rrd_Gd_bc@s4eZdZdZd ddZddZd dd Zd d ZdS) LayerNormZ layernormNcKs^||_|dk r|g|_ng|_tj|f|d|kr@|d|_nt|ddrT|j|_d|_dS)Nrr)rZ_layersr__init__rgetattrZnr_upd)selfrkwargsr r r r#%s   zLayerNorm.__init__cCsL|jdk r|j|}t|j|\}}}t|j|||}||j|j}|Sr)rpredict _get_momentsr_forwardrr)r%rNmuvarZXhrr r r r'2s   zLayerNorm.predictcsjdk r jjdd\ndtj\tj}|\}d fdd }|dk r|tjdjjdgdd9}j||\}}|j d kst |||fS) Nr-)dropcsj||}tj|\}}}|||d|}|d9}|}dk rb||S|SdS)Ng)_get_d_momentsr)dysgddistZsum_dyZ sum_dy_distZd_xhatr*rZbackprop_childZbackprop_rescaler+r%r,r r finish_updateEs   z-LayerNorm.begin_update..finish_updateZ drop_factorr fdtypeZfloat32)N) r begin_updater(rr)_begin_update_scale_shiftr$asarrayZdropoutr8AssertionError)r%rr.ZXhatrr5Z bp_dropoutr r4r r9:s   zLayerNorm.begin_updatecs$dfdd }jj|fS)NcsZj|jdd7_j}||jdd7}|dk rP|jjjjjd|jS)Nr)axis)key)r!sumr Z_memweightsZgradientidr)Z gradient__BIr2r  input__BIr%r r r5[s z:LayerNorm._begin_update_scale_shift..finish_update)N)rr)r%rCr5r rBr r:Zsz#LayerNorm._begin_update_scale_shift)N)r-)__name__ __module__ __qualname__namer#r'r9r:r r r r r"s   r"cCsJtrt||S|jddd}|jdddd}|j|jdgdd||fS)NrTr=Zkeepdims:0yE>r6r7)r_get_moments_reproduce_bugmeanr,r;shaperrr+r,r r r r(fs  r(cCs<|jddd}|jdddd}|j|jdgdd||fS)z:Replicate bug from Thinc 6.8, for backwards compatibility.rTrHrIrr6r7)rKr,r;rLrMr r r rJnsrJcCs2||}||jj|ddd|jj||dddfS)NrTrH)Zxpr?)rr1rr+r3r r r r0us r0cCs|||dS)Nr/r rMr r r r)~sr))N) __future__rrrrrr rrZon_data attributesZWeightsZBiasesZGradientr"r(rJr0r)r r r r s$    D