U C^ @stddlmZddlmZddlmZmZmZddlmZej ededd d d d ed d GdddeZ dS))unicode_literals)describe) DimensionSynapsesGradient)Modelz Output sizezLearned 'query' vectorcCs |jdfS)Nr)nO)objr B/tmp/pip-install-6_kvzl1k/thinc/thinc/neural/_classes/attention.py rcCs|||jdS)Nr)Z normal_initshape)Qopsr r r rrr)r rdQc@s8eZdZdZdZdddZddd Zd d Zd d ZdS)ParametricAttentionz/Weight inputs by similarity to a learned vectorz para-attnNFcKs,tj|f|||_||_|dd|_dS)N drop_factor?)r __init__r hardgetr)selfr rkwargsr r r rszParametricAttention.__init__csL|\}}j||\}|||\}dfdd }||f|fS)NcsR|\}}|\}}j|7_||7}|dk rN|jjjjjd|S)N)key)rZ_memweightsZgradientid)d_outputZsgddXs d_attentionrZdXs2Z bp_attentionZ bp_outputrr r attention_bwd"s  z7ParametricAttention.begin_update..attention_bwd)N)_get_attentionr_apply_attention)rZ Xs_lengthsZdropXslengths attentionoutputr$r r#r begin_updates  z ParametricAttention.begin_updatec sjr`d}tD]@\}}|||}d|||<d||<||7}qnjfdd}|fS)NrrcsJjr|9}nj|}jj|dd}jj|}||fS)NT)Ztrans1)rrZbackprop_softmax_sequencesZgemmZxpouter)r"rr!rr'r)r(rr r get_attention_bwd9s z=ParametricAttention._get_attention..get_attention_bwd)dotr enumerateargmaxrZsoftmax_sequences) rrr'r(startilengthr1r.r r-r r%-s    z"ParametricAttention._get_attentioncs}fdd}||fS)Ncs"|jddd}|}||fS)NrT)ZaxisZkeepdims)sum)r r"r!r'r)r r apply_attention_bwdIszAParametricAttention._apply_attention..apply_attention_bwdr )rr)r'r(r*r7r r6r r&Fsz$ParametricAttention._apply_attention)NF)r) __name__ __module__ __qualname____doc__namerr+r%r&r r r r r s    rN) __future__rrrrrmodelr attributesrr r r r s