B An]v` @sddlmZddlZddlmZmZmZmZmZddl m Z m Z ddl m Z mZddlmZmZmZddlmZddlmZdd lmZdd lmZmZmZmZmZmZdd lmZm Z dd lm!Z!m"Z"m#Z#dd lm$Z$ddl%m&Z&ddl'm(Z(m)Z)ddl*m+Z+ddl,m-Z-ddl.m/Z/ddl0m1Z1m2Z2m3Z3m4Z4ddl5m6Z6ddl7Z.ddl8m9Z9m:Z:m;Z;mZ>m?Z?ddl@mAZAddlBmCZCyddlDZEddlFmGZGWneHk rdZEYnXdZIddZJddZKedvd d!ZLd"d#ZMedwd$d%ZNd&d'ZOd(d)ZPd*d+ZQedxd,d-ZRe/Se6d.d/e/jTe1d0e1d1e1d2e1d3e2d4d5d/e3d6d7d/e2d8d9d/d:d/e4d;e4d<e4d=d> Gd?d@d@eZUdAdBZVdydDdEZWdFdGZXdHdIZYdJdKZZdLdMZ[dNdOZ\dzdPdQZ]dRdSZ^ed{dTdUZ_ed|dVdWZ`dXdYZaed}dZd[Zbd\d]Zcd^d_Zded~d`daZeddcddZfddfdgZgeddhdiZhdjdkZiddmdnZjGdodpdpekZlddqdrZmddtduZndS))unicode_literalsN)ModelMaxoutSoftmaxAffineReLu) HashEmbed StaticVectors) ExtractWindowParametricAttention)Poolingsum_pool mean_pool)Residual) LayerNorm)FeatureExtracter)addlayerizechainclone concatenate with_flatten) with_getitemflatten_add_lengths)uniquedwrapnoop)with_square_sequences) LinearModel)NumpyOpsCupyOps)get_array_module)Adam)describe) DimensionSynapsesBiasesGradient)_set_dimensions_if_needed)IDORTHLOWERNORMPREFIXSUFFIXSHAPE)Errors)util)PyTorchWrapperRNNZspacy_pretrained_vectorscCsJt|}|j|}|j|}|dks0|dkr4dS||||SdS)Ngr)r!linalgnormdot)Zvec1Zvec2xpZnorm1Znorm2r8l/home/app_decipher_dev_19-4/dev/decipher-analysis/serverless-application/helper/df_spacy/python/spacy/_ml.pycosine's   r:c Ksntdd}tdd}tdd}tdd}td d }td d }t||||||d }||_|j|_|S)N learn_rategMbP?Z optimizer_B1g?Z optimizer_B2g+?Z optimizer_epsg:0yE>Z L2_penaltygư>Zgrad_norm_clipg?)L2beta1beta2eps)r2env_optr" max_grad_normdevice) opscfgr;r=r>r?r<rA optimizerr8r8r9create_default_optimizer1s      rFcsJtjjdd|Ddddfdd }j|d}|f|fS) NcSsg|] }t|qSr8)len).0seqr8r8r9 Asz(_flatten_add_lengths..i)dtypecsj|dS)N)pad) unflatten)d_Xsgd)lengthsrCrNr8r9 finish_updateCsz+_flatten_add_lengths..finish_update)rN)N)rrCasarrayflatten)seqsrNdroprSXr8)rRrCrNr9_flatten_add_lengths>s rYcCs.dd}|j||jdk r*|jd|S)Nc_s|jddS)Nr)Wfill)selfargskwargsr8r8r9_zero_init_implKsz#_zero_init.._zero_init_implg) on_init_hooksappendrZr[)modelr_r8r8r9 _zero_initJs    rccCsPdd|D}tjdd|Dtjd}t|}tj|jdd}|||fdfS)NcSsg|]}|tqSr8)to_arrayr,)rIdocr8r8r9rKVsz#_preprocess_doc..cSsg|]}|jdqS)r)shape)rIarrr8r8r9rKZs)rMf)numpyarrayint_rzerosrf)docsrWkeysrRvalsr8r8r9_preprocess_docTs  rpcs"dfdd }t|S)zVWrap a model that should run on CPU, transferring inputs and outputs as necessary.cs6jt||d\}t|}dfdd }||fS)N)rWcst|}||dS)N)rQ)_to_cpu)Z d_outputsrQZ cpu_d_outputs)backpropr8r9with_cpu_backpropisz=with_cpu..with_cpu_forward..with_cpu_backprop)N) begin_updaterr _to_device)inputsrWZ cpu_outputsZ gpu_outputsrt)rbrC)rsr9with_cpu_forwardes z"with_cpu..with_cpu_forward)rq)to_cpur)rCrbrxr8)rbrCr9with_cpu`s rzcCs^t|tjr|St|tr,tdd|DSt|trDdd|DSt|drV|S|SdS)NcSsg|] }t|qSr8)rr)rIxr8r8r9rKvsz_to_cpu..cSsg|] }t|qSr8)rr)rIr{r8r8r9rKxsget) isinstancerindarraytuplelisthasattrr|)rXr8r8r9rrrs    rrcsJt|tr tfdd|DSt|tr<fdd|DS|SdS)Ncsg|]}t|qSr8)rv)rIr{)rCr8r9rKsz_to_device..csg|]}t|qSr8)rv)rIr{)rCr8r9rKs)r}rrrT)rCrXr8)rCr9rvs   rvcsdd|D}tjfdd|D}fddt||D}tfdd|D\}}jdd|Dtjd}j|}jj|dd}|||fdfS) NcSsg|]}|tqSr8)rdr,)rIrer8r8r9rKsz+_preprocess_doc_bigrams..csg|]}d|qS))ngrams)rIZdoc_unis)rCr8r9rKscsg|]}j|qSr8)r7r)rIfeats)rCr8r9rKscsg|]}jj|ddqS)T) return_counts)r7unique)rIk)rCr8r9rKscSsg|]}|jdqS)r)rf)rIrgr8r8r9rKs)rMrh)rrCziprTrirkr7r)rmrWZunigramsZbigramsrnrorRr8)rCr9_preprocess_doc_bigramss rcCs ||S)N) init_weights)rbrXyr8r8r9rz Input sizezNumber of featuresz Output sizez Maxout pieceszWeights matrixcCs|j|j|j|jfS)N)nFnOnPnI)objr8r8r9rrz Bias vectorcCs |j|jfS)N)rr)rr8r8r9rrZPadcCsd|j|j|jfS)Nr))rrr)rr8r8r9rrcCs ||dS)Ng?) normal_init)MrCr8r8r9rrrZrNb) rrrrrZrrNd_Wd_padd_bc@s<eZdZd ddZdddZddZd d Zed d ZdS)PrecomputableAffineNcKs*tj|f|||_||_||_||_dS)N)r__init__rrrr)r\rrrrr^r8r8r9rs zPrecomputableAffine.__init__csnjjjjjjjfdd}||jdjjjf} |}dfdd }||fS)NT)trans2rcsX|\}}||\}}|}||jdjjf}j|jdd7_||jdjjf}j d}j j |}|jjjjf}j ||jdjjf|}|}|dj j|||dd|jjjjf}j| d7_|dk r>|jjjjjd||jdjjfS) Nr)axis)r)rrgT)outtrans1)rrr)r)key)_backprop_paddingreshaperfrrrsumrrrZ transposerCr7ascontiguousarraygemmr[r_memweightsgradientid)ZdY_idsrQdYidsZXfZWopfiZdXfZdWopfi)rXr\r8r9backwards$ &  z2PrecomputableAffine.begin_update..backward)N) rCrrZrrrrrrf _add_padding)r\rXrWYfrr8)rXr\r9rus ( z PrecomputableAffine.begin_updatecCs|jj|j|f}|S)N)rCr7vstackrN)r\rZ Yf_paddedr8r8r9rsz PrecomputableAffine._add_paddingcCsL|dk}|jdd}|||jdddf}|j|jdd7_||fS)Ngr))rr)rrrfr)r\rrmaskrr8r8r9rs  z%PrecomputableAffine._backprop_paddingc sLjddkrdSj}|j}|jjjjdd|jdjfdd}||j d d |j 7}|j |d d}|jdjfdd}||jj dd |j d |j 7}fdd}d}d}d}d } xt|D]v} |||} jj| } jj| } t| d |kr$jjj| _qt| |krBj| 8_qPqWdS)aThis is like the 'layer sequential unit variance', but instead of taking the actual inputs, we randomly generate whitened data. Why's this all so complicated? We have a huge number of inputs, and the maxout unit makes guessing the dynamics tricky. Instead we set the maxout weights to values that empirically result in whitened outputs given whitened inputs. rgNT)inplaceirh)rMrirLg?)locscalesizecs|dd}jj|jdjjfdd}||jdjjjf}j|||||jdjjf}|j 7}j |}jdkrj |dS||dkSdS)Nrrh)rMr) rCallocaterfrrrr scatter_addrUrrTmaxout)rtokvecsZhiddensvectors)rbr8r9predicts"   z1PrecomputableAffine.init_weights..predictg{Gz? )rZrrCr7rrrrrandomuniformrfrTnormalrrrangevarmeanabssqrtr) rbrCr7rrrZtol_varZtol_meanZt_maxZt_iZacts1rrr8)rbr9rs2    z PrecomputableAffine.init_weights)NNNN)r) __name__ __module__ __qualname__rrurr staticmethodrr8r8r8r9rs   #rcCs|j}|jdkr2t|_|jjdkr2td|jjtj}x.|D]&}|j |j kr^|j |j |_ q>d|_ q>W| |j}|t jjj|j|jf<dS)NrzdWarning: Unnamed vectors -- this won't allow multiple vectors models to be loaded. (Shape: (%d, %d)))rname VECTORS_KEYdatarprintrfrrCorthkey2rowrankrTthincextraZload_nlpZVECTORSrB)vocabrrCwordrr8r8r9link_vectors_to_modelss       r皙?cCs8|dkrttStjj||d|d|d}tt|S)NrrT) bidirectionaldropout)rrtorchnnZLSTMrr3)rrdepthrrbr8r8r9 PyTorchBiLSTM0s rc Ks|dd}|dd}|dd}|dd}|dd }ttttttg}tt t t t t d t|||td d } |rt||d |tdd } t||d |tdd } t||d |tdd } n d\} } } |dk rft|||td} |r8t| | B| B| B| Btt||ddd?|td}n,t| | Btt||d dd?|td}n@|rt| | B| B| Btt||ddd?|td}n| }ttddtt||d|d?}t|t|||?|d?}|dkr|t|||?}||_||_WdQRX|S)Npretrained_vectorscnn_maxout_piecesrsubword_featuresT conv_depth bilstm_depthr)z>>|z**+*Z embed_norm)columnrrZ embed_prefixZ embed_suffixZ embed_shape)NNN)r)piecesr))nW)rN)r|r*r-r.r/r0r+rdefine_operatorsrrrrreapplyrindexr rLNrrr rrrrembed)width embed_sizer^rrrrrcolsr5prefixsuffixrfZgloverZ convolutiontok2vecr8r8r9Tok2Vec7sV        rcsdfdd }t|S)NcsLgx0tD]$}j||d\}}|}|qWdfdd }||fS)N)rWcs<d}x2tD]&}|||d}|dkr,|}q||7}qW|S)N)rQ)reversed)rrQdXrs) backpropsr8r9 reapply_bwd|s  z1reapply..reapply_fwd..reapply_bwd)N)rrura)rXrWrLYrsr)layern_times)rr9 reapply_fwdus zreapply..reapply_fwd)r)r)rrrr8)rrr9rtsrcsdfdd }t|S)Ncsj|ddfS)N)rM)rT)rXrW)rMrCr8r9forwardszasarray..forward)r)r)rCrMrr8)rMrCr9rTsrTcCs<g}d}x.|t|kr6|||||||7}q W|S)Nr)rHra)rXrpartsrr8r8r9 _divide_arrays  rcs0dkrttjjddfdd }t|S)Nr)valuecsRttjrtntjjddfjd}dfdd }||fS)N)rMcs(j}|ddf|7<|S)N)rrf)rrQr)rXidxrCr8r9rs z*get_col..forward..backward)N)r}rir~rr r7rrM)rXrWoutputr)r)rXrCr9rs  zget_col..forward)r) IndexErrorr1E066formatr)rrr8)rr9get_colsr cs8dkrttttttgdfdd }t|}|_|S)Ncs*g}x|D]}||q W|dfS)N)rard)rmrWrre)rr8r9rs zdoc2feats..forward)r )r*r-r.r/r0r+rr)rrrbr8)rr9 doc2featss r cCsddd}t|S)NcSs |ddfS)Nc[s|S)Nr8)rr^r8r8r9rrz.print_shape..forward..r8)rXrWr8r8r9rszprint_shape..forward)r)r)rrr8r8r9 print_shapes rcs |\}}dfdd }||fS)Ncs|fS)Nr8)d_outputrQ)tokensr8r9rsz#get_token_vectors..backward)Nr8)Ztokens_attrs_vectorsrWattrsrrr8)rr9get_token_vectorss rcsdt|}t||js||}||d|}||d|}dd|| dfdd }|fS)Ng$@g$g?cs|d}|S)Nr)r8)rrQr)rr8r9 logistic_bwdszlogistic..logistic_bwd)N)r!r}r~rTminimummaximumexp)rXrWr7rr8)rr9logistics  rcCsdd}|j||S)NcSs|jddS)Nr)rZr[)r\rXrr8r8r9r_sz"zero_init.._zero_init_impl) on_data_hooksra)rbr_r8r8r9 zero_inits rcCsVdd|D}tj}|dd|D}|j|}||jdd}|||fdfS)NcSsg|]}|tgqSr8)rdr,)rIrer8r8r9rKsz"preprocess_doc..cSsg|]}|jdqS)r)rf)rIrgr8r8r9rKsrr))rrCrTr7rrrf)rmrWrnrCrRror8r8r9preprocess_docs  rcsdfdd }t|S)Ncs |dfS)Nr8)rXrW)rLr8r9 getitem_fwdszgetitem..getitem_fwd)r)r)rLrr8)rLr9getitemsrc Kstdd}d|kr|d}n tdd}|d}|dd}tttd>d |krd|d }nt||||d }tt ||}||?}WdQRXd|_ ||_ ||_ |S) Nritoken_vector_width`rrT)z>>rr)rr) r2r@r|rrrrrrrrrsoftmax) nr_classrDrrrrrr!rbr8r8r9build_tagger_models(      r#cCsg}x||D]t}tjt|fdd}xBt|D]6\}}|j|jjjkrZ|jjj|j||<q,d||<q,W|jjj|}| |q W|dfS)NrL)rMr) rirlrH enumeraterrrrrra)rmrWbatchreindicesrLrrr8r8r9 SpacyVectorss  r'@c KsZ|dd}|dd}|dd}tttttd |dr|rtt?t dt ||?t |?t t ?tt||d?tt ||d d ?t?}|St||d d }t|d|dd }t|d|d d } t|d|dd } tttttttgtt||B| B| Btt|||dd ?dd ?} |rXttt ||?} t| | } |d}n | } |}d} | ttt||ttd dtt||d ?|?|d?}|t?t |?t t ?ttt||?tt ||d d ?}t t!tj"t#|?}|drt$||d}ntt ||dd d t?}||B|?}t|t%|_&WdQRX||_'d|_(|S)Nrr nr_vectoripretrained_dimsr)z>>rrz**Zlow_datag) drop_factorr))rrr)r)rNexclusive_classesF))r|rrrrrrr'rrrr r r rrrrrrr+r,r.r/r0r*rrrrconcatenate_listsr rprzrCrrrUrrlsuv)r"rrDrr)r*rblowerrrrfZtrained_vectorsZstatic_vectorsrZ vectors_widthrZ cnn_modelZ linear_model output_layerr8r8r9build_text_classifier#sV    P    $ < r1Fc KsntdtiD|r"t||j}ntt||jddt?}|t?t t ?|?}WdQRXt|t |_ ||_|S)a1 Build a simple CNN text classifier, given a token-to-vector model as inputs. If exclusive_classes=True, a softmax non-linearity is applied, so that the outputs sum to 1. If exclusive_classes=False, a logistic non-linearity is applied instead, so that outputs are in the range [0, 1]. z>>g)r+N) rrrrrrrrrr rrUr)rr"r,rDr0rbr8r8r9 build_simple_cnn_text_classifierrs r2csDtjjdd|Dddd fdd }j|dd}||fS) NcSsg|] }t|qSr8)rH)rIrJr8r8r9rKszflatten..rL)rMcsj|ddS)Nr)rN)rO)rPrQ)rRrCr8r9rSszflatten..finish_updater)rN)N)rrCrTrU)rVrWrSrXr8)rRrCr9rUs rUcsV|s tS|dd|djdd|D}t|d fdd }t|}|S) zCompose two or more models `f`, `g`, etc, such that their outputs are concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))` r+g?rcSsg|]}t|tqSr8)rrU)rIrr8r8r9rKsz%concatenate_lists..csV|9}jdd|Ddd}j||d\}||}dfdd }||fS) NcSsg|] }t|qSr8)rH)rIrXr8r8r9rKszDconcatenate_lists..concatenate_lists_fwd..rL)rM)rWcs||dS)N)rQ)rU)Zd_ysrQ) bp_flat_yrCr8r9concatenate_lists_bwdszOconcatenate_lists..concatenate_lists_fwd..concatenate_lists_bwd)N)rTrurO)XsrWrRZflat_yysr5)concatr+rC)r4r9concatenate_lists_fwds  z0concatenate_lists..concatenate_lists_fwd)r3)rr|rCrr)Zlayersr^r9rbr8)r8r+rCr9r-s   r-333333?cs$t|dfdd }t|S)z7Convert a model into a BERT-style masked language modelcsXt|d\}jjddfj||d\}dfdd }||fS)N) mask_probrr))rWcs|d9}||dS)Nr))rQr8)rrQ)rsrr8r9 mlm_backwards z@masked_language_model..mlm_forward..mlm_backward)N) _apply_maskrCrTrrfru)rmrWrr=)r<rb random_words)rsrr9 mlm_forwards z*masked_language_model..mlm_forward)r;) _RandomWordsr)rrbr<r@r8)r<rbr?r9masked_language_models rBc@seZdZddZddZdS)rAcCsvdd|D|_dd|D|_|jdd|_|jdd|_ttj|jdd|_|j|j_g|_dS)NcSsg|]}|jdkr|jqS)g)probtext)rIlexr8r8r9rKsz)_RandomWords.__init__..cSsg|]}|jdkr|jqS)g)rC)rIrEr8r8r9rKsi'rh)rM)wordsprobsrirrjr_cache)r\rr8r8r9rsz_RandomWords.__init__cCs<|js(|jtjjt|jd|jd|j}|j|S)Ni')p) rHextendrirchoicerHrFrGpop)r\rr8r8r9nexts  z_RandomWords.nextN)rrrrrMr8r8r8r9rAs rAc Csddlm}tdd|D}tjdd|f}||k}d}g}xp|D]h}g} x:|D]2} ||snt| j|} n| j} | | |d7}qTWdd |D} |||j | | d qFW||fS) Nr))Doccss|]}t|VqdS)N)rH)rIrer8r8r9 sz_apply_mask..gg?rcSsg|]}t|jqSr8)boolZ whitespace_)rIwr8r8r9rKsz_apply_mask..)rFspaces) Z tokens.docrNrrirr _replace_wordrDrar) rmr?r<rNNrrLZ masked_docsrerFtokenrrRr8r8r9r>s"     r>[MASK]cCs.tj}|dkr|S|dkr&|S|SdS)Ng?g?)rirrM)rr?rrollr8r8r9rSs  rS)rrG)rG)rG)r)N)rG)rG)rG)rG)r()F)rG)r:)r:)rV)o __future__rriZ thinc.v2vrrrrrZ thinc.i2vrr Z thinc.t2tr r Z thinc.t2vr r rZ thinc.miscrrrrZ thinc.apirrrrrrrrrrrrZthinc.linear.linearrthinc.neural.opsrr thinc.neural.utilr!thinc.neural.optimizersr"rr#Zthinc.describer$r%r&r'Zthinc.neural._classes.affiner(Zthinc.extra.load_nlprr*r+r,r-r.r/r0errorsr1r2Ztorch.nnrZthinc.extra.wrappersr3 ImportErrorrr:rFrYrcrprzrrrvrZon_data attributesrrrrrrTrr r rrrrrrr#r'r1r2rUr-rBobjectrAr>rSr8r8r8r9s           $               p =       O