ó <¿CVc@säddlmZmZddlZddlmZddlmZyddlZWne k renXddl m Z ddl m Z de fd„ƒYZd „Zd „Zd efd „ƒYZe d efd„ƒYƒZdS(iÿÿÿÿ(tprint_functiontunicode_literalsN(tstdout(tsqrt(tClusterI(tpython_2_unicode_compatibletVectorSpaceClusterercBskeZdZed d„Zeed„Zd„Zd„Zd„Z d„Z d„Z d„Z d „Z RS( u© Abstract clusterer which takes tokens and maps them into a vector space. Optionally performs singular value decomposition to reduce the dimensionality. cCsd|_||_||_dS(u) :param normalise: should vectors be normalised to length 1 :type normalise: boolean :param svd_dimensions: number of dimensions to use in reducing vector dimensionsionality with SVD :type svd_dimensions: int N(tNonet_Ttt_should_normaliset_svd_dimensions(tselft normalisetsvd_dimensions((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyt__init__s  c CsVt|ƒdkst‚|jr<tt|j|ƒƒ}n|jr|jt|dƒkrtjj tj tj |ƒƒƒ\}}}||j tj |jtj ƒ}|dd…d|j…f}|d|j…dd…f} tj tj|| ƒƒ}tj |ƒ|_n|j||ƒ|rRg|D]} |j| ƒ^q9SdS(Ni(tlentAssertionErrorR tlisttmapt _normaliseR tnumpytlinalgtsvdt transposetarraytidentitytfloat64tdotRtcluster_vectorspacetclassify( R tvectorstassign_clustersttracetutdtvttStTtDttvector((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pytcluster's "- cCs tƒ‚dS(uD Finds the clusters using the given set of vectors. N(tNotImplementedError(R RR ((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyR?scCs^|jr|j|ƒ}n|jdk rBtj|j|ƒ}n|j|ƒ}|j|ƒS(N(R RRRRRtclassify_vectorspacet cluster_name(R R'R(((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyREs  cCs tƒ‚dS(uN Returns the index of the appropriate cluster for the vector. N(R)(R R'((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyR*MscCsR|jr|j|ƒ}n|jdk rBtj|j|ƒ}n|j||ƒS(N(R RRRRRtlikelihood_vectorspace(R R'tlabel((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyt likelihoodSs  cCs#|j|ƒ}||krdSdS(uP Returns the likelihood of the vector belonging to the cluster. gð?g(R*(R R'R(t predicted((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyR,ZscCsF|jr|j|ƒ}n|jdk rBtj|j|ƒ}n|S(uU Returns the vector after normalisation and dimensionality reduction N(R RRRRR(R R'((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyR'as  cCs|ttj||ƒƒS(u7 Normalises the vector to unit length. (RRR(R R'((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyRksN(t__name__t __module__t__doc__tFalseRRR(RRR*R.R,R'R(((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyRs       cCs ||}ttj||ƒƒS(u} Returns the euclidean distance between vectors u and v. This is equivalent to the length of the vector (u - v). (RRR(R!tvtdiff((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyteuclidean_distanceqs cCs@dtj||ƒttj||ƒƒttj||ƒƒS(us Returns 1 minus the cosine of the angle between vectors v and u. This is equal to 1 - (u.v / |u||v|). i(RRR(R!R4((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pytcosine_distanceyst_DendrogramNodecBs,eZdZd„Zed„Zd„ZRS(u Tree node of a dendrogram. cGs||_||_dS(N(t_valuet _children(R tvaluetchildren((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyRƒs cCsX|jr=g}x'|jD]}|j|j|ƒƒqW|S|rM|jgS|gSdS(N(R:textendtleavesR9(R tvaluesR>tchild((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyR>‡s  cCsä|j|fg}x›t|ƒ|kr¯|jƒ\}}|jsY|j||fƒPnxF|jD];}|jr‹|j|j|fƒqc|jd|fƒqcW|jƒqWg}x'|D]\}}|j|jƒƒq½W|S(Ni(R9RtpopR:tpushtappendtsortR>(R tntqueuetprioritytnodeR@tgroups((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyRI’s  (R0R1R2RtTrueR>RI(((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyR8€s  t DendrogramcBsAeZdZgd„Zd„Zd„Zgd„Zd„ZRS(u Represents a dendrogram, a tree with a specified branching order. This must be initialised with the leaf items, then iteratively call merge for each branch. This class constructs a tree representing the order of calls to the merge function. cCsDg|D]}t|ƒ^q|_tj|jƒ|_d|_dS(us :param items: the items at the leaves of the dendrogram :type items: sequence of (any) iN(R8t_itemstcopyt_original_itemst_merge(R titemstitem((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyR±s"cGs‡t|ƒdkst‚t|jg|D]}|j|^q(Œ}|jd7_||j|dR3(tc((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pytüsu uc3s|]}|jˆƒVqdS(N(tcenter(t.0RQ(twidth(sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pys sN(u+u-u|(RRLR8RORNR9tmaxRRARR:tindextmintrangeRCRDtjoin(R t leaf_labelstJOINtHLINKtVLINKRUR>tlast_rowtleafR[R^RFt verticalsRGRHtchild_left_leafRRtmin_idxtmax_idxRSR@tvertical((RYRZRcsc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pytshowÖsV              #cCsWt|jƒdkr-t|j|jŒ}n |jd}|jtƒ}dt|ƒS(Niiu(RRLR8ROR>R3(R RUR>((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyt__repr__s  (R0R1R2RRTRIRtRu(((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyRK¨s   C(t __future__RRRMtsysRtmathRRt ImportErrortnltk.cluster.apiRt nltk.compatRRR6R7tobjectR8RK(((sc/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/cluster/util.pyts  \  (