ó <¿CVc@s°dZddlmZmZddlmZddlmZmZm Z ddl m Z ddl m Z e de fd„ƒYƒZd „Zd „Zed kr¬eƒnd S( uË A classifier model that decides which label to assign to a token on the basis of a tree structure, where branches correspond to conditions on feature values, and leaves correspond to label assignments. iÿÿÿÿ(tprint_functiontunicode_literals(t defaultdict(tFreqDistt MLEProbDisttentropy(t ClassifierI(tpython_2_unicode_compatibletDecisionTreeClassifiercBsãeZdddd„Zd„Zd„Zd„Zdddd„Zddd„Zd „Z e d d d e de d „ƒZ e d„ƒZ e d„ƒZe de d„Ze e d„ƒZe d„ƒZe e d„ƒZRS(cCs(||_||_||_||_dS(uø :param label: The most likely label for tokens that reach this node in the decision tree. If this decision tree has no children, then this label will be assigned to any token that reaches this decision tree. :param feature_name: The name of the feature that this decision tree selects for. :param decisions: A dictionary mapping from feature values for the feature identified by ``feature_name`` to child decision trees. :param default: The child that will be used if the value of feature ``feature_name`` does not match any of the keys in ``decisions``. This is used when constructing binary decision trees. N(t_labelt_fnamet _decisionst_default(tselftlabelt feature_namet decisionstdefault((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyt__init__s   cCsƒ|jg}|jdk rKx-|jjƒD]}|j|jƒƒq+Wn|jdk rs|j|jjƒƒntt|ƒƒS(N( R R tNonetvaluestextendtlabelsR tlisttset(R Rtdt((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyR,s cCsu|jdkr|jS|j|jƒ}||jkrK|j|j|ƒS|jdk rj|jj|ƒS|jSdS(N(R RR tgetR tclassifyR (R t featuresettfval((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyR5scCsSd}x6|D].\}}|j|ƒ|kr |d7}q q Wt|ƒt|ƒS(Nii(Rtfloattlen(R tlabeled_featuresetsterrorsRR((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyterrorCs iFuic CsŠ|jd kr;|t|ƒd}d|d||jfSd}x¶tt|jjƒƒƒD]™\}\}}d||j|f} |dt| ƒ}|d| d||jf7}|jd k r]|dkr]||j||d|dƒ7}q]q]W|j d k r†|t|ƒd}|d |d||j jf7}|j jd k r†|dkr†||j j||d|dƒ7}q†n|S( u  Return a string containing a pretty-printed version of this decision tree. Each line in this string corresponds to a single decision tree node or leaf, and indentation is used to display the structure of the decision tree. iu%s%s %s u.uu %s%s=%s? iu iu%selse: %s %s N( R RRR t enumeratetsortedR titemst pretty_formatR ( R twidthtprefixtdepthtntstiRtresultthdr((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyR&Js .(!*cCs}|jd kr d||jfSd}xŽt|jjƒƒD]w\}}|d||j|f7}|jd k r¢|dkr¢|d|j|d|dƒ7}q<|d|j7}q<W|jd k ryt|jƒdkr |d||jt |jj ƒƒd f7}n|d |f7}|jjd k rb|dkrb|d|jj|d|dƒ7}qy|d|jj7}n|S( u© Return a string representation of this decision tree that expresses the decisions it makes as a nested set of pseudocode if statements. u %sreturn %r uu%sif %s == %r: iu u u return %r u%sif %s != %r: iu%selse: N( R RR R$R R%t pseudocodeR RRtkeys(R R(R)R+RR-((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyR/cs""%!(cCs |jƒS(N(R&(R ((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyt__str__~sgš™™™™™©?idi c Cstƒ}x2|D]*\}} x|D]} |j| ƒq#WqW|dkr¤|r¤ttƒ}xE|D]:\}} x+|jƒD]\} } || j| ƒq|WqcWn|sÂtj|||ƒ} ntj||||ƒ} | j|||d||||ƒ| S(u½ :param binary: If true, then treat all feature/value pairs as individual binary features, rather than using a single n-way branch for each feature. iN( RtaddRRR%Rt best_stumptbest_binary_stumptrefine( R tentropy_cutofft depth_cutofftsupport_cutofftbinarytfeature_valuestverboset feature_namesRRtfnameRttree((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyttrains"   cCs&td„|Dƒƒjƒ}t|ƒS(Ncss|]\}}|VqdS(N((t.0RR((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pys ¨s(RtmaxR(R R((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pytleaf¦s csŽtd„|Dƒƒjƒ}ttƒ‰x7|D]/\}}|j|ƒ}ˆ||cd7®sic3s+|]!}|tˆ|jƒƒfVqdS(N(RRA(R@tval(tfreqs(sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pys ·s(RRARRtdictR(RR RRt feature_valueR((RDsl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pytstump¬s   c CsŽt|ƒ|krdS|jdkr)dS|dkr9dSx§|jD]œ}g|D]0\} } | j|jƒ|krP| | f^qP} td„| Dƒƒ} tt| ƒƒ|krCtj | ||||||ƒ|j|Æscss|]\}}|VqdS(N((R@RR((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pys Ñs( RR RR RRRRRR?R (R R R6R7R8R9R:R;RRRtfval_featuresetst label_freqstdefault_featuresets((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyR5»s6 *       cCs˜tj|ƒ}|j|ƒ}xJ|D]B}tj||ƒ}|j|ƒ}||kr%|}|}q%q%W|r”tdt|ƒ|j|fƒn|S(Nu,best stump for %6d toks uses %-20s err=%6.4f(RRBR"RGtprintRR (R<R R;R3t best_errorR=RGt stump_error((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyR3Øs   c Csñtd„|Dƒƒjƒ}tƒ}tƒ}xL|D]D\}}|j|ƒ|kri||cd7ésii(RRARtNR( RRFR Rt pos_fdistt neg_fdistRRR((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyt binary_stumpçs   c Csåtj|ƒ}|j|ƒ}xb|D]Z}xQ||D]E}tj|||ƒ}|j|ƒ} | |kr6| }|}q6q6Wq%W|jrµd|jt|jjƒƒdf} nd} |rátdt |ƒ| |fƒn|S(Nu%s=%siu (default)u,best stump for %6d toks uses %-20s err=%6.4f( RRBR"RQR R RR0RKR( R<R R:R;R3RLR=RRGRMtdescr((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyR4s$     N(t__name__t __module__RRRRR"R&R/R1t staticmethodtFalseR?RBRGR5R3RQR4(((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyRs(    " cCstj|dtdtƒS(NR9R;(RR?tTrue(tx((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pytfscCsUddlm}m}|t|ƒ}t|jddƒƒt|jddƒƒdS(Niÿÿÿÿ(t names_demotbinary_names_demo_featuresR)i(tnltk.classify.utilRZR[RYRKtppR/(RZR[t classifier((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pytdemos  u__main__N(t__doc__t __future__RRt collectionsRtnltk.probabilityRRRtnltk.classify.apiRt nltk.compatRRRYR_RS(((sl/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/decisiontree.pyt sÿ