є
<┐CVc           @  sЧ   d  Z  d d l m Z m Z d d l m Z d d l m Z m Z m	 Z	 m
 Z
 d d l m Z d e f d Д  Г  YZ d Д  Z e d	 k rУ e Г  n  d
 S(   uы  
A classifier based on the Naive Bayes algorithm.  In order to find the
probability for a label, this algorithm first uses the Bayes rule to
express P(label|features) in terms of P(label) and P(features|label):

|                       P(label) * P(features|label)
|  P(label|features) = ------------------------------
|                              P(features)

The algorithm then makes the 'naive' assumption that all features are
independent, given the label:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                                         P(features)

Rather than computing P(featues) explicitly, the algorithm just
calculates the denominator for each label, and normalizes them so they
sum to one:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                        SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) )
i    (   t   print_functiont   unicode_literals(   t   defaultdict(   t   FreqDistt   DictionaryProbDistt   ELEProbDistt   sum_logs(   t   ClassifierIt   NaiveBayesClassifierc           B  s\   e  Z d  Z d Д  Z d Д  Z d Д  Z d Д  Z d d Д Z d d Д Z e	 e
 d	 Д Г Z RS(
   u  
    A Naive Bayes classifier.  Naive Bayes classifiers are
    paramaterized by two probability distributions:

      - P(label) gives the probability that an input will receive each
        label, given no information about the input's features.

      - P(fname=fval|label) gives the probability that a given feature
        (fname) will receive a given value (fval), given that the
        label (label).

    If the classifier encounters an input with a feature that has
    never been seen with any label, then rather than assigning a
    probability of 0 to all labels, it will ignore that feature.

    The feature value 'None' is reserved for unseen feature values;
    you generally should not use 'None' as a feature value for one of
    your own features.
    c         C  s+   | |  _  | |  _ t | j Г  Г |  _ d S(   u=  
        :param label_probdist: P(label), the probability distribution
            over labels.  It is expressed as a ``ProbDistI`` whose
            samples are labels.  I.e., P(label) =
            ``label_probdist.prob(label)``.

        :param feature_probdist: P(fname=fval|label), the probability
            distribution for feature values, given labels.  It is
            expressed as a dictionary whose keys are ``(label, fname)``
            pairs and whose values are ``ProbDistI`` objects over feature
            values.  I.e., P(fname=fval|label) =
            ``feature_probdist[label,fname].prob(fval)``.  If a given
            ``(label,fname)`` is not a key in ``feature_probdist``, then
            it is assumed that the corresponding P(fname=fval|label)
            is 0 for all values of ``fval``.
        N(   t   _label_probdistt   _feature_probdistt   listt   samplest   _labels(   t   selft   label_probdistt   feature_probdist(    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt   __init__?   s    		c         C  s   |  j  S(   N(   R   (   R   (    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt   labelsT   s    c         C  s   |  j  | Г j Г  S(   N(   t   prob_classifyt   max(   R   t
   featureset(    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt   classifyW   s    c         C  s.  | j  Г  } xN t | j Г  Г D]: } x1 |  j D] } | | f |  j k r/ Pq/ q/ W| | =q Wi  } x' |  j D] } |  j j | Г | | <qm WxИ |  j D]} } xt | j Г  D]f \ } } | | f |  j k r· |  j | | f } | | c | j | Г 7<qк | | c t g  Г 7<qк WqЧ Wt	 | d t
 d t
 ГS(   Nt	   normalizet   log(   t   copyR   t   keysR   R
   R	   t   logprobt   itemsR   R   t   True(   R   R   t   fnamet   labelR   t   fvalt   feature_probs(    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyR   Z   s     i
   c           sE  |  j  Й  t d Г x+|  j | Г D]\ Й Й З  З З f d Ж  } t g  |  j D]( } И И  | И f j Г  k rQ | ^ qQ d | Г} t | Г d k rа q# n  | d } | d } И  | И f j И Г d k r▄ d } n4 d И  | И f j И Г И  | И f j И Г } t d	 И И d
 | d  d
 | d  | f Г q# Wd  S(   Nu   Most Informative Featuresc           s   И  |  И f j  И Г S(   N(   t   prob(   t   l(   t   cpdistR   R    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt	   labelprobБ   s    t   keyi   i    i    u   INFu   %8.1fu"   %24s = %-14r %6s : %-6s = %s : 1.0u   %si   (   R
   t   printt   most_informative_featurest   sortedR   R   t   lenR"   (   R   t   nR%   R#   R   t   l0t   l1t   ratio(    (   R$   R   R    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt   show_most_informative_features{   s"    	
(	

	id   c   	        s   t  Г  } t d Д  Г Й  t d Д  Г Й x░ |  j j Г  D]Я \ \ } } } xК | j Г  D]| } | | f } | j | Г | j | Г } t | И  | Г И  | <t | И | Г И | <И | d k rV | j	 | Г qV qV Wq7 Wt
 | d З  З f d Ж  Г} | |  S(   uЧ  
        Return a list of the 'most informative' features used by this
        classifier.  For the purpose of this function, the
        informativeness of a feature ``(fname,fval)`` is equal to the
        highest value of P(fname=fval|label), for any label, divided by
        the lowest value of P(fname=fval|label), for any label:

        |  max[ P(fname=fval|label1) / P(fname=fval|label2) ]
        c           S  s   d S(   Ng        (    (    (    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt   <lambda>б   s    c           S  s   d S(   Ng      Ё?(    (    (    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyR0   в   s    i    R&   c           s   И |  И  |  S(   N(    (   t   feature_(   t   maxprobt   minprob(    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyR0   ▒   s    (   t   setR   R
   R   R   t   addR"   R   t   mint   discardR)   (	   R   R+   t   featuresR   R   t   probdistR    t   featuret   p(    (   R2   R3   sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyR(   У   s    	"	c         C  sШ  t  Г  } t t  Г } t t Г } t Г  } xy | D]q \ } } | | c d 7<xR | j Г  D]D \ }	 }
 | | |	 f |
 c d 7<| |	 j |
 Г | j |	 Г qZ Wq1 WxБ | D]y } | | } xf | D]^ }	 | | |	 f j Г  } | | d k r─ | | |	 f d c | | 7<| |	 j d Г q─ q─ Wqн W| | Г } i  } xL | j Г  D]> \ \ } }	 } | | d t | |	 Г Г} | | | |	 f <qIW|  | | Г S(   uЛ   
        :param labeled_featuresets: A list of classified featuresets,
            i.e., a list of tuples ``(featureset, label)``.
        i   i    t   binsN(   R   R   R4   R   R5   t   Nt   NoneR*   (   t   clst   labeled_featuresetst	   estimatort   label_freqdistt   feature_freqdistt   feature_valuest   fnamesR   R   R   R    t   num_samplest   countR   R   t   freqdistR9   (    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt   train╡   s.    		
(   t   __name__t
   __module__t   __doc__R   R   R   R   R/   R(   t   classmethodR   RI   (    (    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyR   +   s   				!"c          C  s-   d d l  m }  |  t j Г } | j Г  d  S(   Ni    (   t
   names_demo(   t   nltk.classify.utilRN   R   RI   R/   (   RN   t
   classifier(    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt   demoъ   s    u   __main__N(   RL   t
   __future__R    R   t   collectionsR   t   nltk.probabilityR   R   R   R   t   nltk.classify.apiR   R   RQ   RJ   (    (    (    sj   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/classify/naivebayes.pyt   <module>   s   "┐	