є
<┐CVc           @  sЫ   d  d l  m Z m Z d  d l m Z d  d l m Z m Z d  d l m	 Z	 d  d l
 m Z e	 d e f d Д  Г  YГ Z d Д  Z e d	 k rЧ e Г  n  d
 S(   i    (   t   print_functiont   unicode_literals(   t   reduce(   t   Treet   ProbabilisticTree(   t   python_2_unicode_compatible(   t   ParserIt   ViterbiParserc           B  sn   e  Z d  Z d d Д Z d Д  Z d d Д Z d Д  Z d Д  Z d Д  Z d	 Д  Z	 d
 Д  Z
 d Д  Z d Д  Z RS(   u╖
  
    A bottom-up ``PCFG`` parser that uses dynamic programming to find
    the single most likely parse for a text.  The ``ViterbiParser`` parser
    parses texts by filling in a "most likely constituent table".
    This table records the most probable tree representation for any
    given span and node value.  In particular, it has an entry for
    every start index, end index, and node value, recording the most
    likely subtree that spans from the start index to the end index,
    and has the given node value.

    The ``ViterbiParser`` parser fills in this table incrementally.  It starts
    by filling in all entries for constituents that span one element
    of text (i.e., entries where the end index is one greater than the
    start index).  After it has filled in all table entries for
    constituents that span one element of text, it fills in the
    entries for constitutants that span two elements of text.  It
    continues filling in the entries for constituents spanning larger
    and larger portions of the text, until the entire table has been
    filled.  Finally, it returns the table entry for a constituent
    spanning the entire text, whose node value is the grammar's start
    symbol.

    In order to find the most likely constituent with a given span and
    node value, the ``ViterbiParser`` parser considers all productions that
    could produce that node value.  For each production, it finds all
    children that collectively cover the span and have the node values
    specified by the production's right hand side.  If the probability
    of the tree formed by applying the production to the children is
    greater than the probability of the current entry in the table,
    then the table is updated with this new tree.

    A pseudo-code description of the algorithm used by
    ``ViterbiParser`` is:

    | Create an empty most likely constituent table, *MLC*.
    | For width in 1...len(text):
    |   For start in 1...len(text)-width:
    |     For prod in grammar.productions:
    |       For each sequence of subtrees [t[1], t[2], ..., t[n]] in MLC,
    |         where t[i].label()==prod.rhs[i],
    |         and the sequence covers [start:start+width]:
    |           old_p = MLC[start, start+width, prod.lhs]
    |           new_p = P(t[1])P(t[1])...P(t[n])P(prod)
    |           if new_p > old_p:
    |             new_tree = Tree(prod.lhs, t[1], t[2], ..., t[n])
    |             MLC[start, start+width, prod.lhs] = new_tree
    | Return MLC[0, len(text), start_symbol]

    :type _grammar: PCFG
    :ivar _grammar: The grammar used to parse sentences.
    :type _trace: int
    :ivar _trace: The level of tracing output that should be generated
        when parsing a text.
    i    c         C  s   | |  _  | |  _ d S(   u▓  
        Create a new ``ViterbiParser`` parser, that uses ``grammar`` to
        parse texts.

        :type grammar: PCFG
        :param grammar: The grammar used to parse texts.
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            and higher numbers will produce more verbose tracing
            output.
        N(   t   _grammart   _trace(   t   selft   grammart   trace(    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyt   __init__L   s    	c         C  s   |  j  S(   N(   R   (   R
   (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR   \   s    i   c         C  s   | |  _  d S(   uP  
        Set the level of tracing output that should be generated when
        parsing a text.

        :type trace: int
        :param trace: The trace level.  A trace level of ``0`` will
            generate no tracing output; and higher trace levels will
            produce more verbose tracing output.
        :rtype: None
        N(   R	   (   R
   R   (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR   _   s    c   	      c  sn  t  | Г } |  j j | Г i  } |  j r< t d d Г n  xf t t | Г Г D]R } | | } | | | | d | f <|  j d k rO |  j | | t | Г Г qO qO WxЗ t d t | Г d Г D]l } |  j rу t d d | Г n  xE t t | Г | d Г D]) } | | | f } |  j | | | Г q■ Wq┐ W| j	 d t | Г |  j j
 Г  f Г } | d  k	 rj| Vn  d  S(   Nu%   Inserting tokens into the most likelyu    constituents table...i   u$   Finding the most likely constituentsu    spanning %d text elements...i    (   t   listR   t   check_coverageR	   t   printt   ranget   lent   _trace_lexical_insertiont   _add_constituents_spanningt   gett   startt   None(	   R
   t   tokenst   constituentst   indext   tokent   lengthR   t   spant   tree(    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyt   parsel   s,    	 
  	!'c         C  s┤  t  } xз| rпt } |  j | | Г } xВ| D]z\ } } g  | D] } t | t Г rA | ^ qA }	 t d Д  |	 | j Г  Г }
 | j Г  j Г  } t	 | | d |
 Г} | j
 | d | d | j Г  f Г } |  j d k rZ| d	 k sё | | k rZ| d	 k s| j Г  | j Г  k  r(t d d d Гn t d d d Г|  j | |
 | t | Г Г qZn  | d	 k s~| j Г  | j Г  k  r. | | | d | d | j Г  f <t  } q. q. Wq	 Wd	 S(
   u*  
        Find any constituents that might cover ``span``, and add them
        to the most likely constituents table.

        :rtype: None
        :type span: tuple(int, int)
        :param span: The section of the text for which we are
            trying to find possible constituents.  The span is
            specified as a pair of integers, where the first integer
            is the index of the first token that should be included in
            the constituent; and the second integer is the index of
            the first token that should not be included in the
            constituent.  I.e., the constituent should cover
            ``text[span[0]:span[1]]``, where ``text`` is the text
            that we are parsing.

        :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
        :param constituents: The most likely constituents table.  This
            table records the most probable tree representation for
            any given span and node value.  In particular,
            ``constituents(s,e,nv)`` is the most likely
            ``ProbabilisticTree`` that covers ``text[s:e]``
            and has a node value ``nv.symbol()``, where ``text``
            is the text that we are parsing.  When
            ``_add_constituents_spanning`` is called, ``constituents``
            should contain all possible constituents that are shorter
            than ``span``.

        :type tokens: list of tokens
        :param tokens: The text we are parsing.  This is only used for
            trace output.
        c         S  s   |  | j  Г  S(   N(   t   prob(   t   prt   t(    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyt   <lambda>╞   s    R    i    i   u
      Insert:t   endu    u
     Discard:N(   t   Truet   Falset   _find_instantiationst
   isinstanceR   R   R    t   lhst   symbolR   R   R	   R   R   t   _trace_productionR   (   R
   R   R   R   t   changedt   instantiationst
   productiont   childrent   ct   subtreest   pt   nodeR   (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR   Ф   s(    $	(	&$"$!c         C  sc   g  } xV |  j  j Г  D]E } |  j | j Г  | | Г } x! | D] } | j | | f Г q> Wq W| S(   u	  
        :return: a list of the production instantiations that cover a
            given span of the text.  A "production instantiation" is
            a tuple containing a production and a list of children,
            where the production's right hand side matches the list of
            children; and the children cover ``span``.  :rtype: list
            of ``pair`` of ``Production``, (list of
            (``ProbabilisticTree`` or token.

        :type span: tuple(int, int)
        :param span: The section of the text for which we are
            trying to find production instantiations.  The span is
            specified as a pair of integers, where the first integer
            is the index of the first token that should be covered by
            the production instantiation; and the second integer is
            the index of the first token that should not be covered by
            the production instantiation.
        :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
        :param constituents: The most likely constituents table.  This
            table records the most probable tree representation for
            any given span and node value.  See the module
            documentation for more information.
        (   R   t   productionst
   _match_rhst   rhst   append(   R
   R   R   t   rvR.   t
   childlistst	   childlist(    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR'   ┘   s    c         C  s▌   | \ } } | | k r+ | d k r+ g  g S| | k sC | d k rG g  Sg  } xЙ t  | | d Г D]t } | j | | | d f Г } | d k	 ra |  j | d | | f | Г }	 | g  |	 D] }
 | g |
 ^ q╕ 7} qa qa W| S(   ul  
        :return: a set of all the lists of children that cover ``span``
            and that match ``rhs``.
        :rtype: list(list(ProbabilisticTree or token)

        :type rhs: list(Nonterminal or any)
        :param rhs: The list specifying what kinds of children need to
            cover ``span``.  Each nonterminal in ``rhs`` specifies
            that the corresponding child should be a tree whose node
            value is that nonterminal's symbol.  Each terminal in ``rhs``
            specifies that the corresponding child should be a token
            whose type is that terminal.
        :type span: tuple(int, int)
        :param span: The section of the text for which we are
            trying to find child lists.  The span is specified as a
            pair of integers, where the first integer is the index of
            the first token that should be covered by the child list;
            and the second integer is the index of the first token
            that should not be covered by the child list.
        :type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
        :param constituents: The most likely constituents table.  This
            table records the most probable tree representation for
            any given span and node value.  See the module
            documentation for more information.
        i   i    (    (    N(   R   R   R   R5   (   R
   R6   R   R   R   R$   R9   t   splitt   lt   rightst   r(    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR5   ∙   s      +c         C  sД   d d | d } | d | d | d 7} | d | | d d 7} | d | 7} |  j  d k rv d	 | | f } n  t | Г d
 S(   uЫ  
        Print trace output indicating that a given production has been
        applied at a given location.

        :param production: The production that has been applied
        :type production: Production
        :param p: The probability of the tree produced by the production.
        :type p: float
        :param span: The span of the production
        :type span: tuple
        :rtype: None
        u   |u   .i    u   =i   u   | u   %si   u   %-40s %12.10f N(   R	   R   (   R
   R.   R2   R   t   widtht   str(    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR+   #  s     c         C  sE   d d | d d | | d d } | d | f 7} t  | Г d  S(   Nu      Insert: |u   .u   =i   u   | u   %s(   R   (   R
   R   R   R?   R@   (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR   9  s    &c         C  s   d |  j  S(   Nu   <ViterbiParser for %r>(   R   (   R
   (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyt   __repr__>  s    (   t   __name__t
   __module__t   __doc__R   R   R   R   R   R'   R5   R+   R   RA   (    (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR      s   7		(	E	 	*		c          C  sF  d d l  }  d d l } d d l m } d d l m } d d l m } m } d | f d | f g } t	 Г  xW t
 t | Г Г D]C } t	 d | d	 | | d
 f Г t	 d | | d	 Г t	 Г  qА Wt	 d d	 t | Г f d d Гy3 t |  j j Г  j Г  Г d	 } | | \ }	 }
 Wn t	 d Г d SX|	 j Г  } | |
 Г } i  } t	 d |	 | |
 f Г | j d Г | j Г  } | j | Г } | j Г  | } | r┴t d Д  | d
 Г t | Г n d
 } t | Г } x | D] } d	 | | j Г  <q┌Wt	 Г  t	 d Г t	 d Г t	 d | | | f Г | j Г  } | rZt d Д  | d
 Г t | Г } n d
 } t	 d Г t	 d d t | Г | f Г t	 Г  t	 d d d Г|  j j Г  j Г  j Г  j d Г rщd d l m } t	 d Г | | М  n  t	 Г  t	 d d d Г|  j j Г  j Г  j Г  j d Г rBx | D] } t	 | Г q+Wn  d S(   uї   
    A demonstration of the probabilistic parsers.  The user is
    prompted to select which demo to run, and how many parses should
    be found; and then each parser is run on the same demo, and a
    summary of the results are displayed.
    i    N(   t   tokenize(   R   (   t	   toy_pcfg1t	   toy_pcfg2u   I saw the man with my telescopeu:   the boy saw Jack with Bob under the table with a telescopeu   %3s: %si   i    u        %ru   Which demo (%d-%d)? R$   u    u   Bad sentence numberu    
sent: %s
parser: %s
grammar: %si   c         S  s   |  | j  Г  S(   N(   R    (   t   at   b(    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR#   o  s    u)   Time (secs)   # Parses   Average P(parse)u)   -----------------------------------------u   %11.4f%11d%19.14fc         S  s   |  | j  Г  S(   N(   R    (   RH   RI   (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyR#   |  s    u*   ------------------------------------------u   %11s%11d%19.14fu   n/au   Draw parses (y/n)? u   y(   t
   draw_treesu     please wait...u   Print parses (y/n)? (   t   syst   timet   nltkRE   t
   nltk.parseR   t   nltk.grammarRF   RG   R   R   R   t   intt   stdint   readlinet   stripR;   R   t	   parse_allR   t   freezet   keyst   lowert
   startswitht   nltk.draw.treeRJ   (   RK   RL   RE   R   RF   RG   t   demost   it   snumt   sentR   R   t   parsert
   all_parsesR"   t   parsest   averaget
   num_parsesR2   RJ   R   (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyt   demoF  sf    	  
+

"
$
$u   __main__N(   t
   __future__R    R   t	   functoolsR   t	   nltk.treeR   R   t   nltk.compatR   t   nltk.parse.apiR   R   Rc   RB   (    (    (    sd   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/viterbi.pyt   <module>   s     2	J