ó
<¿CVc           @  s×   d  Z  d d l m Z d d l m Z m Z m Z d d l m Z d d l	 m
 Z
 m Z d d l m Z d d l m Z m Z d d d d d	 „ Z d
 „  Z d „  Z d e f d „  ƒ  YZ d d d „ Z e e _ d S(   s    
Utility functions for parsers.
iÿÿÿÿ(   t   print_function(   t   CFGt   FeatureGrammart   PCFG(   t   load(   t   Chartt   ChartParser(   t   InsideChartParser(   t   FeatureChartt   FeatureChartParseri    c         K  sú   t  |  |  } t | t ƒ s- t d ƒ ‚ n  t | t ƒ rg | d k rQ t } n  | | d | d | ƒSt | t ƒ r¶ | d k r‹ t } n  | d k r  t	 } n  | | d | d | ƒS| d k rË t
 } n  | d k rà t } n  | | d | d | ƒSd S(   s¦  
    Load a grammar from a file, and build a parser based on that grammar.
    The parser depends on the grammar format, and might also depend
    on properties of the grammar itself.

    The following grammar formats are currently supported:
      - ``'cfg'``  (CFGs: ``CFG``)
      - ``'pcfg'`` (probabilistic CFGs: ``PCFG``)
      - ``'fcfg'`` (feature-based CFGs: ``FeatureGrammar``)

    :type grammar_url: str
    :param grammar_url: A URL specifying where the grammar is located.
        The default protocol is ``"nltk:"``, which searches for the file
        in the the NLTK data package.
    :type trace: int
    :param trace: The level of tracing that should be used when
        parsing a text.  ``0`` will generate no tracing output;
        and higher numbers will produce more verbose tracing output.
    :param parser: The class used for parsing; should be ``ChartParser``
        or a subclass.
        If None, the class depends on the grammar format.
    :param chart_class: The class used for storing the chart;
        should be ``Chart`` or a subclass.
        Only used for CFGs and feature CFGs.
        If None, the chart class depends on the grammar format.
    :type beam_size: int
    :param beam_size: The maximum length for the parser's edge queue.
        Only used for probabilistic CFGs.
    :param load_args: Keyword parameters used when loading the grammar.
        See ``data.load`` for more information.
    s1   The grammar must be a CFG, or a subclass thereof.t   tracet	   beam_sizet   chart_classN(   R   t
   isinstanceR   t
   ValueErrorR   t   NoneR   R   R	   R   R   R   (   t   grammar_urlR
   t   parserR   R   t	   load_argst   grammar(    (    sa   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/util.pyt   load_parser   s$    "					c         c  so   xh t  |  d d ƒD]T \ } \ } } t | ƒ | d | | d d d d d g
 } d j | ƒ d } | Vq Wd S(	   st  
	A module to convert a single POS tagged sentence into CONLL format.
	
	>>> from nltk import word_tokenize, pos_tag
	>>> text = "This is a foobar sentence."
	>>> for line in taggedsent_to_conll(pos_tag(word_tokenize(text))):
	... 	print(line, end="")
        1	This	_	DT	DT	_	0	a	_	_
        2	is	_	VBZ	VBZ	_	0	a	_	_
        3	a	_	DT	DT	_	0	a	_	_
        4	foobar	_	JJ	JJ	_	0	a	_	_
        5	sentence	_	NN	NN	_	0	a	_	_
        6	.		_	.	.	_	0	a	_	_
	
	:param sentence: A single input sentence to parse
	:type sentence: list(tuple(str, str))
	:rtype: iter(str) 
	:return: a generator yielding a single sentence in CONLL format.
	t   starti   t   _t   0t   as   	s   
N(   t	   enumeratet   strt   join(   t   sentencet   it   wordt   tagt	   input_str(    (    sa   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/util.pyt   taggedsent_to_conllO   s    %*c         c  s6   x/ |  D]' } x t  | ƒ D] } | Vq Wd Vq Wd S(   sK  
	A module to convert the a POS tagged document stream
	(i.e. list of list of tuples, a list of sentences) and yield lines 
	in CONLL format. This module yields one line per word and two newlines 
	for end of sentence. 

	>>> from nltk import word_tokenize, sent_tokenize, pos_tag
	>>> text = "This is a foobar sentence. Is that right?"
	>>> sentences = [pos_tag(word_tokenize(sent)) for sent in sent_tokenize(text)]
	>>> for line in taggedsents_to_conll(sentences):
        ...     if line:
	...         print(line, end="")
        1	This	_	DT	DT	_	0	a	_	_
        2	is	_	VBZ	VBZ	_	0	a	_	_
        3	a	_	DT	DT	_	0	a	_	_
        4	foobar	_	JJ	JJ	_	0	a	_	_
        5	sentence	_	NN	NN	_	0	a	_	_
        6	.		_	.	.	_	0	a	_	_
        <BLANKLINE>
        <BLANKLINE>
        1	Is	_	VBZ	VBZ	_	0	a	_	_
        2	that	_	IN	IN	_	0	a	_	_
        3	right	_	NN	NN	_	0	a	_	_
        4	?	_	.	.	_	0	a	_	_
        <BLANKLINE>
        <BLANKLINE>

	:param sentences: Input sentences to parse
	:type sentence: list(list(tuple(str, str)))
	:rtype: iter(str) 
	:return: a generator yielding sentences in CONLL format.
	s   

N(   R!   (   t	   sentencesR   R    (    (    sa   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/util.pyt   taggedsents_to_conlli   s    !	t   TestGrammarc           B  s)   e  Z d  Z d d d „ Z e d „ Z RS(   s   
    Unit tests for  CFG.
    c         C  s=   | |  _  t | d d ƒ|  _ | |  _ | |  _ | |  _ d  S(   NR
   i    (   t   test_grammarR   t   cpt   suitet   _acceptt   _reject(   t   selfR   R'   t   acceptt   reject(    (    sa   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/util.pyt   __init__—   s
    			c   
      C  s'  x |  j  D]} t | d d d d ƒxÛ d d g D]Í } xÄ | | D]¸ } | j ƒ  } t |  j j | ƒ ƒ } | r« | r« t ƒ  t | ƒ x | D] } t | ƒ q” Wn  | d k rß | g  k rÖ t d | ƒ ‚ qþ t } qF | rø t d | ƒ ‚ qF t }	 qF Wq5 W| r
 |	 r
 t d	 ƒ q
 q
 Wd
 S(   s|  
        Sentences in the test suite are divided into two classes:
         - grammatical (``accept``) and
         - ungrammatical (``reject``).
        If a sentence should parse accordng to the grammar, the value of
        ``trees`` will be a non-empty list. If a sentence should be rejected
        according to the grammar, then the value of ``trees`` will be None.
        t   doct   :t   endt    R+   R,   s   Sentence '%s' failed to parse's   Sentence '%s' received a parse's   All tests passed!N(   R'   t   printt   splitt   listR&   t   parseR   t   True(
   R*   t
   show_treest   testt   keyt   sentt   tokenst   treest   treet   acceptedt   rejected(    (    sa   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/util.pyt   run    s(    	
	N(   t   __name__t
   __module__t   __doc__R   R-   t   FalseR@   (    (    (    sa   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/util.pyR$   “   s   	s   #%;c         C  s	  | d k	 r |  j | ƒ }  n  g  } xÞ |  j d ƒ D]Í } | d k s4 | d | k r\ q4 n  | j d d ƒ } d } t | ƒ d k rÐ | d d k r³ | d d k } | d } qÐ t | d ƒ } | d } n  | j ƒ  } | g  k rî q4 n  | | | f g 7} q4 W| S(   sŽ  
    Parses a string with one test sentence per line.
    Lines can optionally begin with:
      - a bool, saying if the sentence is grammatical or not, or
      - an int, giving the number of parse trees is should have,
    The result information is followed by a colon, and then the sentence.
    Empty lines and lines beginning with a comment char are ignored.

    :return: a list of tuple of sentences and expected results,
        where a sentence is a list of str,
        and a result is None, or bool, or int

    :param comment_chars: ``str`` of possible comment characters.
    :param encoding: the encoding of the string, if it is binary
    s   
t    i    R/   i   i   R6   t   trueRD   t   falseN(   s   Trues   trues   Falses   false(   s   Trues   true(   R   t   decodeR3   t   lent   int(   t   stringt   comment_charst   encodingR"   R   t
   split_infot   resultR;   (    (    sa   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/util.pyt   extract_test_sentencesÁ   s&    N(   RC   t
   __future__R    t   nltk.grammarR   R   R   t	   nltk.dataR   t   nltk.parse.chartR   R   t   nltk.parse.pchartR   t   nltk.parse.featurechartR   R	   R   R   R!   R#   t   objectR$   RP   RD   t   __test__(    (    (    sa   /private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/util.pyt   <module>   s   7		*.&