ó <¿CVc@s3ddlmZddlZddlZddlZddlZddlmZddlm Z ddl m Z ddl m Z mZmZmZmZddlmZddlmZdd lmZd Zd efd „ƒYZd efd„ƒYZdefd„ƒYZdefd„ƒYZd„ZdS(iÿÿÿÿ(tunicode_literalsN(tPIPE(tStringIO(tcompat(tfind_jart find_jar_itert config_javatjavat _java_options(tParserI(tDependencyGraph(tTreeu1http://nlp.stanford.edu/software/lex-parser.shtmltGenericStanfordParsercBs›eZdZdZdZdZeZeZddddeddd„Z d „Z ed „Z ed „Z ed „Zed „Zed„Zed„ZRS(u Interface to the Stanford Parseru+stanford-parser-(\d+)(\.(\d+))+-models\.jarustanford-parser\.jaru3edu.stanford.nlp.parser.lexparser.LexicalizedParseru4edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzuutf8u-mx1000muc sÁttˆj|dd dd dtd|dtƒd‡fd †ƒ}ttˆj|dddddtd|dtƒd‡fd †ƒ} || fˆ_|ˆ_|ˆ_|ˆ_ |ˆ_ dS(Ntenv_varsuSTANFORD_PARSERuSTANFORD_CORENLPt searchpathturltverbosetis_regextkeycstjˆj|ƒS(N(tretmatcht_JAR(t model_name(tself(se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt3suSTANFORD_MODELScstjˆj|ƒS(N(RRt_MODEL_JAR_PATTERN(R(R(se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR=s(uSTANFORD_PARSERuSTANFORD_CORENLP((uSTANFORD_MODELSuSTANFORD_CORENLP(( tmaxRRt _stanford_urltTrueRt _classpatht model_patht _encodingtcorenlp_optionst java_options( Rt path_to_jartpath_to_models_jarRtencodingRR!R t stanford_jart model_jar((Rse/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt__init__&s&       cCsëg}g}g}t}xÆ|jtƒD]µ}|dkrÊ|rb|jt|ƒƒg}t}qÝ|jr™|j|jdj|ƒƒƒg}t}qÝ|jt|jdj|ƒƒgƒƒg}q(|j|ƒt}q(Wt|ƒS(Nuu (tFalset splitlinestappendtitert_DOUBLE_SPACED_OUTPUTt _make_treetjoinR(Rtoutput_trest cur_linest cur_treestblanktline((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt_parse_trees_outputGs&    (   c Cs\|jd|jddd|jdddg }|j|j|djd „|Dƒƒ|ƒƒS( uâ Use StanfordParser to parse multiple sentences. Takes multiple sentences as a list where each sentence is a list of words. Each sentence will be automatically tagged with this StanfordParser instance's tagger. If whitespaces exists inside a token, then the token will be treated as separate tokens. :param sentences: Input sentences to parse :type sentences: list(list(str)) :rtype: iter(iter(Tree)) u-modelu -sentencesunewlineu -outputFormatu -tokenizedu-escaperu-edu.stanford.nlp.process.PTBEscapingProcessoru css|]}dj|ƒVqdS(u N(R.(t.0tsentence((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pys ts(t _MAIN_CLASSRt_OUTPUT_FORMATR5t_executeR.(Rt sentencesRtcmd((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt parse_sents^s    cCst|j|g|ƒƒS(u& Use StanfordParser to parse a sentence. Takes a sentence as a string; before parsing, it will be automatically tokenized and tagged by the Stanford Parser. :param sentence: Input sentence to parse :type sentence: str :rtype: iter(Tree) (tnexttraw_parse_sents(RR7R((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt raw_parsevs cCsI|jd|jddd|jg}|j|j|dj|ƒ|ƒƒS(uI Use StanfordParser to parse multiple sentences. Takes multiple sentences as a list of strings. Each sentence will be automatically tokenized and tagged by the Stanford Parser. :param sentences: Input sentences to parse :type sentences: list(str) :rtype: iter(iter(Tree)) u-modelu -sentencesunewlineu -outputFormatu (R8RR9R5R:R.(RR;RR<((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR?‚s  cCst|j|g|ƒƒS(u0 Use StanfordParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. :param sentence: Input sentence to parse :type sentence: list(tuple(str, str)) :rtype: iter(Tree) (R>ttagged_parse_sents(RR7R((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt tagged_parse”s cstd‰|jd|jddd|jddˆdd d d g}|j|j|d j‡fd †|Dƒƒ|ƒƒS(ud Use StanfordParser to parse multiple sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. The sentences must have already been tokenized and tagged. :param sentences: Input sentences to parse :type sentences: list(list(tuple(str, str))) :rtype: iter(iter(Tree)) u/u-modelu -sentencesunewlineu -outputFormatu -tokenizedu -tagSeparatoru-tokenizerFactoryu,edu.stanford.nlp.process.WhitespaceTokenizeru-tokenizerMethodunewCoreLabelTokenizerFactoryu c3s.|]$}dj‡fd†|DƒƒVqdS(u c3s|]}ˆj|ƒVqdS(N(R.(R6ttagged(t tag_separator(se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pys ·sN(R.(R6R7(RD(se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pys ·s(R8RR9R5R:R.(RR;RR<((RDse/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRA s     c Csr|j}|jd|gƒ|jr8|j|jƒndjtƒ}td|jd|ƒtj dddt ƒÔ}t |t j ƒr¢|r¢|j|ƒ}n|j|ƒ|jƒ|jrÿ|jdƒt|d |jd |d td tƒ\}}n7|j|jƒt|d |jd td tƒ\}}|j|ƒ}WdQXtj|jƒtd|dt ƒ|S( Nu -encodingu toptionsRtmodeuwbtdeleteit classpathtstdintstdouttstderr(RtextendR R*R.RRR!ttempfiletNamedTemporaryFileR(t isinstanceRt text_typetencodetwritetflusht _USE_STDINtseekRRRtnametdecodetostunlink( RR<tinput_RR$tdefault_optionst input_fileRJRK((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR:¹s,      N(t__name__t __module__t__doc__RRR8R(RTR,tNoneR'R5R=R@R?RBRAR:(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR s"    tStanfordParsercBseZdZdZd„ZRS(u} >>> parser=StanfordParser( ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ... ) >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents(( ... "the quick brown fox jumps over the lazy dog", ... "the quick grey wolf jumps over the lazy fox" ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]), Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']), Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents(( ... ( ... ("The", "DT"), ... ("quick", "JJ"), ... ("brown", "JJ"), ... ("fox", "NN"), ... ("jumped", "VBD"), ... ("over", "IN"), ... ("the", "DT"), ... ("lazy", "JJ"), ... ("dog", "NN"), ... (".", "."), ... ), ... ))],[]) # doctest: +NORMALIZE_WHITESPACE [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])] upenncCs tj|ƒS(N(R t fromstring(Rtresult((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR-s(R]R^R_R9R-(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRaßs1tStanfordDependencyParsercBseZdZdZd„ZRS(uT >>> dep_parser=StanfordDependencyParser( ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ... ) >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])] >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( ... "The quick brown fox jumps over the lazy dog.", ... "The quick grey wolf jumps over the lazy fox." ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])] >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents(( ... ( ... ("The", "DT"), ... ("quick", "JJ"), ... ("brown", "JJ"), ... ("fox", "NN"), ... ("jumped", "VBD"), ... ("over", "IN"), ... ("the", "DT"), ... ("lazy", "JJ"), ... ("dog", "NN"), ... (".", "."), ... ), ... ))],[]) # doctest: +NORMALIZE_WHITESPACE [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] u conll2007cCst|ddƒS(Nttop_relation_labeluroot(R (RRc((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR-Ls(R]R^R_R9R-(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRds0tStanfordNeuralDependencyParsercBsPeZdZdZdZdZdZeZeZ d„Z e d„Z d„Z RS(uÆ >>> from nltk.parse.stanford import StanfordNeuralDependencyParser >>> dep_parser=StanfordNeuralDependencyParser() >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])] >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( ... "The quick brown fox jumps over the lazy dog.", ... "The quick grey wolf jumps over the lazy fox." ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])] uconllu)edu.stanford.nlp.pipeline.StanfordCoreNLPu%stanford-corenlp-(\d+)(\.(\d+))+\.jaru,stanford-corenlp-(\d+)(\.(\d+))+-models\.jarcOs,tt|ƒj||Ž|jd7_dS(Nu(-annotators tokenize,ssplit,pos,depparse(tsuperRfR'R (Rtargstkwargs((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR'sscCstdƒ‚dS(u¶ Currently unimplemented because the neural dependency parser (and the StanfordCoreNLP pipeline class) doesn't support passing in pre- tagged tokens. uxtagged_parse[_sents] is not supported by StanfordNeuralDependencyParser; use parse[_sents] or raw_parse[_sents] instead.N(tNotImplementedError(RR;R((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRAwscCst|ddƒS(NReuROOT(R (RRc((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR-ƒs(R]R^R_R9R8RRRRTR,R'R(RAR-(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRfPs  cCsLddlm}ytddƒtƒWntk rG|dƒ‚nXdS(Niÿÿÿÿ(tSkipTestRu4edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzundoctests from nltk.parse.stanford are skipped because one of the stanford parser or CoreNLP jars doesn't exist(tnoseRkRaRft LookupError(tmoduleRk((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt setup_module‡s  (t __future__RRMRXRtwarningst subprocessRtioRtnltkRtnltk.internalsRRRRRtnltk.parse.apiR tnltk.parse.dependencygraphR t nltk.treeR RR RaRdRfRo(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt s"    (Ã987