є <┐CVc@s3ddlmZddlZddlZddlZddlZddlmZddlm Z ddl mZddlm Z mZmZmZmZddlmZddlmZdd lmZd ZdefdДГYZd efdДГYZdefdДГYZdefdДГYZdДZdS(i (tunicode_literalsN(tPIPE(tStringIO(tcompat(tfind_jart find_jar_itertconfig_javatjavat _java_options(tParserI(tDependencyGraph(tTreeu1http://nlp.stanford.edu/software/lex-parser.shtmltGenericStanfordParsercBsЫeZdZdZdZdZeZeZddddedddДZ d ДZed ДZedДZ edДZed ДZedДZedДZRS(u Interface to the Stanford Parseru+stanford-parser-(\d+)(\.(\d+))+-models\.jarustanford-parser\.jaru3edu.stanford.nlp.parser.lexparser.LexicalizedParseru4edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzuutf8u-mx1000muc s┴ttИj|dddd dtd|dtГdЗfd ЖГ}ttИj|dddddtd|dtГdЗfdЖГ} || fИ_|И_|И_|И_ |И_ dS(Ntenv_varsuSTANFORD_PARSERuSTANFORD_CORENLPt searchpathturltverbosetis_regextkeycstjИj|ГS(N(tretmatcht_JAR(t model_name(tself(se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt3suSTANFORD_MODELScstjИj|ГS(N(RRt_MODEL_JAR_PATTERN(R(R(se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR=s(uSTANFORD_PARSERuSTANFORD_CORENLP((uSTANFORD_MODELSuSTANFORD_CORENLP((tmaxRRt _stanford_urltTrueRt _classpatht model_patht _encodingtcorenlp_optionstjava_options( Rtpath_to_jartpath_to_models_jarRtencodingRR!R tstanford_jart model_jar((Rse/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt__init__&s& cCsыg}g}g}t}x╞|jtГD]╡}|dkr╩|rb|jt|ГГg}t}q▌|jrЩ|j|jdj|ГГГg}t}q▌|jt|jdj|ГГgГГg}q(|j|Гt}q(Wt|ГS(Nuu (tFalset splitlinestappendtitert_DOUBLE_SPACED_OUTPUTt _make_treetjoinR(Rtoutput_trest cur_linest cur_treestblanktline((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt_parse_trees_outputGs& ( c Cs\|jd|jddd|jdddg }|j|j|djd Д|DГГ|ГГS( uт Use StanfordParser to parse multiple sentences. Takes multiple sentences as a list where each sentence is a list of words. Each sentence will be automatically tagged with this StanfordParser instance's tagger. If whitespaces exists inside a token, then the token will be treated as separate tokens. :param sentences: Input sentences to parse :type sentences: list(list(str)) :rtype: iter(iter(Tree)) u-modelu -sentencesunewlineu -outputFormatu -tokenizedu-escaperu-edu.stanford.nlp.process.PTBEscapingProcessoru css|]}dj|ГVqdS(u N(R.(t.0tsentence((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pys ts(t_MAIN_CLASSRt_OUTPUT_FORMATR5t_executeR.(Rt sentencesRtcmd((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pytparse_sents^s cCst|j|g|ГГS(u& Use StanfordParser to parse a sentence. Takes a sentence as a string; before parsing, it will be automatically tokenized and tagged by the Stanford Parser. :param sentence: Input sentence to parse :type sentence: str :rtype: iter(Tree) (tnexttraw_parse_sents(RR7R((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt raw_parsevs cCsI|jd|jddd|jg}|j|j|dj|Г|ГГS(uI Use StanfordParser to parse multiple sentences. Takes multiple sentences as a list of strings. Each sentence will be automatically tokenized and tagged by the Stanford Parser. :param sentences: Input sentences to parse :type sentences: list(str) :rtype: iter(iter(Tree)) u-modelu -sentencesunewlineu -outputFormatu (R8RR9R5R:R.(RR;RR<((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR?Вs cCst|j|g|ГГS(u0 Use StanfordParser to parse a sentence. Takes a sentence as a list of (word, tag) tuples; the sentence must have already been tokenized and tagged. :param sentence: Input sentence to parse :type sentence: list(tuple(str, str)) :rtype: iter(Tree) (R>ttagged_parse_sents(RR7R((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyttagged_parseФs cstdЙ|jd|jddd|jddИdd d dg}|j|j|djЗfd Ж|DГГ|ГГS(ud Use StanfordParser to parse multiple sentences. Takes multiple sentences where each sentence is a list of (word, tag) tuples. The sentences must have already been tokenized and tagged. :param sentences: Input sentences to parse :type sentences: list(list(tuple(str, str))) :rtype: iter(iter(Tree)) u/u-modelu -sentencesunewlineu -outputFormatu -tokenizedu -tagSeparatoru-tokenizerFactoryu,edu.stanford.nlp.process.WhitespaceTokenizeru-tokenizerMethodunewCoreLabelTokenizerFactoryu c3s.|]$}djЗfdЖ|DГГVqdS(u c3s|]}Иj|ГVqdS(N(R.(R6ttagged(t tag_separator(se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pys ╖sN(R.(R6R7(RD(se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pys ╖s(R8RR9R5R:R.(RR;RR<((RDse/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRAаs c Csr|j}|jd|gГ|jr8|j|jГndjtГ}td|jd|Гtj dddt ГП╘}t|tj Гrв|rв|j|Г}n|j|Г|jГ|jr |jdГt|d |jd |dtdtГ\}}n7|j|jГt|d |jdtdtГ\}}|j|Г}WdQXtj|jГtd|dt Г|S( Nu -encodingu toptionsRtmodeuwbtdeleteit classpathtstdintstdouttstderr(RtextendR R*R.RRR!ttempfiletNamedTemporaryFileR(t isinstanceRt text_typetencodetwritetflusht _USE_STDINtseekRRRtnametdecodetostunlink( RR<tinput_RR$tdefault_optionst input_fileRJRK((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR:╣s, N(t__name__t __module__t__doc__RRR8R(RTR,tNoneR'R5R=R@R?RBRAR:(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRs" tStanfordParsercBseZdZdZdДZRS(u} >>> parser=StanfordParser( ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ... ) >>> list(parser.raw_parse("the quick brown fox jumps over the lazy dog")) # doctest: +NORMALIZE_WHITESPACE [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.raw_parse_sents(( ... "the quick brown fox jumps over the lazy dog", ... "the quick grey wolf jumps over the lazy fox" ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])])])]), Tree('ROOT', [Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['quick']), Tree('JJ', ['grey']), Tree('NN', ['wolf'])]), Tree('NP', [Tree('NP', [Tree('NNS', ['jumps'])]), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['fox'])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('PRP', ['I'])]), Tree('VP', [Tree('VBP', ["'m"]), Tree('NP', [Tree('DT', ['a']), Tree('NN', ['dog'])])])])]), Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['This'])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('PRP$', ['my']), Tree('NNS', ['friends']), Tree('POS', ["'"])]), Tree('NN', ['cat'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']), Tree('NP', [Tree('DT', ['the']), Tree('NN', ['tabby'])]), Tree('-RRB-', ['-RRB-'])])])])])])] >>> sum([list(dep_graphs) for dep_graphs in parser.tagged_parse_sents(( ... ( ... ("The", "DT"), ... ("quick", "JJ"), ... ("brown", "JJ"), ... ("fox", "NN"), ... ("jumped", "VBD"), ... ("over", "IN"), ... ("the", "DT"), ... ("lazy", "JJ"), ... ("dog", "NN"), ... (".", "."), ... ), ... ))],[]) # doctest: +NORMALIZE_WHITESPACE [Tree('ROOT', [Tree('S', [Tree('NP', [Tree('DT', ['The']), Tree('JJ', ['quick']), Tree('JJ', ['brown']), Tree('NN', ['fox'])]), Tree('VP', [Tree('VBD', ['jumped']), Tree('PP', [Tree('IN', ['over']), Tree('NP', [Tree('DT', ['the']), Tree('JJ', ['lazy']), Tree('NN', ['dog'])])])]), Tree('.', ['.'])])])] upenncCs tj|ГS(N(Rt fromstring(Rtresult((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR-s(R]R^R_R9R-(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRa▀s1tStanfordDependencyParsercBseZdZdZdДZRS(uT >>> dep_parser=StanfordDependencyParser( ... model_path="edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz" ... ) >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])] >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( ... "The quick brown fox jumps over the lazy dog.", ... "The quick grey wolf jumps over the lazy fox." ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])] >>> sum([[list(parse.triples()) for parse in dep_graphs] for dep_graphs in dep_parser.tagged_parse_sents(( ... ( ... ("The", "DT"), ... ("quick", "JJ"), ... ("brown", "JJ"), ... ("fox", "NN"), ... ("jumped", "VBD"), ... ("over", "IN"), ... ("the", "DT"), ... ("lazy", "JJ"), ... ("dog", "NN"), ... (".", "."), ... ), ... ))],[]) # doctest: +NORMALIZE_WHITESPACE [[((u'jumped', u'VBD'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumped', u'VBD'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] u conll2007cCst|ddГS(Nttop_relation_labeluroot(R (RRc((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR-Ls(R]R^R_R9R-(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRds0tStanfordNeuralDependencyParsercBsPeZdZdZdZdZdZeZeZ dДZ edДZdДZ RS(u╞ >>> from nltk.parse.stanford import StanfordNeuralDependencyParser >>> dep_parser=StanfordNeuralDependencyParser() >>> [parse.tree() for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])])] >>> [list(parse.triples()) for parse in dep_parser.raw_parse("The quick brown fox jumps over the lazy dog.")] # doctest: +NORMALIZE_WHITESPACE [[((u'jumps', u'VBZ'), u'nsubj', (u'fox', u'NN')), ((u'fox', u'NN'), u'det', (u'The', u'DT')), ((u'fox', u'NN'), u'amod', (u'quick', u'JJ')), ((u'fox', u'NN'), u'amod', (u'brown', u'JJ')), ((u'jumps', u'VBZ'), u'nmod', (u'dog', u'NN')), ((u'dog', u'NN'), u'case', (u'over', u'IN')), ((u'dog', u'NN'), u'det', (u'the', u'DT')), ((u'dog', u'NN'), u'amod', (u'lazy', u'JJ'))]] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.raw_parse_sents(( ... "The quick brown fox jumps over the lazy dog.", ... "The quick grey wolf jumps over the lazy fox." ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('jumps', [Tree('fox', ['The', 'quick', 'brown']), Tree('dog', ['over', 'the', 'lazy'])]), Tree('jumps', [Tree('wolf', ['The', 'quick', 'grey']), Tree('fox', ['over', 'the', 'lazy'])])] >>> sum([[parse.tree() for parse in dep_graphs] for dep_graphs in dep_parser.parse_sents(( ... "I 'm a dog".split(), ... "This is my friends ' cat ( the tabby )".split(), ... ))], []) # doctest: +NORMALIZE_WHITESPACE [Tree('dog', ['I', "'m", 'a']), Tree('cat', ['This', 'is', Tree('friends', ['my', "'"]), Tree('tabby', ['the'])])] uconllu)edu.stanford.nlp.pipeline.StanfordCoreNLPu%stanford-corenlp-(\d+)(\.(\d+))+\.jaru,stanford-corenlp-(\d+)(\.(\d+))+-models\.jarcOs,tt|Гj||О|jd7_dS(Nu(-annotators tokenize,ssplit,pos,depparse(tsuperRfR'R (Rtargstkwargs((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR'sscCstdГВdS(u╢ Currently unimplemented because the neural dependency parser (and the StanfordCoreNLP pipeline class) doesn't support passing in pre- tagged tokens. uxtagged_parse[_sents] is not supported by StanfordNeuralDependencyParser; use parse[_sents] or raw_parse[_sents] instead.N(tNotImplementedError(RR;R((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRAwscCst|ddГS(NReuROOT(R (RRc((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyR-Гs(R]R^R_R9R8RRRRTR,R'R(RAR-(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyRfPs cCsLddlm}ytddГtГWntk rG|dГВnXdS(Ni (tSkipTestRu4edu/stanford/nlp/models/lexparser/englishPCFG.ser.gzundoctests from nltk.parse.stanford are skipped because one of the stanford parser or CoreNLP jars doesn't exist(tnoseRkRaRftLookupError(tmoduleRk((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pytsetup_moduleЗs (t __future__RRMRXRtwarningst subprocessRtioRtnltkRtnltk.internalsRRRRRtnltk.parse.apiR tnltk.parse.dependencygraphR t nltk.treeRRRRaRdRfRo(((se/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/parse/stanford.pyt s"(├987