ó <¿CVc@s¯ddlmZddlZddlmZddlmZddlTddlTde fd„ƒYZ de fd „ƒYZ d „Z d „Zed kr«e ƒeƒndS( iÿÿÿÿ(tprint_functionN(tutil(tcompat(t*tChasenCorpusReadercBskeZdd d„Zd d„Zd d„Zd d„Zd d„Zd d„Zd d„Z d d„Z RS( tutf8cCs#||_tj||||ƒdS(N(t_sent_splittert CorpusReadert__init__(tselftroottfileidstencodingt sent_splitter((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyRs cCsb|dkr|j}nt|tjƒr6|g}ntg|D]}|j|ƒjƒ^q@ƒS(N(tNonet_fileidst isinstanceRt string_typestconcattopentread(R R tf((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pytraws   c CsGtg|j|tƒD]*\}}t||ttt|jƒ^qƒS(N(RtabspathstTruetChasenCorpusViewtFalseR(R R tfileidtenc((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pytwordssc CsGtg|j|tƒD]*\}}t||ttt|jƒ^qƒS(N(RRRRRR(R R RR((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyt tagged_words"sc CsGtg|j|tƒD]*\}}t||ttt|jƒ^qƒS(N(RRRRRR(R R RR((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pytsents'sc CsGtg|j|tƒD]*\}}t||ttt|jƒ^qƒS(N(RRRRRR(R R RR((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyt tagged_sents,sc CsGtg|j|tƒD]*\}}t||ttt|jƒ^qƒS(N(RRRRRR(R R RR((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pytparas1sc CsGtg|j|tƒD]*\}}t||ttt|jƒ^qƒS(N(RRRRR(R R RR((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyt tagged_paras6sN( t__name__t __module__RRRRRRR R!R"(((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyRs      RcBs#eZdZdd„Zd„ZRS(s• A specialized corpus view for ChasenReader. Similar to ``TaggedCorpusView``, but this'll use fixed sets of word and sentence tokenizer. cCs>||_||_||_||_tj||d|ƒdS(NR (t_taggedt_group_by_sentt_group_by_paraRtStreamBackedCorpusViewR(R t corpus_fileR ttaggedt group_by_sentt group_by_paraR ((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyRBs     c Cs¢g}x•t|ddƒD]}g}g}xà|jƒD]Ò}|jƒdk}|jdƒ}|ddj|dƒf} |s’|j| ƒn|s°|jr8|j| ƒr8|jsÛg|D]\} } | ^qÀ}n|jrô|j|ƒn |j |ƒg}q8q8Wt |ƒdkrt|jsKg|D]\} } | ^q0}n|jrd|j|ƒqt|j |ƒn|j r|j|ƒq|j |ƒqW|S(sReads one paragraph at a time.t.s^EOS\ntEOSs ii( tread_regexp_blockt splitlineststriptsplittjointappendRR%R&textendtlenR'( R tstreamtblocktpara_strtparatsenttlinet_eost_cellstwtt((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyt read_blockKs6 "    "  N(R#R$t__doc__RRRA(((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyR<s cCs‚ddl}ddlm}|dtdddƒ}tdj|jƒdd !ƒƒtd jd „|jƒd d !DƒƒƒdS(Niÿÿÿÿ(tLazyCorpusLoadertjeitas.*chasenR sutf-8t/iTVi|Vs EOS css(|]}djd„|DƒƒVqdS(s css4|]*}d|d|djdƒdfVqdS(s%s/%siis iN(R2(t.0R?((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pys }sN(R3(RFR;((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pys }sizi}(tnltktnltk.corpus.utilRCRtprintR3RR (RGRCRD((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pytdemoss  cCsRddlm}|dtdddƒ}t|jƒddtjƒsNt‚dS( Niÿÿÿÿ(RCRDs.*chasenR sutf-8ii(RHRCRRRRRtAssertionError(RCRD((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyttest€st__main__(t __future__Rtsystnltk.corpus.readerRRGRtnltk.corpus.reader.utiltnltk.corpus.reader.apiRRR(RRJRLR#(((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/chasen.pyts   *7