ó <¿CVc@sþddlZddlZddlmZddlmZmZddlTddlTddlm Z ej dƒZ ej dƒZ ej dƒZ ej d ƒZej d ƒZej d ƒZej d ƒZd efd„ƒYZdee fd„ƒYZdS(iÿÿÿÿN(tcompat(ttokenizettree(t*(tXMLCorpusReaders]*){0,1}>(.*?)

s]*){0,1}>(.*?)s#<([wc](?: [^>]*){0,1}>)(.*?)s!<[wc](?: [^>]*){0,1}>(.*?)s type="(.*?)"s ana="(.*?)"stext id="(.*?)"t TEICorpusViewcBs2eZdddd„ZdZd„Zd„ZRS(icCs>||_||_||_||_tj||d|ƒdS(Ntstartpos(t_taggedt_textidst_group_by_sentt_group_by_paratStreamBackedCorpusViewt__init__(tselft corpus_filettaggedt group_by_sentt group_by_parattagsettheadLenttextids((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyR !s     ic Csæ|j|jƒ}t|ƒ}xc|jdƒ|jdƒksT|jdƒdkrƒ|jƒ}t|ƒdkrvPn||7}q!W|jddƒ}tj|ƒ}|j rxi|D]^}||j krµ|j |ƒd}||j dƒtdƒ}|| |||}qµqµWng}x¿t j|ƒD]®} g} xyt j| ƒD]h} |j smtj| ƒ} n!tt|jtj| ƒƒƒ} |jr§| j| ƒqL| j| ƒqLW|jrÑ|j| ƒq0|j| ƒq0W|S(Nsis ti(t readlinest _pagesizetconcattcounttreadlinetlentreplacetTEXTIDtfindallRtfindtPARAtSENTRtWORDtlisttmapt _parse_tagt TAGGEDWORDR tappendtextendR ( R tstreamtblockttmpRttidtbegtendtoutputtpara_strtparatsent_strtsent((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyt read_block.s< !     !  cCsX|\}}|jdƒr6tj|ƒjdƒ}ntj|ƒjdƒ}||fS(Ntwi(t startswithtANAtsearchtgrouptTYPE(R ttag_word_tuplettagtword((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyR%Ts  N(t__name__t __module__tNoneR RR4R%(((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyR s  &tPl196xCorpusReadercBs×eZdZd„Zd„Zd„Zdd„Zd„Zddd„Z dddd„Z dddd„Z dddd „Z dddd „Z dddd „Zdddd „Zddd „Zddd„ZRS(iÒ cOsSd|kr|d|_n d|_tj||Œtj||ƒ|jƒdS(Nt textid_file(RR@RR tCategorizedCorpusReadert _init_textids(R targstkwargs((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyR as   cCsÓttƒ|_ttƒ|_|jdk rÏxŸ|j|jƒjƒD]‚}|jƒ}|j ddƒ\}}||j ƒkr›t dt |fƒ‚nx*|j |j ƒD]}|j||ƒq®WqFWndS(Nt is(In text_id mapping file %s: %s not found(t defaultdictR#t_f2tt_t2fRR@topenRtstriptsplittfileidst ValueErrortcatfilet _delimitert _add_textids(R tlinetfile_idttext_idsttext_id((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyRDjs cCs,|j|j|ƒ|j|j|ƒdS(N(RIR'RJ(R RTRV((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyRRwscsd}|dk r6|s'|df}q6tdƒ‚n|dk ro|s`ˆj|ƒdf}qotdƒ‚n|dk r|st|tjƒrŸ|g}nt‡fd†|Dƒgƒ}tƒ}x/|D]'}tˆj |ƒt|ƒ@||Šs(NN( R@RORNt isinstanceRt string_typestsumtdicttsetRI(R RNt categoriesRR+tfilesttdicttf((R sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyt_resolve{s(      %cCs|S(N((R R<((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyt decode_tag“scsrˆj||ƒ\}}|dkr1tˆjƒSt|tjƒrO|g}ntt‡fd†|DƒgƒƒS(sJ In the pl196x corpus each category is stored in single file and thus both methods provide identical functionality. In order to accommodate finer granularity, a non-standard textids() method was implemented. All the main functions can be supplied with a list of required chunks---giving much more control to the user. c3s|]}ˆj|VqdS(N(RI(RWtd(R (sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pys ¤sN(RbR@tsortedRJRYRRZR[(R RNR^t_((R sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyR—s   c Csá|j|||ƒ\}}|dkr3|j}nt|tjƒrQ|g}n|rŸtg|D]7}t|j|ƒt t t d|j d||ƒ^qaƒStg|D]-}t|j|ƒt t t d|j ƒ^q©ƒSdS(NRR( RbR@t_fileidsRYRRZRRtabspathtFalseR(R RNR^Rtfileid((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pytwords¦s  Bc Csá|j|||ƒ\}}|dkr3|j}nt|tjƒrQ|g}n|rŸtg|D]7}t|j|ƒt t t d|j d||ƒ^qaƒStg|D]-}t|j|ƒt t t d|j ƒ^q©ƒSdS(NRR( RbR@RgRYRRZRRRhRitTrueR(R RNR^RRj((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pytsents·s  Bc Csá|j|||ƒ\}}|dkr3|j}nt|tjƒrQ|g}n|rŸtg|D]7}t|j|ƒt t t d|j d||ƒ^qaƒStg|D]-}t|j|ƒt t t d|j ƒ^q©ƒSdS(NRR( RbR@RgRYRRZRRRhRiRlR(R RNR^RRj((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pytparasÈs  Bc Csá|j|||ƒ\}}|dkr3|j}nt|tjƒrQ|g}n|rŸtg|D]7}t|j|ƒt t t d|j d||ƒ^qaƒStg|D]-}t|j|ƒt t t d|j ƒ^q©ƒSdS(NRR( RbR@RgRYRRZRRRhRlRiR(R RNR^RRj((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyt tagged_wordsÙs  Bc Csá|j|||ƒ\}}|dkr3|j}nt|tjƒrQ|g}n|rŸtg|D]7}t|j|ƒt t t d|j d||ƒ^qaƒStg|D]-}t|j|ƒt t t d|j ƒ^q©ƒSdS(NRR( RbR@RgRYRRZRRRhRlRiR(R RNR^RRj((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyt tagged_sentsês  Bc Csá|j|||ƒ\}}|dkr3|j}nt|tjƒrQ|g}n|rŸtg|D]7}t|j|ƒt t t d|j d||ƒ^qaƒStg|D]-}t|j|ƒt t t d|j ƒ^q©ƒSdS(NRR( RbR@RgRYRRZRRRhRlR(R RNR^RRj((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyt tagged_parasûs  BcCsN|j||ƒ\}}t|ƒdkr>tj||dƒStdƒ‚dS(NiisExpected a single file(RbRRtxmlt TypeError(R RNR^Rf((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyRr scCsz|j||ƒ\}}|dkr0|j}nt|tjƒrN|g}ntg|D]}|j|ƒjƒ^qXƒS(N( RbR@RgRYRRZRRKtread(R RNR^RfRa((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pytraws   N(R>R?RR RDRRR@RbRcRRkRmRnRoRpRqRrRu(((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyRA]s   (tostretnltkRRRtnltk.corpus.reader.utiltnltk.corpus.reader.apitnltk.corpus.reader.xmldocsRtcompileR R!R&R"R:R7RR RRCRA(((sk/private/var/folders/cc/xm4nqn811x9b50x1q_zpkmvdjlphkp/T/pip-build-FUwmDn/nltk/nltk/corpus/reader/pl196x.pyts    =