U C^$ @sZddlmZddlmZddlmZddlmZddlm Z dd d Z d d Z ddZ dS))unicode_literals)Printer) iob_to_biluo) minibatch) n_sents_info FcOs8t|d}t|d}|dkr4t||t||}|S)a Convert IOB files with one sentence per line and tags separated with '|' into JSON format for use with train cli. IOB and IOB2 are accepted. Sample formats: I|O like|O London|I-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O I|O like|O London|B-GPE and|O New|B-GPE York|I-GPE City|I-GPE .|O I|PRP|O like|VBP|O London|NNP|I-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O I|PRP|O like|VBP|O London|NNP|B-GPE and|CC|O New|NNP|B-GPE York|NNP|I-GPE City|NNP|I-GPE .|.|O )no_print r)rread_iobsplitrmerge_sentences)Z input_datan_sentsr argskwargsmsgdocsr@/tmp/pip-install-6_kvzl1k/spacy/spacy/cli/converters/iob2json.pyiob2json s   rc Csg}|D]}|sqdd|D}t|ddkrHt|\}}}n4t|ddkrtt|\}}dgt|}ntdt|}|ddt|||Dqd d|D}d d|D}d dt|D} | S) NcSsg|]}|dqS)|)r ).0trrr $szread_iob..rr-zThe sentence-per-line IOB/IOB2 file is not formatted correctly. Try checking whitespace and delimiters. See https://spacy.io/api/cli#convertcSsg|]\}}}|||dqS))ZorthtagZnerr)rwpentrrrr0scSsg|] }d|iqS)tokensrrsentrrrr5scSsg|]}d|giqS) sentencesrr"rrrr6scSsg|]\}}||gdqS))id paragraphsr)ripararrrr7s)stripr lenzip ValueErrorrappend enumerate) Z raw_sentsr$liner!wordsposZiobZbiluor&rrrrr s.  r cCsfg}t||dD]P}t|}|d}|ddd}|D]}||dddq:||q|S)N)sizerr&r$)rlistpopextendr-)rrZmergedgroupfirstZ to_extendr#rrrr;s  rN)r F) __future__rZwasabirZgoldrutilrZconll_ner2jsonrrr rrrrrs