U Dx`'@sTdZdddgZddlZddlmZmZz0ddlmZmZm Z m Z m Z m Z m Z e e fZWn8ek rddlmZmZm Z m Z m Z m Z e ZYnXdd dZdd dZdd dZd d ZedejjZGdddZddZddZzddlmZWn"ek rddlmZYnXedjZze Wne!k rFe"Z YnXddZ#dS)z5External interface to the BeautifulSoup HTML parser. fromstringparse convert_treeN)etreehtml) BeautifulSoupTagCommentProcessingInstructionNavigableString DeclarationDoctype)rrr r r r cKst|||f|S)aParse a string of HTML data into an Element tree using the BeautifulSoup parser. Returns the root ```` Element of the tree. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. )_parse)data beautifulsoup makeelementbsargsr;/tmp/pip-target-zr53vnty/lib/python/lxml/html/soupparser.pyrs cKs,t|dst|}t|||f|}t|S)aYParse a file into an ElemenTree using the BeautifulSoup parser. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. read)hasattropenrr ElementTree)filerrrrootrrrr$s cCs*t||}|}|D]}||q|S)aConvert a BeautifulSoup tree to a list of Element trees. Returns a list instead of a single root Element to support HTML-like soup with more than one root element. You can pass a different Element factory through the `makeelement` keyword. ) _convert_tree getchildrenremove)beautiful_soup_treerrchildrenchildrrrr3s  cKs|dkr t}t|dr&d|kr&d|d<t|dr@d|kr@d|d<||f|}t||}t|dkrx|djdkrx|dSd|_|S) NZ HTML_ENTITIESZconvertEntitiesrZDEFAULT_BUILDER_FEATURESfeaturesz html.parserr)rrrlentag)sourcerrrtreerrrrrEs    rz`(?:\s|[|\}}| j}|o|dd|_|o|dd|_| S)Nrr")r html_parserr enumerate isinstancerr(lower_DECLARATION_OR_DOCTYPEr*indexr'_init_node_convertersreversedZ addpreviousZaddnextZ output_readyAttributeErrorstring_parse_doctype_declarationgroupsZ getroottreedocinfo public_idZ system_url)rrZfirst_element_idxZlast_element_idxZ html_rootZ declarationieZpre_rootZ post_rootroots convert_nodeZres_rootprevZ convertedZdoctype_stringmatchZ external_idZsys_urir?rrrrisd   &          rcsigfdd}fdddfdd ddd d |ttfd d }|td d}|tdd}|tfdd}S)Ncsfdd}|S)Ncs D]}||<|q|Sr-)append)handlert) convertersordered_node_typestypesrradds z5_init_node_converters..converter..addr)rLrMrJrK)rLr convertersz(_init_node_converters..convertercs$D]}t||r|SqdSr-)r5)noderIrNrrfind_best_converters z2_init_node_converters..find_best_convertercsPzt|}Wn(tk r8|}t|<YnX|dkrFdS|||Sr-)typeKeyError)bs_nodeparentrH)rJrQrrrDsz+_init_node_converters..convert_nodecSsXt|trBi}|D](\}}t|tr2d|}t|||<qntdd|D}|S)N css|]\}}|t|fVqdSr-unescape).0kvrrr sz;_init_node_converters..map_attrs..)r5dictitemslistjoinrX)Zbs_attrsattribsrZr[rrr map_attrss   z(_init_node_converters..map_attrscSs:t|dkr|jpd||_n|djp*d||d_dS)Nrr2)r#texttail)rUrdrrr append_texts z*_init_node_converters..append_textc s|j}|dk r2|r|nd}tj||j|d}n|r>|ni}|j|d}|D]H}zt|}Wntk r|YnX|dk rT|||qT||qT|S)N)attrib)r)r SubElementr(rRrS)rTrUr)raresr rH)rDrJrrbrr convert_tags   z*_init_node_converters..convert_tagcSs t|}|dk r|||Sr-)rZ HtmlCommentrGrTrUrirrrconvert_comments  z._init_node_converters..convert_commentcSs>|dr|dd}tj|dd}|dk r:|||S)N?r2rVr")endswithrr splitrGrkrrr convert_pi s    z)_init_node_converters..convert_pics|dk r|t|dSr-rW)rTrU)rfrr convert_textsz+_init_node_converters..convert_text)N)rr'r r r )rrOrjrlrprqr)rfrDrJrQrrbrKrr9s     r9)name2codepointz&(\w+);cCs|sdSdd}t||S)NrccSs8ztt|dWStk r2|dYSXdS)Nr"r)unichrrrgrouprS)mrrrunescape_entity5sz!unescape..unescape_entity)handle_entities)r<rvrrrrX1srX)NN)NN)N)$__doc____all__relxmlrrZbs4rrr r r r r r7 ImportErrorrrrrcompile IGNORECASErFr=r'rr9 html.entitiesrrhtmlentitydefssubrwrs NameErrorchrrXrrrrs< $       Uc