a 97a:Mã@sdZddlmZmZmZmZddlTddlmZddl Z ddl Z e   d¡Z e   d¡Z e   d¡Ze   d ¡Ze   d ¡Ze   d ¡Ze   d ¡Ze   d ¡Ze   d¡Ze   d¡Ze   d¡Ze   de j¡Ze   de j¡Ze   d ¡Ze   d¡ZGdd„deƒZGdd„dejƒZdS)zLA parser for HTML and XHTML. Backported for python-future from Python 3.3. é)Úabsolute_importÚdivisionÚprint_functionÚunicode_literals)Ú*)Ú _markupbaseNz[&<]z &[a-zA-Z#]z%&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]z)&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]z <[a-zA-Z]ú>z--\s*>z(([a-zA-Z][-.a-zA-Z0-9:_]*)(?:\s|/(?!>))*z[a-zA-Z][^ />]*zJ\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?z]((?<=[\'"\s/])[^\s/>][^\s/=>]*)(\s*=+\s*(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?(?:\s|/(?!>))*aê <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:\s+ # whitespace before attribute name (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name (?:\s*=\s* # value indicator (?:'[^']*' # LITA-enclosed value |\"[^\"]*\" # LIT-enclosed value |[^'\">\s]+ # bare value ) )? ) )* \s* # trailing whitespace aF <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name (?:[\s/]* # optional whitespace before attribute name (?:(?<=['"\s/])[^\s/>][^\s/=>]* # attribute name (?:\s*=+\s* # value indicator (?:'[^']*' # LITA-enclosed value |"[^"]*" # LIT-enclosed value |(?!['"])[^>\s]* # bare value ) (?:\s*,)* # possibly followed by a comma )?(?:\s|/(?!>))* )* )? \s* # trailing whitespace z#c@s"eZdZdZddd„Zdd„ZdS) ÚHTMLParseErrorz&Exception raised for all parse errors.©NNcCs&|sJ‚||_|d|_|d|_dS)Nré©ÚmsgÚlinenoÚoffset)Úselfr Úposition©rúw/private/var/folders/s6/9n5zrl012gv99k63s4q6ccsd4s6mqz/T/pip-target-f5cq3f2q/lib/python/future/backports/html/parser.pyÚ__init__Us zHTMLParseError.__init__cCs>|j}|jdur|d|j}|jdur:|d|jd}|S)Nz , at line %dz , column %dr r )rÚresultrrrÚ__str__[s   zHTMLParseError.__str__N)r )Ú__name__Ú __module__Ú __qualname__Ú__doc__rrrrrrr Rs r c@sìeZdZdZdZd:dd„Zdd„Zdd „Zd d „Zd d „Z dZ dd„Z dd„Z dd„Z dd„Zdd„Zd;dd„Zdd„Zdd„Zd d!„Zd"d#„Zd$d%„Zd&d'„Zd(d)„Zd*d+„Zd,d-„Zd.d/„Zd0d1„Zd2d3„Zd4d5„Zd6d7„Zd8d9„ZdS)<Ú HTMLParseraÇFind tags and other markup and call handler functions. Usage: p = HTMLParser() p.feed(data) ... p.close() Start tags are handled by calling self.handle_starttag() or self.handle_startendtag(); end tags by self.handle_endtag(). The data between tags is passed from the parser to the derived class by calling self.handle_data() with the data as argument (the data may be split up in arbitrary chunks). Entity references are passed by calling self.handle_entityref() with the entity reference as the argument. Numeric character references are passed to self.handle_charref() with the string containing the reference as the argument. )ÚscriptÚstyleFcCs&|rtjdtdd||_| ¡dS)zßInitialize and reset this instance. If strict is set to False (the default) the parser will parse invalid markup, otherwise it will raise an error. Note that the strict mode is deprecated. zThe strict mode is deprecated.é)Ú stacklevelN)ÚwarningsÚwarnÚDeprecationWarningÚstrictÚreset)rr#rrrrzs ÿzHTMLParser.__init__cCs(d|_d|_t|_d|_tj |¡dS)z1Reset this instance. Loses all unprocessed data.Úz???N)ÚrawdataÚlasttagÚinteresting_normalÚ interestingÚ cdata_elemrÚ ParserBaser$©rrrrr$‡s zHTMLParser.resetcCs|j||_| d¡dS)z‘Feed data to the parser. Call this as often as you want, with as little or as much text as you want (may include '\n'). rN)r&Úgoahead©rÚdatarrrÚfeeds zHTMLParser.feedcCs| d¡dS)zHandle any buffered data.r N)r-r,rrrÚclose˜szHTMLParser.closecCst|| ¡ƒ‚dS©N)r Úgetpos)rÚmessagerrrÚerrorœszHTMLParser.errorNcCs|jS)z)Return full source of start tag: '<...>'.)Ú_HTMLParser__starttag_textr,rrrÚget_starttag_text¡szHTMLParser.get_starttag_textcCs$| ¡|_t d|jtj¡|_dS)Nz )Úlowerr*ÚreÚcompileÚIr))rÚelemrrrÚset_cdata_mode¥s zHTMLParser.set_cdata_modecCst|_d|_dSr2)r(r)r*r,rrrÚclear_cdata_mode©szHTMLParser.clear_cdata_modec Cs¬|j}d}t|ƒ}||krd|j ||¡}|r8| ¡}n|jrBqd|}||kr`| |||…¡| ||¡}||krxqd|j}|d|ƒrÂt   ||¡r¢|  |¡}n”|d|ƒr¸|  |¡}n~|d|ƒrÎ|  |¡}nh|d|ƒrä| |¡}nR|d|ƒr|jr| |¡}n | |¡}n&|d|krd| d¡|d}nqd|dkr´|sJqd|jr\| d¡| d |d¡}|dkrš| d|d¡}|dkr¢|d}n|d7}| |||…¡| ||¡}q|d |ƒr`t  ||¡}|r*| ¡d d …} | | ¡| ¡}|d |dƒs|d}| ||¡}qn4d ||d…vrd| |dd …¡| |d ¡}qdq|d|ƒrVt  ||¡}|rÀ| d¡} | | ¡| ¡}|d |dƒs²|d}| ||¡}qt  ||¡}|r&|rd| ¡||d…krd|jr| d¡n||kr|}| ||d¡}qdn.|d|krd| d¡| ||d¡}nqdqdsJdƒ‚q|rš||krš|jsš| |||…¡| ||¡}||d…|_dS)Nrú<údS|rX| ||d|…¡|dS)Nr)rCr@z"unexpected call to parse_comment()rrEr )r&rVÚhandle_comment)rr^Úreportr&Úposrrrrh-szHTMLParser.parse_bogus_commentcCsd|j}|||d…dks"Jdƒ‚t ||d¡}|s:dS| ¡}| ||d|…¡| ¡}|S)NrrBzunexpected call to parse_pi()rE)r&ÚpicloserIrJÚ handle_pirZ)rr^r&rOr`rrrrS9szHTMLParser.parse_picCs.d|_| |¡}|dkr|S|j}|||…|_g}t ||d¡}|sPJdƒ‚| ¡}| d¡ ¡|_}||krH|j rŠt  ||¡}n t  ||¡}|sžqH| ddd¡\} } } | s¼d} n`| dd…dkrâ| dd…ksn| dd…dkr | dd…krnn | dd…} | r,|  | ¡} |  |  ¡| f¡| ¡}ql|||… ¡} | d vrî| ¡\} }d |jvr¢| |j d ¡} t|jƒ|j d ¡}n|t|jƒ}|j rØ| d |||…dd …f¡| |||…¡|S|  d ¡r| ||¡n"| ||¡||jvr*| |¡|S)Nrr z#unexpected call to parse_starttag()rrdú'rEú")rú/>Ú z junk characters in start tag: %rérq)r6Úcheck_for_whole_start_tagr&ÚtagfindrOrZrXr8r'r#ÚattrfindÚattrfind_tolerantÚunescapeÚappendÚstripr3ÚcountrHÚrfindr5rKÚendswithÚhandle_startendtagÚhandle_starttagÚCDATA_CONTENT_ELEMENTSr=)rr^Úendposr&ÚattrsrOraÚtagÚmÚattrnameÚrestÚ attrvaluerZrrrrrrPEsf    ( ÿ  ÿ        ÿÿ    zHTMLParser.parse_starttagcCs|j}|jrt ||¡}n t ||¡}|rú| ¡}|||d…}|dkrR|dS|dkr²| d|¡rn|dS| d|¡r~dS|jrž| ||d¡| d¡||krª|S|dS|dkr¾dS|d vrÊdS|jræ| ||¡| d ¡||krò|S|dSt d ƒ‚dS) Nr rú/rqrrEzmalformed empty start tagr%z6abcdefghijklmnopqrstuvwxyz=/ABCDEFGHIJKLMNOPQRSTUVWXYZzmalformed start tagzwe should not get here!) r&r#ÚlocatestarttagendrOÚlocatestarttagend_tolerantrZrMrLr5ÚAssertionError)rr^r&r„r`Únextrrrrt~s>      z$HTMLParser.check_for_whole_start_tagcCsN|j}|||d…dks"Jdƒ‚t ||d¡}|s:dS| ¡}t ||¡}|sø|jdurr| |||…¡|S|jr|  d|||…f¡t  ||d¡}|sÊ|||d…dkrÀ|dS|  |¡S|  ¡  ¡}| d | ¡¡}| |¡|dS|  d¡  ¡}|jdur4||jkr4| |||…¡|S| |  ¡¡| ¡|S) Nrr@zunexpected call to parse_endtagr rEzbad end tag: %rrdzr)r&Ú endendtagrIrZÚ endtagfindrOr*rKr#r5Útagfind_tolerantrhrXr8rVÚ handle_endtagr>)rr^r&rOriÚ namematchÚtagnamer<rrrrQ©s<       zHTMLParser.parse_endtagcCs| ||¡| |¡dSr2)rr©rrƒr‚rrrr~Ós zHTMLParser.handle_startendtagcCsdSr2rr“rrrrØszHTMLParser.handle_starttagcCsdSr2r)rrƒrrrrÜszHTMLParser.handle_endtagcCsdSr2r©rrbrrrrYàszHTMLParser.handle_charrefcCsdSr2rr”rrrr\äszHTMLParser.handle_entityrefcCsdSr2rr.rrrrKèszHTMLParser.handle_datacCsdSr2rr.rrrrjìszHTMLParser.handle_commentcCsdSr2r)rÚdeclrrrrgðszHTMLParser.handle_declcCsdSr2rr.rrrrnôszHTMLParser.handle_picCs|jr| d|f¡dS)Nzunknown declaration: %r)r#r5r.rrrÚ unknown_decl÷szHTMLParser.unknown_declcCs"d|vr |Sdd„}t d||¡S)NrGcSs| ¡d}zZ|ddkrd|dd…}|ddvrLt|dd… d¡dƒ}nt| d¡ƒ}t|ƒWSWnty€d|YS0ddlm}||vrž||S| d¡r°d |Std t |ƒƒD]4}|d|…|vr¾||d|…||d…Sq¾d |SdS) Nrú#r )ÚxÚXrFérD)Úhtml5rGr) ÚgroupsÚintÚrstripÚchrÚ ValueErrorZfuture.backports.html.entitiesr›r}ÚrangerH)ÚsÚcr›r˜rrrÚreplaceEntitiesÿs&       "z,HTMLParser.unescape..replaceEntitiesz&&(#?[xX]?(?:[0-9a-fA-F]+;|\w{1,32};?)))r9Úsub)rr¢r¤rrrrxüs ÿzHTMLParser.unescape)F)r ) rrrrr€rr$r0r1r5r6r7r=r>r-rUrhrSrPrtrQr~rrrYr\rKrjrgrnr–rxrrrrrds:  h  9+*r) rÚ __future__rrrrZfuture.builtinsZfuture.backportsrr9r r:r(r]r[rWrNrmÚ commentcloserurrvrwÚVERBOSEr‰rŠrrŽÚ Exceptionr r+rrrrrÚs<           ÿÿ óò