3 L]!@sdZddlmZmZmZmZddlmZddlZddl Z ddl Z ddl m Z ddl mZmZmZmZmZe jdkrdd lmZmZdd lmZmZndd lmZmZmZmZd d ZddZddZej dedddZ!dddZ"dS)z Functions to convert unicode IRIs into ASCII byte string URIs and back. Exports the following items: - iri_to_uri() - uri_to_iri() )unicode_literalsdivisionabsolute_importprint_function)idnaN)unwrap)byte_clsstr_cls type_name bytes_to_list int_types)urlsplit urlunsplit)quoteunquote)runquote_to_bytesrrcCs t|tsttdt|d}tjdkr|jd r|jd rd}tj d|}|rt|j d}d|t |d}t |}|r||dd}t |dd}nt |}|dkrt |j}|j}|dk r|jd }t |jd d }t |jd d }|j}|dk rt|jd }d} |dk r>| |7} |r6| d|7} | d7} |dk rP| |7} |dk r|dkoj|dk} |dko||dk} | r| r| d|7} t |jdd } t |jdd } t |jdd }| dkr|dkr| dkrd} | dkrd} t|| | | |f}t|tr|jd}|S)z Normalizes and encodes a unicode IRI into an ASCII byte string URI :param value: A unicode string of an IRI :return: A byte string of the ASCII-encoded URI z@ value must be a unicode string, not %s Nzhttp://zhttps://z ^[^:]*://rrrz !$&'()*+,;=)safeascii:@shttps80shttpss443z/!$&'()*+,;=@:z/?!$&'()*+,;=@:/latin1)rr) isinstancer TypeErrorrr sys version_info startswithrematchgrouplenr _urlquoteschemehostnameencodeusernamepasswordportpathqueryfragmentr)valuer) real_prefixZ prefix_matchparsedr*r,r-r.netlocZ default_httpZ default_httpsr/r0r1outputr77/tmp/pip-install-wfra5znf/asn1crypto/asn1crypto/_iri.py iri_to_uri%sd  "             r9c Cs*t|tsttdt|t|}|j}|dk r<|jd}t|j ddgd}t|j ddgd}|j }|rt|jd}|j }|rt|t  r|jd}d}|dk r||7}|r|d|7}|d7}|dk r||7}|dk r|dt|7}t|jd gd d }t|jd d gd d } t|j} t|||| | fS)z Converts an ASCII URI byte string into a unicode IRI :param value: An ASCII-encoded byte string of the URI :return: A unicode string of the IRI z= value must be a byte string, not %s Nr:@)remaprr/T)r<preserve&=)rr r rr rr)decode _urlunquoter,r-r*r.r r r/r0r1r) r2r4r)r,r-r*r.r5r/r0r1r7r7r8 uri_to_irius<       rCcCs4t|j|j|j}dd|D}dj||jfS)a> Error handler for decoding UTF-8 parts of a URI into an IRI. Leaves byte sequences encoded in %XX format, but as part of a unicode string. :param exc: The UnicodeDecodeError exception :return: A 2-element tuple of (replacement unicode string, integer index to resume at) cSsg|] }d|qS)z%%%02xr7).0numr7r7r8 sz,_iri_utf8_errors_handler..r)r objectstartendjoin)excZ bytes_as_ints replacementsr7r7r8_iri_utf8_errors_handlers rMiriutf8rcs|dks|dkrdSgtjd|rXfdd}tjd||}fdd}tjd||}t|jd jd d }t|ts|jd }td krfd d}tjd||}|S)a Quotes a unicode string for use in a URL :param string: A unicode string :param safe: A unicode string of character to not encode :return: None (if string is None) or an ASCII byte string of the quoted string Nrz%[0-9a-fA-F]{2}csDt|jd}|jdd}x$tD]}|j|dt|}q$W|S)Nrzutf-8rNz%%%02x)rr&rAlistreplaceord)r% byte_stringZunicode_stringZ safe_char)rr7r8 _try_unescapes  z _urlquote.._try_unescapez(?:%[0-9a-fA-F]{2})+csj|jdjddS)Nrr)appendr&r+)r%)escapesr7r8_extract_escapesz"_urlquote.._extract_escapezutf-8)rrrcs jdS)Nr)pop)_)rVr7r8_return_escapesz!_urlquote.._return_escapes%00)r$searchsuburlquoter+rr r')stringrrSrWr6rZr7)rVrr8r(s       r(c Cs|dkr |S|dkrdS|rfdddddg}i}x6|D].}|jd }|||<|j|jd |jd }q4Wt|}|rx,|D]$}|j|jd d t|jd }qxW|jd d }|rx |jD]\}}|j||}qW|S)a Unquotes a URI portion from a byte string into unicode using UTF-8 :param byte_string: A byte string of the data to unquote :param remap: A list of characters (as unicode) that should be re-mapped to a %XX encoding. This is used when characters are not valid in part of a URL. :param preserve: A bool - indicates that the chars to be remapped if they occur in non-hex form, should be preserved. E.g. / for URL path. :return: A unicode string Nrrrrz%%%02xzutf-8rN)rXrPr+rrQrAitems) rRr<r>rLZpreserve_unmapchar replacementr6originalr7r7r8rBs(   $ rB)r)r)NN)#__doc__ __future__rrrr encodingsrcodecsr$r!_errorsr_typesr r r r r r"urlparserrurllibrr]rr urllib.parser9rCrMregister_errorr(rBr7r7r7r8 s"   P4  4