B /Wb-@sy ddlZWnek r(ddlZYnXddlZddlZddlmZddlmZddl m Z ddl m Z ddl mZmZmZmZmZmZddlmZdd lmZmZmZmZmZmZe ed eed d d Ze ed eed ddZ e ed eeed ddZ!e ed eed ddZ"e ed eed ddZ#e ed eed ddZ$e ed eed ddZ%e ed eed ddZ&e ed eed ddZ'e ed eed ddZ(eed d d!Z)e ed eed d"d#Z*e ed eed d$d%Z+e ed eed d&d'Z,e ed eed d(d)Z-e ed eed d*d+Z.e e/ed eed,d-d.Z0e ed eed d/d0Z1dSe2e3eed2d3d4Z4e d5d eed6d7d8Z5e2eeee2fd9d:d;Z6eed<d=d>Z7dTeeed@dAdBZ8eeedCdDdEZ9eee:dFdGdHZ;eeedFdIdJZdUe2ee?e3eee2eeeeeddfdP dQdRZ@dS)VN)IncrementalDecoder)aliases) lru_cache)findall) GeneratorListOptionalSetTupleUnion)MultibyteIncrementalDecoder)ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATION)maxsize) characterreturncCsTyt|}Wntk r"dSXd|kpRd|kpRd|kpRd|kpRd|kpRd|kS)NFz WITH GRAVEz WITH ACUTEz WITH CEDILLAzWITH DIAERESISzWITH CIRCUMFLEXz WITH TILDE) unicodedataname ValueError)r descriptionr?/tmp/pip-target-ju3w756n/lib/python/charset_normalizer/utils.pyis_accentuatedsrcCs.t|}|s|S|d}tt|ddS)N r)r decompositionsplitchrint)rZ decomposedcodesrrr remove_accent,s   r%cCs.t|}x tD]\}}||kr|SqWdS)zK Retrieve the Unicode range official name from a single character. N)ordritems)rZ character_ord range_nameZ ord_rangerrr unicode_range7s r)cCs,yt|}Wntk r"dSXd|kS)NFZLATIN)rrr)rrrrris_latinEs r*cCs(y|dWntk r"dSXdS)NasciiFT)encodeUnicodeEncodeError)rrrris_asciiNs r.cCs2t|}d|krdSt|}|dkr*dSd|kS)NPTFZ Punctuation)rcategoryr))rcharacter_categorycharacter_rangerrris_punctuationWs r3cCs:t|}d|ksd|krdSt|}|dkr2dSd|kS)NSNTFZForms)rr0r))rr1r2rrr is_symbolfs r6cCst|}|dkrdSd|kS)NFZ Emoticons)r))rr2rrr is_emoticonusr7cCs&|s|dkrdSt|}d|kS)N>,>;+<|TZ)isspacerr0)rr1rrr is_separators r@cCs||kS)N)islowerisupper)rrrris_case_variablesrCcCst|}|dkS)NCo)rr0)rr1rrris_private_use_onlys rEcCs,yt|}Wntk r"dSXd|kS)NFCJK)rrr)rcharacter_namerrris_cjks rHcCs,yt|}Wntk r"dSXd|kS)NFZHIRAGANA)rrr)rrGrrr is_hiraganas rIcCs,yt|}Wntk r"dSXd|kS)NFZKATAKANA)rrr)rrGrrr is_katakanas rJcCs,yt|}Wntk r"dSXd|kS)NFZHANGUL)rrr)rrGrrr is_hanguls rKcCs,yt|}Wntk r"dSXd|kS)NFZTHAI)rrr)rrGrrris_thais rL)r(rcstfddtDS)Nc3s|]}|kVqdS)Nr).0keyword)r(rr sz-is_unicode_range_secondary..)anyr)r(r)r(ris_unicode_range_secondarysrQcCs.|dko,|dko,|dko,|ddkS)NFsutf_8)r? isprintabledecode)rrrris_unprintables  rV)sequence search_zonercCst|tstt|}tt|dt||jddd}t|dkrHdSxJ|D]B}| dd}x,t D] \}}||kr|S||krl|SqlWqNWdS)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nr+ignore)errorsr-_) isinstancebytes TypeErrorlenrrminrUlowerreplacerr')rXrYZseq_lenresultsZspecified_encodingencoding_alias encoding_ianarrrany_specified_encodings     rh)rrcCs |dkpttd|jtS)zQ Verify is a specific encoding is a multi byte one based on it IANA name > utf_32utf_7utf_16 utf_16_be utf_32_le utf_32_be utf_16_le utf_8_sigrSz encodings.{}) issubclass importlib import_moduleformatrr )rrrris_multi_byte_encodings rv)rXrcCsJxDtD]<}t|}t|tr"|g}x|D]}||r(||fSq(WqWdS)z9 Identify and extract SIG/BOM in given sequence. )N)rr^r_ startswith)rX iana_encodingZmarksmarkrrridentify_sig_or_boms    r{)ryrcCs|dkS)N>rjrlr)ryrrrshould_strip_sig_or_bomsr|T)cp_namestrictrcCsL|dd}x$tD]\}}|||gkr|SqW|rHtd||S)Nr\r]z Unable to retrieve IANA for '{}')rcrdrr'rru)r}r~rfrgrrr iana_names r)decoded_sequencercCs8t}x(|D] }t|}|dkr"q ||q Wt|S)N)setr)addlist)rrangesrr2rrr range_scan+s r) iana_name_a iana_name_brc Cst|st|rdStd|j}td|j}|dd}|dd}d}x6tdD]*}t|g}||||krZ|d7}qZW|dS) Ngz encodings.{}rZ)r[rr )rvrsrtrurranger_rU) rrZ decoder_aZ decoder_bZid_aZid_bZcharacter_match_countiZ to_be_decodedrrr cp_similarity9s    rcCs|tko|t|kS)z Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r)rrrrr is_cp_similarNsrZcharset_normalizerz)%(asctime)s | %(levelname)s | %(message)s)rlevel format_stringrcCs:t|}||t}|t|||dS)N)logging getLoggersetLevel StreamHandler setFormatter Formatter addHandler)rrrloggerhandlerrrrset_logging_handlerYs   r) sequencesrgoffsets chunk_sizebom_or_sig_availablestrip_sig_or_bom sig_payloadis_multi_byte_decoderdecoded_payloadrc csF|r:|dkr:x(|D] } || | |} | s,P| VqWnx|D]} | |} | t|dkr`qB|| | |} |r|dkr|| } | j||rdndd} |r8| dkr8|| dkr8t|d} |r8| d| |kr8x\t| | d d D]H}||| } |r|dkr|| } | j|dd} | d| |krPqW| VqBWdS) NFrZr~)r[rrir)rarUrbr)rrgrrrrrrrrchunkZ chunk_endZ cut_sequenceZchunk_partial_size_chkjrrrcut_sequence_chunksgs8      r)rW)T)N)AZ unicodedata2r ImportErrorrsrcodecsrZencodings.aliasesr functoolsrrertypingrrrr r r Z_multibytecodecr Zconstantrrrrrrstrboolrr%r)r*r.r3r6r7r@rCrErHrIrJrKrLrarQrVr_r#rhrvr{r|rrfloatrrINFOrrrrrrrs|