B `G@sPy ddlZWnek r(ddlZYnXddlmZddlmZddlmZm Z m Z m Z m Z ddl Z ddlmZddlmZddlmZddlmZmZmZmZmZmZeed eed d d Zeed eed d dZeed eeed ddZeed eed ddZ eed eed ddZ!eed eed ddZ"eed eed ddZ#eed ddZ$eed ddZ%ee&ed eedddZ'd:e(e)eed!d"d#Z*ed$d eed%d&d'Z+e(e eee(fd(d)d*Z,eed+d,d-Z-d;eeed/d0d1Z.ee ed2d3d4Z/eee0d5d6d7Z1eeed5d8d9Z2dS)<N)IncrementalDecoder)findall)OptionalTupleUnionListSet)MultibyteIncrementalDecoder)aliases) lru_cache)UNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDRE_POSSIBLE_ENCODING_INDICATIONENCODING_MARKSUTF8_MAXIMAL_ALLOCATIONIANA_SUPPORTED_SIMILAR)maxsize) characterreturncCs<yt|}Wntk r"dSXd|kp:d|kp:d|kS)NFz WITH GRAVEz WITH ACUTEz WITH CEDILLA) unicodedataname ValueError)r descriptionrs/private/var/folders/7j/8686xlfs15q3tgljmghtvg0r0000gn/T/pip-target-isidps9b/lib/python/charset_normalizer/utils.pyis_accentuateds rcCs.t|}|s|S|d}tt|ddS)N r)r decompositionsplitchrint)rZ decomposedcodesrrr remove_accents  r#cCs.t|}x tD]\}}||kr|SqWdS)zK Retrieve the Unicode range official name from a single character. N)ordr items)rZ character_ord range_nameZ ord_rangerrr unicode_range,s r'cCs,yt|}Wntk r"dSXd|kS)NFZLATIN)rrr)rrrrris_latin:s r(cCs2t|}d|krdSt|}|dkr*dSd|kS)NPTFZ Punctuation)rcategoryr')rcharacter_categorycharacter_rangerrris_punctuationCs r-cCs:t|}d|ksd|krdSt|}|dkr2dSd|kS)NSNTFZForms)rr*r')rr+r,rrr is_symbolRs r0cCs&|s|dkrdSt|}d|kS)N)u|+TZ)isspacerr*)rr+rrr is_separatoras r4cCst|}d|kS)NCo)rr*)rr+rrris_private_use_onlyks r6cCs,yt|}Wntk r"dSXd|kS)NFCJK)rrr)rZcharacter_namerrris_cjkqs r8)r&rcCsxtD]}||krdSqWdS)NTF)r )r&keywordrrris_unicode_range_secondaryzs r:)sequence search_zonercCst|tstt|}tt|d||kr*|n|jddd}t|dkrNdSxJ|D]B}|dd}x,t D] \}}||kr|S||krr|SqrWqTWdS)zW Extract using ASCII-only decoder any specified encoding in the first n-bytes. Nasciiignore)errorsr-_) isinstancebytes TypeErrorlenrrdecodelowerreplacer r%)r<r=Zseq_lenresultsZspecified_encodingencoding_alias encoding_ianarrrany_specified_encodings  $   rM)rrcCs |dkpttd|jtS)zQ Verify is a specific encoding is a multi byte one based on it IANA name > utf_16 utf_16_leutf_32 utf_32_le utf_8_sig utf_16_be utf_32_beutf_7utf_8z encodings.{}) issubclass importlib import_moduleformatrr )rrrris_multi_byte_encodings r\)r<rcCsJxDtD]<}t|}t|tr"|g}x|D]}||r(||fSq(WqWdS)z9 Identify and extract SIG/BOM in given sequence. )N)rrCrD startswith)r< iana_encodingZmarksmarkrrridentify_sig_or_boms    ra)r_rcCs|dkS)N>rOrQr)r_rrrshould_strip_sig_or_bomsrbT)cp_namestrictrcCsP|dd}x(tD]\}}||ks2||kr|SqW|rLtd||S)NrArBz Unable to retrieve IANA for '{}')rHrIr r%rr[)rcrdrKrLrrr iana_namesre)decoded_sequencercCs8t}x(|D] }t|}|dkr"q ||q Wt|S)N)setr'addlist)rfrangesrr,rrr range_scans  rk) iana_name_a iana_name_brc Cst|st|rdStd|j}td|j}|dd}|dd}d}x8tddD]*}t|g}||||kr\|d7}q\W|dS) Ngz encodings.{}r?)r@r)r\rYrZr[rrangerDrG) rlrmZ decoder_aZ decoder_bZid_aZid_bZcharacter_match_countiZ to_be_decodedrrr cp_similaritys    rscCs|tko|t|kS)z Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using the function cp_similarity. )r)rlrmrrr is_cp_similarsrt)r;)T)3Z unicodedata2r ImportErrorcodecsrrertypingrrrrrrYZ_multibytecodecr Zencodings.aliasesr functoolsr Zcharset_normalizer.constantr r rrrrstrboolrr#r'r(r-r0r4r6r8rFr:rDr!rMr\rarbrerkfloatrsrtrrrrsL