U Dx`u5 @sLUdZddlZddlZddlZddlZddlZddlZddlZddlm Z m Z m Z ddl m Z mZmZmZmZmZmZmZmZmZddlZddlZddlZddlZddlmZm Z ddl!m"Z"ddl#m$Z$e%e&Z'ej(e)d<ee*ee*fZ+dNedej,e+fej,d d d Z-dOeej,e+d d dZ.dPee+ej,dddZ/ej0j1dddZ2e*ee*dddZ3e$dQe*eej,eej0j1ej4dddZ4e$dRe*eej,eej0j1ej5dddZ5e*ee*e*fdddZ6dSe7e8d!d"d#Z9dTee e8ee8eee d%d&d'Z:ed(dd)d*Z;e*e*dd+d,ZdWeej,ej?j@d d3d4ZAee eBee d5d6d7ZCejDejDd8d9d:ZEejDe d8d;d<ZFdXed?e fe ee*eBe8e e d@dAdBZGe8e8e7ee8d?fdCdDdEZHee ee d?fdFdGdHZIee ddFdIdJZJee e8ddKdLdMZKdS)Yz$Internal (private) Utilities Module.N)FIRST_COMPLETEDFuturewait) AnyCallableDict GeneratorListOptionalSequenceTupleUnioncast)_config exceptions) __version__) apply_configs_logger)sessionreturncCs8t|trt|dS|dk r |Stjdk r0tjStS)z3Ensure that a valid boto3.Session will be returned.) primitivesN) isinstancedictboto3_from_primitivesboto3ZDEFAULT_SESSIONSessionrr9/tmp/pip-target-zr53vnty/lib/python/awswrangler/_utils.pyensure_sessions   r) boto3_sessionrcCs>t|d}|}t|ddt|ddt|dd|j|jdS)z+Convert Boto3 Session to Python primitives.rZ access_keyNZ secret_keytoken)Zaws_access_key_idZaws_secret_access_keyZaws_session_token region_name profile_name)rget_credentialsgetattrr"r#)r Z_boto3_session credentialsrrrboto3_to_primitives(s    r')rrcCsV|dkrtSt|}|dd}|dkr0dn||d<dd|D}tjf|S)z+Convert Python primitives to Boto3 Session.Nr#)NdefaultcSsi|]\}}|dk r||qSNr).0kvrrr <sz)boto3_from_primitives..)rcopydeepcopygetitemsrr)rZ _primitivesr#argsrrrr5s  r)rcCsDdttddi}td}|r*||d<tjj|dddtdS) zBotocore configuration.Z max_attemptsZAWS_MAX_ATTEMPTS5ZAWS_RETRY_MODEmode z awswrangler/)retriesconnect_timeoutZmax_pool_connectionsZuser_agent_extra)intosgetenvbotocoreconfigConfigr)Zretries_configr4rrrdefault_botocore_config@s r>) service_namercCsd}|dkr"tjjdk r"tjj}n|dkr@tjjdk r@tjj}n|dkr^tjjdk r^tjj}nv|dkr|tjjdk r|tjj}nX|dkrtjjdk rtjj}n:|dkrtjjdk rtjj}n|dkrtjjdk rtjj}|S)NZs3ZathenastsZglueZredshiftZkmsZemr) rr<Zs3_endpoint_urlZathena_endpoint_urlZsts_endpoint_urlZglue_endpoint_urlZredshift_endpoint_urlZkms_endpoint_urlZemr_endpoint_url)r? endpoint_urlrrr_get_endpoint_urlPs       rB)r?rbotocore_configrcCs0t|d}t|dj||d|dkr(tn|dS)zCreate a valid boto3.client.r?rTNr?rAZuse_sslr<)rBrclientr>r?rrCrArrrrFcs  rFcCs0t|d}t|dj||d|dkr(tn|dS)zCreate a valid boto3.resource.rDrTNrE)rBrresourcer>rGrrrrHqs  rH)pathrcCs|ddkr td|d|dddddd }|d }d|kr^td|d d}t|d kr|d d kr~|n|d }||fS)a(Split a full S3 path in bucket and key strings. 's3://bucket/key' -> ('bucket', 'key') Parameters ---------- path : str S3 path (e.g. s3://bucket/key). Returns ------- Tuple[str, str] Tuple of bucket and key strings Examples -------- >>> from awswrangler._utils import parse_path >>> bucket, key = parse_path('s3://bucket/key') >>> from awswrangler._utils import parse_path >>> bucket, key = parse_path('s3://arn:aws:s3:::accesspoint//') zs3://F'z1' is not a valid path. It MUST start with 's3://'z :accesspoint/z :accesspoint:/rz' is not a valid bucket name.N) startswithrInvalidArgumentValuereplacesplitlen)rIpartsZbucketkeyrrr parse_paths rVT) use_threadsrcCs0d}|dkr,t}|dk r,||kr(|n|}|S)a)Get the number of cpu cores to be used. Note ---- In case of `use_threads=True` the number of threads that could be spawned will be get from os.cpu_count(). Parameters ---------- use_threads : bool True to enable multi-core utilization, False to disable. Returns ------- int Number of cpu cores to be used. Examples -------- >>> from awswrangler._utils import ensure_cpu_count >>> ensure_cpu_count(use_threads=True) 4 >>> ensure_cpu_count(use_threads=False) 1 rMTN)r9 cpu_count)rWZcpusZcpu_cntrrrensure_cpu_counts rYrM)lst num_chunks max_lengthrcCsL|sgS|dkr|ntttt|t|}t||}dd|DS)aSplit a list in a List of List (chunks) with even sizes. Parameters ---------- lst: List List of anything to be splitted. num_chunks: int, optional Maximum number of chunks. max_length: int, optional Max length of each chunk. Has priority over num_chunks. Returns ------- List[List[Any]] List of List (chunks) with even sizes. Examples -------- >>> from awswrangler._utils import chunkify >>> chunkify(list(range(13)), num_chunks=3) [[0, 1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]] >>> chunkify(list(range(13)), max_length=4) [[0, 1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] NcSs g|]}t|dkr|qS)r)rStolist)r*Zarrrrr s zchunkify..)r8mathceilfloatrSnpZ array_split)rZr[r\nZ np_chunksrrrchunkifys * rd)NNNccsdEdHdS)zEmpty Generator.rNrrrrrempty_generatorsrecCs|jdddddS)zExtract directory path.rLrM)sepmaxsplitr)rsplit)rIrrr get_directorysri) subnet_idr rcCs>t|d}td|d}tt|j|gdddddd S) zExtract region from Subnet ID.rZec2)r?r)Z SubnetIdsZSubnetsrZAvailabilityZoneN)rrFrstrZdescribe_subnets)rjr rZ client_ec2rrrget_region_from_subnets  rm)r default_regionrcCs6t|d}|j}|dk r|S|dk r(|StddS)zExtract region from session.rNz>There is no region_name defined on boto3, please configure it.)rr"rZInvalidArgument)r rnrZregionrrrget_region_from_sessions rocCst|d}|}|}|S)z'Get AWS credentials from boto3 session.r)rr$Zget_frozen_credentials)r rr&Zfrozen_credentialsrrrget_credentials_from_sessions rp)rZsamplingrcCs|dkr |S|dks|dkr.td|dt|}|dkrBgStt||}||kr^|n|}|dkrndn|}td|td|td |tj||d }| |S) zRandom List sampling.?gz2Argument must be [0.0 < value <= 1.0]. z received.rrMz_len: %sz sampling: %sznum_samples: %s) populationr+) rrPrSr8roundrdebugrandomsamplesort)rZrq_lenZ num_samplesZ random_lstrrr list_sampling s    rz)dfrcCsR|jD]B}t||jddkr ||jjjdkr ||}d||<|||<q |S)z4Ensure that all columns has the writeable flag True.flagsTFN)columnsto_listhasattrvaluesr|Z writeable)r{columnsrrrensure_df_is_mutables rcCs8|jdd|jfj}|r4td|ddS)z9Raise an exception if there are duplicated columns names.Nz5There are duplicated column names in your DataFrame: a. Note that your columns may have been sanitized and it can be the cause of the duplicity. Wrangler sanitization removes all special characters and also converts CamelCase to snake_case. So you must avoid columns like ['MyCol', 'my_col'] in your DataFrame.)locr} duplicatedr~rZInvalidDataFrame)r{rrrrcheck_duplicated_columns*s  rrr.)fexex_codebase max_num_trieskwargsrc Ks|}t|D]}z|f|WS|k r}zh|dk rXt|drX|jdd|krX||dkrft||d}td||d||t|W5d}~XYq Xq t dS)zRun function with decorrelated Jitter. Reference: https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ NresponseErrorZCoderMrz/Retrying %s | Fail number %s/%s | Exception: %s) rangerrrvuniformrerrortimesleep RuntimeError) rrrrrrdelayi exceptionrrrtry_it7s   r) total_size chunk_size upper_boundrc Cs|dkrtjntj}t|t|t|}|dkr8dn|}t||}||}tt||}t|D] }|t |} || d7<qht |S)z*Calculate even chunks sizes (Best effort).TrM) r_r`floorr8ralist itertoolsrepeatrrStuple) rrrZ round_funcr[Z base_sizerestsizesrZi_cycledrrrget_even_chunks_sizesSs   r)seqrcCstdd|DS)zFilter only running futures.css|]}|r|VqdSr))running)r*rrrr csz&get_running_futures..)rrrrrget_running_futuresasrcCst|dtddS)z'Wait until any future became available.N)fstimeoutZ return_when)rrrrrrwait_any_future_availablefsr)r max_workersrcCs0t|d}t||kr,t|dt|d}q dS)z(Block until any thread became available.rN)rrSr)rrrrrrblock_waiting_available_threadks   r)N)N)N)NN)NN)T)rMN)N)NN)N)Nrrr)L__doc__r.rloggingr_r9rvrconcurrent.futuresrrrtypingrrrrr r r r r rrZbotocore.configr;numpyrbZpandaspdZ awswranglerrrZawswrangler.__metadata__rZawswrangler._configr getLogger__name__rLogger__annotations__rlZBoto3PrimitivesTyperrr'rr<r=r>rBrFrHrVboolr8rYrdrerirmror&ZReadOnlyCredentialsrprarzZ DataFramerrrrrrrrrrrs0         #"&!