a =tEc@sHddlZddlZddlZdaddZddZddZdd Zd d Z dS) NcCstdurtdatS)Ns3) _s3Clientboto3clientrrw/Users/sathyaws/work/code/ml/opensource/amazon-textract-pdf-text-extractor/lambda/textractpostprocessor/app/awsUtils.py getS3Clients rcCs8t}td|d||j||d}|d}|dSNzreading from bucket z key )BucketKeyBodyzutf-8rprint get_objectreaddecode bucketNameprefixZs3ClientdatacontentsrrrreadTextFileFromS3 s  rcCs.|ddd}|d}d|}||fS)Nzs3:///r)replacesplitpopjoin)Zs3_path path_partsbucketkeyrrr split_s3_paths  r!cCs8t}td|d||j||d}|d}|dSr r rrrrreadJSONFileFromS3s  r"cCs<t||}t|}t|d}t||jd}|S)NZBlocksZLINE)rjsonloadspd DataFrameZ BlockTypeeq)rrrZ json_contentr dataFilteredrrrgetExtractedDataFromS3!s   r)) rr#pandasr%rrrr!r"r)rrrrs