import json import requests import boto3 import os import zipfile import io import csv def wellLocation(event, context): # Downlaods the BSEE Well Data Locations from the open source repository s3 = boto3.resource('s3') with requests.get('https://www.data.boem.gov/Well/Files/BoreholeRawData.zip', stream=True) as r: z = zipfile.ZipFile(io.BytesIO(r.content)) os.chdir('/tmp/') z.extractall() os.chdir('./BoreholeRawData/') count = 0 lines2save=[] file_number = 1 first = True headers = '' with open('mv_boreholes.txt') as infile: for line in infile: # Some special logic to handle the first line, saves the headers for each one if first: headers = line lines2save.append(headers) count +=1 first = False continue # Otherwise continue on to the rest of the loop lines2save.append(line) if count%5000==0: file_name = 'mv_boreholes_'+str(file_number)+'.txt' outfiles = open(file_name,"w") outfiles.writelines(lines2save) outfiles.close() lines2save = [] lines2save.append(headers) s3.meta.client.upload_file('/tmp/BoreholeRawData/'+file_name, os.environ['S3Bucket'], 'mv_boreholes/'+file_name) file_number+=1 os.remove(file_name) count +=1 file_name = 'mv_boreholes_'+str(file_number)+'.txt' outfiles = open(file_name,"w") outfiles.writelines(lines2save) outfiles.close() s3.meta.client.upload_file('/tmp/BoreholeRawData/mv_boreholes_'+str(file_number)+'.txt', os.environ['S3Bucket'], 'mv_boreholes/'+file_name) return { 'statusCode': 200, 'body': json.dumps('Completed Uploading Geo Data to DynamoDB!') }