from __future__ import print_function import subprocess import sys #check if module requests is present and install if it is not try: import requests except ImportError: subprocess.check_call([sys.executable, "-m", "pip", "install", 'requests']) import requests # Load the dataset source_data = 'abalone_dataset.csv' with open(source_data, 'wb') as file: print("Downloading dataset file, this might take couple of minutes...") response=requests.get("https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/regression/abalone", stream=True) total_length = response.headers.get('content-length') if total_length is None: # no content length header file.write(response.content) else: dl = 0 total_length = int(total_length) for data in response.iter_content(chunk_size=4096): dl += len(data) file.write(data) done = int(50 * dl / total_length) sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) ) sys.stdout.flush() sys.stdout.write("\n") #divide the data into two files data=[l for l in open(source_data,'r')] middle_point=len(data)//2 output_files= ['dataset1.csv','dataset2.csv'] output_data=[data[:middle_point], data[middle_point:]] for index,outfile in enumerate(output_files): with open(outfile, 'w') as to_write: for line in output_data[index]: to_write.write(line) #printing the success of file creation print(f"Half of the data saved successfully to {outfile}")