{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Setting Up the Configuration for the data storage in S3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook is designed to be run with `Python 3 (Data Science)` kernel." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import boto3\n", "\n", "session = boto3.Session() \n", "ssm = session.client('ssm')\n", "\n", "download_url = ssm.get_parameter(Name=\"/aik/download_url\")[\"Parameter\"][\"Value\"]\n", "raw_data = ssm.get_parameter(Name=\"/aik/raw_data\")[\"Parameter\"][\"Value\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We are using the opendatasets library to easily download the dataset from kaggle" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%pip install opendatasets==0.1.20" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import opendatasets as od" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You will need an kaggle account. Script is asking for username(not email) and a kaggle key. Refer to https://github.com/Kaggle/kaggle-api if you need to create a kaggle key." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "od.download(download_url)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Copy data from local storage to the S3 bucket" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os \n", "cwd = os.getcwd()\n", "cwd\n", "source = f\"{cwd}/ipinyou/ipinyou.contest.dataset\"\n", "source" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!aws s3 cp --recursive $source $raw_data" ] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:eu-west-1:470317259841:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }