{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Install required and/or update third-party libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!python -m pip install -Uq pip\n", "!python -m pip install -Uq sagemaker awswrangler" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sagemaker\n", "bucket=sagemaker.Session().default_bucket()\n", "prefix = 'sagemaker/DEMO-xgboost-tripfare'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!aws s3 cp --recursive ../glue/ s3://$bucket/scripts/ " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "input_source = f's3://{bucket}/{prefix}/input/'\n", "input_data = input_source + 'data'\n", "input_zones = input_source + 'zones/'\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%store input_source\n", "%store input_zones" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Due to the issue with the nyc-tlc bucket access issue, the below command does not work anymore. We have included the data in the git repo and changed the data uploading method. This will be changed back once the s3 bucket issue is fixed by the nyc trip data office." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# !aws s3 cp --recursive 's3://nyc-tlc/trip data/' $input_data/green --exclude '*' --include 'green_tripdata_2018-1*'\n", "# !aws s3 cp --recursive 's3://nyc-tlc/trip data/' $input_data/yellow --exclude '*' --include 'yellow_tripdata_2018-1*'\n", "# !aws s3 cp 's3://nyc-tlc/misc/taxi_zones.zip' $input_zones" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!aws s3 cp --recursive green/ $input_data/green\n", "!aws s3 cp input_zones/taxi_zones.zip $input_zones" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!aws s3 ls $input_data/green/\n", "# !aws s3 ls $input_data/yellow/" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" } }, "nbformat": 4, "nbformat_minor": 4 }