{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Setup for Pipelines lab" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sagemaker" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%store -r bucket\n", "%store -r prefix\n", "%store -r region\n", "%store -r docker_image_name" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s3uri_code = f\"s3://{bucket}/{prefix}/code\"\n", "s3uri_code" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Create and upload python scripts (created in the labs) to S3 \n", "\n", "## (if you have ran some of the labs but not all, just pick the parts you skipped)\n", "\n", "### 1-DataPrep" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile preprocess.py\n", "\"\"\"Feature engineers the customer churn dataset.\"\"\"\n", "import argparse\n", "import logging\n", "import pathlib\n", "\n", "import boto3\n", "import numpy as np\n", "import pandas as pd\n", "\n", "logger = logging.getLogger()\n", "logger.setLevel(logging.INFO)\n", "logger.addHandler(logging.StreamHandler())\n", "\n", "if __name__ == \"__main__\":\n", " logger.info(\"Starting preprocessing.\")\n", " parser = argparse.ArgumentParser()\n", " parser.add_argument(\"--input-data\", type=str, required=True)\n", " args = parser.parse_args()\n", "\n", " base_dir = \"/opt/ml/processing\"\n", " pathlib.Path(f\"{base_dir}/data\").mkdir(parents=True, exist_ok=True)\n", " input_data = args.input_data\n", " print(input_data)\n", " bucket = input_data.split(\"/\")[2]\n", " key = \"/\".join(input_data.split(\"/\")[3:])\n", "\n", " logger.info(\"Downloading data from bucket: %s, key: %s\", bucket, key)\n", " fn = f\"{base_dir}/data/raw-data.csv\"\n", " s3 = boto3.resource(\"s3\")\n", " s3.Bucket(bucket).download_file(key, fn)\n", "\n", " logger.info(\"Reading downloaded data.\")\n", "\n", " # read in csv\n", " df = pd.read_csv(fn)\n", "\n", " # drop the \"Phone\" feature column\n", " df = df.drop([\"Phone\"], axis=1)\n", "\n", " # Change the data type of \"Area Code\"\n", " df[\"Area Code\"] = df[\"Area Code\"].astype(object)\n", "\n", " # Drop several other columns\n", " df = df.drop([\"Day Charge\", \"Eve Charge\", \"Night Charge\", \"Intl Charge\"], axis=1)\n", "\n", " # Convert categorical variables into dummy/indicator variables.\n", " model_data = pd.get_dummies(df)\n", "\n", " # Create one binary classification target column\n", " model_data = pd.concat(\n", " [\n", " model_data[\"Churn?_True.\"],\n", " model_data.drop([\"Churn?_False.\", \"Churn?_True.\"], axis=1),\n", " ],\n", " axis=1,\n", " )\n", "\n", " # Split the data\n", " train_data, validation_data, test_data = np.split(\n", " model_data.sample(frac=1, random_state=1729),\n", " [int(0.7 * len(model_data)), int(0.9 * len(model_data))],\n", " )\n", "\n", " pd.DataFrame(train_data).to_csv(\n", " f\"{base_dir}/train/train.csv\", header=False, index=False\n", " )\n", " pd.DataFrame(validation_data).to_csv(\n", " f\"{base_dir}/validation/validation.csv\", header=False, index=False\n", " )\n", " pd.DataFrame(test_data).to_csv(\n", " f\"{base_dir}/test/test.csv\", header=False, index=False\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s3_dataprep_code_uri = sagemaker.s3.S3Uploader.upload(\"preprocess.py\", s3uri_code)\n", "%store s3_dataprep_code_uri\n", "s3_dataprep_code_uri" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "### 2-Modeling" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile xgboost_customer_churn.py\n", "import argparse\n", "import json\n", "import os\n", "import pickle\n", "import random\n", "import tempfile\n", "import urllib.request\n", "\n", "import xgboost\n", "from smdebug import SaveConfig\n", "from smdebug.xgboost import Hook\n", "\n", "\n", "def parse_args():\n", "\n", " parser = argparse.ArgumentParser()\n", "\n", " parser.add_argument(\"--max_depth\", type=int, default=5)\n", " parser.add_argument(\"--eta\", type=float, default=0.2)\n", " parser.add_argument(\"--gamma\", type=int, default=4)\n", " parser.add_argument(\"--min_child_weight\", type=int, default=6)\n", " parser.add_argument(\"--subsample\", type=float, default=0.8)\n", " parser.add_argument(\"--verbosity\", type=int, default=0)\n", " parser.add_argument(\"--objective\", type=str, default=\"binary:logistic\")\n", " parser.add_argument(\"--num_round\", type=int, default=50)\n", " parser.add_argument(\"--smdebug_path\", type=str, default=None)\n", " parser.add_argument(\"--smdebug_frequency\", type=int, default=1)\n", " parser.add_argument(\"--smdebug_collections\", type=str, default='metrics')\n", " parser.add_argument(\"--output_uri\", type=str, default=\"/opt/ml/output/tensors\",\n", " help=\"S3 URI of the bucket where tensor data will be stored.\")\n", "\n", " parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))\n", " parser.add_argument('--validation', type=str, default=os.environ.get('SM_CHANNEL_VALIDATION'))\n", " parser.add_argument('--model-dir', type=str, default=os.environ['SM_MODEL_DIR'])\n", " \n", " args = parser.parse_args()\n", "\n", " return args\n", "\n", "\n", "def create_smdebug_hook(out_dir, train_data=None, validation_data=None, frequency=1, collections=None,):\n", "\n", " save_config = SaveConfig(save_interval=frequency)\n", " hook = Hook(\n", " out_dir=out_dir,\n", " train_data=train_data,\n", " validation_data=validation_data,\n", " save_config=save_config,\n", " include_collections=collections,\n", " )\n", "\n", " return hook\n", "\n", "\n", "def main():\n", " \n", " args = parse_args()\n", "\n", " train, validation = args.train, args.validation\n", " parse_csv = \"?format=csv&label_column=0\"\n", " dtrain = xgboost.DMatrix(train+parse_csv)\n", " dval = xgboost.DMatrix(validation+parse_csv)\n", "\n", " watchlist = [(dtrain, \"train\"), (dval, \"validation\")]\n", "\n", " params = {\n", " \"max_depth\": args.max_depth,\n", " \"eta\": args.eta,\n", " \"gamma\": args.gamma,\n", " \"min_child_weight\": args.min_child_weight,\n", " \"subsample\": args.subsample,\n", " \"verbosity\": args.verbosity,\n", " \"objective\": args.objective}\n", "\n", " # The output_uri is a the URI for the s3 bucket where the metrics will be\n", " # saved.\n", " output_uri = (\n", " args.smdebug_path\n", " if args.smdebug_path is not None\n", " else args.output_uri\n", " )\n", "\n", " collections = (\n", " args.smdebug_collections.split(',')\n", " if args.smdebug_collections is not None\n", " else None\n", " )\n", "\n", " hook = create_smdebug_hook(\n", " out_dir=output_uri,\n", " frequency=args.smdebug_frequency,\n", " collections=collections,\n", " train_data=dtrain,\n", " validation_data=dval,\n", " )\n", "\n", " bst = xgboost.train(\n", " params=params,\n", " dtrain=dtrain,\n", " evals=watchlist,\n", " num_boost_round=args.num_round,\n", " callbacks=[hook])\n", " \n", " if not os.path.exists(args.model_dir):\n", " os.makedirs(args.model_dir)\n", "\n", " model_location = os.path.join(args.model_dir, 'xgboost-model')\n", " pickle.dump(bst, open(model_location, 'wb'))\n", "\n", "\n", "if __name__ == \"__main__\":\n", "\n", " main()\n", "\n", "\n", "def model_fn(model_dir):\n", " \"\"\"Load a model. For XGBoost Framework, a default function to load a model is not provided.\n", " Users should provide customized model_fn() in script.\n", " Args:\n", " model_dir: a directory where model is saved.\n", " Returns:\n", " A XGBoost model.\n", " XGBoost model format type.\n", " \"\"\"\n", " model_files = (file for file in os.listdir(model_dir) if os.path.isfile(os.path.join(model_dir, file)))\n", " model_file = next(model_files)\n", " try:\n", " booster = pickle.load(open(os.path.join(model_dir, model_file), 'rb'))\n", " format = 'pkl_format'\n", " except Exception as exp_pkl:\n", " try:\n", " booster = xgboost.Booster()\n", " booster.load_model(os.path.join(model_dir, model_file))\n", " format = 'xgb_format'\n", " except Exception as exp_xgb:\n", " raise ModelLoadInferenceError(\"Unable to load model: {} {}\".format(str(exp_pkl), str(exp_xgb)))\n", " booster.set_param('nthread', 1)\n", " return booster, format\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!tar -czf sourcedir.tar.gz xgboost_customer_churn.py" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "train_script_name = \"xgboost_customer_churn.py\"\n", "script_artifact = \"sourcedir.tar.gz\"\n", "s3_modeling_code_uri = sagemaker.s3.S3Uploader.upload(script_artifact, s3uri_code)\n", "\n", "%store train_script_name\n", "%store s3_modeling_code_uri\n", "\n", "train_script_name, s3_modeling_code_uri" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "### 3-Evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile evaluate.py\n", "\"\"\"Evaluation script for measuring model accuracy.\"\"\"\n", "\n", "import json\n", "import os\n", "import tarfile\n", "import logging\n", "import pickle\n", "\n", "import pandas as pd\n", "import xgboost\n", "\n", "logger = logging.getLogger()\n", "logger.setLevel(logging.INFO)\n", "logger.addHandler(logging.StreamHandler())\n", "\n", "# May need to import additional metrics depending on what you are measuring.\n", "# See https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html\n", "from sklearn.metrics import classification_report, roc_auc_score, accuracy_score\n", "\n", "def get_dataset(dir_path, dataset_name) -> pd.DataFrame:\n", " files = [ os.path.join(dir_path, file) for file in os.listdir(dir_path) ]\n", " if len(files) == 0:\n", " raise ValueError(('There are no files in {}.\\n' +\n", " 'This usually indicates that the channel ({}) was incorrectly specified,\\n' +\n", " 'the data specification in S3 was incorrectly specified or the role specified\\n' +\n", " 'does not have permission to access the data.').format(files, dataset_name))\n", " raw_data = [ pd.read_csv(file, header=None) for file in files ]\n", " df = pd.concat(raw_data)\n", " return df\n", "\n", "if __name__ == \"__main__\":\n", " model_path = \"/opt/ml/processing/model/model.tar.gz\"\n", " with tarfile.open(model_path) as tar:\n", " tar.extractall(path=\"..\")\n", "\n", " logger.debug(\"Loading xgboost model.\")\n", " model = pickle.load(open(\"xgboost-model\", \"rb\"))\n", "\n", " logger.info(\"Loading test input data\")\n", " test_path = \"/opt/ml/processing/test\"\n", " df = get_dataset(test_path, \"test_set\")\n", "\n", " logger.debug(\"Reading test data.\")\n", " y_test = df.iloc[:, 0].to_numpy()\n", " df.drop(df.columns[0], axis=1, inplace=True)\n", " X_test = xgboost.DMatrix(df.values)\n", "\n", " logger.info(\"Performing predictions against test data.\")\n", " predictions_probs = model.predict(X_test)\n", " predictions = predictions_probs.round()\n", "\n", " logger.info(\"Creating classification evaluation report\")\n", " acc = accuracy_score(y_test, predictions)\n", " auc = roc_auc_score(y_test, predictions_probs)\n", "\n", " # The metrics reported can change based on the model used, but it must be a specific name per (https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-metrics.html)\n", " report_dict = {\n", " \"binary_classification_metrics\": {\n", " \"accuracy\": {\n", " \"value\": acc,\n", " \"standard_deviation\": \"NaN\",\n", " },\n", " \"auc\": {\"value\": auc, \"standard_deviation\": \"NaN\"},\n", " },\n", " }\n", "\n", " logger.info(\"Classification report:\\n{}\".format(report_dict))\n", "\n", " evaluation_output_path = os.path.join(\n", " \"/opt/ml/processing/evaluation\", \"evaluation.json\"\n", " )\n", " logger.info(\"Saving classification report to {}\".format(evaluation_output_path))\n", "\n", " with open(evaluation_output_path, \"w\") as f:\n", " f.write(json.dumps(report_dict))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "s3_evaluation_code_uri = sagemaker.s3.S3Uploader.upload(\"evaluate.py\", s3uri_code)\n", "\n", "%store s3_evaluation_code_uri\n", "s3_evaluation_code_uri" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "# Create functions to get processors and estimators (created in the labs)\n", "\n", "## (if you have ran some of the labs but not all, just pick the parts you skipped)\n", "\n", "### 1-DataPrep" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile ../my_labs_solutions/dataprep_solution.py\n", "# DataPrep\n", "import sagemaker\n", "from sagemaker.sklearn.processing import SKLearnProcessor\n", "\n", "def get_dataprep_processor(\n", " processing_instance_type,\n", " processing_instance_count,\n", " role,\n", " base_job_prefix=\"CustomerChurn\"\n", ") -> SKLearnProcessor:\n", " \n", " sm_sess = sagemaker.session.Session()\n", " \n", " # Processing step for feature engineering\n", " sklearn_processor = SKLearnProcessor(\n", " framework_version=\"0.23-1\",\n", " instance_type=processing_instance_type,\n", " instance_count=processing_instance_count,\n", " base_job_name=f\"{base_job_prefix}/sklearn-CustomerChurn-preprocess\", # choose any name\n", " sagemaker_session=sm_sess,\n", " role=role,\n", " )\n", " return sklearn_processor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2-Modeling" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile ../my_labs_solutions/modeling_solution.py\n", "# Modeling\n", "import boto3\n", "import sagemaker\n", "from sagemaker.inputs import TrainingInput\n", "\n", "from sagemaker.debugger import rule_configs, Rule, DebuggerHookConfig\n", "\n", "def get_modeling_estimator(bucket,\n", " prefix,\n", " s3_modeling_code_uri,\n", " docker_image_name,\n", " role,\n", " entry_point_script = 'xgboost_customer_churn.py') -> sagemaker.estimator.Estimator:\n", " \n", " sm_sess = sagemaker.session.Session()\n", "\n", " # Input configs\n", " hyperparams = {\"sagemaker_program\": entry_point_script,\n", " \"sagemaker_submit_directory\": s3_modeling_code_uri,\n", " \"max_depth\": 5,\n", " \"subsample\": 0.8,\n", " \"num_round\": 600,\n", " \"eta\": 0.2,\n", " \"gamma\": 4,\n", " \"min_child_weight\": 6,\n", " \"objective\": 'binary:logistic',\n", " \"verbosity\": 0\n", " }\n", "\n", " # Debugger configs\n", " debug_rules = [\n", " Rule.sagemaker(rule_configs.loss_not_decreasing()),\n", " Rule.sagemaker(rule_configs.overtraining()),\n", " Rule.sagemaker(rule_configs.overfit())\n", " ]\n", "\n", " # Estimator configs\n", " xgb = sagemaker.estimator.Estimator(image_uri=docker_image_name,\n", " role=role,\n", " hyperparameters=hyperparams,\n", " instance_count=1, \n", " instance_type='ml.m4.xlarge',\n", " output_path=f's3://{bucket}/{prefix}/output',\n", " base_job_name='pipeline-xgboost-customer-churn',\n", " sagemaker_session=sm_sess,\n", " rules=debug_rules)\n", " \n", " return xgb\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3-Evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%writefile ../my_labs_solutions/evaluation_solution.py\n", "# Evaluation\n", "import sagemaker\n", "from sagemaker.processing import (\n", " ProcessingInput,\n", " ProcessingOutput,\n", " ScriptProcessor,\n", ")\n", "\n", "def get_evaluation_processor(docker_image_name, role) -> ScriptProcessor:\n", " \n", " sm_sess = sagemaker.session.Session()\n", "\n", " # Processing step for evaluation\n", " processor = ScriptProcessor(\n", " image_uri=docker_image_name,\n", " command=[\"python3\"],\n", " instance_type=\"ml.m5.xlarge\",\n", " instance_count=1,\n", " base_job_name=\"CustomerChurn/eval-script\",\n", " sagemaker_session=sm_sess,\n", " role=role,\n", " )\n", " \n", " return processor\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Save vars for later:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%store -r s3uri_raw" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "role = sagemaker.get_execution_role()\n", "role" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import json\n", "\n", "my_vars = {\n", " \"bucket\": bucket, \n", " \"prefix\": prefix, \n", " \"region\": region, \n", " \"docker_image_name\": docker_image_name,\n", " \"s3uri_raw\": s3uri_raw, \n", " \"s3_dataprep_code_uri\": s3_dataprep_code_uri,\n", " \"s3_modeling_code_uri\": s3_modeling_code_uri,\n", " \"train_script_name\": train_script_name,\n", " \"s3_evaluation_code_uri\": s3_evaluation_code_uri,\n", " \"role\": role\n", " }\n", "\n", "with open(\"../my_labs_solutions/my-solution-vars.json\", \"w\") as f:\n", " f.write(json.dumps(my_vars))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### [You can now go back to the main notebook for this lab](../pipelines.ipynb)" ] } ], "metadata": { "availableInstances": [ { "_defaultOrder": 0, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "memoryGiB": 4, "name": "ml.t3.medium", "vcpuNum": 2 }, { "_defaultOrder": 1, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 8, "name": "ml.t3.large", "vcpuNum": 2 }, { "_defaultOrder": 2, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 16, "name": "ml.t3.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 3, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 32, "name": "ml.t3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 4, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "memoryGiB": 8, "name": "ml.m5.large", "vcpuNum": 2 }, { "_defaultOrder": 5, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 16, "name": "ml.m5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 6, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 32, "name": "ml.m5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 7, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 64, "name": "ml.m5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 8, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 128, "name": "ml.m5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 9, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 192, "name": "ml.m5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 10, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 256, "name": "ml.m5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 11, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 384, "name": "ml.m5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 12, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 8, "name": "ml.m5d.large", "vcpuNum": 2 }, { "_defaultOrder": 13, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 16, "name": "ml.m5d.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 14, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 32, "name": "ml.m5d.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 15, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 64, "name": "ml.m5d.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 16, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 128, "name": "ml.m5d.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 17, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 192, "name": "ml.m5d.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 18, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 256, "name": "ml.m5d.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 19, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "memoryGiB": 384, "name": "ml.m5d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 20, "_isFastLaunch": true, "category": "Compute optimized", "gpuNum": 0, "memoryGiB": 4, "name": "ml.c5.large", "vcpuNum": 2 }, { "_defaultOrder": 21, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "memoryGiB": 8, "name": "ml.c5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 22, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "memoryGiB": 16, "name": "ml.c5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 23, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "memoryGiB": 32, "name": "ml.c5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 24, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "memoryGiB": 72, "name": "ml.c5.9xlarge", "vcpuNum": 36 }, { "_defaultOrder": 25, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "memoryGiB": 96, "name": "ml.c5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 26, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "memoryGiB": 144, "name": "ml.c5.18xlarge", "vcpuNum": 72 }, { "_defaultOrder": 27, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "memoryGiB": 192, "name": "ml.c5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 28, "_isFastLaunch": true, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 16, "name": "ml.g4dn.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 29, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 32, "name": "ml.g4dn.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 30, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 64, "name": "ml.g4dn.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 31, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 128, "name": "ml.g4dn.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 32, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "memoryGiB": 192, "name": "ml.g4dn.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 33, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 256, "name": "ml.g4dn.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 34, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 61, "name": "ml.p3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 35, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "memoryGiB": 244, "name": "ml.p3.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 36, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "memoryGiB": 488, "name": "ml.p3.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 37, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "memoryGiB": 768, "name": "ml.p3dn.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 38, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "memoryGiB": 16, "name": "ml.r5.large", "vcpuNum": 2 }, { "_defaultOrder": 39, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "memoryGiB": 32, "name": "ml.r5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 40, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "memoryGiB": 64, "name": "ml.r5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 41, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "memoryGiB": 128, "name": "ml.r5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 42, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "memoryGiB": 256, "name": "ml.r5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 43, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "memoryGiB": 384, "name": "ml.r5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 44, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "memoryGiB": 512, "name": "ml.r5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 45, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "memoryGiB": 768, "name": "ml.r5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 46, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 16, "name": "ml.g5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 47, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 32, "name": "ml.g5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 48, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 64, "name": "ml.g5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 49, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 128, "name": "ml.g5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 50, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "memoryGiB": 256, "name": "ml.g5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 51, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "memoryGiB": 192, "name": "ml.g5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 52, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "memoryGiB": 384, "name": "ml.g5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 53, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "memoryGiB": 768, "name": "ml.g5.48xlarge", "vcpuNum": 192 } ], "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 4 }