{ "cells": [ { "cell_type": "markdown", "id": "635ad8a7-009c-4d7d-8bb6-68e094e45604", "metadata": {}, "source": [ "# Test your FSx for Lustre and SageMaker training connection" ] }, { "cell_type": "markdown", "id": "a81d69ae-8dff-4cdb-bfde-d34d8498d57d", "metadata": {}, "source": [ "### 1. Point to relevant configs" ] }, { "cell_type": "code", "execution_count": null, "id": "e46debb2-5240-4943-b166-7f831c6346c9", "metadata": { "tags": [] }, "outputs": [], "source": [ "import sagemaker\n", "\n", "sess = sagemaker.Session()\n", "\n", "bucket = sess.default_bucket()" ] }, { "cell_type": "code", "execution_count": null, "id": "83b50122-5314-40a8-ab07-d71589294182", "metadata": { "tags": [] }, "outputs": [], "source": [ "from sagemaker.inputs import FileSystemInput\n", "\n", "# Specify FSx Lustre file system id.\n", "file_system_id = \"\"\n", "\n", "# Specify the SG and subnet used by the FSX, these are passed to SM Estimator so jobs use this as well\n", "fsx_security_group_id = \"\"\n", "fsx_subnet = \"\"\n", "\n", "# Specify directory path for input data on the file system.\n", "# You need to provide normalized and absolute path below.\n", "# Your mount name can be provided by you when creating fsx, or generated automatically.\n", "# You can find this mount_name on the FSX page in console.\n", "# Example of fsx generated mount_name: \"3x5lhbmv\"\n", "base_path = \"\"\n", "\n", "# Specify your file system type.\n", "file_system_type = \"FSxLustre\"\n", "\n", "train = FileSystemInput(\n", " file_system_id=file_system_id,\n", " file_system_type=file_system_type,\n", " directory_path=base_path,\n", " file_system_access_mode=\"rw\",\n", ")\n", "\n", "data_channels = {\"train\": train}" ] }, { "cell_type": "code", "execution_count": null, "id": "13aeeeed-df80-440e-9c7d-b22cfc0b88a2", "metadata": { "tags": [] }, "outputs": [], "source": [ "kwargs = {}\n", "# Use the security group and subnet that was used to create the fsx filesystem\n", "kwargs[\"security_group_ids\"] = [fsx_security_group_id]\n", "kwargs[\"subnets\"] = [fsx_subnet]" ] }, { "cell_type": "markdown", "id": "6104c74b-aa28-445a-aba3-ba8586d9cfdf", "metadata": {}, "source": [ "### 2. Write a basic script" ] }, { "cell_type": "code", "execution_count": null, "id": "ce619465-6663-4dea-82ef-643150a372a2", "metadata": { "tags": [] }, "outputs": [], "source": [ "!mkdir fsx_scripts" ] }, { "cell_type": "code", "execution_count": null, "id": "b25960c4-cc20-45cb-a155-02d2b555d328", "metadata": { "tags": [] }, "outputs": [], "source": [ "%%writefile fsx_scripts/test.py\n", "\n", "import argparse\n", "import os\n", "\n", "def parse_args():\n", " \n", " parser = argparse.ArgumentParser()\n", "\n", " # remember this environment variable needs to exactly match what you defined earlier\n", " parser.add_argument(\"--train_folder\", type=str, default=os.environ[\"SM_CHANNEL_TRAIN\"])\n", " \n", " args = parser.parse_args()\n", " \n", " return args\n", "\n", "if __name__ == \"__main__\":\n", " \n", " print ('running your job!')\n", " \n", " args = parse_args()\n", " \n", " print ('train path looks like {}, now we will try an ls'.format(args.train_folder))\n", " \n", " cmd = 'ls {}'.format(args.train_folder)\n", " \n", " os.system(cmd)\n", " " ] }, { "cell_type": "markdown", "id": "2b30ac95-caa6-44f1-97d0-462aff50b1dd", "metadata": {}, "source": [ "### 3. Run on SageMaker training" ] }, { "cell_type": "code", "execution_count": null, "id": "8423d332-b1b7-42a4-a8ca-41f98aae7b10", "metadata": { "tags": [] }, "outputs": [], "source": [ "import sagemaker\n", "from sagemaker.pytorch import PyTorch\n", "\n", "sess = sagemaker.Session()\n", "role = sagemaker.get_execution_role()\n", "\n", "bucket = sess.default_bucket()\n", "\n", "estimator = PyTorch(\n", " entry_point=\"test.py\",\n", " base_job_name=\"lustre-test\",\n", " role=role,\n", " source_dir=\"fsx_scripts\",\n", " # configures the SageMaker training resource, you can increase as you need\n", " instance_count=1,\n", " instance_type=\"ml.m5.large\",\n", " py_version=\"py38\",\n", " framework_version = '1.10',\n", " sagemaker_session=sess,\n", " debugger_hook_config=False,\n", " # enable warm pools for 60 minutes, useful for debugging\n", " keep_alive_period_in_seconds = 60 * 60,\n", " **kwargs\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "4f47d449-8aaa-4372-bdd6-82d8df6cabdd", "metadata": { "tags": [] }, "outputs": [], "source": [ "estimator.fit(inputs = data_channels, wait=False)" ] }, { "cell_type": "code", "execution_count": null, "id": "62eb26a3-2f5b-4f91-abf1-09aeca00e3e0", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "712a6e6b-4d55-48d0-ae5b-df4e6e6c8798", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }