{ "cells": [ { "cell_type": "markdown", "id": "e0dbc36f-a3be-4b47-a12e-144339f97b7d", "metadata": {}, "source": [ "# Cerebras SageMaker Finetuning\n", "\n", "This is a sample code to finetune and deploy Cerebras with traditional fine-tuning on SageMaker." ] }, { "cell_type": "code", "execution_count": null, "id": "523ccc27-2d4f-4544-9ac1-f3c6408b4090", "metadata": { "tags": [] }, "outputs": [], "source": [ "import sagemaker, boto3, json\n", "from sagemaker import get_execution_role\n", "from sagemaker.pytorch.model import PyTorchModel\n", "from sagemaker.huggingface import HuggingFace\n", "\n", "role = get_execution_role()\n", "region = boto3.Session().region_name\n", "sess = sagemaker.Session()\n", "bucket = sess.default_bucket()\n", "\n", "sagemaker.__version__" ] }, { "cell_type": "markdown", "id": "ca100824-fc18-4d8d-a004-698cdc192cea", "metadata": {}, "source": [ "## Upload Data\n", "\n", "We will use Databricks-dolly-15k as sample dataset to finetune the model. (License: [Creative Commons Attribution-ShareAlike 3.0 Unported License](https://creativecommons.org/licenses/by-sa/3.0/legalcode))\n", "\n", "You may also choose to use custom dataset." ] }, { "cell_type": "code", "execution_count": null, "id": "b7a5fa4e-b4fb-48fc-a40b-a5ccfed973e2", "metadata": {}, "outputs": [], "source": [ "!curl -L https://huggingface.co/datasets/databricks/databricks-dolly-15k/resolve/main/databricks-dolly-15k.jsonl --create-dirs -o data/databricks-dolly-15k.jsonl" ] }, { "cell_type": "code", "execution_count": null, "id": "b9b40d8b-78fb-4b5e-97d0-9f6d045e05ee", "metadata": { "tags": [] }, "outputs": [], "source": [ "!head -n 2 data/databricks-dolly-15k.jsonl" ] }, { "cell_type": "code", "execution_count": null, "id": "e4fb068c-1c26-4da8-b045-22bcaf726f74", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Convet .jsonl to .json\n", "import pandas as pd\n", "df = pd.read_json('data/databricks-dolly-15k.jsonl', orient='records', lines=True)\n", "df = df.rename(columns={\"context\": \"input\", \"response\": \"output\"})\n", "df.to_json(\"data/databricks-dolly-15k.json\", orient='records')" ] }, { "cell_type": "code", "execution_count": null, "id": "b8c25643-ccfd-4265-951f-49a42d141dd7", "metadata": { "tags": [] }, "outputs": [], "source": [ "input_train = sess.upload_data(\n", " path=\"./data/databricks-dolly-15k.json\",\n", " key_prefix=\"Dolly\"\n", ")\n", "input_train" ] }, { "cell_type": "code", "execution_count": null, "id": "40848b86-c629-44fd-9b49-4cd19c9be0cb", "metadata": {}, "outputs": [], "source": [ "hyperparameters={\n", " 'model_name_or_path':'cerebras/Cerebras-GPT-111M',\n", " 'train_file': '/opt/ml/input/data/train/databricks-dolly-15k.json',\n", " 'num_train_epochs': 5,\n", " 'fp16': True,\n", " 'output_dir': '/opt/ml/model',\n", " 'prompt_template_name': 'stable_lm',\n", " 'per_device_train_batch_size': 8,\n", " 'per_device_eval_batch_size': 8,\n", " 'save_total_limit': 1,\n", " 'do_train': True,\n", " 'do_eval': True,\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "55bee687-0bf2-4c41-a526-1f4217cd88e6", "metadata": { "scrolled": true, "tags": [] }, "outputs": [], "source": [ "huggingface_estimator = HuggingFace(\n", " base_job_name=\"Cerebras\",\n", " role=role,\n", " entry_point='run_clm.py',\n", " source_dir='./scripts/code',\n", " instance_type='ml.g5.2xlarge',\n", " instance_count=1,\n", " volume_size=200,\n", " transformers_version='4.26',\n", " pytorch_version='1.13',\n", " py_version='py39',\n", " # use_spot_instances=True,\n", " hyperparameters=hyperparameters,\n", ")\n", "huggingface_estimator.fit({'train': input_train})" ] }, { "cell_type": "markdown", "id": "816f72f1-8674-47ca-9a8b-861b5ef39873", "metadata": {}, "source": [ "## Download and Extract Model" ] }, { "cell_type": "code", "execution_count": null, "id": "6559efcd-1861-4656-bc2d-d82c8e81efb2", "metadata": { "tags": [] }, "outputs": [], "source": [ "import boto3\n", "import sagemaker\n", "\n", "def get_latest_training_job_artifact(base_job_name):\n", " sagemaker_client = boto3.client('sagemaker')\n", " response = sagemaker_client.list_training_jobs(NameContains=base_job_name, SortBy='CreationTime', SortOrder='Descending')\n", " training_job_arn = response['TrainingJobSummaries'][0]['TrainingJobArn']\n", " training_job_description = sagemaker_client.describe_training_job(TrainingJobName=training_job_arn.split('/')[-1])\n", " return training_job_description['ModelArtifacts']['S3ModelArtifacts']\n", "\n", "try:\n", " model_data = huggingface_estimator.model_data\n", "except:\n", " # Retrieve artifact url when kernel is restarted\n", " model_data = get_latest_training_job_artifact('Cerebras')\n", " \n", "!aws s3 cp {model_data} cerebras.tar.gz" ] }, { "cell_type": "code", "execution_count": null, "id": "8aa6177f-7a02-4930-aea4-2dbcd26af271", "metadata": { "tags": [] }, "outputs": [], "source": [ "!rm -rf scripts/model && mkdir scripts/model\n", "!tar -xf cerebras.tar.gz -C scripts/model --no-same-owner --exclude checkpoint*" ] }, { "cell_type": "markdown", "id": "3eec9932-dda4-4698-a233-f33c1ed123cd", "metadata": {}, "source": [ "## Package and Upload Model" ] }, { "cell_type": "code", "execution_count": null, "id": "78c1ce81-ea44-48eb-84e8-79f8d137686f", "metadata": { "scrolled": true, "tags": [] }, "outputs": [], "source": [ "%cd scripts\n", "!tar -czvf ../package.tar.gz *\n", "%cd -" ] }, { "cell_type": "code", "execution_count": null, "id": "ecc40777-67a6-4cb7-96a8-8f590427e3d9", "metadata": { "tags": [] }, "outputs": [], "source": [ "model_path = sess.upload_data('package.tar.gz', bucket=bucket, key_prefix=f\"Cerebras\")\n", "model_path" ] }, { "cell_type": "markdown", "id": "dc83cc9f-8f0c-44b3-a6a0-779893f71105", "metadata": {}, "source": [ "## Deploy Model" ] }, { "cell_type": "code", "execution_count": null, "id": "7f6e3832-db38-44d0-95b9-7d5bb9b8fb96", "metadata": { "tags": [] }, "outputs": [], "source": [ "from sagemaker.async_inference import AsyncInferenceConfig\n", "from sagemaker.serializers import JSONSerializer\n", "\n", "endpoint_name = \"Cerebras\"\n", "\n", "huggingface_model = PyTorchModel(\n", " model_data=model_path,\n", " framework_version=\"1.13\",\n", " py_version='py39',\n", " role=role,\n", " name=endpoint_name,\n", " env={\n", " \"model_params\": json.dumps({\n", " \"base_model\": \"model\",\n", " \"peft\": False,\n", " \"load_8bit\": True,\n", " \"prompt_template\": \"stable_lm\",\n", " })\n", " }\n", ")\n", "\n", "# deploy model to SageMaker Inference\n", "predictor = huggingface_model.deploy(\n", " initial_instance_count=1,\n", " instance_type='ml.g5.2xlarge',\n", " endpoint_name=endpoint_name,\n", " serializer=JSONSerializer(),\n", " async_inference_config=AsyncInferenceConfig()\n", ")" ] }, { "cell_type": "markdown", "id": "dbe88233-f50c-4fd3-b004-a1db3abe89b6", "metadata": {}, "source": [ "## Run Inference" ] }, { "cell_type": "code", "execution_count": null, "id": "b570c672-e6ed-4888-9840-6b41ead0e16d", "metadata": { "tags": [] }, "outputs": [], "source": [ "from sagemaker.predictor import Predictor\n", "from sagemaker.predictor_async import AsyncPredictor\n", "from sagemaker.serializers import JSONSerializer\n", "from sagemaker.deserializers import NumpyDeserializer\n", "\n", "predictor_client = AsyncPredictor(\n", " predictor=Predictor(\n", " endpoint_name=endpoint_name,\n", " sagemaker_session=sess,\n", " serializer=JSONSerializer(),\n", " deserializer=NumpyDeserializer()\n", " ),\n", " name=endpoint_name\n", ")\n", "data = {\n", " \"instruction\": \"Who is the founder of Amazon?\",\n", " \"input\": \"Amazon was founded by Jeff Bezos from his garage in Bellevue, Washington,[7] on July 5, 1994. Initially an online marketplace for books, it has expanded into a multitude of product categories, a strategy that has earned it the moniker The Everything Store.\",\n", " \"max_new_tokens\": 64,\n", " \"temperature\": 0.7,\n", " \"do_sample\": True,\n", "}\n", "response = predictor_client.predict(\n", " data=data\n", ")\n", "print(response)" ] }, { "cell_type": "markdown", "id": "1d23db87-99d5-416f-8036-36e13800ee02", "metadata": {}, "source": [ "## Delete Endpoint" ] }, { "cell_type": "code", "execution_count": null, "id": "4057b16d-79f8-46cc-bc66-0c11d4fda006", "metadata": { "tags": [] }, "outputs": [], "source": [ "predictor.delete_model()\n", "predictor.delete_endpoint()" ] }, { "cell_type": "code", "execution_count": null, "id": "a6e51ae1-b94d-4e1a-af67-215240abe93b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "availableInstances": [ { "_defaultOrder": 0, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.t3.medium", "vcpuNum": 2 }, { "_defaultOrder": 1, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.t3.large", "vcpuNum": 2 }, { "_defaultOrder": 2, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.t3.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 3, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.t3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 4, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5.large", "vcpuNum": 2 }, { "_defaultOrder": 5, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 6, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 7, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 8, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 9, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 10, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 11, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 12, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5d.large", "vcpuNum": 2 }, { "_defaultOrder": 13, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5d.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 14, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5d.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 15, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5d.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 16, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5d.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 17, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5d.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 18, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5d.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 19, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 20, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": true, "memoryGiB": 0, "name": "ml.geospatial.interactive", "supportedImageNames": [ "sagemaker-geospatial-v1-0" ], "vcpuNum": 0 }, { "_defaultOrder": 21, "_isFastLaunch": true, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.c5.large", "vcpuNum": 2 }, { "_defaultOrder": 22, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.c5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 23, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.c5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 24, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.c5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 25, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 72, "name": "ml.c5.9xlarge", "vcpuNum": 36 }, { "_defaultOrder": 26, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 96, "name": "ml.c5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 27, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 144, "name": "ml.c5.18xlarge", "vcpuNum": 72 }, { "_defaultOrder": 28, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.c5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 29, "_isFastLaunch": true, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g4dn.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 30, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g4dn.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 31, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g4dn.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 32, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g4dn.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 33, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g4dn.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 34, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g4dn.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 35, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 61, "name": "ml.p3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 36, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 244, "name": "ml.p3.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 37, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 488, "name": "ml.p3.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 38, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.p3dn.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 39, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.r5.large", "vcpuNum": 2 }, { "_defaultOrder": 40, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.r5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 41, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.r5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 42, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.r5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 43, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.r5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 44, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.r5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 45, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 512, "name": "ml.r5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 46, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.r5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 47, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 48, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 49, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 50, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 51, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 52, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 53, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.g5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 54, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.g5.48xlarge", "vcpuNum": 192 } ], "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }