{ "cells": [ { "cell_type": "markdown", "id": "f24a09e9-262f-4943-84a2-b60b6c24c389", "metadata": {}, "source": [ "# OpenCALM SageMaker Inference for JAQKET dataset\n", "\n", "[OpenCALM](https://huggingface.co/spaces/kyo-takano/OpenCALM-7B) を SageMaker の推論エンドポイントに Hosting し、[JAQKET](https://www.nlp.ecei.tohoku.ac.jp/projects/jaqket/) の評価データセットについて回答を得る Notebook です。\n", "\n", "以下の環境で Hosting し動作確認を行ってます。\n", "\n", "* `ml.g5.2xlarge(NVIDIA A10G Tensor Core GPU 搭載 VRAM 24GB, RAM 32GB, vCPU 8)` : `PyTorch 1.13 Python 3.9 GPU Optimized`\n", " \n", "[各インスタンスの料金についてはこちら](https://aws.amazon.com/jp/sagemaker/pricing/)をご確認ください。" ] }, { "cell_type": "code", "execution_count": null, "id": "0428530b-8f42-4ab7-83ce-c3bdfa96a51b", "metadata": { "tags": [] }, "outputs": [], "source": [ "!pip install \"sagemaker>=2.143.0\" -U" ] }, { "cell_type": "code", "execution_count": null, "id": "d91b104b-51f1-4f2a-b77e-7afac73de372", "metadata": {}, "outputs": [], "source": [ "!pip install tqdm" ] }, { "cell_type": "code", "execution_count": null, "id": "3f94c4e8-de95-4657-97e0-dc213415bee5", "metadata": { "tags": [] }, "outputs": [], "source": [ "import sagemaker, boto3, json\n", "from sagemaker import get_execution_role\n", "from sagemaker.pytorch.model import PyTorchModel\n", "from sagemaker.huggingface import HuggingFace\n", "\n", "role = get_execution_role()\n", "region = boto3.Session().region_name\n", "sess = sagemaker.Session()\n", "bucket = sess.default_bucket()\n", "\n", "sagemaker.__version__" ] }, { "cell_type": "markdown", "id": "9882e8b3-4b8a-4700-8233-8f7e504c6b8d", "metadata": {}, "source": [ "## Package and Upload Model" ] }, { "cell_type": "code", "execution_count": null, "id": "a08d3c5f-2537-433b-a7c9-e2da3480a5f4", "metadata": { "scrolled": true, "tags": [] }, "outputs": [], "source": [ "!rm -rf scripts/model\n", "%cd scripts\n", "!tar -czvf ../package.tar.gz *\n", "%cd -" ] }, { "cell_type": "code", "execution_count": null, "id": "11d0ae99-8cfe-43f5-8677-863e91bce7a0", "metadata": { "tags": [] }, "outputs": [], "source": [ "model_path = sess.upload_data(\"package.tar.gz\", bucket=bucket, key_prefix=f\"OpenCALM\")\n", "model_path" ] }, { "cell_type": "markdown", "id": "6432b637-a883-4d94-ba28-54fc1be73408", "metadata": {}, "source": [ "## Deploy Model" ] }, { "cell_type": "code", "execution_count": null, "id": "30357ace-1d7e-4947-ab7b-6d716d6a8c6c", "metadata": {}, "outputs": [], "source": [ "model_name = \"cyberagent/open-calm-7b\"\n", "model_name_base = model_name.split(\"/\")[-1]" ] }, { "cell_type": "code", "execution_count": null, "id": "a20f1592-7ff0-45a2-bbb7-3f9528a11a98", "metadata": { "tags": [] }, "outputs": [], "source": [ "from sagemaker.serializers import JSONSerializer\n", "\n", "huggingface_model = PyTorchModel(\n", " model_data=model_path,\n", " framework_version=\"1.13\",\n", " py_version=\"py39\",\n", " role=role,\n", " name=model_name_base,\n", " env={\n", " \"model_params\": json.dumps(\n", " {\n", " \"base_model\": model_name,\n", " \"peft\": False,\n", " \"load_8bit\": False,\n", " \"prompt_template\": \"simple_qa_ja\",\n", " }\n", " ),\n", " \"SAGEMAKER_MODEL_SERVER_TIMEOUT\": \"3600\"\n", " },\n", ")\n", "\n", "# deploy model to SageMaker Inference\n", "predictor = huggingface_model.deploy(\n", " initial_instance_count=1,\n", " instance_type='ml.g5.2xlarge',\n", " endpoint_name=model_name_base,\n", " serializer=JSONSerializer()\n", ")" ] }, { "cell_type": "markdown", "id": "d07b0901-2675-49e5-8ccd-968f3ca2820e", "metadata": {}, "source": [ "## Run Inference" ] }, { "cell_type": "code", "execution_count": null, "id": "b6eb4904-4b77-4cd0-bbe9-4d4685bc018e", "metadata": { "tags": [] }, "outputs": [], "source": [ "from sagemaker.predictor import Predictor\n", "from sagemaker.predictor_async import AsyncPredictor\n", "from sagemaker.deserializers import JSONDeserializer\n", "\n", "predictor_client = Predictor(\n", " endpoint_name=model_name_base,\n", " sagemaker_session=sess,\n", " serializer=JSONSerializer(),\n", " deserializer=JSONDeserializer()\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "5526e52c-2ae8-40e6-aaa0-136ec8d9caf2", "metadata": {}, "outputs": [], "source": [ "import re\n", "\n", "\n", "def inference(instruction):\n", " data = {\n", " \"instruction\": instruction,\n", " \"input\": \"\",\n", " \"max_new_tokens\": 32,\n", " \"temperature\": 0.1,\n", " \"do_sample\": False,\n", " \"num_beams\": 5,\n", " \"pad_token_id\": 1,\n", " \"bos_token_id\": 0,\n", " \"eos_token_is\": 0,\n", " # \"repetition_penalty\": 1.05,\n", " \"stop_ids\": [1, 0],\n", " }\n", " response = predictor_client.predict(data=data)\n", " answer = \"\"\n", " try:\n", " answer = re.findall(\"「(.*?)」\", f\"「{response}\")[-1]\n", " except IndexError:\n", " answer = response\n", " return answer" ] }, { "cell_type": "code", "execution_count": null, "id": "66890ae1-091d-4750-b205-fb90438bc732", "metadata": { "tags": [] }, "outputs": [], "source": [ "print(inference(\"映画『ウエスト・サイド物語』に登場する2つの少年グループといえば、シャーク団と何団?\"))" ] }, { "cell_type": "markdown", "id": "1bd0dcb0-4a6a-4e34-9bd0-9c81e19ebcf4", "metadata": { "tags": [] }, "source": [ "JAQKET データセットをダウンロード。" ] }, { "cell_type": "code", "execution_count": null, "id": "69eeadb0-5c50-4d67-bd8c-f55dc1755a86", "metadata": { "tags": [] }, "outputs": [], "source": [ "!wget -P data https://jaqket.s3.ap-northeast-1.amazonaws.com/data/aio_02/aio_02_dev_v1.0.jsonl" ] }, { "cell_type": "code", "execution_count": null, "id": "1810485f-6e2e-4b29-9375-54fe018e0d5d", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from tqdm import tqdm\n", "\n", "\n", "df = pd.read_json(\"data/aio_02_dev_v1.0.jsonl\", orient=\"records\", lines=True)\n", "\n", "llm_answers = []\n", "matches = []\n", "for idx, row in tqdm(df.iterrows()):\n", " llm_answer = inference(row[\"question\"])\n", " llm_answers += [llm_answer]\n", " matches += [llm_answer in row[\"answers\"]]\n", "\n", "\n", "df[\"llm_answers\"] = pd.Series(llm_answers)\n", "df[\"match\"] = pd.Series(matches)" ] }, { "cell_type": "code", "execution_count": null, "id": "380186a7-593a-4e5a-90c8-699ab81781de", "metadata": {}, "outputs": [], "source": [ "print(df.match.sum(), \"/\", len(df))" ] }, { "cell_type": "code", "execution_count": null, "id": "89011735-b7b6-417d-a24b-07b452a87d29", "metadata": {}, "outputs": [], "source": [ "df.to_csv(f\"data/{model_name_base}_inference.csv\", index=False)" ] }, { "cell_type": "markdown", "id": "219ebbfd-3b41-4305-95af-8d35c013969b", "metadata": {}, "source": [ "## Delete Endpoint" ] }, { "cell_type": "code", "execution_count": null, "id": "1cd9be37-f988-4394-b920-61d1cfeaf066", "metadata": { "tags": [] }, "outputs": [], "source": [ "predictor.delete_model()\n", "predictor.delete_endpoint()" ] }, { "cell_type": "code", "execution_count": null, "id": "7b8974c6-3f58-40c8-b849-ebe7968e0310", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "availableInstances": [ { "_defaultOrder": 0, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.t3.medium", "vcpuNum": 2 }, { "_defaultOrder": 1, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.t3.large", "vcpuNum": 2 }, { "_defaultOrder": 2, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.t3.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 3, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.t3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 4, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5.large", "vcpuNum": 2 }, { "_defaultOrder": 5, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 6, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 7, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 8, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 9, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 10, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 11, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 12, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5d.large", "vcpuNum": 2 }, { "_defaultOrder": 13, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5d.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 14, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5d.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 15, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5d.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 16, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5d.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 17, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5d.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 18, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5d.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 19, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 20, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": true, "memoryGiB": 0, "name": "ml.geospatial.interactive", "supportedImageNames": [ "sagemaker-geospatial-v1-0" ], "vcpuNum": 0 }, { "_defaultOrder": 21, "_isFastLaunch": true, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.c5.large", "vcpuNum": 2 }, { "_defaultOrder": 22, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.c5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 23, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.c5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 24, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.c5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 25, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 72, "name": "ml.c5.9xlarge", "vcpuNum": 36 }, { "_defaultOrder": 26, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 96, "name": "ml.c5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 27, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 144, "name": "ml.c5.18xlarge", "vcpuNum": 72 }, { "_defaultOrder": 28, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.c5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 29, "_isFastLaunch": true, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g4dn.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 30, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g4dn.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 31, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g4dn.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 32, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g4dn.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 33, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g4dn.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 34, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g4dn.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 35, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 61, "name": "ml.p3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 36, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 244, "name": "ml.p3.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 37, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 488, "name": "ml.p3.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 38, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.p3dn.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 39, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.r5.large", "vcpuNum": 2 }, { "_defaultOrder": 40, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.r5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 41, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.r5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 42, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.r5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 43, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.r5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 44, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.r5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 45, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 512, "name": "ml.r5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 46, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.r5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 47, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 48, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 49, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 50, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 51, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 52, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 53, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.g5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 54, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.g5.48xlarge", "vcpuNum": 192 }, { "_defaultOrder": 55, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 1152, "name": "ml.p4d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 56, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 1152, "name": "ml.p4de.24xlarge", "vcpuNum": 96 } ], "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science 3.0)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-data-science-310-v1" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" } }, "nbformat": 4, "nbformat_minor": 5 }