{ "cells": [ { "cell_type": "code", "execution_count": 3, "id": "1be69f0e-bc9c-41c1-ba7b-c5b9a42c2e7e", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting sentence-transformers\n", " Using cached sentence_transformers-2.2.2-py3-none-any.whl\n", "Collecting transformers<5.0.0,>=4.6.0 (from sentence-transformers)\n", " Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m22.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", "\u001b[?25hRequirement already satisfied: tqdm in /opt/conda/lib/python3.10/site-packages (from sentence-transformers) (4.65.0)\n", "Requirement already satisfied: torch>=1.6.0 in /opt/conda/lib/python3.10/site-packages (from sentence-transformers) (2.0.0)\n", "Requirement already satisfied: torchvision in /opt/conda/lib/python3.10/site-packages (from sentence-transformers) (0.15.1)\n", "Requirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from sentence-transformers) (1.23.5)\n", "Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.10/site-packages (from sentence-transformers) (1.2.2)\n", "Requirement already satisfied: scipy in /opt/conda/lib/python3.10/site-packages (from sentence-transformers) (1.10.1)\n", "Collecting nltk (from sentence-transformers)\n", " Using cached nltk-3.8.1-py3-none-any.whl (1.5 MB)\n", "Collecting sentencepiece (from sentence-transformers)\n", " Using cached sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "Collecting huggingface-hub>=0.4.0 (from sentence-transformers)\n", " Using cached huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (3.12.0)\n", "Requirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2023.5.0)\n", "Requirement already satisfied: requests in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (2.28.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (5.4.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (4.5.0)\n", "Requirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub>=0.4.0->sentence-transformers) (23.1)\n", "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.6.0->sentence-transformers) (1.11.1)\n", "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.6.0->sentence-transformers) (3.1)\n", "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.6.0->sentence-transformers) (3.1.2)\n", "Collecting regex!=2019.12.17 (from transformers<5.0.0,>=4.6.0->sentence-transformers)\n", " Using cached regex-2023.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (770 kB)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers<5.0.0,>=4.6.0->sentence-transformers)\n", " Using cached tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", "Collecting safetensors>=0.3.1 (from transformers<5.0.0,>=4.6.0->sentence-transformers)\n", " Using cached safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "Requirement already satisfied: click in /opt/conda/lib/python3.10/site-packages (from nltk->sentence-transformers) (8.1.3)\n", "Requirement already satisfied: joblib in /opt/conda/lib/python3.10/site-packages (from nltk->sentence-transformers) (1.2.0)\n", "Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.10/site-packages (from scikit-learn->sentence-transformers) (3.1.0)\n", "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /opt/conda/lib/python3.10/site-packages (from torchvision->sentence-transformers) (9.4.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.6.0->sentence-transformers) (2.1.2)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (3.1.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (3.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests->huggingface-hub>=0.4.0->sentence-transformers) (2023.5.7)\n", "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.6.0->sentence-transformers) (1.3.0)\n", "Installing collected packages: tokenizers, sentencepiece, safetensors, regex, nltk, huggingface-hub, transformers, sentence-transformers\n", "Successfully installed huggingface-hub-0.16.4 nltk-3.8.1 regex-2023.6.3 safetensors-0.3.1 sentence-transformers-2.2.2 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.31.0\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "!pip install sentence-transformers" ] }, { "cell_type": "code", "execution_count": 7, "id": "736dc101-5863-44ff-b086-a25a69f0b51d", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets in /opt/conda/lib/python3.10/site-packages (2.14.1)\n", "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from datasets) (1.23.5)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (12.0.0)\n", "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.3.6)\n", "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from datasets) (2.0.1)\n", "Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (2.28.2)\n", "Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (4.65.0)\n", "Requirement already satisfied: xxhash in /opt/conda/lib/python3.10/site-packages (from datasets) (3.3.0)\n", "Requirement already satisfied: multiprocess in /opt/conda/lib/python3.10/site-packages (from datasets) (0.70.14)\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (2023.5.0)\n", "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.10/site-packages (from datasets) (3.8.5)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from datasets) (0.16.4)\n", "Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from datasets) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.10/site-packages (from datasets) (5.4.1)\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (22.2.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (3.1.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (6.0.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (4.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.9.2)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.4.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.10/site-packages (from aiohttp->datasets) (1.3.1)\n", "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (3.12.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.10/site-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.5.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (3.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.10/site-packages (from requests>=2.19.0->datasets) (2023.5.7)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)\n", "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->datasets) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Requirement already satisfied: accelerate in /opt/conda/lib/python3.10/site-packages (0.19.0)\n", "Collecting accelerate\n", " Using cached accelerate-0.21.0-py3-none-any.whl (244 kB)\n", "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.10/site-packages (from accelerate) (1.23.5)\n", "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from accelerate) (23.1)\n", "Requirement already satisfied: psutil in /opt/conda/lib/python3.10/site-packages (from accelerate) (5.9.5)\n", "Requirement already satisfied: pyyaml in /opt/conda/lib/python3.10/site-packages (from accelerate) (5.4.1)\n", "Requirement already satisfied: torch>=1.10.0 in /opt/conda/lib/python3.10/site-packages (from accelerate) (2.0.0)\n", "Requirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.12.0)\n", "Requirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (4.5.0)\n", "Requirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (1.11.1)\n", "Requirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.1)\n", "Requirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch>=1.10.0->accelerate) (3.1.2)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.2)\n", "Requirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n", "Installing collected packages: accelerate\n", " Attempting uninstall: accelerate\n", " Found existing installation: accelerate 0.19.0\n", " Uninstalling accelerate-0.19.0:\n", " Successfully uninstalled accelerate-0.19.0\n", "Successfully installed accelerate-0.21.0\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "!pip install datasets\n", "!pip install accelerate -U " ] }, { "cell_type": "markdown", "id": "b59b9525-038f-4820-9d22-1de0d83bfbf7", "metadata": {}, "source": [ "# finetune 模型" ] }, { "cell_type": "code", "execution_count": 8, "id": "9c2ae124-2a55-4713-afe0-c40be9b3bb5f", "metadata": { "tags": [] }, "outputs": [], "source": [ "from sentence_transformers import SentenceTransformer, LoggingHandler\n", "from datasets import load_dataset\n", "from sentence_transformers import InputExample\n", "from torch.utils.data import DataLoader\n", "from sentence_transformers import losses\n", "import torch\n", "import logging" ] }, { "cell_type": "code", "execution_count": 9, "id": "07bc65f6-c9a2-47bc-a2da-094b8924ca15", "metadata": { "tags": [] }, "outputs": [], "source": [ "logging.basicConfig(format='%(asctime)s - %(message)s', datefmt = '%Y-%m-%d %H:%M:S', level=logging.INFO, handlers =[LoggingHandler()] )" ] }, { "cell_type": "markdown", "id": "4e2e9e75-5fdf-4e85-967f-e4d86f85902a", "metadata": {}, "source": [ "## 从hf加载模型" ] }, { "cell_type": "code", "execution_count": 10, "id": "66eade76-c737-4abc-8a2d-625347306f2c", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2023-07-31 02:19:S - Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-mpnet-base-v2\n", "2023-07-31 02:19:S - Use pytorch device: cuda\n" ] } ], "source": [ "modelB = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')" ] }, { "cell_type": "code", "execution_count": 4, "id": "4e5cf155-2608-4cad-934f-c7281e3c8fce", "metadata": { "tags": [] }, "outputs": [], "source": [ "# dataset_id = \"embedding-data/sentence-compression\"\n", "# dataset = load_dataset(dataset_id)" ] }, { "cell_type": "code", "execution_count": 11, "id": "fcfa89a5-e6af-45fa-9b76-7b05900aabbb", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "['3_faq.faq', '.ipynb_checkpoints', '1_faq.faq', '4_faq.faq', '2_faq.faq']" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import os,datasets\n", "filenames = os.listdir('topwar_faq')\n", "filenames" ] }, { "cell_type": "code", "execution_count": 13, "id": "5808a2dd-569a-4513-9ded-3c25b35a64c9", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data size:126\n" ] } ], "source": [ "def parse_faq(file_content,QA_SEP='====='):\n", " arr = file_content.split(QA_SEP)\n", " list_arr = []\n", " for item in arr:\n", " question, answer = item.strip().split(\"\\n\", 1)\n", " question = question.replace(\"Question: \", \"\")\n", " answer = answer.replace(\"Answer: \", \"\")\n", " list_arr.append((answer,question))\n", " return list_arr\n", "\n", "all_datas = []\n", "for fn in filenames:\n", " if fn == '.ipynb_checkpoints':\n", " continue\n", " with open(f\"topwar_faq/{fn}\") as f:\n", " data = f.read()\n", " all_datas += parse_faq(data)\n", "print(f\"data size:{len(all_datas)}\")" ] }, { "cell_type": "code", "execution_count": 14, "id": "734c79a3-92b1-4740-909e-e7e195099bf6", "metadata": { "tags": [] }, "outputs": [], "source": [ "train_examples = []\n", "\n", "for i in range(len(all_datas)):\n", " example = all_datas[i]\n", " train_examples.append(InputExample(texts=[example[0], example[1]],label = [0.85]*len(all_datas)))" ] }, { "cell_type": "code", "execution_count": 15, "id": "95c874f8-1f0c-40d9-b9b3-6d4a219646fe", "metadata": { "tags": [] }, "outputs": [], "source": [ "\n", "train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=64)\n", "train_loss = losses.MultipleNegativesRankingLoss(model=modelB)\n", "num_epochs = 10\n", "warmup_steps = int(len(train_dataloader) * num_epochs * 0.1) #10% of train data" ] }, { "cell_type": "code", "execution_count": null, "id": "5c287779-5147-4d07-93d3-e595b674914f", "metadata": { "tags": [] }, "outputs": [], "source": [ "# from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator\n", "\n", "# # evaluator = EmbeddingSimilarityEvaluator.from_input_examples(\n", "# # train_examples\n", "# # )\n", "# sentences1 = ['世界上最好玩的游戏,江娱互动旗下的一款SLG游戏', '专属技能碎片可以通过多种途径获得,例如礼包商城-特惠礼包界面可以购买专属技能碎片礼包']\n", "# sentences2 = ['口袋奇兵是什么?', '专属技能碎片在哪里获得?']\n", "# scores = [0.75, 0.8]\n", "\n", "# evaluator = EmbeddingSimilarityEvaluator(sentences1, sentences2, scores)" ] }, { "cell_type": "code", "execution_count": 18, "id": "7353170e-a8af-494b-8981-40f586446955", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Epoch: 0%| | 0/10 [00:00" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sns.histplot(simsvalues, kde=True)" ] }, { "cell_type": "code", "execution_count": 47, "id": "f993f290-677a-4a61-93b7-0977d7534d7b", "metadata": { "tags": [] }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 49, "id": "573f3968-56dc-4d93-9b92-2924019993c0", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "count 126.000000\n", "mean 0.655297\n", "std 0.099786\n", "min 0.439509\n", "25% 0.583280\n", "50% 0.661202\n", "75% 0.720619\n", "max 0.912604\n", "dtype: float64" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.Series(simsvalues).describe()" ] }, { "cell_type": "markdown", "id": "b8715caf-e85a-4690-beb2-0d732a8b244d", "metadata": { "tags": [] }, "source": [ "## 将question和answer进行cross 对比,生成126*126个结果,再查看这个分布" ] }, { "cell_type": "code", "execution_count": 54, "id": "e529a082-e4d0-4f0e-85bd-f38160a09cfb", "metadata": { "tags": [] }, "outputs": [], "source": [ "cross_simsvalues = util.cos_sim(emb_answer,emb_question).flatten()" ] }, { "cell_type": "code", "execution_count": 67, "id": "23cfc0f1-4468-48bb-8349-1011605d5870", "metadata": { "tags": [] }, "outputs": [], "source": [ "cross_sims_s = pd.Series(cross_simsvalues)" ] }, { "cell_type": "code", "execution_count": 92, "id": "cdd17e5a-01cc-481b-9f36-e56944f2cd86", "metadata": { "tags": [] }, "outputs": [], "source": [ "N = len(input_question)\n", "pos_indices = [ i*N+i for i in range(N)] ##正例的index" ] }, { "cell_type": "code", "execution_count": 95, "id": "92f376bf-da76-4f80-bf93-ca0165f2dbbf", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "count 126.000000\n", "mean 0.655297\n", "std 0.099786\n", "min 0.439509\n", "25% 0.583280\n", "50% 0.661202\n", "75% 0.720619\n", "max 0.912604\n", "dtype: float64" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 正样本的得分\n", "pos_cross_sims_s = cross_sims_s[pos_indices]\n", "pos_cross_sims_s.describe()" ] }, { "cell_type": "code", "execution_count": 96, "id": "a1b58af3-47be-423a-8999-da07609eaa9f", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "count 15750.000000\n", "mean 0.099584\n", "std 0.123988\n", "min -0.246504\n", "25% 0.015882\n", "50% 0.086160\n", "75% 0.162934\n", "max 0.813635\n", "dtype: float64" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#负样本得分\n", "neg_cross_sims_s = cross_sims_s.drop(pos_indices)\n", "neg_cross_sims_s.describe()" ] }, { "cell_type": "code", "execution_count": 98, "id": "98bfbea3-1274-4ea0-91d9-69af1784600c", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "sns.histplot(pos_cross_sims_s, color='green',kde=True)\n", "sns.histplot(neg_cross_sims_s.sample(N), color='red',kde=True)" ] }, { "cell_type": "markdown", "id": "5a9b8002-1cb8-4899-a903-c43fabf37ddc", "metadata": {}, "source": [ "## 输出具体的结果" ] }, { "cell_type": "code", "execution_count": 59, "id": "7f5224be-74af-410b-b165-e211399bc26b", "metadata": { "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "def similarity(v1,v2):\n", " dot_product = np.dot(v1, v2)\n", "\n", " magnitude_v1 = np.linalg.norm(v1)\n", " magnitude_v2 = np.linalg.norm(v2)\n", "\n", " return dot_product / (magnitude_v1 * magnitude_v2)" ] }, { "cell_type": "code", "execution_count": 60, "id": "833ee658-32b0-49ab-b6eb-2c9b258e1762", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Question:口袋奇兵是什么?\n", "Answer:世界上最好玩的游戏,江娱互动旗下的一款SLG游戏\n", "0.5740935802459717\n", "\n", "Question:芭芭拉\n", "Answer:口袋奇兵5月份最新推出的空军英雄,既美丽又强大,触发技能后最擅长连击击毙敌军。\n", "0.6880062818527222\n", "\n", "Question:阵容推荐、推荐个阵容\n", "Answer:\n", "1、空军:推荐芭芭拉、尤里卡、邦尼\n", "2、陆军:推荐尤里卡、流浪者、波里克\n", "3、海军:推荐蕾拉、蔻缇雅、暴风赤红\n", "0.6594719290733337\n", "\n", "Question:小游戏在哪\n", "Answer:目前只有10-20级玩家可以看到小游戏中心,您可以点击右上角的(大喇叭)常规活动,翻到最后面就可以看到了哦!\n", "0.5766243934631348\n", "\n", "Question:头像同步不了\n", "Answer:建议您尝试登陆游戏后,点击头像、右上角的三个点、选择设置、用户信息,然后切换一下用户信息,看看是否可以解决您的不能同步的问题。然后回到游戏后在游戏中点击左上角头像、再次点击头像、将头像栏拉动到最下方看看是否有您需要更换的头像。\n", "0.6618648171424866\n", "\n", "Question:误操作了怎么办\n", "Answer:军级达到5级、并且操作在7天之内(英雄类需操作两小时内并且英雄没有出征过),90天内没有过申请撤销操作的请求才可以申请一次撤销操作哦~如果您符合条件,可以点击头像-联系客服,将您的情况详细描述一下\n", "0.5097072124481201\n", "\n", "Question:礼包码\n", "Answer:几个长期有效的:KDQB666、TOPWAR666、topwar888、KD2023 \n", "[温馨提示]点击头像→设置→礼品码,输入礼品码领取,即可直接进入您的背包中;\n", "0.6091381311416626\n", "\n", "Question:内部号、我想当托\n", "Answer:这个游戏良心运营,没有托儿\n", "0.5079904198646545\n", "\n", "Question:芭芭拉\n", "Answer:口袋奇兵5月份最新推出的空军英雄,既美丽又强大,触发技能后最擅长连击击毙敌军。\n", "0.6880062818527222\n", "\n", "Question:口袋奇兵是什么?\n", "Answer:世界上最好玩的游戏,江娱互动旗下的一款SLG游戏\n", "0.5740935802459717\n", "\n", "Question:芭芭拉\n", "Answer:口袋奇兵5月份最新推出的空军英雄,既美丽又强大,触发技能后最擅长连击击毙敌军。\n", "0.6880062818527222\n", "\n", "Question:口袋奇兵是什么?\n", "Answer:世界上最好玩的游戏,江娱互动旗下的一款SLG游戏\n", "0.5740935802459717\n", "\n", "Question:芭芭拉\n", "Answer:口袋奇兵5月份最新推出的空军英雄,既美丽又强大,触发技能后最擅长连击击毙敌军。\n", "0.6880062818527222\n", "\n", "Question:无法登陆\n", "Answer:①点击小程序右上角“···”(三个点的图标)→选择“重新进入小程序”;\n", "②切换4G/5G/WIFI进行尝试,或错开网络拥堵时段(如0点整),稍候尝试;\n", "③尝试重启登录设备(手机、平板等);\n", "④在小程序选择界面中,长按游戏图标,将小程序拖动删除,再重新搜索“口袋奇兵”。\n", "0.5282047986984253\n", "\n", "Question:概率低、运气差\n", "Answer:我们游戏中所有活动,都是有概率可以获取奖励预览中的所有展示奖励的哦。\n", "相关概率,所有玩家都是一样的呢~\n", "您可以多尝试,一定有机会中大奖的哦!(~ ̄▽ ̄)~\n", "0.5829479694366455\n", "\n", "Question:战锤集结\n", "Answer:点击右侧的联盟、战争、自动加入、就可以看到加入自动上车功能了\n", "0.5071412324905396\n", "\n", "Question:换区\n", "Answer:一般是开服120天(121天起)之后开启迁服功能。需要您的人物等级达到80级才能转区~\n", "开启迁出功能的服务器可以在世界地图(能看到各个服务器的那个地图)查询哦。\n", "请您点进 世界,依次点击:左上角的“小地图”右边的小图标→右下角第二个图标 圆圈圈→左下角第二个按钮,查看迁服列表。\n", "【后方阵线】一栏里的战区就是您可以转区的哦~选择您想要转入的战区,如果满足调动要求,点击【准备调动】之后会显示相关说明和所需“调动申请书”的数量\n", "0.5703070163726807\n", "\n", "Question:怎么改名称\n", "Answer:Answer:请您点击左上角的 头像(个人信息),右上角有个小小的方框图标,点击就可以修改名字啦~\n", "头像-性别-右边的小方框,就可以修改性别啦~\n", "0.6675779819488525\n", "\n", "Question:专属技能碎片在哪里获得?\n", "Answer:专属技能碎片可以通过多种途径获得,例如礼包商城-特惠礼包界面可以购买专属技能碎片礼包\n", "0.8391760587692261\n", "\n", "Question:不能快速删除工厂(比如兵工厂、船厂、机场),要一个一个点太麻烦了\n", "Answer:我们非常感谢您能与我们分享您的想法。口袋奇兵团队全员将会认真听取您的意见,努力改进与优化,为打造更加公平多彩的游戏环境而努力。\n", "期待您不吝赐教,如果您还有其他好的意见与建议,欢迎随时告诉我们。\n", "小芙会将您的提议搜集后上报给开发团队,让开发团队在后续的优化或者更新中优先考虑您的提议加以改进,再次感谢您的反馈!\n", "0.43950924277305603\n", "\n", "Question:为什么我删除不了角色?\n", "Answer:在同一绑定账号下,当前账号必须保留一个游戏角色才可以删除另外一个角色,\n", "您可以登陆游戏点击头像、设置、角色管理、就可以选择创建角色或删除角色,如果没有删除角色按键,\n", "可能是因为您并不是在当前app创建的角色,您需要回到创建账号的小程序或app等才可以删除角色哦~\n", "0.7761311531066895\n", "\n", "Question:怎么帮小号送给别人\n", "Answer:平台不支持转送或者账号交易\n", "0.47375765442848206\n", "\n", "Question:三个强化,伤害加深,背包只显示一个,在安装强化界面能看到3个,但是不能合成\n", "Answer:您这边需要把强化部件方案1,2,3,4需要合成的部件全部取下来然后再合成\n", "0.6903927326202393\n", "\n", "Question:在哪里能看到我刚刚获得的装备图纸\t\n", "Answer:您可以在军火库制作对应装备的界面查看到您拥有的图纸\n", "0.658401608467102\n", "\n", "Question:我购买的资源不见了\t\n", "Answer:礼包购买或者箱子开出的资源道具会出现在背包里,其他资源会直接加入到资源总数中喔\n", "0.6858749389648438\n", "\n", "Question:为什么我买了精英月卡却没有获得钻石\t\n", "Answer:精英月卡中并不包含钻石喔,精英月卡的特权是不用看视频,直接获得当天看视频可以获得的钻石,如果已经通过看视频获得了钻石,购买精英月卡时就不会获得这部分钻石了噢\n", "0.7498976588249207\n", "\n", "Question:你们是真的吃相难看,就这样对你们的衣食父母?\t\n", "Answer:指挥官大人您好!我们的活动是有一定概率的哦,非常抱歉未能完全满足您的期望。我们一直在努力,希望您能继续享受我们精彩的游戏体验!这边祝您后续欧气满满,好运常在\n", "0.5918810963630676\n", "\n", "Question:我使用了自选兵种+1/+2宝箱没有获得士兵\t\n", "Answer:使用道具并选择您想要的士兵后,请您到背包里‘士兵’的选项里查看噢\n", "0.617608368396759\n", "\n", "Question:为什么我的修理厂是空的,受伤的士兵却没有收进去\t\n", "Answer:如果是在攻击其他玩家基地时产生的伤兵,那不会进入修理厂喔,VIP13-VIP16的特权可以按一定比例转化成伤兵进入修理厂\n", "0.6001214385032654\n", "\n", "Question:为什么我用了两个额外队列的道具,只增加了一个队列\t\n", "Answer:如果使用多个额外限时队列道具,只会增加限时队列的持续时间,并不会增加队列数量。\n", "0.6737362146377563\n", "\n", "Question:礼品码怎么使用,在哪领取\t\n", "Answer:点击头像-设置-礼品码进行兑换使用,获取方式请留意后续公告牌与游戏内公告邮件\n", "0.6476421356201172\n", "\n", "Question:我的获得的雷神元素背包里怎么看不见\t\n", "Answer:需要80级之后解锁超级武器实验室建筑之后才可进行查看,80级前获得的雷神元素会正常累计增加\n", "0.6691535115242004\n", "\n", "Question:装饰属性在城市详情内怎么不显示啊\t\n", "Answer:装饰属性是在神奇宝箱-buff界面进行显示的\n", "0.786723792552948\n", "\n", "Question:旧的装饰套装比如假日酒店等能否返厂让人再次购买呢\t\n", "Answer:计划后续会增加获取途径,建议可以通过公告牌留意后续活动奖励以及礼包\n", "0.525277853012085\n", "\n", "Question:黄金坦克\n", "Answer:黄金坦克附体,坦克血量大增。生效数量最多:1\n", "0.7387195825576782\n", "\n", "Question:双子摇摇马的功能\n", "Answer:(有几率)破损坦克修理时不降级。生效数量最多:1\n", "0.6075373291969299\n", "\n", "Question:指挥官雕像\n", "Answer:(有几率)训练坦克时立即完成。生效数量最多:1\n", "0.5459924936294556\n", "\n", "Question:礼物箱\n", "Answer:训练速度增加。生效数量最多:5\n", "0.6708224415779114\n", "\n", "Question:长椅\n", "Answer:行军速度加快。生效数量最多:5\n", "0.8136347532272339\n", "\n", "Question:幸运泉\n", "Answer:金币产出增加。生效数量最多:5\n", "0.6920217275619507\n", "\n", "Question:高速公路\n", "Answer:对黑暗军团行军速度略微加快。生效数量最多:100\n", "0.6422338485717773\n", "\n", "Question:紫色地板\n", "Answer:训练速度略微增加。生效数量最多:100\n", "0.5185257196426392\n", "\n", "Question:灯笼\n", "Answer:金币产出略微增加。生效数量最多:10\n", "0.5136443972587585\n", "\n", "Question:青苹果树\n", "Answer:修理速度略微增加。生效数量最多:20\n", "0.5166247487068176\n", "\n", "Question:环太平洋联动活动后续还会有三期吗?\n", "Answer:您好,本次环太平洋联动活动为最后一次返场。为了避免不必要的损失,本次活动道具还请在活动结束前及时兑换使用。\n", "0.6672731041908264\n", "\n", "Question:我怎么找不到异界能源研究所了?\n", "Answer:您好,异界能源研究所已合并至机甲猎人研究所页面。您可以前往机甲猎人研究所中,在页面右上方位置找到异界能量研究所图标并点击进入。\n", "0.7120376229286194\n", "\n", "Question:为啥别的玩家能看到游戏里主动推送的联动活动相关视频,我号上怎么看不到?\n", "Answer:您好,感谢您的反馈。每位玩家可以点开联动活动页面左上角的作战影像观看活动相关完整视频。\n", "0.617574155330658\n", "\n", "Question:红色能源碎片如何获得?\n", "Answer:您好,精炼能源碎片可用于在物资补给站中获得兑换高级奖励。研究红色能量试管必得精炼能源碎片或者击败遗迹内怪兽有概率获得精炼能源碎片。\n", "0.7186340689659119\n", "\n", "Question:我在研究物资补给站误兑换了东西,可以帮我申请误操作处理吗?\n", "Answer:您好,可以按照三个月一次的误操作机会为您扣除所得道具并返还所使用的对应能源碎片,但是请注意每样道具的兑换次数无法重置。\n", "0.6458031535148621\n", "\n", "Question:我如何往遗迹深渊中进驻多个队列?\n", "Answer:您好,每位玩家在遗迹深渊中只能进驻一个行军队列及相应部队。进驻到遗迹深渊的部队和行军队列会被实际占用,无法在原地图其他玩法中使用。\n", "0.6925995945930481\n", "\n", "Question:给5个机甲猎人充能有什么作用?\n", "Answer:您好,毒妇血量较高,玩家可配合机甲猎人进行击杀,并可根据机甲猎人分别的能量值剩余情况选择给任意机甲猎人充能。每日前10个异界能源的提交会获得一份奖励。\n", "5个机甲猎人捐献共享每日可获得奖励的次数。即捐献任意一个机甲猎人拿到每日全部次数奖励后,捐献其他机甲也无法再获得次数奖励。\n", "0.5138034224510193\n", "\n", "Question:[坚定]效果,[专注]效果和[加固]效果分别是什么?\n", "Answer:[坚定]效果:无论是否出战,增加全部单位生命;[专注]效果:无论是否出战,增加全部单位攻击;[加固]效果:无论是否出战,增加全部单位伤害减免。\n", "拥有多个[坚定]、[专注]、[加固]效果时,只会生效一个[坚定]、[专注]、[加固]效果,其中加成最高的[坚定]、[专注]、[加固]效果生效。\n", "0.863585889339447\n", "\n", "Question:如何增加切尔诺阿尔法的[坚定],[专注]和[加固]效果?\n", "Answer:英雄升星会增加[坚定]效果,英雄5星时[坚定]效果提升至30%。\n", "英雄专属技能拥有[专注]和[加固]效果,英雄专属技能1级可获得[专注]效果1%,英雄专属技能10级时[专注]效果可提升至90%。加固效果是专属附加属性,专属技能5级时英雄可获得加固效果4%,专属技能7级时英雄[加固]效果在4%的基础上增加12%,共计16%。\n", "0.7647935748100281\n", "\n", "Question:什么是[援护]效果?\n", "Answer:拥有[援护]效果的单位存活时,同列其它单位受到的伤害减少,减少的伤害由[援护]单位承担(尤里卡突袭者还可将分担伤害减少40%),释放顺序低于洛克菲尔德技能。\n", "0.6907435059547424\n", "\n", "Question:尤里卡突袭者的[援护]效果是否能分担各种英雄技能打出的伤害?\n", "Answer:您好,[援护]效果不分担燃烧伤害(除燃烧伤害外,都可以分担)。拥有点燃技能的英雄目前有寂,914,马克西莫,异化娜迪亚等。\n", "0.5859891176223755\n", "\n", "Question:升星或升级是否可以增加尤里卡突袭者的[援护]效果?\n", "Answer:您好,升星或升级无法增加该英雄的[援护]效果。只有配置尤里卡突袭者7级专属技能时可以将该英雄的分担伤害减少效果从40%提升至60%。\n", "0.817891538143158\n", "\n", "Question:探戈狼技能礼包的价格档位有哪些,各包含多少个专属技能?\n", "Answer:您好,该礼包有98元和128元两个价格档位,分别包含300个和450个探戈狼1级专属技能。\n", "0.7253335118293762\n", "\n", "Question:探戈狼技能后续是否可以在专属技能商店中获得?\n", "Answer:您好,本次联动活动期间玩家只可以通过礼包购买的时候获得探戈狼专属技能。预计后续可以在专属技能商店中兑换获得该英雄技能。\n", "0.8022236824035645\n", "\n", "Question:获得探戈狼以及它的专属技能后,是否会影响新迁服功能上线后我迁服所需的调动申请书数量?\n", "Answer:您好,获得探戈狼以及它的专属技能不会影响后续迁服时所需调动申请书的数量,还请您知悉。\n", "0.8008280396461487\n", "\n", "Question:为何我点击活动时提示受政策原因,该地区IP无法参与活动?\n", "Answer:您好,官方坚决遵守各国家及地区对内容管理的政策要求,若部分内容无法正常体验,请您更换IP并重新登录尝试,具体方法可与其他玩家或社群进行交流。\n", "0.5046846866607666\n", "\n", "Question:什么是秘密武器?为什么我们服没有秘密武器?\n", "Answer:为了给新手玩家更多的帮助,辅助成长,目前部分新服会开放“秘密武器”功能,可以提供士兵生产、护盾开启、空投宝箱等特技,帮助您快速发展。\n", "目前秘密武器功能还在发展初期,仅在部分服务器开放,感谢您的理解。我们也会请团队考虑后续为更多服务器开启此功能,敬请期待。\n", "0.7733469605445862\n", "\n", "Question:秘密武器的开放条件是什么?\n", "Answer:需要指挥官基地等级到达12级,通过在基地内开启对应地块即可获得该建筑。\n", "0.5704920887947083\n", "\n", "Question:秘密武器如何组装?基础部件道具如何获得?\n", "Answer:您可以点击秘密武器建筑,在第一个页签“机械组装”中查看所需基础部件道具,包括无后坐力炮、多管火箭弹、数字阵列雷达、核心动力泵、多地形地盘、重甲控制舱。\n", "在第二个页签“秘密部署”中完成对应的任务(包括雷达任务、训练士兵、合成建筑等)后,即可领取对应的基础部件,回到第一页签进行组装。\n", "0.8004412055015564\n", "\n", "Question:秘密武器的三个特技介绍。\n", "Answer:智能训练:点击使用智能训练特技,启动后会有2分钟的时间,在这个时间内点击添加新的造兵任务都会即刻生产完毕。不过需要注意是每次使用均有冷却时间以及快速造兵的数量上限。\n", "援军定位:使用特技后,可立即进行一次空投支援,获得随机奖励。空投次数每480分钟回复一次,最多拥有3次。点击技能旁边的感叹号可了解空投获取的随机奖励内容。\n", "中枢壁垒:使用秘密武器的能源开启60分钟护盾, 开启后,敌人无法对您的基地发起进攻。冷却时间:2880分钟。\n", "0.5943860411643982\n", "\n", "Question:秘密武器特技的冷却时间、上限之类的数值,是固定的吗?会随着升级变化吗?\n", "Answer:您好,这些数值是固定的。\n", "0.5146230459213257\n", "\n", "Question:秘密武器在危险地带提示不能开启护盾,是正常的吗?\n", "Answer:您好,秘密武器的护盾功能与游戏本身限制相同。游戏内如危险地带等本身无法开启护盾的地点,秘密武器的护盾也无法开启。是游戏的正常设定。\n", "0.7784391641616821\n", "\n", "Question:秘密武器如何升级?\n", "Answer:您可以通过完成秘密部署任务和开启地块,获得稀有金属零件来进行升级。任务奖励中还包括钻石、核心设计图等不错的奖励,十分建议您优先参与并完成。\n", "0.594862163066864\n", "\n", "Question:秘密武器最高等级是多少?\n", "Answer:您好,目前秘密武器最高可升到50级。\n", "0.7704099416732788\n", "\n", "Question:稀有金属零件怎么获得?\n", "Answer:稀有金属零件可以通过完成秘密部署任务,或者开启基地内地块获得。(注:基地内第二岛屿仅有20个左右的部分地块解锁时会提供稀有金属零件。)\n", "0.7430362701416016\n", "\n", "Question:秘密部署的任务,我只领取上面的总奖励了,下面的小任务的奖励我忘记领了。\n", "Answer:您好,当您领取秘密部署总任务的奖励时,下方未领取的子任务奖励会一并帮您领取的。请不要担心,您前面完成的秘密部署任务的奖励已经全部领取到了。\n", "0.7813732624053955\n", "\n", "Question:秘密部署中的帝国宝藏任务无法完成!\n", "Answer:非常抱歉指挥官,曙光之地里搜不到帝国宝藏。您可能需要等曙光之地结束后回到原服才能继续攻打帝国宝藏。我们也会将此问题反馈给团队,请团队考虑后续优化此任务。感谢您的理解与支持。\n", "0.6322575807571411\n", "\n", "Question:秘密武器合成等级和建造科技等级一致的情况下,无法完成“合成≥N级建筑”任务怎么办?\n", "Answer:您好,建议您将“合成”科技的等级优先提升一级,在基地建造多个低等级兵厂建筑后,拖动建筑合成以完成该任务。 前期资源有限,建议您优先升级“合成等级”科技,从而更快的成长。\n", "0.6219929456710815\n", "\n", "Question:重装机兵模块研究活动会持续多久?\n", "Answer:您好,破浪突袭者BW-3的基础机体、复合模块、战斗模块、传动模块需要通过重装机兵模块研究活动获得,在活动页面消耗研究次数以增加模块研究进度,有几率暴击。当研究进度达到100%时,即可获得当前研究的模块,多余的进度会自动累计到下一模块的研究进度中。\n", "若指挥官未全部获取4个部件,该活动页面不会出现结束倒计时(会一直存在),直至指挥官全部获得4个模块后,该活动页面才会出现结束倒计时(48h)。\n", "0.6693931818008423\n", "\n", "Question:如何获得重装机兵模块研究次数?\n", "Answer:您好,指挥官可通过【完成每日任务】和【购买模块研究礼包】2种方式来获得重装机兵模块研究次数。\n", "0.8398867845535278\n", "\n", "Question:研究进程结束后,多余的研究次数怎么办?\n", "Answer:您好,研究进程结束后,多余研究次数将自动转化为钻石,指挥官可前往【超值活动-活动领奖中心】领取。\n", "0.6442395448684692\n", "\n", "Question:破浪突袭者BW-3的4个基础模块需要多久才能都获得嘛?其它的模块怎么获得?\n", "Answer:您好,模块研究进度会受暴击因素影响而浮动,因此每位玩家所需的获取时间可能会有不同。指挥官也可以选择购买模块研究礼包的方式来加速获得4个基础模块。至于其它的模块,通过【荒野行动】会概率掉落,以及开启【艾斯道刻穿甲剑重装芯片宝箱】可以有概率获取。\n", "0.6554750204086304\n", "\n", "Question:小蓝是拥有控制技能的重装机兵,这个控制技能怎么理解?它更适合攻击还是防守?\n", "Answer:您好,破浪突袭者BW-3的控制技能主要体现在 技能中有一项是潮汐震荡,如果被击目标处在攻击力降低状态,50%概率对其附加[眩晕](无法普攻和释放主动技能)。从技能定位来看,其更偏向攻击,但防守效果也不错。\n", "0.6177594065666199\n", "\n", "Question:你们这个新机甲的获取时间周期也太长了吧?不花钱就不能快速获得,你们也太过分了吧\n", "Answer:您好!非常感谢您的反馈。我们非常理解您所描述的问题。本次重装机兵模块获取方式确实不同以往,一成不变的游戏模式可能会让指挥官感到疲劳,因此我们一直在寻找新的游戏模式,希望为玩家带来更多的乐趣。我们会认真倾听玩家的反馈,并始终努力改进我们的游戏以符合您的期望。感谢您的信任与支持。\n", "0.4816896915435791\n", "\n", "Question:如何获得艾斯道刻穿甲剑芯片?\n", "Answer:您好,5月25日零点后重装芯片宝箱、重装秘藏、芯片超频有几率开出艾斯道刻穿甲剑重装芯片。开启艾斯道刻穿甲剑重装芯片宝箱(参与5月25日零点上线的劲爆囤货节活动获得),或者购买相关礼包也可以获得艾斯道刻穿甲剑芯片。\n", "0.7271589040756226\n", "\n", "Question:征服者活动的匹配机制是什么?\n", "Answer:在征服者活动里,地图是重新分配的,参与服务器的地图会【随机】打乱成一个新的地图。征服者系统是由多个轮次组成的连续战役活动,宣战后可确定本轮次的征服目标。只可以对本势力战区相邻的其他势力战区进行宣战。\n", "0.6017512679100037\n", "\n", "Question:什么是势力?势力霸主是什么,有什么作用?\n", "Answer:势力是战区的集合。在活动一开始时,每个战区各为一个独立势力,随着战事不断推进,战区之间相互吞并,会出现由许多战区构成的势力,吞并其他战区的战区称为宗主战区,被吞并的战区叫做附属战区。势力能够包含的战区数量无上限。势力霸主就是一个势力中宗主战区的首领,拥有绝对的领导权,可以任命官职,组建军团并任命军团长。\n", "0.8214671611785889\n", "\n", "Question:对某个战区宣战后是否可以取消宣战?\n", "Answer:势力的管理层可以在活动页面进行宣战,宣战时会弹出二次确认面板,宣战成功后不可以取消宣战。\n", "0.6400095820426941\n", "\n", "Question:势力排名以什么为标准?如何增加势力声望?\n", "Answer:每一轮战役结束后,会以势力排名为基准,对势力发奖。势力排名以势力国家数量为排名基准,在势力版图中包含的战区数量相同情况下,势力声望将决定势力排名顺序。积极参与战备任务和势力兵营会增加势力声望。\n", "0.8081082701683044\n", "\n", "Question:军团是什么,有什么用,怎么组建军团? \n", "Answer:以个人为单位申请,细化势力部队,方便战斗指挥,一个势力的军团数量上限与所拥有的附属战区数量有关。势力霸主可以任命军团长用来设置军团成员,方便军团作战的指挥。\n", "0.6972187161445618\n", "\n", "Question:当一个势力把另一个势力的宗主战区吞并,它能吞并该势力下的所有战区吗?\n", "Answer:不能的,当一个宗主战区被吞并时,该势力下的所有附属战区变会为独立战区。\n", "0.7655807137489319\n", "\n", "Question:贡献积分和军功怎么获得,可以用来做什么?\n", "Answer:指挥官完成每日任务和领取捐兵积分进度奖励可以获得贡献积分;在活动战场与敌方战区指挥官发生战斗,击杀敌方士兵和损失己方士兵都可以获得军功。两者都可以在征服者商店兑换奖励。\n", "0.6894623041152954\n", "\n", "Question:获得军功的数量与哪些因素有关?\n", "Answer:您好,主要与两个因素有关。\n", "1. 战斗场景:当玩家在战场中【杀死敌人士兵】时会获得军功,在不同场景下发生的战斗获得的军功数量不同,三个建筑内(首府+2个王者遗迹-守卫)和在战场区域内发生战斗的军功较多,在战场区域外发生战斗的军功较少。\n", "2. 士兵等级&基地等级:40级以下士兵没有军功,高于对方指挥官20级以后击杀敌方士兵不得军功,指挥官受伤的士兵等级低于指挥官自身5级后不会获得军功。\n", "0.6687365174293518\n", "\n", "Question:什么叫起义?什么时候可以起义呢?起义需要具备什么条件吗?\n", "Answer:本战区向宗主势力发起挑战,胜利后可脱离该势力并恢复成单独势力。开启起义准备后,有起义意愿的战区需要先进行【检阅】,满足条件才可进行起义。检阅不会消耗士兵,点击检阅按键后,自己基地里拥有并且满足条件的士兵,会计入到目标兵数里;只有在宣战阶段截止前审核通过的服务器才能宣战成功。起义宣告成功后会在战场公示阶段变成一个独立势力,宗主势力会自动对起义战区宣战。起义失败后仍然是附属战区。\n", "0.6850916147232056\n", "\n", "Question:最多可以同时和多少个战区宣战呢?有什么限制要求吗?\n", "Answer:势力的管理层可对一个或多个可以相邻战区(世界大地图可以通过直线相连)进行宣战标记。宣战阶段结束,这些标记的战区会被锁定。如果未进行任何宣战,系统会随机锁定一个战区作为宣战目标。\n", "0.6037830114364624\n", "\n", "Question:怎么判断防守失败和战斗胜者?\n", "Answer:当战区首府和至少1个王者遗迹--守卫被摧毁后,则该战区防守失败,成为附属战区。多个势力攻打某一个势力的情况下,假如该势力防守失败,在战斗进入结算状态后,会根据本次战斗进攻势力的拆除的城防值排名来决定本次战斗的胜者,出现同分的情况时,则依据势力总军功值决定胜者。\n", "在前线奋斗的时候,也要注意守住自己的战区,己方战区如果失守,即便打下别的战区也不算数。\n", "0.7212809324264526\n", "\n", "Question:先锋对决怎么玩,胜负规则是什么?\n", "Answer:玩家拥有建筑的占领权后,拥有【先锋】权限的玩家,可以开启对决。开启对决后,该玩家在此建筑中的所有部队都将会进入对决状态中,其他势力的拥有【先锋】权限的玩家,可以发起对决;当击败玩家所有进入对决状态的部队后,视为攻击成功。对决连续守住一定的攻击次数,或守住规定的时间,则对决成功,否则对决失败。对决获胜,可获得一定时间的无敌时间,并立即摧毁一定的城防值;对决失败,则击败对决的玩家获得建筑的控制权。\n", "0.7062654495239258\n", "\n", "Question:征服者活动的个人参与限制条件是什么?一个服可以无限进人吗?\n", "Answer:需要个人基地等级≥60级后才能进行参与该活动。一个服不可以无限进人,会有人数限制。\n", "0.6693925857543945\n", "\n", "Question:我跨服进入战场后,我的坐标会在哪个位置?\n", "Answer:战斗阶段开启后,玩家可以通过活动界面或跨服按键进入目标战场,进入目标战场后会随机分配坐标;在战斗阶段期间,非原服玩家无法开盾;在战场区域不能开盾。\n", "0.5842746496200562\n", "\n", "Question:战斗阶段是否可以攻击其他战区指挥官?\n", "Answer:全力交锋--战斗阶段开始后,跨服到其它战区后,除了攻打首府和两个2个王者遗迹-守卫外(其他遗迹和发射塔为和平状态,不可攻击),还可以攻打该服玩家的城堡,采集田和强化弱化塔。\n", "0.5541176199913025\n", "\n", "Question:表彰奖励和封赏奖励有什么区别?\n", "Answer:每轮全力交锋--结算阶段后,根据势力排名宗主战区将获得用于表彰的奖励,由势力首领分发表彰给本战区指挥官。\n", "当势力首领封赏奖励给附属战区后,附属战区获得本战区表彰奖励,由附属战区首领分发给本战区指挥官。\n", "0.6605383157730103\n", "\n", "Question:如何开启装饰物商店功能?\n", "Answer:您好,所在服开服时间≥84天或已经开过一次曙光之地活动后才可以在金融中心里看到装饰物商店页签,且需要满足玩家基地等级≥60级后才可以解锁装饰物商店功能。\n", "0.6786451935768127\n", "\n", "Question:为什么我们服没有全息幻视功能?\n", "Answer:您好,需要玩家所在服开完一次曙光之地活动后才能使用该功能\n", "0.6019400954246521\n", "\n", "Question:橙色装备改造功能的开启条件是什么?\n", "Answer:需要玩家装备4件橙色装备后,才能开启橙色装备改造功能。符合开启条件后玩家可以在军火库改造页面内将装备进行进阶操作。装备改造后可以获得额外属性,额外属性在佩戴装备后可对对应兵种生效。\n", "0.742383599281311\n", "\n", "Question:每个橙色装备最多可以改造到多少阶?\n", "Answer:目前的设定是每个橙色装备最多可以改造到8阶。4件橙色装备改造品阶总和达到一定值后可以激活额外套装属性效果\n", "0.8099770545959473\n", "\n", "Question:分解已经改造过的橙色装备时,改造材料会返还吗?\n", "Answer:分解已经改造过的橙色装备时,装备制造和改造消耗的材料、以及装备改造消耗的改造图纸会返还,但是装备制造图纸不会返还~\n", "0.9126039147377014\n", "\n", "Question:装备改造是什么?装备怎么改造\n", "Answer:可以在军火库改造页面内将装备进行进阶操作,装备改造后可以获得额外属性,额外属性在佩戴装备后可对对应兵种生效。\n", "0.5781945586204529\n", "\n", "Question:新区开放\n", "Answer:我们会不定期开放新服务器,您可以留意下头像-设置-角色管理里的服务器列表\n", "0.44808635115623474\n", "\n", "Question:为什么我买了至尊永久卡后,永久卡头像框不能设置\t\n", "Answer:需要您在背包里使用该道具,再到个人中心页面或者聊天频道里进行设置\n", "0.5235860347747803\n", "\n", "Question:商店购买的石油粮食为什么在背包里找不到呢\t\n", "Answer:从商店里购买的石油粮食不会在背包内显示哦,会直接加到总量里的\n", "0.7838613986968994\n", "\n", "Question:我经常忘记切换强化,有什么建议或者改善方法吗\t\n", "Answer:感谢您的反馈,我们会把您的想法转达给团队的,后期可能会添加一个出征时强化选择提醒功能\n", "0.5529630184173584\n", "\n", "Question:在哪里能看到我刚刚获得的装备图纸\t\n", "Answer:您可以在军火库制作对应装备的界面查看到您拥有的图纸\n", "0.6584015488624573\n", "\n", "Question:我购买的资源不见了\t\n", "Answer:礼包购买或者箱子开出的资源道具会出现在背包里,其他资源会直接加入到资源总数中喔\n", "0.6858749389648438\n", "\n", "Question:为什么我买了精英月卡却没有获得钻石\t\n", "Answer:精英月卡中并不包含钻石喔,精英月卡的特权是不用看视频,直接获得当天看视频可以获得的钻石,如果已经通过看视频获得了钻石,购买精英月卡时就不会获得这部分钻石了噢\n", "0.7498976588249207\n", "\n", "Question:自选兵种+1/+2宝箱获得的士兵等级是怎么算的?\t\n", "Answer:是根据您的个人科技【士兵单位】的等级计算+1/+2,比如您当前的个人科技中【陆军单位】等级是80,可以通过该道具获得81/82级陆军士兵。\n", "0.7085302472114563\n", "\n", "Question:我使用了自选兵种+1/+2宝箱没有获得士兵\t\n", "Answer:使用道具并选择您想要的士兵后,请您到背包里‘士兵’的选项里查看噢\n", "0.617608368396759\n", "\n", "Question:为什么我的修理厂是空的,受伤的士兵却没有收进去\t\n", "Answer:如果是在攻击其他玩家基地时产生的伤兵,那不会进入修理厂喔,VIP13-VIP16的特权可以按一定比例转化成伤兵进入修理厂\n", "0.6001214385032654\n", "\n", "Question:为什么我用了两个额外队列的道具,只增加了一个队列\t\n", "Answer:如果使用多个额外限时队列道具,只会增加限时队列的持续时间,并不会增加队列数量。\n", "0.6737362146377563\n", "\n", "Question:礼品码怎么使用,在哪领取\t\n", "Answer:点击头像-设置-礼品码进行兑换使用,获取方式请留意后续公告牌与游戏内公告邮件\n", "0.6476421356201172\n", "\n", "Question:我的获得的雷神元素背包里怎么看不见\t\n", "Answer:需要80级之后解锁超级武器实验室建筑之后才可进行查看,80级前获得的雷神元素会正常累计增加\n", "0.6691535115242004\n", "\n", "Question:装饰属性在城市详情内怎么不显示啊\t\n", "Answer:装饰属性是在神奇宝箱-buff界面进行显示的\n", "0.786723792552948\n", "\n", "Question:旧的装饰套装比如假日酒店等能否返厂让人再次购买呢\t\n", "Answer:计划后续会增加获取途径,建议可以通过公告牌留意后续活动奖励以及礼包\n", "0.525277853012085\n", "\n", "Question:黄金坦克\n", "Answer:黄金坦克附体,坦克血量大增。生效数量最多:1\n", "0.7387195825576782\n", "\n", "Question:双子摇摇马的功能\n", "Answer:(有几率)破损坦克修理时不降级。生效数量最多:1\n", "0.6075373291969299\n", "\n", "Question:指挥官雕像\n", "Answer:(有几率)训练坦克时立即完成。生效数量最多:1\n", "0.5459924936294556\n", "\n", "Question:礼物箱\n", "Answer:训练速度增加。生效数量最多:5\n", "0.6708224415779114\n", "\n", "Question:长椅\n", "Answer:行军速度加快。生效数量最多:5\n", "0.8136347532272339\n", "\n", "Question:幸运泉\n", "Answer:金币产出增加。生效数量最多:5\n", "0.6920217275619507\n", "\n", "Question:高速公路\n", "Answer:对黑暗军团行军速度略微加快。生效数量最多:100\n", "0.6422338485717773\n", "\n", "Question:紫色地板\n", "Answer:训练速度略微增加。生效数量最多:100\n", "0.5185257196426392\n", "\n", "Question:灯笼\n", "Answer:金币产出略微增加。生效数量最多:10\n", "0.5136443972587585\n", "\n", "Question:青苹果树\n", "Answer:修理速度略微增加。生效数量最多:20\n", "0.5166247487068176\n", "\n" ] } ], "source": [ "for i in range(len(input_question)):\n", " sims = similarity(emb_answer[i],emb_question[i])\n", " print(f\"Question:{input_question[i]}\\nAnswer:{input_answer[i]}\\n{sims}\\n\")" ] }, { "cell_type": "code", "execution_count": null, "id": "cd63deec-e324-470e-aad4-ff13cabcbaba", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 64, "id": "348b3ab6-f42d-4c54-a9b7-3a6223387168", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "[[{'corpus_id': 93, 'score': 0.4407898187637329},\n", " {'corpus_id': 63, 'score': 0.2768343389034271},\n", " {'corpus_id': 76, 'score': 0.27134737372398376}]]" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "input_queries = ['怎么攻击别人?']\n", "q_embedding = modelB.encode(input_queries)\n", "results = util.semantic_search(query_embeddings = q_embedding,corpus_embeddings= emb_answer,top_k=3)\n", "results" ] }, { "cell_type": "code", "execution_count": 65, "id": "37fbf424-9b29-44ba-8300-65aab4b56a35", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0.4407898" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "similarity(q_embedding[0],emb_answer[93])" ] }, { "cell_type": "code", "execution_count": 212, "id": "6b7015ab-0ab1-4b5a-bb70-1f1eff19c8cf", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('全力交锋--战斗阶段开始后,跨服到其它战区后,除了攻打首府和两个2个王者遗迹-守卫外(其他遗迹和发射塔为和平状态,不可攻击),还可以攻打该服玩家的城堡,采集田和强化弱化塔。', '战斗阶段是否可以攻击其他战区指挥官?') score:0.4162951409816742\n", "('玩家拥有建筑的占领权后,拥有【先锋】权限的玩家,可以开启对决。开启对决后,该玩家在此建筑中的所有部队都将会进入对决状态中,其他势力的拥有【先锋】权限的玩家,可以发起对决;当击败玩家所有进入对决状态的部队后,视为攻击成功。对决连续守住一定的攻击次数,或守住规定的时间,则对决成功,否则对决失败。对决获胜,可获得一定时间的无敌时间,并立即摧毁一定的城防值;对决失败,则击败对决的玩家获得建筑的控制权。', '先锋对决怎么玩,胜负规则是什么?') score:0.33781808614730835\n", "('您好,破浪突袭者BW-3的控制技能主要体现在 技能中有一项是潮汐震荡,如果被击目标处在攻击力降低状态,50%概率对其附加[眩晕](无法普攻和释放主动技能)。从技能定位来看,其更偏向攻击,但防守效果也不错。', '小蓝是拥有控制技能的重装机兵,这个控制技能怎么理解?它更适合攻击还是防守?') score:0.29779693484306335\n" ] } ], "source": [ "for ret in results[0]:\n", " print(f\"{all_datas[ret['corpus_id']]} score:{ret['score']}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ec80db02-8714-4132-8457-2acf024b1a59", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "2d47bd28-2482-47cb-b058-9f2f8bce9944", "metadata": {}, "source": [ "# 使用pre trained 模型对比" ] }, { "cell_type": "code", "execution_count": 136, "id": "da83ef84-16fd-4366-9a0e-3be21e731fa0", "metadata": { "tags": [] }, "outputs": [], "source": [ "from transformers import AutoTokenizer,AutoModel\n", "import torch" ] }, { "cell_type": "code", "execution_count": 137, "id": "66d5dff5-b3eb-44d6-b871-926cda9de11a", "metadata": { "tags": [] }, "outputs": [], "source": [ "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')" ] }, { "cell_type": "code", "execution_count": 138, "id": "ccf9a3e4-15c9-4233-aad4-c65c68c6e7d7", "metadata": { "tags": [] }, "outputs": [], "source": [ "def mean_pooling(model_output, attention_mask):\n", " token_embeddings = model_output[0].to(device) #First element of model_output contains all token embeddings\n", " input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float().to(device)\n", " return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)\n", "\n", "def load_model(model_location): \n", " tokenizer = AutoTokenizer.from_pretrained(model_location)\n", " \n", " model = AutoModel.from_pretrained(\n", " model_location, \n", " # device_map=\"balanced_low_0\", \n", " #load_in_8bit=True\n", " )\n", " # load the model on GPU\n", " model.to(device) \n", " model.eval()\n", " \n", " return model, tokenizer" ] }, { "cell_type": "code", "execution_count": 171, "id": "fedb25cf-5c33-41fd-8131-0c0c36cdebd5", "metadata": { "tags": [] }, "outputs": [], "source": [ "model_location = 'sentence-transformers/paraphrase-multilingual-mpnet-base-v2'\n", "modelA,tokenizer = load_model(model_location)" ] }, { "cell_type": "code", "execution_count": 172, "id": "1f875dd8-c54c-4eb3-9516-9c9a7a16ae8e", "metadata": { "tags": [] }, "outputs": [], "source": [ "encoded_input = tokenizer(input_question, padding=True, truncation=True, max_length=128, return_tensors='pt').to(device)" ] }, { "cell_type": "code", "execution_count": 173, "id": "1d2ee9b2-e277-4e8e-90bd-6759665b091d", "metadata": { "tags": [] }, "outputs": [], "source": [ "with torch.no_grad():\n", " model_output = modelA(**encoded_input)\n", "\n", "# Perform pooling. In this case, max pooling.\n", "sentence_embeddings_q = mean_pooling(model_output, encoded_input['attention_mask']).to(device).cpu().numpy()" ] }, { "cell_type": "code", "execution_count": 174, "id": "116a097b-ab2d-41d6-a2e8-309e5c27bf51", "metadata": { "tags": [] }, "outputs": [], "source": [ "encoded_input = tokenizer(input_answer, padding=True, truncation=True, max_length=128, return_tensors='pt').to(device)\n", "with torch.no_grad():\n", " model_output = modelA(**encoded_input)\n", "\n", "# Perform pooling. In this case, max pooling.\n", "sentence_embeddings_a = mean_pooling(model_output, encoded_input['attention_mask']).to(device).cpu().numpy()" ] }, { "cell_type": "code", "execution_count": null, "id": "15726d48-4201-4edf-82ed-c0222afd82a5", "metadata": { "tags": [] }, "outputs": [], "source": [ "for i in range(len(input_question)):\n", " sims = similarity(sentence_embeddings_a[i],sentence_embeddings_q[i])\n", " print(f\"Question:{input_question[i]}\\nAnswer:{input_answer[i]}\\n{sims}\\n\")" ] }, { "cell_type": "code", "execution_count": 213, "id": "f29850ba-cfa6-476b-9a2b-8262aa55f22b", "metadata": { "tags": [] }, "outputs": [], "source": [ "input_queries = ['怎么攻击别人?']\n", "encoded_input = tokenizer(input_queries, padding=True, truncation=True, max_length=128, return_tensors='pt').to(device)\n", "with torch.no_grad():\n", " model_output = modelA(**encoded_input)\n", "\n", "q_embedding = mean_pooling(model_output, encoded_input['attention_mask']).to(device).cpu().numpy()" ] }, { "cell_type": "code", "execution_count": 214, "id": "fa12cf2d-097c-4a0d-8725-d83b7df885fc", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('您好,[援护]效果不分担燃烧伤害(除燃烧伤害外,都可以分担)。拥有点燃技能的英雄目前有寂,914,马克西莫,异化娜迪亚等。', '尤里卡突袭者的[援护]效果是否能分担各种英雄技能打出的伤害?') score:0.44886574149131775\n", "('玩家拥有建筑的占领权后,拥有【先锋】权限的玩家,可以开启对决。开启对决后,该玩家在此建筑中的所有部队都将会进入对决状态中,其他势力的拥有【先锋】权限的玩家,可以发起对决;当击败玩家所有进入对决状态的部队后,视为攻击成功。对决连续守住一定的攻击次数,或守住规定的时间,则对决成功,否则对决失败。对决获胜,可获得一定时间的无敌时间,并立即摧毁一定的城防值;对决失败,则击败对决的玩家获得建筑的控制权。', '先锋对决怎么玩,胜负规则是什么?') score:0.4420698285102844\n", "('您好,破浪突袭者BW-3的控制技能主要体现在 技能中有一项是潮汐震荡,如果被击目标处在攻击力降低状态,50%概率对其附加[眩晕](无法普攻和释放主动技能)。从技能定位来看,其更偏向攻击,但防守效果也不错。', '小蓝是拥有控制技能的重装机兵,这个控制技能怎么理解?它更适合攻击还是防守?') score:0.43532323837280273\n" ] } ], "source": [ "results = util.semantic_search(query_embeddings = q_embedding,corpus_embeddings= sentence_embeddings_a,top_k=3)\n", "results\n", "for ret in results[0]:\n", " print(f\"{all_datas[ret['corpus_id']]} score:{ret['score']}\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "f9fa8a3c-ce82-406c-b274-e1157838689c", "metadata": { "tags": [] }, "outputs": [ { "ename": "NameError", "evalue": "name 'tokenizer' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[2], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m input_queries \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m中国的首都在哪里?\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m中国的首都在北京\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m----> 2\u001b[0m encoded_input \u001b[38;5;241m=\u001b[39m \u001b[43mtokenizer\u001b[49m(input_queries, padding\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, truncation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, max_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m128\u001b[39m, return_tensors\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m'\u001b[39m)\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[1;32m 4\u001b[0m model_output \u001b[38;5;241m=\u001b[39m modelA(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mencoded_input)\n", "\u001b[0;31mNameError\u001b[0m: name 'tokenizer' is not defined" ] } ], "source": [ "input_queries = ['中国的首都在哪里?','中国的首都在北京']\n", "encoded_input = tokenizer(input_queries, padding=True, truncation=True, max_length=128, return_tensors='pt').to(device)\n", "with torch.no_grad():\n", " model_output = modelA(**encoded_input)\n", "\n", "embedding = mean_pooling(model_output, encoded_input['attention_mask']).to(device).cpu().numpy()" ] }, { "cell_type": "markdown", "id": "a7cc99a0-ee6a-4902-b8af-b7c6cc9da18a", "metadata": {}, "source": [ "# 部署模型到sagemaker" ] }, { "cell_type": "code", "execution_count": 12, "id": "57dc4832-b17a-47b0-8662-4ced85f29952", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "Requirement already satisfied: sagemaker in /opt/conda/lib/python3.10/site-packages (2.154.0)\n", "Collecting sagemaker\n", " Downloading sagemaker-2.169.0.tar.gz (851 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m851.8/851.8 kB\u001b[0m \u001b[31m8.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", "\u001b[?25hCollecting attrs<24,>=23.1.0 (from sagemaker)\n", " Using cached attrs-23.1.0-py3-none-any.whl (61 kB)\n", "Requirement already satisfied: boto3<2.0,>=1.26.131 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (1.26.132)\n", "Requirement already satisfied: cloudpickle==2.2.1 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (2.2.1)\n", "Requirement already satisfied: google-pasta in /opt/conda/lib/python3.10/site-packages (from sagemaker) (0.2.0)\n", "Requirement already satisfied: numpy<2.0,>=1.9.0 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (1.23.5)\n", "Requirement already satisfied: protobuf<4.0,>=3.1 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (3.20.3)\n", "Requirement already satisfied: protobuf3-to-dict<1.0,>=0.1.5 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (0.1.5)\n", "Requirement already satisfied: smdebug_rulesconfig==1.0.1 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (1.0.1)\n", "Requirement already satisfied: importlib-metadata<5.0,>=1.4.0 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (4.13.0)\n", "Requirement already satisfied: packaging>=20.0 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (23.1)\n", "Requirement already satisfied: pandas in /opt/conda/lib/python3.10/site-packages (from sagemaker) (2.0.1)\n", "Requirement already satisfied: pathos in /opt/conda/lib/python3.10/site-packages (from sagemaker) (0.3.0)\n", "Requirement already satisfied: schema in /opt/conda/lib/python3.10/site-packages (from sagemaker) (0.7.5)\n", "Collecting PyYAML==6.0 (from sagemaker)\n", " Using cached PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (682 kB)\n", "Requirement already satisfied: jsonschema in /opt/conda/lib/python3.10/site-packages (from sagemaker) (4.17.3)\n", "Requirement already satisfied: platformdirs in /opt/conda/lib/python3.10/site-packages (from sagemaker) (3.5.0)\n", "Requirement already satisfied: tblib==1.7.0 in /opt/conda/lib/python3.10/site-packages (from sagemaker) (1.7.0)\n", "Requirement already satisfied: botocore<1.30.0,>=1.29.132 in /opt/conda/lib/python3.10/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (1.29.132)\n", "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (1.0.1)\n", "Requirement already satisfied: s3transfer<0.7.0,>=0.6.0 in /opt/conda/lib/python3.10/site-packages (from boto3<2.0,>=1.26.131->sagemaker) (0.6.1)\n", "Requirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.10/site-packages (from importlib-metadata<5.0,>=1.4.0->sagemaker) (3.15.0)\n", "Requirement already satisfied: six in /opt/conda/lib/python3.10/site-packages (from protobuf3-to-dict<1.0,>=0.1.5->sagemaker) (1.16.0)\n", "Requirement already satisfied: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /opt/conda/lib/python3.10/site-packages (from jsonschema->sagemaker) (0.19.3)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.10/site-packages (from pandas->sagemaker) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.10/site-packages (from pandas->sagemaker) (2023.3)\n", "Requirement already satisfied: tzdata>=2022.1 in /opt/conda/lib/python3.10/site-packages (from pandas->sagemaker) (2023.3)\n", "Requirement already satisfied: ppft>=1.7.6.6 in /opt/conda/lib/python3.10/site-packages (from pathos->sagemaker) (1.7.6.6)\n", "Requirement already satisfied: dill>=0.3.6 in /opt/conda/lib/python3.10/site-packages (from pathos->sagemaker) (0.3.6)\n", "Requirement already satisfied: pox>=0.3.2 in /opt/conda/lib/python3.10/site-packages (from pathos->sagemaker) (0.3.2)\n", "Requirement already satisfied: multiprocess>=0.70.14 in /opt/conda/lib/python3.10/site-packages (from pathos->sagemaker) (0.70.14)\n", "Requirement already satisfied: contextlib2>=0.5.5 in /opt/conda/lib/python3.10/site-packages (from schema->sagemaker) (21.6.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.25.4 in /opt/conda/lib/python3.10/site-packages (from botocore<1.30.0,>=1.29.132->boto3<2.0,>=1.26.131->sagemaker) (1.26.15)\n", "Building wheels for collected packages: sagemaker\n", " Building wheel for sagemaker (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for sagemaker: filename=sagemaker-2.169.0-py2.py3-none-any.whl size=1158252 sha256=0ec721d00f4427b64ea6d9d4e052bb5989328fde366bb4acd16933fb90d67f82\n", " Stored in directory: /root/.cache/pip/wheels/42/17/69/c2089332a0db669b4a27888e1d76e825168014112d5eb44231\n", "Successfully built sagemaker\n", "Installing collected packages: PyYAML, attrs, sagemaker\n", " Attempting uninstall: PyYAML\n", " Found existing installation: PyYAML 5.4.1\n", " Uninstalling PyYAML-5.4.1:\n", " Successfully uninstalled PyYAML-5.4.1\n", " Attempting uninstall: attrs\n", " Found existing installation: attrs 22.2.0\n", " Uninstalling attrs-22.2.0:\n", " Successfully uninstalled attrs-22.2.0\n", " Attempting uninstall: sagemaker\n", " Found existing installation: sagemaker 2.154.0\n", " Uninstalling sagemaker-2.154.0:\n", " Successfully uninstalled sagemaker-2.154.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "awscli 1.27.132 requires PyYAML<5.5,>=3.10, but you have pyyaml 6.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed PyYAML-6.0 attrs-23.1.0 sagemaker-2.169.0\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m" ] } ], "source": [ "!pip install -U sagemaker" ] }, { "cell_type": "markdown", "id": "b0490240-edff-4fff-8eec-bd295bf43f99", "metadata": {}, "source": [ "## 2. 把模型拷贝到S3为后续部署做准备" ] }, { "cell_type": "code", "execution_count": 80, "id": "df4a6f16-63af-4c84-9dda-0af0dfa7b487", "metadata": { "tags": [] }, "outputs": [], "source": [ "import sagemaker\n", "from sagemaker import image_uris\n", "import boto3\n", "import os\n", "import time\n", "import json\n", "\n", "role = sagemaker.get_execution_role() # execution role for the endpoint\n", "sess = sagemaker.session.Session() # sagemaker session for interacting with different AWS APIs\n", "bucket = sess.default_bucket() # bucket to house artifacts\n", "\n", "region = sess._region_name\n", "account_id = sess.account_id()\n", "\n", "s3_client = boto3.client(\"s3\")\n", "sm_client = boto3.client(\"sagemaker\")\n", "smr_client = boto3.client(\"sagemaker-runtime\")" ] }, { "cell_type": "code", "execution_count": 81, "id": "2fb24429-b2e5-4259-bd2d-70aa006c8cd1", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "s3_code_prefix: LLM-RAG/workshop/finetuned-sentence2emb_deploy_code\n", "model_snapshot_path: ./finetuned-sentence-embedding\n" ] } ], "source": [ "s3_model_prefix = \"LLM-RAG/workshop/finetuned-sentence2emb-model\" # folder where model checkpoint will go\n", "model_snapshot_path = \"./finetuned-sentence-embedding\"\n", "s3_code_prefix = \"LLM-RAG/workshop/finetuned-sentence2emb_deploy_code\"\n", "print(f\"s3_code_prefix: {s3_code_prefix}\")\n", "print(f\"model_snapshot_path: {model_snapshot_path}\")" ] }, { "cell_type": "code", "execution_count": 82, "id": "e53e83fd-305a-42f8-83cf-e0a17779eaac", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "upload: finetuned-sentence-embedding/config_sentence_transformers.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/config_sentence_transformers.json\n", "upload: finetuned-sentence-embedding/config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/config.json\n", "upload: finetuned-sentence-embedding/1_Pooling/config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/1_Pooling/config.json\n", "upload: finetuned-sentence-embedding/modules.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/modules.json\n", "upload: finetuned-sentence-embedding/README.md to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/README.md\n", "upload: finetuned-sentence-embedding/eval/.ipynb_checkpoints/similarity_evaluation_results-checkpoint.csv to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/eval/.ipynb_checkpoints/similarity_evaluation_results-checkpoint.csv\n", "upload: finetuned-sentence-embedding/special_tokens_map.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/special_tokens_map.json\n", "upload: finetuned-sentence-embedding/eval/similarity_evaluation_results.csv to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/eval/similarity_evaluation_results.csv\n", "upload: finetuned-sentence-embedding/sentence_bert_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/sentence_bert_config.json\n", "upload: finetuned-sentence-embedding/tokenizer_config.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/tokenizer_config.json\n", "upload: finetuned-sentence-embedding/tokenizer.json to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/tokenizer.json\n", "upload: finetuned-sentence-embedding/sentencepiece.bpe.model to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/sentencepiece.bpe.model\n", "upload: finetuned-sentence-embedding/pytorch_model.bin to s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/pytorch_model.bin\n" ] } ], "source": [ "!aws s3 cp --recursive {model_snapshot_path} s3://{bucket}/{s3_model_prefix}" ] }, { "cell_type": "markdown", "id": "4e4c4651-d731-4e8d-8deb-350918d8d721", "metadata": {}, "source": [ "### 3. 模型部署准备(entrypoint脚本,容器镜像,服务配置)" ] }, { "cell_type": "code", "execution_count": 83, "id": "8a07ff35-d547-4df3-b511-4be2862dcb6e", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117\n" ] } ], "source": [ "inference_image_uri = (\n", " f\"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117\"\n", ")\n", "\n", "#中国区需要替换为下面的image_uri\n", "# inference_image_uri = (\n", "# f\"727897471807.dkr.ecr.{region}.amazonaws.com.cn/djl-inference:0.21.0-deepspeed0.8.3-cu117\"\n", "# )\n", "\n", "print(f\"Image going to be used is ---- > {inference_image_uri}\")" ] }, { "cell_type": "code", "execution_count": 84, "id": "4858bfe1-0392-41fd-8c46-d4a04a520397", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] } ], "source": [ "!mkdir -p sentence2emb_deploy_code" ] }, { "cell_type": "code", "execution_count": 85, "id": "5fe26158-4b50-4659-a869-6d64f3d152e4", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting sentence2emb_deploy_code/model.py\n" ] } ], "source": [ "%%writefile sentence2emb_deploy_code/model.py\n", "from djl_python import Input, Output\n", "import torch\n", "import logging\n", "import math\n", "import os\n", "from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, AutoModel\n", "\n", "device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\n", "print(f'--device={device}')\n", "\n", "\n", "def load_model(properties):\n", " tensor_parallel = properties[\"tensor_parallel_degree\"]\n", " model_location = properties['model_dir']\n", " if \"model_id\" in properties:\n", " model_location = properties['model_id']\n", " logging.info(f\"Loading model in {model_location}\")\n", " \n", " tokenizer = AutoTokenizer.from_pretrained(model_location)\n", " \n", " model = AutoModel.from_pretrained(\n", " model_location, \n", " # device_map=\"balanced_low_0\", \n", " #load_in_8bit=True\n", " )\n", " # load the model on GPU\n", " model.to(device) \n", " model.requires_grad_(False)\n", " model.eval()\n", " \n", " return model, tokenizer\n", "\n", "\n", "model = None\n", "tokenizer = None\n", "generator = None\n", "\n", "def mean_pooling(model_output, attention_mask):\n", " token_embeddings = model_output[0].to(device) #First element of model_output contains all token embeddings\n", " input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float().to(device)\n", " return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)\n", "\n", "\n", "def handle(inputs: Input):\n", " global model, tokenizer\n", " if not model:\n", " model, tokenizer = load_model(inputs.get_properties())\n", "\n", " if inputs.is_empty():\n", " return None\n", " data = inputs.get_as_json()\n", " \n", " input_sentences = data[\"inputs\"]\n", " params = data[\"parameters\"]\n", " logging.info(f\"inputs: {input_sentences}\")\n", " logging.info(f\"parameters: {params}\")\n", " \n", " encoded_input = tokenizer(input_sentences, padding=True, truncation=True, return_tensors='pt').to(device)\n", " # Compute token embeddings\n", " with torch.no_grad():\n", " model_output = model(**encoded_input)\n", "\n", " # Perform pooling. In this case, max pooling.\n", " sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask']).to(device).cpu().numpy()\n", "\n", "# # preprocess\n", "# input_ids = tokenizer(input_sentences, return_tensors=\"pt\").input_ids\n", "# # pass inputs with all kwargs in data\n", "# if params is not None:\n", "# outputs = model.generate(input_ids, **params)\n", "# else:\n", "# outputs = model.generate(input_ids)\n", "\n", "# # postprocess the prediction\n", "# prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", " \n", " result = {\"sentence_embeddings\": sentence_embeddings}\n", " return Output().add_as_json(result)" ] }, { "cell_type": "markdown", "id": "d5c02fa9-d046-42a7-88fc-fc81fe890313", "metadata": {}, "source": [ "#### Note: option.s3url 需要按照自己的账号进行修改" ] }, { "cell_type": "code", "execution_count": 86, "id": "64254ec0-69ab-4c2f-a9bb-05f259ab5218", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting sentence2emb_deploy_code/serving.properties\n" ] } ], "source": [ "%%writefile sentence2emb_deploy_code/serving.properties\n", "engine=Python\n", "option.tensor_parallel_degree=1\n", "option.s3url = s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb-model/" ] }, { "cell_type": "code", "execution_count": 87, "id": "d76dad38-02f0-46f9-9121-9cb5155d7a76", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n", "sentence2emb_deploy_code/\n", "sentence2emb_deploy_code/model.py\n", "sentence2emb_deploy_code/serving.properties\n" ] } ], "source": [ "!rm s2e_model.tar.gz\n", "!cd sentence2emb_deploy_code && rm -rf \".ipynb_checkpoints\"\n", "!tar czvf s2e_model.tar.gz sentence2emb_deploy_code" ] }, { "cell_type": "code", "execution_count": 88, "id": "76d3d300-4d4d-4ea3-a7d8-2667619cd727", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-east-2-946277762357/LLM-RAG/workshop/finetuned-sentence2emb_deploy_code/s2e_model.tar.gz\n" ] } ], "source": [ "s3_code_artifact = sess.upload_data(\"s2e_model.tar.gz\", bucket, s3_code_prefix)\n", "print(f\"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}\")" ] }, { "cell_type": "markdown", "id": "dc1a07e1-0955-497f-ba2d-9991a6deb833", "metadata": {}, "source": [ "### 4. 创建模型 & 创建endpoint" ] }, { "cell_type": "code", "execution_count": 89, "id": "b9318f47-36c1-4f16-962b-bdfe84c77c16", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "finetuned-paraphrase-2023-07-05-03-14-23-508\n", "Image going to be used is ---- > 763104351884.dkr.ecr.us-east-2.amazonaws.com/djl-inference:0.21.0-deepspeed0.8.3-cu117\n", "Created Model: arn:aws:sagemaker:us-east-2:946277762357:model/finetuned-paraphrase-2023-07-05-03-14-23-508\n" ] } ], "source": [ "from sagemaker.utils import name_from_base\n", "import boto3\n", "\n", "model_name = name_from_base(\"finetuned-paraphrase\") #Note: Need to specify model_name\n", "print(model_name)\n", "print(f\"Image going to be used is ---- > {inference_image_uri}\")\n", "\n", "create_model_response = sm_client.create_model(\n", " ModelName=model_name,\n", " ExecutionRoleArn=role,\n", " PrimaryContainer={\n", " \"Image\": inference_image_uri,\n", " \"ModelDataUrl\": s3_code_artifact\n", " },\n", " \n", ")\n", "model_arn = create_model_response[\"ModelArn\"]\n", "\n", "print(f\"Created Model: {model_arn}\")" ] }, { "cell_type": "markdown", "id": "9a7a2371-de79-4240-b029-80c15a674a7f", "metadata": {}, "source": [ "### 如果批量创建索引量较多,建议改成\"InstanceType\": \"ml.g4dn.xlarge\"," ] }, { "cell_type": "code", "execution_count": 90, "id": "e337d531-3324-4ae4-ac76-66c3418fd548", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'EndpointConfigArn': 'arn:aws:sagemaker:us-east-2:946277762357:endpoint-config/finetuned-paraphrase-2023-07-05-03-14-23-508-config',\n", " 'ResponseMetadata': {'RequestId': '0b754ef3-6cb2-4243-bd14-518fbf35d080',\n", " 'HTTPStatusCode': 200,\n", " 'HTTPHeaders': {'x-amzn-requestid': '0b754ef3-6cb2-4243-bd14-518fbf35d080',\n", " 'content-type': 'application/x-amz-json-1.1',\n", " 'content-length': '132',\n", " 'date': 'Wed, 05 Jul 2023 03:14:30 GMT'},\n", " 'RetryAttempts': 0}}" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "endpoint_config_name = f\"{model_name}-config\"\n", "endpoint_name = f\"{model_name}-endpoint\"\n", "\n", "endpoint_config_response = sm_client.create_endpoint_config(\n", " EndpointConfigName=endpoint_config_name,\n", " ProductionVariants=[\n", " {\n", " \"VariantName\": \"variant1\",\n", " \"ModelName\": model_name,\n", " \"InstanceType\": \"ml.m5.2xlarge\",\n", " \"InitialInstanceCount\": 1,\n", " # \"VolumeSizeInGB\" : 400,\n", " # \"ModelDataDownloadTimeoutInSeconds\": 2400,\n", " \"ContainerStartupHealthCheckTimeoutInSeconds\": 10*60,\n", " },\n", " ],\n", ")\n", "endpoint_config_response" ] }, { "cell_type": "code", "execution_count": 92, "id": "75f6a4b6-c36a-4ec5-8dd9-35b237ba55ba", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮\n",
       " /tmp/ipykernel_1225/4188345917.py:1 in <module>                                                  \n",
       "                                                                                                  \n",
       " [Errno 2] No such file or directory: '/tmp/ipykernel_1225/4188345917.py'                         \n",
       "                                                                                                  \n",
       " /opt/conda/lib/python3.10/site-packages/botocore/client.py:530 in _api_call                      \n",
       "                                                                                                  \n",
       "    527 │   │   │   │   │   f\"{py_operation_name}() only accepts keyword arguments.\"              \n",
       "    528 │   │   │   │   )                                                                         \n",
       "    529 │   │   │   # The \"self\" in this scope is referring to the BaseClient.                    \n",
       "  530 │   │   │   return self._make_api_call(operation_name, kwargs)                            \n",
       "    531 │   │                                                                                     \n",
       "    532 │   │   _api_call.__name__ = str(py_operation_name)                                       \n",
       "    533                                                                                           \n",
       "                                                                                                  \n",
       " /opt/conda/lib/python3.10/site-packages/botocore/client.py:960 in _make_api_call                 \n",
       "                                                                                                  \n",
       "    957 │   │   if http.status_code >= 300:                                                       \n",
       "    958 │   │   │   error_code = parsed_response.get(\"Error\", {}).get(\"Code\")                     \n",
       "    959 │   │   │   error_class = self.exceptions.from_code(error_code)                           \n",
       "  960 │   │   │   raise error_class(parsed_response, operation_name)                            \n",
       "    961 │   │   else:                                                                             \n",
       "    962 │   │   │   return parsed_response                                                        \n",
       "    963                                                                                           \n",
       "╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\n",
       "ClientError: An error occurred (ValidationException) when calling the CreateEndpoint operation: Cannot create \n",
       "already existing endpoint \n",
       "\"arn:aws:sagemaker:us-east-2:946277762357:endpoint/finetuned-paraphrase-2023-07-05-03-14-23-508-endpoint\".\n",
       "
\n" ], "text/plain": [ "\u001b[31m╭─\u001b[0m\u001b[31m──────────────────────────────\u001b[0m\u001b[31m \u001b[0m\u001b[1;31mTraceback \u001b[0m\u001b[1;2;31m(most recent call last)\u001b[0m\u001b[31m \u001b[0m\u001b[31m───────────────────────────────\u001b[0m\u001b[31m─╮\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/tmp/ipykernel_1225/\u001b[0m\u001b[1;33m4188345917.py\u001b[0m:\u001b[94m1\u001b[0m in \u001b[92m\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[3;31m[Errno 2] No such file or directory: '/tmp/ipykernel_1225/4188345917.py'\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/opt/conda/lib/python3.10/site-packages/botocore/\u001b[0m\u001b[1;33mclient.py\u001b[0m:\u001b[94m530\u001b[0m in \u001b[92m_api_call\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 527 \u001b[0m\u001b[2m│ │ │ │ │ \u001b[0m\u001b[33mf\u001b[0m\u001b[33m\"\u001b[0m\u001b[33m{\u001b[0mpy_operation_name\u001b[33m}\u001b[0m\u001b[33m() only accepts keyword arguments.\u001b[0m\u001b[33m\"\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 528 \u001b[0m\u001b[2m│ │ │ │ \u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 529 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[2m# The \"self\" in this scope is referring to the BaseClient.\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 530 \u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m \u001b[96mself\u001b[0m._make_api_call(operation_name, kwargs) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 531 \u001b[0m\u001b[2m│ │ \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 532 \u001b[0m\u001b[2m│ │ \u001b[0m_api_call.\u001b[91m__name__\u001b[0m = \u001b[96mstr\u001b[0m(py_operation_name) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 533 \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2;33m/opt/conda/lib/python3.10/site-packages/botocore/\u001b[0m\u001b[1;33mclient.py\u001b[0m:\u001b[94m960\u001b[0m in \u001b[92m_make_api_call\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 957 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94mif\u001b[0m http.status_code >= \u001b[94m300\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 958 \u001b[0m\u001b[2m│ │ │ \u001b[0merror_code = parsed_response.get(\u001b[33m\"\u001b[0m\u001b[33mError\u001b[0m\u001b[33m\"\u001b[0m, {}).get(\u001b[33m\"\u001b[0m\u001b[33mCode\u001b[0m\u001b[33m\"\u001b[0m) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 959 \u001b[0m\u001b[2m│ │ │ \u001b[0merror_class = \u001b[96mself\u001b[0m.exceptions.from_code(error_code) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[31m❱ \u001b[0m 960 \u001b[2m│ │ │ \u001b[0m\u001b[94mraise\u001b[0m error_class(parsed_response, operation_name) \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 961 \u001b[0m\u001b[2m│ │ \u001b[0m\u001b[94melse\u001b[0m: \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 962 \u001b[0m\u001b[2m│ │ │ \u001b[0m\u001b[94mreturn\u001b[0m parsed_response \u001b[31m│\u001b[0m\n", "\u001b[31m│\u001b[0m \u001b[2m 963 \u001b[0m \u001b[31m│\u001b[0m\n", "\u001b[31m╰──────────────────────────────────────────────────────────────────────────────────────────────────╯\u001b[0m\n", "\u001b[1;91mClientError: \u001b[0mAn error occurred \u001b[1m(\u001b[0mValidationException\u001b[1m)\u001b[0m when calling the CreateEndpoint operation: Cannot create \n", "already existing endpoint \n", "\u001b[32m\"arn:aws:sagemaker:us-east-2:946277762357:endpoint/finetuned-paraphrase-2023-07-05-03-14-23-508-endpoint\"\u001b[0m.\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "create_endpoint_response = sm_client.create_endpoint(\n", " EndpointName=f\"{endpoint_name}\", EndpointConfigName=endpoint_config_name\n", ")\n", "print(f\"Created Endpoint: {create_endpoint_response['EndpointArn']}\")" ] }, { "cell_type": "code", "execution_count": 33, "id": "277cc50a-d67e-4d01-ae58-18af86bf6259", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Status: InService\n", "Arn: arn:aws:sagemaker:us-east-2:946277762357:endpoint/finetuned-paraphrase-2023-07-04-16-08-10-303-endpoint\n", "Status: InService\n" ] } ], "source": [ "import time\n", "\n", "resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n", "status = resp[\"EndpointStatus\"]\n", "print(\"Status: \" + status)\n", "\n", "while status == \"Creating\":\n", " time.sleep(60)\n", " resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n", " status = resp[\"EndpointStatus\"]\n", " print(\"Status: \" + status)\n", "\n", "print(\"Arn: \" + resp[\"EndpointArn\"])\n", "print(\"Status: \" + status)" ] }, { "cell_type": "markdown", "id": "ef9ac01c-e8d9-4887-be4e-baded6d36abc", "metadata": {}, "source": [ "## 5. 模型测试" ] }, { "cell_type": "code", "execution_count": 34, "id": "1bef7a45-2484-4c77-8b14-1f6d5fea6b01", "metadata": { "tags": [] }, "outputs": [], "source": [ "def get_vector_by_sm_endpoint(questions, sm_client, endpoint_name):\n", " parameters = {\n", " \"max_new_tokens\": 50,\n", " \"temperature\": 0,\n", " \"min_length\": 10,\n", " \"no_repeat_ngram_size\": 2,\n", " }\n", "\n", " response_model = sm_client.invoke_endpoint(\n", " EndpointName=endpoint_name,\n", " Body=json.dumps(\n", " {\n", " \"inputs\": questions,\n", " \"parameters\": parameters\n", " }\n", " ),\n", " ContentType=\"application/json\",\n", " )\n", " json_str = response_model['Body'].read().decode('utf8')\n", " json_obj = json.loads(json_str)\n", " embeddings = json_obj['sentence_embeddings']\n", " return embeddings" ] }, { "cell_type": "code", "execution_count": 38, "id": "61b0cb25-1e16-4a70-a77c-f5e644d1166e", "metadata": { "tags": [] }, "outputs": [], "source": [ "prompts1 = \"\"\"专属技能碎片在哪里获得?\"\"\"\n", "prompts1 = \"\"\"中国首都在哪里?\"\"\"\n", "\n", "emb1 = get_vector_by_sm_endpoint(prompts1, smr_client, endpoint_name)" ] }, { "cell_type": "code", "execution_count": 39, "id": "2f37256a-7fa3-4e4c-b0c5-f17e0e8f858a", "metadata": { "tags": [] }, "outputs": [], "source": [ "prompts2 = \"\"\"专属技能碎片可以通过多种途径获得,例如礼包商城-特惠礼包界面可以购买专属技能碎片礼包\"\"\"\n", "emb2 = get_vector_by_sm_endpoint(prompts2, smr_client, endpoint_name)" ] }, { "cell_type": "code", "execution_count": 40, "id": "9377d20d-31c8-493e-9247-135052e35cbb", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "tensor([[-0.0653]])" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "util.cos_sim(emb1,emb2)" ] }, { "cell_type": "code", "execution_count": null, "id": "4f3a2f25-e206-439e-b8ee-b5f4216f09a9", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "availableInstances": [ { "_defaultOrder": 0, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.t3.medium", "vcpuNum": 2 }, { "_defaultOrder": 1, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.t3.large", "vcpuNum": 2 }, { "_defaultOrder": 2, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.t3.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 3, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.t3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 4, "_isFastLaunch": true, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5.large", "vcpuNum": 2 }, { "_defaultOrder": 5, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 6, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 7, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 8, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 9, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 10, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 11, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 12, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.m5d.large", "vcpuNum": 2 }, { "_defaultOrder": 13, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.m5d.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 14, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.m5d.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 15, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.m5d.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 16, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.m5d.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 17, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.m5d.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 18, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.m5d.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 19, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.m5d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 20, "_isFastLaunch": false, "category": "General purpose", "gpuNum": 0, "hideHardwareSpecs": true, "memoryGiB": 0, "name": "ml.geospatial.interactive", "supportedImageNames": [ "sagemaker-geospatial-v1-0" ], "vcpuNum": 0 }, { "_defaultOrder": 21, "_isFastLaunch": true, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 4, "name": "ml.c5.large", "vcpuNum": 2 }, { "_defaultOrder": 22, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 8, "name": "ml.c5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 23, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.c5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 24, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.c5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 25, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 72, "name": "ml.c5.9xlarge", "vcpuNum": 36 }, { "_defaultOrder": 26, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 96, "name": "ml.c5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 27, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 144, "name": "ml.c5.18xlarge", "vcpuNum": 72 }, { "_defaultOrder": 28, "_isFastLaunch": false, "category": "Compute optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.c5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 29, "_isFastLaunch": true, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g4dn.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 30, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g4dn.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 31, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g4dn.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 32, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g4dn.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 33, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g4dn.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 34, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g4dn.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 35, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 61, "name": "ml.p3.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 36, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 244, "name": "ml.p3.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 37, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 488, "name": "ml.p3.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 38, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.p3dn.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 39, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.r5.large", "vcpuNum": 2 }, { "_defaultOrder": 40, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.r5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 41, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.r5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 42, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.r5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 43, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.r5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 44, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.r5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 45, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 512, "name": "ml.r5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 46, "_isFastLaunch": false, "category": "Memory Optimized", "gpuNum": 0, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.r5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 47, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 16, "name": "ml.g5.xlarge", "vcpuNum": 4 }, { "_defaultOrder": 48, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 32, "name": "ml.g5.2xlarge", "vcpuNum": 8 }, { "_defaultOrder": 49, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 64, "name": "ml.g5.4xlarge", "vcpuNum": 16 }, { "_defaultOrder": 50, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 128, "name": "ml.g5.8xlarge", "vcpuNum": 32 }, { "_defaultOrder": 51, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 1, "hideHardwareSpecs": false, "memoryGiB": 256, "name": "ml.g5.16xlarge", "vcpuNum": 64 }, { "_defaultOrder": 52, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 192, "name": "ml.g5.12xlarge", "vcpuNum": 48 }, { "_defaultOrder": 53, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 4, "hideHardwareSpecs": false, "memoryGiB": 384, "name": "ml.g5.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 54, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 768, "name": "ml.g5.48xlarge", "vcpuNum": 192 }, { "_defaultOrder": 55, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 1152, "name": "ml.p4d.24xlarge", "vcpuNum": 96 }, { "_defaultOrder": 56, "_isFastLaunch": false, "category": "Accelerated computing", "gpuNum": 8, "hideHardwareSpecs": false, "memoryGiB": 1152, "name": "ml.p4de.24xlarge", "vcpuNum": 96 } ], "instance_type": "ml.g4dn.xlarge", "kernelspec": { "display_name": "Python 3 (PyTorch 2.0.0 Python 3.10 GPU Optimized)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-2:429704687514:image/pytorch-2.0.0-gpu-py310" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.8" } }, "nbformat": 4, "nbformat_minor": 5 }