{ "cells": [ { "cell_type": "markdown", "id": "8d0fe91c-5c00-4f62-984b-bc158b80973d", "metadata": {}, "source": [ "# Testing with API Gateway" ] }, { "cell_type": "code", "execution_count": null, "id": "875d39fc-9fe5-4817-8614-ee344721913b", "metadata": {}, "outputs": [], "source": [ "# %%capture\n", "# %pip install aws-requests-auth" ] }, { "cell_type": "code", "execution_count": null, "id": "17e9d857-7f22-4720-8f72-82df56acee38", "metadata": {}, "outputs": [], "source": [ "import json\n", "import time\n", "import boto3\n", "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import requests\n", "from aws_requests_auth.boto_utils import BotoAWSRequestsAuth\n", "from tqdm.contrib.concurrent import thread_map" ] }, { "cell_type": "code", "execution_count": null, "id": "153c4bbe-4946-4e4a-beaf-b147aaf28f34", "metadata": {}, "outputs": [], "source": [ "# bucket = sm_session.default_bucket()\n", "region = boto3.Session().region_name" ] }, { "cell_type": "markdown", "id": "f5049d34-e8a6-40ae-8402-b00fa44b8e8b", "metadata": {}, "source": [ "### Parameters" ] }, { "cell_type": "code", "execution_count": null, "id": "2140cd06-508a-4ace-b36c-5d9e4194fb28", "metadata": {}, "outputs": [], "source": [ "url = \"\" # example url: \"https://vcqnacq0k1.execute-api.us-east-1.amazonaws.com/LATEST/HF\"" ] }, { "cell_type": "code", "execution_count": null, "id": "56a75ef9-ffaf-4714-b572-283463d6527c", "metadata": {}, "outputs": [], "source": [ "test_phrase = \"This is an interesting workshop, very helpful!\"" ] }, { "cell_type": "code", "execution_count": null, "id": "10328c56-48d5-4660-9a4c-262f66701179", "metadata": {}, "outputs": [], "source": [ "auth = BotoAWSRequestsAuth(\n", " aws_host=url.split(\"//\")[-1],\n", " aws_region=region,\n", " aws_service=\"execute-api\",\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "33fe2cff-2be3-467e-915d-ceb5c5bb2c8e", "metadata": { "tags": [] }, "outputs": [], "source": [ "payload = {\"inputs\": test_phrase}\n", "response = requests.post(\n", " url,\n", " # auth=auth,\n", " json=payload,\n", ")\n", "print(json.dumps(response.json(), indent=2))" ] }, { "cell_type": "markdown", "id": "b242af23-7452-4e32-8c2b-c9647f597f7c", "metadata": {}, "source": [ "## Benchmark" ] }, { "cell_type": "code", "execution_count": null, "id": "c39a42fb-4385-4558-9482-1c98eae36102", "metadata": {}, "outputs": [], "source": [ "def time_prediction(payload, return_pred=False):\n", " t1 = time.time()\n", " pred = requests.post(url, json=payload)\n", " if return_pred:\n", " return pred\n", " if pred.status_code != 200:\n", " return None\n", " return time.time() - t1\n", "\n", "\n", "def run_benchmark(\n", " payload,\n", " num_preds=100,\n", " print_report=False,\n", " plot_report=False,\n", " n_threads=None,\n", "):\n", " tic = time.time()\n", " t_vec = thread_map(\n", " time_prediction,\n", " [payload] * num_preds,\n", " max_workers=n_threads,\n", " )\n", " duration = time.time() - tic\n", " n_failed = np.count_nonzero(np.isnan(t_vec))\n", " TPS = num_preds / duration\n", "\n", " latency_percentiles = np.percentile(t_vec, q=[50, 90, 95, 99])\n", "\n", " if plot_report:\n", "\n", " plt.hist(t_vec, bins=100)\n", " plt.title(\"Request latency histogram for ml.c5.xlarge\")\n", "\n", " plt.show()\n", "\n", " if print_report:\n", " print(\n", " \"==== HuggingFace model deployed on CPU instance endpoint benchmark ====\\n\",\n", " f\"95 % of requests take less than {latency_percentiles[2]*1000} ms\\n\",\n", " f\"Rough request throughput/second is {TPS}\\n\",\n", " f\"{n_failed} failed invocations\",\n", " )\n", " return TPS, latency_percentiles[2] * 1000, n_failed" ] }, { "cell_type": "code", "execution_count": null, "id": "642c7e55-991e-4edf-9abb-85ba6fc77f6b", "metadata": {}, "outputs": [], "source": [ "run_benchmark(payload, 5000, True, True)" ] }, { "cell_type": "code", "execution_count": null, "id": "cbb46514-1dda-41d4-9869-4369e99b0ca4", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Python 3 (Data Science)", "language": "python", "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/datascience-1.0" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.10" } }, "nbformat": 4, "nbformat_minor": 5 }