{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "241006b5", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "import os\n", "from pathlib import Path\n", "import sqlite3\n", "\n", "from traffic_comparator.sqlite import COLUMN_DATATYPES, COLUMN_JSONS, json_load_function, get_took_value, get_latest_table_name" ] }, { "cell_type": "code", "execution_count": null, "id": "340ac0eb", "metadata": {}, "outputs": [], "source": [ "db_file = Path(os.getenv(\"COMPARISONS_DB_LOCATION\", \"comparisons.db\"))\n", "print(f\"Loading from DB file: {db_file}\")\n", "con = sqlite3.connect(db_file)\n", "cur = con.cursor()\n", "# By default, this reads from the latest table, but this can be modified to a specific table name instead.\n", "table_name = get_latest_table_name(cur)\n", "df = pd.read_sql_query(f\"SELECT * from {table_name}\", con,\n", " dtype=COLUMN_DATATYPES)\n", "con.close()\n", "\n", "# This loads the text from each of the `table_json_fields` as a python dictionary\n", "for column in COLUMN_JSONS:\n", " df[column] = df[column].apply(json_load_function)\n", " \n", "# This creates the source and target `took` fields by extracting the took value from the response bodies.\n", "df['source_took'] = df['source_response_body'].apply(get_took_value)\n", "df['target_took'] = df['target_response_body'].apply(get_took_value)\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "fc208dae", "metadata": {}, "outputs": [], "source": [ "df.describe()" ] }, { "cell_type": "code", "execution_count": null, "id": "e329d114", "metadata": {}, "outputs": [], "source": [ "print(\"Measured Latency\")\n", "print(df['target_response_latency'].describe())\n", "print()\n", "print(\"Took field\")\n", "print(df['target_took'].describe())\n" ] }, { "cell_type": "code", "execution_count": null, "id": "179b1149", "metadata": {}, "outputs": [], "source": [ "# Latency plot\n", "plt.subplot(2, 1, 1)\n", "_, bins, _ = plt.hist([df['source_response_latency'], df['target_response_latency']], bins=60, range=(0, 300), histtype='stepfilled', alpha=0.8)\n", "plt.axvline(df['source_response_latency'].median(), color='k', linestyle='dashed', linewidth=1)\n", "plt.axvline(df['target_response_latency'].median(), color='k', linestyle='dashed', linewidth=1)\n", "\n", "plt.title(\"Measured latency of source vs target cluster\")\n", "plt.xlabel(\"Latency (ms)\")\n", "plt.ylabel(\"Count\")\n", "\n", "# Took plot\n", "plt.subplot(2, 1, 2)\n", "plt.hist([df['source_took'], df['target_took']], bins=bins, histtype='stepfilled', alpha=0.7)\n", "plt.axvline(df['source_took'].median(), color='k', linestyle='dashed', linewidth=1)\n", "plt.axvline(df['target_took'].median(), color='k', linestyle='dashed', linewidth=1)\n", "plt.title(\"Reported latency (\\\"took\\\") of source vs target cluster\")\n", "plt.xlabel(\"Latency (ms)\")\n", "plt.ylabel(\"Count\")\n", "plt.tight_layout()\n", "\n", "\n", "print(f\"Source median latency: {df['source_response_latency'].median():.2f} ms\")\n", "print(f\"Target median latency: {df['target_response_latency'].median():.2f} ms\")\n", "plt.show()\n", "print(f\"Source median 'took': {df['source_took'].median():.2f} ms\")\n", "print(f\"Target median 'took': {df['target_took'].median():.2f} ms\")" ] }, { "cell_type": "code", "execution_count": null, "id": "5751cd60", "metadata": { "scrolled": true }, "outputs": [], "source": [ "# Estimate the network latency\n", "print(\"Source cluster network latency\")\n", "print((df['source_response_latency'] - df['source_took']).mean())\n", "print()\n", "print(\"Target cluster network latency\")\n", "print((df['target_response_latency'] - df['target_took']).mean())" ] }, { "cell_type": "code", "execution_count": null, "id": "989dc230", "metadata": {}, "outputs": [], "source": [ "uri_groups = df.groupby(['request_uri', 'request_method'])\n", "uri_groups.aggregate(func={'responses_are_identical': lambda x: f\"{x.mean():.2%}\"})" ] }, { "cell_type": "code", "execution_count": null, "id": "2b659080", "metadata": {}, "outputs": [], "source": [ "uri_groups.size()" ] }, { "cell_type": "code", "execution_count": null, "id": "038afdf3", "metadata": {}, "outputs": [], "source": [ "bulk_df = df[df[\"request_uri\"] == \"/_bulk\"]\n", "bulk_df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "27f9e58c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0rc2" } }, "nbformat": 4, "nbformat_minor": 5 }