{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Module 4. Custom Metric 으로 성능 데이터 및 Cold Start 성능 체크 하기\n", "이번 모듈에서는 모듈2에서 테스트 용으로 분리했던 데이터를 가지고 Custom 지표를 통해 추가적인 성능을 평가해 보도록 합니다. \n", "또한 Coldstart 성능도 추가적으로 확인해 보도록 합니다." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: tqdm in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (4.42.1)\n", "\u001b[33mWARNING: You are using pip version 20.0.2; however, version 20.3.2 is available.\n", "You should consider upgrading via the '/home/ec2-user/anaconda3/envs/python3/bin/python -m pip install --upgrade pip' command.\u001b[0m\n" ] } ], "source": [ "import pandas as pd, numpy as np\n", "import io\n", "import scipy.sparse as ss\n", "import json\n", "import time\n", "import os\n", "import boto3\n", "import uuid\n", "from botocore.exceptions import ClientError\n", "from metrics import mean_reciprocal_rank, ndcg_at_k, precision_at_k\n", "!pip install tqdm\n", "from tqdm import tqdm_notebook\n", "from datetime import datetime\n", "from random import randint" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%store -r" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Configure the SDK to Personalize:\n", "personalize = boto3.client('personalize')\n", "personalize_runtime = boto3.client('personalize-runtime')\n", "personalize_events = boto3.client('personalize-events')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df_all=pd.read_csv(interaction_all_file)\n", "df_train=pd.read_csv(interaction_train_file)\n", "df_test=pd.read_csv(interaction_test_file)\n", "\n", "item_all=pd.read_csv(item_file)\n", "item_cold=pd.read_csv(item_cold_file)\n", "item_warm=pd.read_csv(item_warm_file)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "unique_user_from_all dataset: 6023\n", "unique_user_from_train dataset: 4641\n", "unique_user_from_test dataset: 1983\n", "Existing users in test dataset: 601\n", "New users in test dataset: 1382\n", "User with cold item interactions in test dataset: 57\n", "Exisiting User with cold item interaction in test dataset: 12\n" ] } ], "source": [ "#Unique users\n", "unique_users=df_all['USER_ID'].unique()\n", "print(\"unique_user_from_all dataset:\", len(unique_users))\n", "\n", "unique_user_from_train=df_train['USER_ID'].unique()\n", "print(\"unique_user_from_train dataset:\", len(unique_user_from_train))\n", "\n", "unique_user_from_test=df_test['USER_ID'].unique()\n", "print(\"unique_user_from_test dataset:\", len(unique_user_from_test))\n", "\n", "old_user_from_test=df_test['USER_ID'][df_test['USER_ID'].isin(unique_user_from_train)].unique()\n", "print(\"Existing users in test dataset:\",len(old_user_from_test))\n", "\n", "new_user_from_test=df_test['USER_ID'][-df_test['USER_ID'].isin(unique_user_from_train)].unique()\n", "print(\"New users in test dataset:\",len(new_user_from_test))\n", "\n", "cold_item_interaction=df_test[df_test['ITEM_ID'].isin(item_cold['ITEM_ID'].unique())]\n", "cold_interaction_user=cold_item_interaction['USER_ID'].unique()\n", "print(\"User with cold item interactions in test dataset:\",len(cold_interaction_user))\n", "\n", "old_user_from_test_with_cold=cold_item_interaction['USER_ID'][cold_item_interaction['USER_ID'].isin(old_user_from_test)].unique()\n", "print(\"Exisiting User with cold item interaction in test dataset:\", len(old_user_from_test_with_cold))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "#### 조금더 상세하고 Custum 평가 지표를 얻기 위해서 이전에 분리해둔 테스트 데이터를 가지고 캠페인 생성 후 별도 테스트를 진행하도록 합니다." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/ipykernel/__main__.py:3: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", " app.launch_new_instance()\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7a1bc8915c4f4c059519d09bc553b650", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=1983.0), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "# 테스트 인터렉션 셋에 있는 모든 사용자를 대상으로 선능을 확인해 봅니다.\n", "relevance = []\n", "counts=[]\n", "for user_id in tqdm_notebook(unique_user_from_test):\n", " true_items = set(df_test[df_test['USER_ID']==user_id]['ITEM_ID'].values)\n", " #print(true_items)\n", " \n", " rec_response = personalize_runtime.get_recommendations(\n", " campaignArn =user_personalization_campaign_arn ,\n", " userId = str(user_id)\n", " )\n", " rec_items = [int(x['itemId']) for x in rec_response['itemList']] \n", " relevance.append([int(x in true_items) for x in rec_items])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean_reciprocal_rank 0.3944173097544351\n", "precision_at_5 0.21321230458900658\n", "precision_at_10 0.20342914775592538\n", "precision_at_25 0.22642460917801316\n", "normalized_discounted_cumulative_gain_at_5 0.2598970789183912\n", "normalized_discounted_cumulative_gain_at_10 0.2904870526842617\n", "normalized_discounted_cumulative_gain_at_25 0.4707628802541746\n" ] } ], "source": [ "print('mean_reciprocal_rank', np.mean([mean_reciprocal_rank(r) for r in relevance]))\n", "print('precision_at_5', np.mean([precision_at_k(r, 5) for r in relevance]))\n", "print('precision_at_10', np.mean([precision_at_k(r, 10) for r in relevance]))\n", "print('precision_at_25', np.mean([precision_at_k(r, 25) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_5', np.mean([ndcg_at_k(r, 5) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_10', np.mean([ndcg_at_k(r, 10) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_25', np.mean([ndcg_at_k(r, 25) for r in relevance]))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean_reciprocal_rank 0.2426\n", "precision_at_5 0.0993\n", "precision_at_10 0.0916\n", "precision_at_25 0.0767\n", "normalized_discounted_cumulative_gain_at_5 0.1682\n", "normalized_discounted_cumulative_gain_at_10 0.2283\n", "normalized_discounted_cumulative_gain_at_25 0.3348\n" ] } ], "source": [ "# 퍼스널라이즈에서 제공하는 메트릭과 비슷한지 비교해 봅니다.\n", "\n", "get_solution_metrics_response = personalize.get_solution_metrics(\n", " solutionVersionArn = user_personalization_solution_version_arn\n", ")\n", "\n", "#print(json.dumps(get_solution_metrics_response, indent=2))\n", "\n", "print('mean_reciprocal_rank',get_solution_metrics_response[\"metrics\"][\"mean_reciprocal_rank_at_25\"], )\n", "print('precision_at_5', get_solution_metrics_response[\"metrics\"][\"precision_at_5\"])\n", "print('precision_at_10', get_solution_metrics_response[\"metrics\"][\"precision_at_10\"])\n", "print('precision_at_25', get_solution_metrics_response[\"metrics\"][\"precision_at_25\"])\n", "print('normalized_discounted_cumulative_gain_at_5', get_solution_metrics_response[\"metrics\"][\"normalized_discounted_cumulative_gain_at_5\"])\n", "print('normalized_discounted_cumulative_gain_at_10', get_solution_metrics_response[\"metrics\"][\"normalized_discounted_cumulative_gain_at_10\"])\n", "print('normalized_discounted_cumulative_gain_at_25', get_solution_metrics_response[\"metrics\"][\"normalized_discounted_cumulative_gain_at_25\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 이벤트 트랙커 생성 \n", "\n", "아래 코드 셀은 특정 item과 상호 작용하는 사용자를 시뮬레이트하는 코드 입니다. 이벤트 트레커를 통해 실시간 스트림을 보내도록 하여 고객의 클릭정보에 따라 추천 항목이 변하는 것을 확인할 수 있습니다. " ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "arn:aws:personalize:ap-northeast-2:870180618679:dataset-group/20201215-dataset-group\n", "user-personalization-event-tracker-20201215\n" ] } ], "source": [ "event_tracker_name=\"user-personalization-event-tracker-\"+WORK_DATE\n", "print(dataset_group_arn)\n", "print(event_tracker_name)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "eventTrackerArn:arn:aws:personalize:ap-northeast-2:870180618679:event-tracker/81e1bae6,\n", " eventTrackingId:401c2330-c611-4fb6-bfdf-2858747ca001\n" ] } ], "source": [ "event_tracker_response = personalize.create_event_tracker( \n", " name=event_tracker_name,\n", " datasetGroupArn=dataset_group_arn\n", ")\n", "event_tracker_arn = event_tracker_response['eventTrackerArn']\n", "event_tracking_id = event_tracker_response['trackingId']\n", "\n", "print('eventTrackerArn:{},\\n eventTrackingId:{}'.format(event_tracker_arn, event_tracking_id))\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 사용자 행동 시뮬레이션\n", "\n", "아래 코드 셀은 특정 item과 상호 작용하는 사용자를 시뮬레이트하는 코드 샘플을 제공하며, 시작할 때와 다른 추천 목록을 얻습니다." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def get_movie_title(movie_id):\n", " \"\"\"\n", " Takes in an ID, returns a title\n", " \"\"\"\n", " movie_id = int(movie_id)\n", " movie_title=item_all[item_all['ITEM_ID']==movie_id]['TITLE']\n", " return (movie_title.tolist())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "session_dict = {}" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def send_movie_click(USER_ID, ITEM_ID):\n", " \"\"\"\n", " Simulates a click as an envent\n", " to send an event to Amazon Personalize's Event Tracker\n", " \"\"\"\n", " # Configure Session\n", " try:\n", " session_ID = session_dict[USER_ID]\n", " except:\n", " session_dict[USER_ID] = str(uuid.uuid1())\n", " session_ID = session_dict[USER_ID]\n", " \n", " value=randint(0,5)\n", " \n", " # Configure Properties:\n", " event = {\n", " \"itemId\": str(ITEM_ID),\n", " \"eventValue\": value\n", " }\n", " event_json = json.dumps(event)\n", " \n", " # Make Call\n", " personalize_events.put_events(\n", " trackingId = event_tracking_id, # 이벤트트래커에서 생성한 아이디\n", " userId= USER_ID,\n", " sessionId = session_ID,\n", " eventList = [{\n", " 'sentAt': int(time.time()),\n", " 'eventType': 'RATING',\n", " 'properties': event_json\n", " }]\n", ")\n", " " ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OriginalRecs
0Braveheart (1995)
1Star Wars: Episode VI - Return of the Jedi (1983)
2Forrest Gump (1994)
3One Flew Over the Cuckoo's Nest (1975)
4Star Wars: Episode V - The Empire Strikes Back...
5Star Wars: Episode I - The Phantom Menace (1999)
6Star Wars: Episode IV - A New Hope (1977)
7Schindler's List (1993)
8Titanic (1997)
9Silence of the Lambs, The (1991)
10Godfather, The (1972)
11Rain Man (1988)
12Saving Private Ryan (1998)
13Raiders of the Lost Ark (1981)
14Jurassic Park (1993)
15Godfather: Part III, The (1990)
16Rocky II (1979)
17L.A. Confidential (1997)
18Casablanca (1942)
19Princess Bride, The (1987)
20Robin Hood: Prince of Thieves (1991)
21Gone with the Wind (1939)
22Back to the Future (1985)
23Lethal Weapon 4 (1998)
24Last Action Hero (1993)
\n", "
" ], "text/plain": [ " OriginalRecs\n", "0 Braveheart (1995)\n", "1 Star Wars: Episode VI - Return of the Jedi (1983)\n", "2 Forrest Gump (1994)\n", "3 One Flew Over the Cuckoo's Nest (1975)\n", "4 Star Wars: Episode V - The Empire Strikes Back...\n", "5 Star Wars: Episode I - The Phantom Menace (1999)\n", "6 Star Wars: Episode IV - A New Hope (1977)\n", "7 Schindler's List (1993)\n", "8 Titanic (1997)\n", "9 Silence of the Lambs, The (1991)\n", "10 Godfather, The (1972)\n", "11 Rain Man (1988)\n", "12 Saving Private Ryan (1998)\n", "13 Raiders of the Lost Ark (1981)\n", "14 Jurassic Park (1993)\n", "15 Godfather: Part III, The (1990)\n", "16 Rocky II (1979)\n", "17 L.A. Confidential (1997)\n", "18 Casablanca (1942)\n", "19 Princess Bride, The (1987)\n", "20 Robin Hood: Prince of Thieves (1991)\n", "21 Gone with the Wind (1939)\n", "22 Back to the Future (1985)\n", "23 Lethal Weapon 4 (1998)\n", "24 Last Action Hero (1993)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_id=old_user_from_test[0]\n", "\n", "get_recommendations_response = personalize_runtime.get_recommendations(\n", " campaignArn = user_personalization_campaign_arn,\n", " userId = str(user_id),\n", ")\n", "\n", "item_list = get_recommendations_response['itemList']\n", "\n", "recommendation_title_list = []\n", "recommendation_id_list=[]\n", "for item in item_list:\n", " title = get_movie_title(item['itemId'])\n", " recommendation_title_list.append(title)\n", " recommendation_id_list.append(item['itemId'])\n", "recommendations_df = pd.DataFrame(recommendation_title_list ,columns = ['OriginalRecs'])\n", "recommendations_df" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Pick a movie, we will use ID 270 or Miracle on 34th Street\n", "\n", "movie_to_click = item_warm['ITEM_ID'][0]\n", "movie_title_clicked = get_movie_title(movie_to_click)\n", "send_movie_click(USER_ID=str(user_id), ITEM_ID=movie_to_click)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Recommendations for user: 1389\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OriginalRecs(Toy Story (1995),)
0Braveheart (1995)Braveheart (1995)
1Star Wars: Episode VI - Return of the Jedi (1983)Star Wars: Episode VI - Return of the Jedi (1983)
2Forrest Gump (1994)Forrest Gump (1994)
3One Flew Over the Cuckoo's Nest (1975)One Flew Over the Cuckoo's Nest (1975)
4Star Wars: Episode V - The Empire Strikes Back...Star Wars: Episode V - The Empire Strikes Back...
5Star Wars: Episode I - The Phantom Menace (1999)Star Wars: Episode I - The Phantom Menace (1999)
6Star Wars: Episode IV - A New Hope (1977)Star Wars: Episode IV - A New Hope (1977)
7Schindler's List (1993)Schindler's List (1993)
8Titanic (1997)Titanic (1997)
9Silence of the Lambs, The (1991)Silence of the Lambs, The (1991)
10Godfather, The (1972)Godfather, The (1972)
11Rain Man (1988)Rain Man (1988)
12Saving Private Ryan (1998)Saving Private Ryan (1998)
13Raiders of the Lost Ark (1981)Raiders of the Lost Ark (1981)
14Jurassic Park (1993)Jurassic Park (1993)
15Godfather: Part III, The (1990)Godfather: Part III, The (1990)
16Rocky II (1979)Rocky II (1979)
17L.A. Confidential (1997)L.A. Confidential (1997)
18Casablanca (1942)Casablanca (1942)
19Princess Bride, The (1987)Princess Bride, The (1987)
20Robin Hood: Prince of Thieves (1991)Robin Hood: Prince of Thieves (1991)
21Gone with the Wind (1939)Gone with the Wind (1939)
22Back to the Future (1985)Back to the Future (1985)
23Lethal Weapon 4 (1998)Lethal Weapon 4 (1998)
24Last Action Hero (1993)Last Action Hero (1993)
\n", "
" ], "text/plain": [ " OriginalRecs \\\n", "0 Braveheart (1995) \n", "1 Star Wars: Episode VI - Return of the Jedi (1983) \n", "2 Forrest Gump (1994) \n", "3 One Flew Over the Cuckoo's Nest (1975) \n", "4 Star Wars: Episode V - The Empire Strikes Back... \n", "5 Star Wars: Episode I - The Phantom Menace (1999) \n", "6 Star Wars: Episode IV - A New Hope (1977) \n", "7 Schindler's List (1993) \n", "8 Titanic (1997) \n", "9 Silence of the Lambs, The (1991) \n", "10 Godfather, The (1972) \n", "11 Rain Man (1988) \n", "12 Saving Private Ryan (1998) \n", "13 Raiders of the Lost Ark (1981) \n", "14 Jurassic Park (1993) \n", "15 Godfather: Part III, The (1990) \n", "16 Rocky II (1979) \n", "17 L.A. Confidential (1997) \n", "18 Casablanca (1942) \n", "19 Princess Bride, The (1987) \n", "20 Robin Hood: Prince of Thieves (1991) \n", "21 Gone with the Wind (1939) \n", "22 Back to the Future (1985) \n", "23 Lethal Weapon 4 (1998) \n", "24 Last Action Hero (1993) \n", "\n", " (Toy Story (1995),) \n", "0 Braveheart (1995) \n", "1 Star Wars: Episode VI - Return of the Jedi (1983) \n", "2 Forrest Gump (1994) \n", "3 One Flew Over the Cuckoo's Nest (1975) \n", "4 Star Wars: Episode V - The Empire Strikes Back... \n", "5 Star Wars: Episode I - The Phantom Menace (1999) \n", "6 Star Wars: Episode IV - A New Hope (1977) \n", "7 Schindler's List (1993) \n", "8 Titanic (1997) \n", "9 Silence of the Lambs, The (1991) \n", "10 Godfather, The (1972) \n", "11 Rain Man (1988) \n", "12 Saving Private Ryan (1998) \n", "13 Raiders of the Lost Ark (1981) \n", "14 Jurassic Park (1993) \n", "15 Godfather: Part III, The (1990) \n", "16 Rocky II (1979) \n", "17 L.A. Confidential (1997) \n", "18 Casablanca (1942) \n", "19 Princess Bride, The (1987) \n", "20 Robin Hood: Prince of Thieves (1991) \n", "21 Gone with the Wind (1939) \n", "22 Back to the Future (1985) \n", "23 Lethal Weapon 4 (1998) \n", "24 Last Action Hero (1993) " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_recommendations_response = personalize_runtime.get_recommendations(\n", " campaignArn = user_personalization_campaign_arn,\n", " userId = str(user_id)\n", " \n", ")\n", "\n", "print(\"Recommendations for user: \", user_id)\n", "\n", "item_list = get_recommendations_response['itemList']\n", "\n", "recommendation_list = []\n", "\n", "for item in item_list:\n", " title = get_movie_title(item['itemId'])\n", " recommendation_list.append(title)\n", " \n", "new_rec_DF = pd.DataFrame(recommendation_list, columns = [movie_title_clicked])\n", "\n", "recommendations_df = recommendations_df.join(new_rec_DF)\n", "recommendations_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cold Item interaction 내보내기 \n", "\n", "이번 실험에서는 콜드 스타트 아이템의 인터렉션을 발생하도록 합니다.\n", "\n", "User-Personalization 레시피를 통해 솔루션을 생성했을경우 새로운 인터렉션이 있을때마다 아마존 퍼스널라이즈는 매 2시간 마다 솔루션 버전을 자동으로 업데이트 합니다. 이때 새로운 아이템이 포함 되어 있다면 설정해 놓은 Exploration Weight 및 Exploration item age cut off 값에 따라 콜드 아이템도 추천하게 됩니다. 자세한 내용은 [여기](https://docs.aws.amazon.com/personalize/latest/dg/recording-events.html)를 참고하세요. \n", "\n", "이 번 실습에서는 시간상 강제로 신규 아이템 인터렉션을 발생한 뒤 매뉴얼로 솔루션 버전 및 캠페인을 업데이트 하도록 합니다. " ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ITEM_IDTITLEGENRE
0136From the Journals of Jean Seberg (1995)Documentary
1138Neon Bible, The (1995)Drama
2192Show, The (1995)Documentary
3402Open Season (1996)Comedy
4576Fausto (1993)Comedy
............
563542Coming Apart (1969)Drama
573647Running Free (2000)Drama
583772Hatchet For the Honeymoon (Rosso Segno Della F...Horror
593800Criminal Lovers (Les Amants Criminels) (1999)Drama|Romance
603892Anatomy (Anatomie) (2000)Horror
\n", "

61 rows × 3 columns

\n", "
" ], "text/plain": [ " ITEM_ID TITLE GENRE\n", "0 136 From the Journals of Jean Seberg (1995) Documentary\n", "1 138 Neon Bible, The (1995) Drama\n", "2 192 Show, The (1995) Documentary\n", "3 402 Open Season (1996) Comedy\n", "4 576 Fausto (1993) Comedy\n", ".. ... ... ...\n", "56 3542 Coming Apart (1969) Drama\n", "57 3647 Running Free (2000) Drama\n", "58 3772 Hatchet For the Honeymoon (Rosso Segno Della F... Horror\n", "59 3800 Criminal Lovers (Les Amants Criminels) (1999) Drama|Romance\n", "60 3892 Anatomy (Anatomie) (2000) Horror\n", "\n", "[61 rows x 3 columns]" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_item_list=item_cold\n", "new_item_list['ITEM_ID']=new_item_list['ITEM_ID'].astype(str)\n", "new_item_list" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "user_id=old_user_from_test_with_cold[2]\n" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "for i in range(len(new_item_list)):\n", " value=randint(0,5) \n", " # Configure Properties:\n", " event = {\n", " \"itemId\": str(new_item_list['ITEM_ID'][i]),\n", " \"eventValue\": value\n", " }\n", " event_json = json.dumps(event)\n", " \n", " \n", " \n", " personalize_events.put_events(\n", " trackingId = event_tracking_id,\n", " userId= str(user_id),\n", " sessionId = str(i),\n", "\n", " eventList = [{\n", " 'sentAt': datetime.now().timestamp(),\n", " 'eventType' : 'RATINGS',\n", " 'properties' : event_json\n", " }])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 솔루션 버전 업데이트 및 캠페인 업데이트 \n", "\n", "#### 이벤트 트레커 정보가 데이터 쌓일때가지 충분한 시간을 주기 위해 약 5분정도 후에 아래 셀을 시작합니다." ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"solutionVersionArn\": \"arn:aws:personalize:ap-northeast-2:870180618679:solution/user-personalization-20201215/eafc56a3\",\n", " \"ResponseMetadata\": {\n", " \"RequestId\": \"41deaf46-ffd0-43b9-8ff6-1c86d4c552ef\",\n", " \"HTTPStatusCode\": 200,\n", " \"HTTPHeaders\": {\n", " \"content-type\": \"application/x-amz-json-1.1\",\n", " \"date\": \"Tue, 15 Dec 2020 13:48:10 GMT\",\n", " \"x-amzn-requestid\": \"41deaf46-ffd0-43b9-8ff6-1c86d4c552ef\",\n", " \"content-length\": \"120\",\n", " \"connection\": \"keep-alive\"\n", " },\n", " \"RetryAttempts\": 0\n", " }\n", "}\n" ] } ], "source": [ "## 새로운 인터렉션을 포함한 새로운 솔루션 버전 생성 \n", "create_solution_version_response = personalize.create_solution_version(\n", " solutionArn = user_personalization_solution_arn,\n", " trainingMode='UPDATE'\n", ")\n", "\n", "user_personalization_solution_version_arn_new = create_solution_version_response['solutionVersionArn']\n", "print(json.dumps(create_solution_version_response, indent=2))" ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "User-Personalization SolutionVersion: ACTIVE\n", "All solution creation completed\n", "CPU times: user 16.3 ms, sys: 2 µs, total: 16.3 ms\n", "Wall time: 1.06 s\n" ] } ], "source": [ "%%time\n", "\n", "max_time = time.time() + 8*60*60 # 8 hours\n", "while time.time() < max_time:\n", " \n", " #hrnn status\n", " describe_solution_version_response = personalize.describe_solution_version(\n", " solutionVersionArn = user_personalization_solution_version_arn_new\n", " ) \n", " status= describe_solution_version_response[\"solutionVersion\"][\"status\"]\n", " print(\"User-Personalization SolutionVersion: {}\".format(status))\n", " \n", " if (status== \"ACTIVE\" or status == \"CREATE FAILED\"):\n", " break\n", " \n", " time.sleep(300)\n", "\n", "print(\"All solution creation completed\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "방금 업데이트된 새로운 솔루션 버전으로 캠페인을 업데이트 합니다. 이 때 explorationWeight를 1로 변경하여 최대한 콜드 아이템에서만 추천 될수 있도록 합니다. " ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"campaignArn\": \"arn:aws:personalize:ap-northeast-2:870180618679:campaign/user-personalization-campaign-20201215\",\n", " \"ResponseMetadata\": {\n", " \"RequestId\": \"24c10ccb-d229-450f-8b31-70d8c883e55e\",\n", " \"HTTPStatusCode\": 200,\n", " \"HTTPHeaders\": {\n", " \"content-type\": \"application/x-amz-json-1.1\",\n", " \"date\": \"Tue, 15 Dec 2020 14:38:04 GMT\",\n", " \"x-amzn-requestid\": \"24c10ccb-d229-450f-8b31-70d8c883e55e\",\n", " \"content-length\": \"113\",\n", " \"connection\": \"keep-alive\"\n", " },\n", " \"RetryAttempts\": 0\n", " }\n", "}\n" ] } ], "source": [ "update_campaing_response = personalize.update_campaign(\n", " campaignArn=user_personalization_campaign_arn,\n", " solutionVersionArn=user_personalization_solution_version_arn_new,\n", " minProvisionedTPS=1,\n", " campaignConfig = {\"itemExplorationConfig\": {\"explorationWeight\": \"1\", \"explorationItemAgeCutOff\": \"7\"}}\n", " \n", ")\n", "\n", "user_personalization_campaign_arn = update_campaing_response['campaignArn']\n", "\n", "print(json.dumps(update_campaing_response, indent=2))" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Current Campaign solution version arn:aws:personalize:ap-northeast-2:870180618679:solution/user-personalization-20201215/eafc56a3\n" ] } ], "source": [ "# Wait for campaign update to reflect the new explorationWeight\n", "explorationWeight = None\n", "max_time = time.time() + 3*60*60 # 3 hours\n", "while time.time() < max_time:\n", " describe_campaign_response = personalize.describe_campaign(\n", " campaignArn = user_personalization_campaign_arn\n", " )\n", " solution_version = describe_campaign_response[\"campaign\"][\"solutionVersionArn\"]\n", " \n", "\n", " if (solution_version == user_personalization_solution_version_arn_new):\n", " print(\"Current Campaign solution version {}\".format(solution_version))\n", " break\n", " \n", " time.sleep(60)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cold Start Exploration Weight에 따른 테스트 \n", "\n", "이부분에서는 새롭게 더해진 새로운 아이템(ColdStart)에 대한 추천 성능을 테스트 해보도록 합니다. 또한 Exploration Weight를 변경하면서 추천 아이템 중에 얼마 만큼의 콜드 아이템이 포함 되었는지 확인해 보도록 합니다.\n" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "metrics=[]\n", "\n", "def build_metric_matrix(solution,relevance):\n", " metrics.append([solution,\n", " np.mean([mean_reciprocal_rank(r) for r in relevance]),\n", " np.mean([precision_at_k(r, 5) for r in relevance]),\n", " np.mean([precision_at_k(r, 10) for r in relevance]),\n", " np.mean([precision_at_k(r, 10) for r in relevance]),\n", " np.mean([ndcg_at_k(r, 5) for r in relevance]),\n", " np.mean([ndcg_at_k(r, 10) for r in relevance]),\n", " np.mean([ndcg_at_k(r, 25) for r in relevance]),\n", " np.mean(counts)])\n", " \n", "def is_cold_item(rec_items):\n", " rec_is_cold=[int(item) for item in rec_items if int(item) in item_cold['ITEM_ID'].astype('int64').unique()]\n", " #item_cold['ITEM_ID'].astype('int64')]\n", " return(rec_is_cold)\n" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/ipykernel/__main__.py:4: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "46b968dc27574dbe8e7824da0a5c4a3f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=601.0), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "relevance = []\n", "is_cold_item_list=[]\n", "counts=[]\n", "for user_id in tqdm_notebook(old_user_from_test):\n", " true_items = set(df_test[df_test['USER_ID']==user_id]['ITEM_ID'].values)\n", " #print(true_items)\n", " \n", " rec_response = personalize_runtime.get_recommendations(\n", " campaignArn =user_personalization_campaign_arn ,\n", " userId = str(user_id)\n", " )\n", " rec_items = [int(x['itemId']) for x in rec_response['itemList']]\n", " arr=is_cold_item(rec_items) \n", " counts.append(len(arr))\n", " is_cold_item_list.append(arr)\n", " relevance.append([int(x in true_items) for x in rec_items])\n" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean_reciprocal_rank 0.09362662725586533\n", "precision_at_5 0.03860232945091514\n", "precision_at_10 0.03860232945091514\n", "precision_at_25 0.04492512479201331\n", "normalized_discounted_cumulative_gain_at_5 0.05720520806231752\n", "normalized_discounted_cumulative_gain_at_10 0.07842399863787089\n", "normalized_discounted_cumulative_gain_at_25 0.15459205991887345\n", "average number of cold items 10.84026622296173\n" ] } ], "source": [ "print('mean_reciprocal_rank', np.mean([mean_reciprocal_rank(r) for r in relevance]))\n", "print('precision_at_5', np.mean([precision_at_k(r, 5) for r in relevance]))\n", "print('precision_at_10', np.mean([precision_at_k(r, 10) for r in relevance]))\n", "print('precision_at_25', np.mean([precision_at_k(r, 25) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_5', np.mean([ndcg_at_k(r, 5) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_10', np.mean([ndcg_at_k(r, 10) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_25', np.mean([ndcg_at_k(r, 25) for r in relevance]))\n", "print('average number of cold items', np.mean(counts))\n", "build_metric_matrix('user-personalization-coldstart-meta-update-100%',relevance)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "랜덤으로 추천하였을 경우 대비 Coldstart 성능이 얼마나 좋은 것인지 비교하여 보도록 합니다. " ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/ipykernel/__main__.py:2: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n", " from ipykernel import kernelapp as app\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6fc0ad383e624a28bf2f81f41cffcb84", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=601.0), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "relevance = []\n", "for user_id in tqdm_notebook(old_user_from_test):\n", "\n", " true_items = set(df_test[df_test['USER_ID']==user_id]['ITEM_ID'].values)\n", " rec_items = np.random.permutation(item_cold['ITEM_ID'].astype('int64').unique())[:25]\n", " relevance.append([int(x in true_items) for x in rec_items])\n", "counts=[25]" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean_reciprocal_rank 0.0013430897998285684\n", "precision_at_5 0.00033277870216306157\n", "precision_at_10 0.00033277870216306157\n", "precision_at_25 0.0006655574043261231\n", "normalized_discounted_cumulative_gain_at_5 0.0010497999227478494\n", "normalized_discounted_cumulative_gain_at_10 0.0016934817983460884\n", "normalized_discounted_cumulative_gain_at_25 0.0038242318531639917\n", "average number of cold items 25.0\n" ] } ], "source": [ "\n", "print('mean_reciprocal_rank', np.mean([mean_reciprocal_rank(r) for r in relevance]))\n", "print('precision_at_5', np.mean([precision_at_k(r, 5) for r in relevance]))\n", "print('precision_at_10', np.mean([precision_at_k(r, 10) for r in relevance]))\n", "print('precision_at_25', np.mean([precision_at_k(r, 25) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_5', np.mean([ndcg_at_k(r, 5) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_10', np.mean([ndcg_at_k(r, 10) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_25', np.mean([ndcg_at_k(r, 25) for r in relevance]))\n", "print('average number of cold items', np.mean(counts))\n", "build_metric_matrix('random',relevance)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Update Campaign with 30% Exploration" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"campaignArn\": \"arn:aws:personalize:ap-northeast-2:870180618679:campaign/user-personalization-campaign-20201215\",\n", " \"ResponseMetadata\": {\n", " \"RequestId\": \"fa1d3f83-a515-433a-92ab-2e8441b9282d\",\n", " \"HTTPStatusCode\": 200,\n", " \"HTTPHeaders\": {\n", " \"content-type\": \"application/x-amz-json-1.1\",\n", " \"date\": \"Tue, 15 Dec 2020 15:16:45 GMT\",\n", " \"x-amzn-requestid\": \"fa1d3f83-a515-433a-92ab-2e8441b9282d\",\n", " \"content-length\": \"113\",\n", " \"connection\": \"keep-alive\"\n", " },\n", " \"RetryAttempts\": 0\n", " }\n", "}\n" ] } ], "source": [ "\n", "update_campaing_response = personalize.update_campaign(\n", " campaignArn=user_personalization_campaign_arn,\n", " solutionVersionArn=user_personalization_solution_version_arn_new,\n", " minProvisionedTPS=1,\n", " campaignConfig = {\"itemExplorationConfig\": {\"explorationWeight\": \"0.3\", \"explorationItemAgeCutOff\": \"7\"}}\n", " \n", ")\n", "\n", "user_personalization_campaign_arn = update_campaing_response['campaignArn']\n", "\n", "print(json.dumps(update_campaing_response, indent=2))\n" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Current Campaign explorationWeight: 1\n", "Current Campaign explorationWeight: 0.3\n" ] } ], "source": [ "# Wait for campaign update to reflect the new explorationWeight\n", "explorationWeight = None\n", "max_time = time.time() + 3*60*60 # 3 hours\n", "while time.time() < max_time:\n", " describe_campaign_response = personalize.describe_campaign(\n", " campaignArn = user_personalization_campaign_arn\n", " )\n", " explorationWeight = describe_campaign_response[\"campaign\"][\"campaignConfig\"]['itemExplorationConfig']['explorationWeight']\n", " print(\"Current Campaign explorationWeight: {}\".format(explorationWeight))\n", " \n", " if explorationWeight == \"0.3\":\n", " break\n", " \n", " time.sleep(60)\n", "\n", "\n", "# wait 1 minutes\n", "time.sleep(60)" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/ipykernel/__main__.py:4: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f7e5421757784720aa246fd07318c9ea", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=601.0), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "relevance = []\n", "is_cold_item_list=[]\n", "counts=[]\n", "for user_id in tqdm_notebook(old_user_from_test):\n", " true_items = set(df_test[df_test['USER_ID']==user_id]['ITEM_ID'].values)\n", " #print(true_items)\n", " \n", " rec_response = personalize_runtime.get_recommendations(\n", " campaignArn =user_personalization_campaign_arn ,\n", " userId = str(user_id)\n", " )\n", " rec_items = [int(x['itemId']) for x in rec_response['itemList']]\n", " arr=is_cold_item(rec_items) \n", " counts.append(len(arr))\n", " is_cold_item_list.append(arr)\n", " relevance.append([int(x in true_items) for x in rec_items])" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean_reciprocal_rank 0.09029524209663584\n", "precision_at_5 0.03760399334442596\n", "precision_at_10 0.036272878535773705\n", "precision_at_25 0.0443261231281198\n", "normalized_discounted_cumulative_gain_at_5 0.05443476885955114\n", "normalized_discounted_cumulative_gain_at_10 0.07391097882485473\n", "normalized_discounted_cumulative_gain_at_25 0.15053583759154687\n", "average number of cold items 11.118136439267888\n" ] } ], "source": [ "print('mean_reciprocal_rank', np.mean([mean_reciprocal_rank(r) for r in relevance]))\n", "print('precision_at_5', np.mean([precision_at_k(r, 5) for r in relevance]))\n", "print('precision_at_10', np.mean([precision_at_k(r, 10) for r in relevance]))\n", "print('precision_at_25', np.mean([precision_at_k(r, 25) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_5', np.mean([ndcg_at_k(r, 5) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_10', np.mean([ndcg_at_k(r, 10) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_25', np.mean([ndcg_at_k(r, 25) for r in relevance]))\n", "print('average number of cold items', np.mean(counts))\n", "build_metric_matrix('user-personalization-coldstart-30%',relevance)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Update Campaign with 0% Exploration" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"campaignArn\": \"arn:aws:personalize:ap-northeast-2:870180618679:campaign/user-personalization-campaign-20201215\",\n", " \"ResponseMetadata\": {\n", " \"RequestId\": \"89519f3f-f856-405e-ade4-18f73344eff9\",\n", " \"HTTPStatusCode\": 200,\n", " \"HTTPHeaders\": {\n", " \"content-type\": \"application/x-amz-json-1.1\",\n", " \"date\": \"Tue, 15 Dec 2020 15:19:19 GMT\",\n", " \"x-amzn-requestid\": \"89519f3f-f856-405e-ade4-18f73344eff9\",\n", " \"content-length\": \"113\",\n", " \"connection\": \"keep-alive\"\n", " },\n", " \"RetryAttempts\": 0\n", " }\n", "}\n" ] } ], "source": [ "\n", "update_campaing_response = personalize.update_campaign(\n", " campaignArn=user_personalization_campaign_arn,\n", " solutionVersionArn=user_personalization_solution_version_arn_new,\n", " minProvisionedTPS=1,\n", " campaignConfig = {\"itemExplorationConfig\": {\"explorationWeight\": \"0\", \"explorationItemAgeCutOff\": \"7\"}}\n", " \n", ")\n", "\n", "user_personalization_campaign_arn = update_campaing_response['campaignArn']\n", "\n", "print(json.dumps(update_campaing_response, indent=2))" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Current Campaign explorationWeight: 0.3\n", "Current Campaign explorationWeight: 0\n" ] } ], "source": [ "# Wait for campaign update to reflect the new explorationWeight\n", "explorationWeight = None\n", "max_time = time.time() + 3*60*60 # 3 hours\n", "while time.time() < max_time:\n", " describe_campaign_response = personalize.describe_campaign(\n", " campaignArn = user_personalization_campaign_arn\n", " )\n", " explorationWeight = describe_campaign_response[\"campaign\"][\"campaignConfig\"]['itemExplorationConfig']['explorationWeight']\n", " print(\"Current Campaign explorationWeight: {}\".format(explorationWeight))\n", " \n", " if explorationWeight == \"0\":\n", " break\n", " \n", " time.sleep(60)\n", "\n", "# wait 1 minutes\n", "time.sleep(60)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/ipykernel/__main__.py:4: TqdmDeprecationWarning: This function will be removed in tqdm==5.0.0\n", "Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "37432775326a41978ad61d3c81e126c7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FloatProgress(value=0.0, max=601.0), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "relevance = []\n", "is_cold_item_list=[]\n", "counts=[]\n", "for user_id in tqdm_notebook(old_user_from_test):\n", " true_items = set(df_test[df_test['USER_ID']==user_id]['ITEM_ID'].values)\n", " #print(true_items)\n", " \n", " rec_response = personalize_runtime.get_recommendations(\n", " campaignArn =user_personalization_campaign_arn ,\n", " userId = str(user_id)\n", " )\n", " rec_items = [int(x['itemId']) for x in rec_response['itemList']]\n", " arr=is_cold_item(rec_items) \n", " counts.append(len(arr))\n", " is_cold_item_list.append(arr)\n", " relevance.append([int(x in true_items) for x in rec_items])" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "mean_reciprocal_rank 0.18231356702163648\n", "precision_at_5 0.08452579034941764\n", "precision_at_10 0.08069883527454243\n", "precision_at_25 0.0768053244592346\n", "normalized_discounted_cumulative_gain_at_5 0.1245711493553611\n", "normalized_discounted_cumulative_gain_at_10 0.15925591645458065\n", "normalized_discounted_cumulative_gain_at_25 0.2510513274961014\n", "average number of cold items 0.0\n" ] } ], "source": [ "print('mean_reciprocal_rank', np.mean([mean_reciprocal_rank(r) for r in relevance]))\n", "print('precision_at_5', np.mean([precision_at_k(r, 5) for r in relevance]))\n", "print('precision_at_10', np.mean([precision_at_k(r, 10) for r in relevance]))\n", "print('precision_at_25', np.mean([precision_at_k(r, 25) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_5', np.mean([ndcg_at_k(r, 5) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_10', np.mean([ndcg_at_k(r, 10) for r in relevance]))\n", "print('normalized_discounted_cumulative_gain_at_25', np.mean([ndcg_at_k(r, 25) for r in relevance]))\n", "print('average number of cold items', np.mean(counts))\n", "build_metric_matrix('user-personalization-coldstart-0%',relevance)" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
recipemrrp@5p@10p@25ndcg@5ndcg@10ndcg@25cold_item_count
0user-personalization-coldstart-meta-update-100%0.0936270.0386020.0386020.0386020.0572050.0784240.15459210.840266
1random0.0013430.0003330.0003330.0003330.0010500.0016930.00382425.000000
2user-personalization-coldstart-30%0.0902950.0376040.0362730.0362730.0544350.0739110.15053611.118136
3user-personalization-coldstart-0%0.1823140.0845260.0806990.0806990.1245710.1592560.2510510.000000
\n", "
" ], "text/plain": [ " recipe mrr p@5 \\\n", "0 user-personalization-coldstart-meta-update-100% 0.093627 0.038602 \n", "1 random 0.001343 0.000333 \n", "2 user-personalization-coldstart-30% 0.090295 0.037604 \n", "3 user-personalization-coldstart-0% 0.182314 0.084526 \n", "\n", " p@10 p@25 ndcg@5 ndcg@10 ndcg@25 cold_item_count \n", "0 0.038602 0.038602 0.057205 0.078424 0.154592 10.840266 \n", "1 0.000333 0.000333 0.001050 0.001693 0.003824 25.000000 \n", "2 0.036273 0.036273 0.054435 0.073911 0.150536 11.118136 \n", "3 0.080699 0.080699 0.124571 0.159256 0.251051 0.000000 " ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "df_metrics=pd.DataFrame(metrics,columns=['recipe','mrr','p@5','p@10','p@25','ndcg@5','ndcg@10','ndcg@25','cold_item_count'])\n", "df_metrics\n" ] } ], "metadata": { "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.10" } }, "nbformat": 4, "nbformat_minor": 4 }