{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.\n", "SPDX-License-Identifier: MIT-0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* [Import libraries](#libraries)\n", "* [Connect to DocumentDB](#dbconn)\n", "* [Preview images](#preview)\n", "* [Upload images](#upload)\n", "* [Analyze and ingest images](#ingest)\n", "* [Explore images using DocumentDB queries](#query)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Import libraries " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import glob\n", "import ipyplot\n", "import json\n", "import os\n", "import pandas as pd\n", "import plotly.express as px\n", "from pymongo import MongoClient, ASCENDING\n", "import s3fs\n", "\n", "pd.set_option('display.max_colwidth', 80)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Set parameters\n", "stack_name = \"docdb-rekognition\" # name of CloudFormation stack\n", "\n", "s3_bucket = 'docdb-blog' # name of your S3 bucket\n", "s3_prefix = 'rekognition/pics/' # S3 path where you want the images uploaded" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "rekognition = boto3.client('rekognition')\n", "s3 = boto3.client(\"s3\")\n", "\n", "local_prefix = 'pics' # path to where the images were downloaded onto your SageMaker instance" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Connect to DocumentDB " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Get DocumentDB credentials stored in Secrets Manager\n", "def get_secret(stack_name):\n", "\n", " # Create a Secrets Manager client\n", " session = boto3.session.Session()\n", " client = session.client(\n", " service_name='secretsmanager',\n", " region_name=session.region_name\n", " )\n", " \n", " secret_name = f'{stack_name}-DocDBSecret'\n", " get_secret_value_response = client.get_secret_value(SecretId=secret_name)\n", " secret = get_secret_value_response['SecretString']\n", " \n", " return json.loads(secret)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [], "source": [ "# Set up a connection to the Amazon DocumentDB database\n", "secret = get_secret(stack_name)\n", "\n", "db_username = secret['username']\n", "db_password = secret['password']\n", "db_port = secret['port']\n", "db_host = secret['host']\n", "\n", "# SSL connection string\n", "uri_str = f\"mongodb://{db_username}:{db_password}@{db_host}:{db_port}/?ssl=true&ssl_ca_certs=rds-combined-ca-bundle.pem&replicaSet=rs0&readPreference=secondaryPreferred&retryWrites=false\"\n", "client = MongoClient(uri_str)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['documentdbinstanceone-ba0lmvhl0dml.chuzv8sgxhbr.us-west-2.docdb.amazonaws.com:27017',\n", " 'documentdbinstancetwo-iulkk0vmfiln.chuzv8sgxhbr.us-west-2.docdb.amazonaws.com:27017',\n", " 'documentdbinstancethree-epyakg0eahpb.chuzv8sgxhbr.us-west-2.docdb.amazonaws.com:27017']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Show cluster details for the Amazon DocumentDB cluster, which verifies the connection\n", "client[\"admin\"].command(\"ismaster\")[\"hosts\"]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "db_name = \"db\" # name the database\n", "coll_name = \"coll\" # name the collection\n", "\n", "db = client[db_name] # create a database object\n", "coll = db[coll_name] # create a collection object" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Optional: drop existing data in the collection if the collection exists\n", "coll.drop()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preview images " ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

0

\n", "

pics/coleen-rivas-OZ2rS2zCjNo-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

1

\n", "

pics/erik-eastman-4HG5hlhmZg8-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

2

\n", "

pics/fikri-rasyid-amI09sbNZdE-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

3

\n", "

pics/gusandy-maulana-Rs3Z-j8QTEM-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

4

\n", "

pics/kourosh-qaffari-RrhhzitYizg-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

5

\n", "

pics/pickawood-8SfXsep8EIA-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

6

\n", "

pics/ranurte-Hnmb9wQucG4-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

7

\n", "

pics/riley-sullivan-kTb76cLODyE-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

8

\n", "

pics/robert-f-9t5sV4KarVA-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

9

\n", "

pics/roberto-carlos-roman-K77xDt7E1fE-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

10

\n", "

pics/simon-berger-39SHYToxfiQ-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

11

\n", "

pics/svetlana-kuznetsova-VgItkeIq6Ek-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

12

\n", "

pics/tom-ungerer-10Kd3Pm4BDg-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

13

\n", "

pics/volkan-vardar-tYBlm33PMxU-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

14

\n", "

pics/yeshi-kangrang-wTD1-_u8x1g-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Get local paths of images\n", "pic_local_paths = glob.glob(f\"{local_prefix}/*.jpg\")\n", "pic_local_paths = sorted(pic_local_paths)\n", "\n", "# Preview images\n", "ipyplot.plot_images(\n", " images=pic_local_paths, \n", " max_images=15, \n", " img_width=180,\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Upload images to S3 " ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# Upload images to S3\n", "for pic_local_path in pic_local_paths:\n", " pic_filename = os.path.basename(pic_local_path)\n", " boto3.Session().resource('s3').Bucket(s3_bucket).Object(os.path.join(s3_prefix, pic_filename)).upload_file(pic_local_path)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Get S3 keys for images\n", "fs = s3fs.S3FileSystem()\n", "pic_keylist = fs.ls(f's3://{s3_bucket}/{s3_prefix}/')[1:] ### BMH: I think we should remove the [1:]... I'm losing the first pic.\n", "pic_keylist = [key.split(f'{s3_bucket}/')[1] for key in pic_keylist]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['rekognition/pics/coleen-rivas-OZ2rS2zCjNo-unsplash.jpg',\n", " 'rekognition/pics/erik-eastman-4HG5hlhmZg8-unsplash.jpg',\n", " 'rekognition/pics/fikri-rasyid-amI09sbNZdE-unsplash.jpg',\n", " 'rekognition/pics/gusandy-maulana-Rs3Z-j8QTEM-unsplash.jpg',\n", " 'rekognition/pics/kourosh-qaffari-RrhhzitYizg-unsplash.jpg',\n", " 'rekognition/pics/pickawood-8SfXsep8EIA-unsplash.jpg',\n", " 'rekognition/pics/ranurte-Hnmb9wQucG4-unsplash.jpg',\n", " 'rekognition/pics/riley-sullivan-kTb76cLODyE-unsplash.jpg',\n", " 'rekognition/pics/robert-f-9t5sV4KarVA-unsplash.jpg',\n", " 'rekognition/pics/roberto-carlos-roman-K77xDt7E1fE-unsplash.jpg',\n", " 'rekognition/pics/simon-berger-39SHYToxfiQ-unsplash.jpg',\n", " 'rekognition/pics/svetlana-kuznetsova-VgItkeIq6Ek-unsplash.jpg',\n", " 'rekognition/pics/tom-ungerer-10Kd3Pm4BDg-unsplash.jpg',\n", " 'rekognition/pics/volkan-vardar-tYBlm33PMxU-unsplash.jpg',\n", " 'rekognition/pics/yeshi-kangrang-wTD1-_u8x1g-unsplash.jpg']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pic_keylist" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyze images with Rekognition and ingest data into DocumentDB " ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'Labels': [{'Name': 'Outdoors',\n", " 'Confidence': 98.58585357666016,\n", " 'Instances': [],\n", " 'Parents': []},\n", " {'Name': 'Garden',\n", " 'Confidence': 96.23029327392578,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Outdoors'}]},\n", " {'Name': 'Arbour',\n", " 'Confidence': 93.65332794189453,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Garden'}, {'Name': 'Outdoors'}]},\n", " {'Name': 'Person',\n", " 'Confidence': 93.00440979003906,\n", " 'Instances': [{'BoundingBox': {'Width': 0.016103893518447876,\n", " 'Height': 0.03213529288768768,\n", " 'Left': 0.6525371670722961,\n", " 'Top': 0.9264869689941406},\n", " 'Confidence': 93.00440979003906},\n", " {'BoundingBox': {'Width': 0.010800352320075035,\n", " 'Height': 0.020640190690755844,\n", " 'Left': 0.781416118144989,\n", " 'Top': 0.8592491149902344},\n", " 'Confidence': 78.98234558105469},\n", " {'BoundingBox': {'Width': 0.017044249922037125,\n", " 'Height': 0.02785704843699932,\n", " 'Left': 0.7455113530158997,\n", " 'Top': 0.8547402620315552},\n", " 'Confidence': 66.65809631347656}],\n", " 'Parents': []},\n", " {'Name': 'Human',\n", " 'Confidence': 93.00440979003906,\n", " 'Instances': [],\n", " 'Parents': []},\n", " {'Name': 'Amusement Park',\n", " 'Confidence': 82.81632995605469,\n", " 'Instances': [],\n", " 'Parents': []},\n", " {'Name': 'Theme Park',\n", " 'Confidence': 76.72222900390625,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Amusement Park'}]},\n", " {'Name': 'Plant',\n", " 'Confidence': 73.67972564697266,\n", " 'Instances': [],\n", " 'Parents': []},\n", " {'Name': 'Potted Plant',\n", " 'Confidence': 68.09540557861328,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Plant'},\n", " {'Name': 'Vase'},\n", " {'Name': 'Jar'},\n", " {'Name': 'Pottery'}]},\n", " {'Name': 'Pottery',\n", " 'Confidence': 68.09540557861328,\n", " 'Instances': [],\n", " 'Parents': []},\n", " {'Name': 'Jar',\n", " 'Confidence': 68.09540557861328,\n", " 'Instances': [],\n", " 'Parents': []},\n", " {'Name': 'Vase',\n", " 'Confidence': 68.09540557861328,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Jar'}, {'Name': 'Pottery'}]},\n", " {'Name': 'Ferris Wheel',\n", " 'Confidence': 64.03275299072266,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Amusement Park'}]},\n", " {'Name': 'Nature',\n", " 'Confidence': 62.964080810546875,\n", " 'Instances': [],\n", " 'Parents': []},\n", " {'Name': 'Planter',\n", " 'Confidence': 58.99358367919922,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Potted Plant'},\n", " {'Name': 'Plant'},\n", " {'Name': 'Vase'},\n", " {'Name': 'Jar'},\n", " {'Name': 'Pottery'}]},\n", " {'Name': 'Herbs',\n", " 'Confidence': 57.66265869140625,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Planter'},\n", " {'Name': 'Potted Plant'},\n", " {'Name': 'Plant'},\n", " {'Name': 'Vase'},\n", " {'Name': 'Jar'},\n", " {'Name': 'Pottery'}]},\n", " {'Name': 'Park',\n", " 'Confidence': 51.91413879394531,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Lawn'},\n", " {'Name': 'Outdoors'},\n", " {'Name': 'Grass'},\n", " {'Name': 'Plant'}]},\n", " {'Name': 'Grass',\n", " 'Confidence': 51.91413879394531,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Plant'}]},\n", " {'Name': 'Lawn',\n", " 'Confidence': 51.91413879394531,\n", " 'Instances': [],\n", " 'Parents': [{'Name': 'Grass'}, {'Name': 'Plant'}]}],\n", " 'LabelModelVersion': '2.0',\n", " 'ResponseMetadata': {'RequestId': 'e9f032ad-6931-4c5c-9911-eee9ab0e8970',\n", " 'HTTPStatusCode': 200,\n", " 'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1',\n", " 'date': 'Thu, 11 Mar 2021 00:35:27 GMT',\n", " 'x-amzn-requestid': 'e9f032ad-6931-4c5c-9911-eee9ab0e8970',\n", " 'content-length': '2512',\n", " 'connection': 'keep-alive'},\n", " 'RetryAttempts': 0}}" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Example Rekognition ouput\n", "rekognition.detect_labels(\n", " Image={\n", " 'S3Object':{\n", " 'Bucket': s3_bucket,\n", " 'Name': pic_keylist[0]\n", " }}, \n", " MinConfidence=50, \n", " MaxLabels=100,\n", ")" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# Analyze and ingest images data\n", "for pic_key in pic_keylist:\n", " \n", " # Analyze an image with Rekognition\n", " pic_result = rekognition.detect_labels(\n", " Image={\n", " 'S3Object':{\n", " 'Bucket': s3_bucket,\n", " 'Name': pic_key\n", " }}, \n", " MinConfidence=50, \n", " MaxLabels=100)\n", "\n", " # Extract S3 key and image labels\n", " pic_label = pic_result['Labels']\n", " doc = {\n", " \"img\": pic_key.split('/')[-1], \n", " \"Labels\": pic_result['Labels']\n", " }\n", " \n", " # Ingest results into DocumentDB\n", " coll.insert_one(doc)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Explore images using DocumentDB queries " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "def preview_imgs(result):\n", " \"\"\"\n", " Method to preview images resulting from queries\n", " \"\"\" \n", " # Process result as pandas dataframe\n", " result = pd.DataFrame(result)\n", " \n", " # Get corresponding local image paths\n", " result_list = result['img'].tolist()\n", " result_list = [f'{local_prefix}/{result}' for result in result_list]\n", " \n", " # Display images\n", " ipyplot.plot_images(\n", " images=result_list, \n", " max_images=10, \n", " img_width=180,\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Frequency counts" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "15" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Count images\n", "coll.count_documents({})" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# Histogram count of labels with confidence >=90.0\n", "result = coll.aggregate([\n", " {\"$unwind\": \"$Labels\"}, \n", " {\"$match\": {\"Labels.Confidence\": {\"$gte\": 90.0}}}, \n", " {\"$group\": {\"_id\": \"$Labels.Name\", \"count\": {\"$sum\": 1}}},\n", " {\"$sort\": {\"count\": -1} } \n", "])" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "alignmentgroup": "True", "hovertemplate": "_id=%{x}
count=%{y}", "legendgroup": "", "marker": { "color": "#636efa" }, "name": "", "offsetgroup": "", "orientation": "v", "showlegend": false, "textposition": "auto", "type": "bar", "x": [ "Person", "Human", "Plant", "Animal", "Food", "Tree", "Bird", "Transportation", "Building", "Market", "Furniture", "Book", "Vehicle", "Outdoors", "People", "Library", "Team Sport", "Lighting", "Flamingo", "Arbour", "Vacation", "Room", "City", "Arena", "Automobile", "Fruit", "Canine", "Monument", "Mammal", "Metropolis", "Chicken", "Aerial View", "Shelf", "Car", "Bookcase", "Team", "Road", "Urban", "Baseball Field", "Supermarket", "Cafe", "Highway", "Meal", "Landscape", "Freeway", "Boat", "Husky", "Dog", "Train", "Nature", "Cable Car", "Chair", "Fowl", "Sports", "Sport", "Garden", "Softball", "Pet", "Poultry", "Shop", "Grocery Store", "Scenery", "Town", "Indoors", "Baseball", "Restaurant" ], "xaxis": "x", "y": [ 6, 6, 4, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ], "yaxis": "y" } ], "layout": { "barmode": "relative", "legend": { "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "_id" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "count" } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Plot histogram\n", "fig = px.bar(result, x='_id', y='count')\n", "fig.write_html(\"label_histogram.html\") # save the histogram as a html file\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Select images with minumum confidence threshold (without index)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

0

\n", "

pics/kourosh-qaffari-RrhhzitYizg-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

1

\n", "

pics/pickawood-8SfXsep8EIA-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Query images with a 'Book' label of 90% or more confidence\n", "result = coll.find(\n", " {\"Labels\": {\"$elemMatch\": {\"Name\": \"Book\", \"Confidence\": {\"$gte\": 90.0}}}}, \n", " {\"_id\": 0, \"img\": 1}\n", ")\n", "\n", "preview_imgs(result)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

0

\n", "

pics/kourosh-qaffari-RrhhzitYizg-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Query images with a 'Book' label with 90% or more confidence, and a 'Person' label with 90% or more confidence\n", "result = coll.find(\n", " {\"$and\": [\n", " {\"Labels\": {\"$elemMatch\": {\"Name\": \"Book\", \"Confidence\": {\"$gte\": 90.0}}}}, \n", " {\"Labels\": {\"$elemMatch\": {\"Name\": \"Person\", \"Confidence\": {\"$gte\": 90.0}}}}] \n", " }, \n", " {\"_id\": 0, \"img\": 1})\n", "\n", "preview_imgs(result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Select images with minumum confidence threshold (with index)\n", "You can also create an index to help these last 2 queries. To create the index, run the following:" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'idx_labels'" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create an index to help identify labels in pictures\n", "coll.create_index([\n", " (\"Labels.Name\", ASCENDING), \n", " (\"Labels.Confidence\", ASCENDING)], \n", " name=\"idx_labels\")" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

0

\n", "

pics/kourosh-qaffari-RrhhzitYizg-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

1

\n", "

pics/pickawood-8SfXsep8EIA-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Query for 'Book' label with 90% or more confidence\n", "query_book = coll.find({\"$and\": [\n", " {\"Labels.Name\": \"Book\"},\n", " {\"Labels.Confidence\": {\"$gte\": 90.0}},\n", " {\"Labels\": {\"$elemMatch\": {\"Name\": \"Book\", \"Confidence\": {\"$gte\": 90.0}}}}\n", " ]}, \n", " {\"_id\": 0, \"img\": 1}\n", ")\n", "\n", "preview_imgs(query_book)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'queryPlanner': {'plannerVersion': 1,\n", " 'namespace': 'db.coll',\n", " 'winningPlan': {'stage': 'FETCH',\n", " 'inputStage': {'stage': 'IXSCAN', 'indexName': 'idx_labels'}}},\n", " 'serverInfo': {'host': 'documentdbinstancetwo-iulkk0vmfiln',\n", " 'port': 27017,\n", " 'version': '3.6.0'},\n", " 'ok': 1.0}" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# We can see that the planner has chosen an Index Scan (IXSCAN) for this query now\n", "query_book.explain()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

0

\n", "

pics/kourosh-qaffari-RrhhzitYizg-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Query for 'Book' and 'Person' labels, both with 90% or more confidence\n", "query_book_person = coll.find(\n", " {\"$and\": [\n", " {\"Labels.Name\": \"Book\"},\n", " {\"Labels.Confidence\": {\"$gte\": 90.0}},\n", " {\"Labels.Name\": \"Person\"},\n", " {\"Labels.Confidence\": {\"$gte\": 90.0}}, ## unnecessary, but adding for clarity\n", " {\"Labels\": {\"$elemMatch\": {\"Name\": \"Book\", \"Confidence\": {\"$gte\": 90.0}}}}, \n", " {\"Labels\": {\"$elemMatch\": {\"Name\": \"Person\", \"Confidence\": {\"$gte\": 90.0}}}}] \n", " }, \n", " {\"_id\": 0, \"img\": 1}\n", ")\n", "\n", "preview_imgs(query_book_person)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'queryPlanner': {'plannerVersion': 1,\n", " 'namespace': 'db.coll',\n", " 'winningPlan': {'stage': 'FETCH',\n", " 'inputStage': {'stage': 'IXSCAN', 'indexName': 'idx_labels'}}},\n", " 'serverInfo': {'host': 'documentdbinstancetwo-iulkk0vmfiln',\n", " 'port': 27017,\n", " 'version': '3.6.0'},\n", " 'ok': 1.0}" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Again, we can see that the planner has chosen an Index Scan (IXSCAN) for this query now\n", "query_book_person.explain()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Select images with specified number instances of a label (array queries)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

0

\n", "

pics/erik-eastman-4HG5hlhmZg8-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Find all images with at least 4 instances of a person, with 90% or more confidence\n", "# The query checks if the third instance, \"Instances.3\", exists, with instance count starting from zero\n", "result = coll.find(\n", " {\"Labels\": {\"$elemMatch\": {\"Name\": \"Person\", \n", " \"Confidence\": {\"$gte\": 90.0}, \n", " \"Instances.3\": {\"$exists\": True}}}}, \n", " {\"_id\": 0, \"img\": 1}\n", ")\n", "\n", "preview_imgs(result)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", "
\n", " \n", " \n", " \n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", " \n", "
\n", "
\n", "
\n", "

0

\n", "

pics/coleen-rivas-OZ2rS2zCjNo-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", "
\n", "
\n", "

1

\n", "

pics/ranurte-Hnmb9wQucG4-unsplash.jpg

\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Find all images with at least 2 but fewer than 4 instances of a person, with 90% or more confidence\n", "result = coll.find(\n", " {\"Labels\": {\"$elemMatch\": {\"Name\": \"Person\", \n", " \"Confidence\": {\"$gte\": 90.0}, \n", " \"Instances.1\": {\"$exists\": True}, \n", " \"Instances.3\": {\"$exists\": False}}}}, \n", " {\"_id\": 0, \"img\": 1}\n", ")\n", "\n", "preview_imgs(result)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.10" } }, "nbformat": 4, "nbformat_minor": 4 }