{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.\n", "SPDX-License-Identifier: MIT-0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* [Import libraries](#libraries)\n", "* [Connect to DocumentDB](#dbconn)\n", "* [Preview images](#preview)\n", "* [Upload images](#upload)\n", "* [Analyze and ingest images](#ingest)\n", "* [Explore images using DocumentDB queries](#query)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Import libraries " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import glob\n", "import ipyplot\n", "import json\n", "import os\n", "import pandas as pd\n", "import plotly.express as px\n", "from pymongo import MongoClient, ASCENDING\n", "import s3fs\n", "\n", "pd.set_option('display.max_colwidth', 80)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Set parameters\n", "stack_name = \"docdb-rekognition\" # name of CloudFormation stack\n", "\n", "s3_bucket = 'docdb-blog' # name of your S3 bucket\n", "s3_prefix = 'rekognition/pics/' # S3 path where you want the images uploaded" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "rekognition = boto3.client('rekognition')\n", "s3 = boto3.client(\"s3\")\n", "\n", "local_prefix = 'pics' # path to where the images were downloaded onto your SageMaker instance" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Connect to DocumentDB " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Get DocumentDB credentials stored in Secrets Manager\n", "def get_secret(stack_name):\n", "\n", " # Create a Secrets Manager client\n", " session = boto3.session.Session()\n", " client = session.client(\n", " service_name='secretsmanager',\n", " region_name=session.region_name\n", " )\n", " \n", " secret_name = f'{stack_name}-DocDBSecret'\n", " get_secret_value_response = client.get_secret_value(SecretId=secret_name)\n", " secret = get_secret_value_response['SecretString']\n", " \n", " return json.loads(secret)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": false }, "outputs": [], "source": [ "# Set up a connection to the Amazon DocumentDB database\n", "secret = get_secret(stack_name)\n", "\n", "db_username = secret['username']\n", "db_password = secret['password']\n", "db_port = secret['port']\n", "db_host = secret['host']\n", "\n", "# SSL connection string\n", "uri_str = f\"mongodb://{db_username}:{db_password}@{db_host}:{db_port}/?ssl=true&ssl_ca_certs=rds-combined-ca-bundle.pem&replicaSet=rs0&readPreference=secondaryPreferred&retryWrites=false\"\n", "client = MongoClient(uri_str)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['documentdbinstanceone-ba0lmvhl0dml.chuzv8sgxhbr.us-west-2.docdb.amazonaws.com:27017',\n", " 'documentdbinstancetwo-iulkk0vmfiln.chuzv8sgxhbr.us-west-2.docdb.amazonaws.com:27017',\n", " 'documentdbinstancethree-epyakg0eahpb.chuzv8sgxhbr.us-west-2.docdb.amazonaws.com:27017']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Show cluster details for the Amazon DocumentDB cluster, which verifies the connection\n", "client[\"admin\"].command(\"ismaster\")[\"hosts\"]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "db_name = \"db\" # name the database\n", "coll_name = \"coll\" # name the collection\n", "\n", "db = client[db_name] # create a database object\n", "coll = db[coll_name] # create a collection object" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Optional: drop existing data in the collection if the collection exists\n", "coll.drop()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Preview images " ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", "