{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Text detection using Amazon Rekognition"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "***\n",
    "This notebook provides a walkthrough of the [text detection API](https://docs.aws.amazon.com/rekognition/latest/dg/text-detection.html) in Amazon Rekognition. You can quickly identify text in your video and image libraries to catalog footage and photos for marketing, advertising, and media industry use cases.\n",
    "***"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Initialize stuff"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Update boto3 to current version\n",
    "!conda upgrade -y boto3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Check to ensure that current version of boto3 is installed\n",
    "import boto3\n",
    "print(boto3.__version__)\n",
    "\n",
    "import botocore\n",
    "print(botocore.__version__)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialise Notebook\n",
    "import boto3\n",
    "from IPython.display import HTML, display, Image as IImage\n",
    "from PIL import Image, ImageDraw, ImageFont\n",
    "import time\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Curent AWS Region. Use this to choose corresponding S3 bucket with sample content\n",
    "\n",
    "mySession = boto3.session.Session()\n",
    "awsRegion = mySession.region_name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Init clients\n",
    "rekognition = boto3.client('rekognition')\n",
    "s3 = boto3.client('s3')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# S3 bucket that contains sample images and videos\n",
    "\n",
    "# We are providing sample images and videos in this bucket so\n",
    "# you do not have to manually download/upload test images and videos.\n",
    "bucketName = \"aws-workshops-\" + awsRegion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create temporary directory\n",
    "# This directory is not needed to call Rekognition APIs.\n",
    "# We will only use this directory to download images from S3 bucket and draw bounding boxes\n",
    "\n",
    "!mkdir m1tmp\n",
    "tempFolder = 'm1tmp/'"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Detect text in image\n",
    "***"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "imageName = \"content-moderation/media/coffee.jpg\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "display(IImage(url=s3.generate_presigned_url('get_object', Params={'Bucket': bucketName, 'Key': imageName})))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Call Rekognition to detect text in the image"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Call Amazon Rekognition to detect text in the image\n",
    "# https://docs.aws.amazon.com/rekognition/latest/dg/API_DetectText.html\n",
    "\n",
    "detectTextResponse = rekognition.detect_text(\n",
    "    Image={\n",
    "        'S3Object': {\n",
    "            'Bucket': bucketName,\n",
    "            'Name': imageName,\n",
    "        }\n",
    "      },\n",
    "      Filters={\n",
    "        'WordFilter': {\n",
    "            'MinConfidence': 90\n",
    "        }\n",
    "      }\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Review the raw JSON reponse from Rekognition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show JSON response returned by Rekognition Text API (Text Detection)\n",
    "# In the JSON response below, you will see detected text, confidence score, and additional information.\n",
    "\n",
    "display(detectTextResponse)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Display list of detected unsafe text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import string\n",
    "unsafeWords = [\"crap\", \"darn\", \"damm\"]\n",
    "for textDetection in detectTextResponse[\"TextDetections\"]:\n",
    "    # strip punctuation before checking match\n",
    "    text = textDetection[\"DetectedText\"].translate(str.maketrans('', '', string.punctuation))\n",
    "    if(textDetection[\"Type\"] == \"WORD\" and text in unsafeWords):\n",
    "        print(\"Detected unsafe word: {}\".format(textDetection[\"DetectedText\"]))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Show image with bounding boxes around detected objects"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define a function that will display image with bounded boxes around recognized text\n",
    "# We will call this function in next step\n",
    "  \n",
    "def drawBoundingBoxes (sourceImage, boxes):\n",
    "    # blue, green, red, grey\n",
    "    colors = ((255,255,255),(255,255,255),(76,182,252),(52,194,123))\n",
    "    \n",
    "    # Download image locally\n",
    "    imageLocation = tempFolder+os.path.basename(sourceImage)\n",
    "    s3.download_file(bucketName, sourceImage, imageLocation)\n",
    "\n",
    "    # Draws BB on Image\n",
    "    bbImage = Image.open(imageLocation)\n",
    "    draw = ImageDraw.Draw(bbImage)\n",
    "    width, height = bbImage.size\n",
    "    col = 0\n",
    "    maxcol = len(colors)\n",
    "    line= 3\n",
    "    for box in boxes:\n",
    "        x1 = int(box[1]['Left'] * width)\n",
    "        y1 = int(box[1]['Top'] * height)\n",
    "        x2 = int(box[1]['Left'] * width + box[1]['Width'] * width)\n",
    "        y2 = int(box[1]['Top'] * height + box[1]['Height']  * height)\n",
    "        \n",
    "        draw.text((x1,y1),box[0],colors[col])\n",
    "        for l in range(line):\n",
    "            draw.rectangle((x1-l,y1-l,x2+l,y2+l),outline=colors[col])\n",
    "        col = (col+1)%maxcol\n",
    "    \n",
    "    imageFormat = \"PNG\"\n",
    "    ext = sourceImage.lower()\n",
    "    if(ext.endswith('jpg') or ext.endswith('jpeg')):\n",
    "        imageFormat = 'JPEG'\n",
    "\n",
    "    bbImage.save(imageLocation,format=imageFormat)\n",
    "\n",
    "    display(bbImage)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Extract bounding box information from JSON response above and display image with bounding boxes around text.\n",
    "\n",
    "boxes = []\n",
    "textDetections = detectTextResponse['TextDetections']\n",
    "for textDetection in textDetections:\n",
    "        boxes.append ((textDetection['Type'], textDetection[\"Geometry\"]['BoundingBox']))\n",
    "    \n",
    "drawBoundingBoxes(imageName, boxes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Detect text in image using Filters and Regions of Interest\n",
    "***"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "imageName = \"content-moderation/media/coffee.jpg\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "display(IImage(url=s3.generate_presigned_url('get_object', Params={'Bucket': bucketName, 'Key': imageName})))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Call Amazon Rekognition to detect text in the image\n",
    "# https://docs.aws.amazon.com/rekognition/latest/dg/API_DetectText.html\n",
    "\n",
    "detectTextResponse = rekognition.detect_text(\n",
    "    Image={\n",
    "        'S3Object': {\n",
    "            'Bucket': bucketName,\n",
    "            'Name': imageName,\n",
    "        }\n",
    "    },\n",
    "    Filters={\n",
    "        'WordFilter': {\n",
    "            'MinConfidence': 90,\n",
    "            'MinBoundingBoxHeight': 0.05,\n",
    "            'MinBoundingBoxWidth': 0.02\n",
    "        },\n",
    "        'RegionsOfInterest': [\n",
    "            {\n",
    "                'BoundingBox': {\n",
    "                    'Width': 0.1,\n",
    "                    'Height': 0.05,\n",
    "                    'Left': 0.01,\n",
    "                    'Top': 0.01\n",
    "                }\n",
    "            },\n",
    "        ]\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show JSON response returned by Rekognition Text API (Text Detection)\n",
    "# In the JSON response below, you will see detected text, confidence score, and additional information.\n",
    "\n",
    "display(detectTextResponse)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for textDetection in detectTextResponse[\"TextDetections\"]:\n",
    "    text = textDetection[\"DetectedText\"]\n",
    "    if(textDetection[\"Type\"] == \"WORD\"):\n",
    "        print(\"Word: {}\".format(textDetection[\"DetectedText\"]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Detect text in video\n",
    " Text detection in video is an async operation. \n",
    "https://docs.aws.amazon.com/rekognition/latest/dg/text-detecting-video-procedure.html.\n",
    "\n",
    "- First we start a text detection job which returns a Job Id.\n",
    "- We can then call `get_text_detection` to get the job status and after job is complete, we can get object metadata.\n",
    "- In production use cases, you would usually use StepFunction or SNS topic to get notified when job is complete.\n",
    "***"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "videoName = \"content-moderation/media/serverless-bytes.mov\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Call Rekognition to start a job for text detection"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Start video text job\n",
    "startTextDetection = rekognition.start_text_detection(\n",
    "    Video={\n",
    "        'S3Object': {\n",
    "            'Bucket': bucketName,\n",
    "            'Name': videoName,\n",
    "        }\n",
    "    },\n",
    ")\n",
    "\n",
    "textJobId = startTextDetection['JobId']\n",
    "display(\"Job Id: {0}\".format(textJobId))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Wait for text detection job to complete"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Wait for text detection job to complete\n",
    "# In production use cases, you would usually use StepFunction or SNS topic to get notified when job is complete.\n",
    "getTextDetection = rekognition.get_text_detection(\n",
    "    JobId=textJobId\n",
    ")\n",
    "\n",
    "while(getTextDetection['JobStatus'] == 'IN_PROGRESS'):\n",
    "    time.sleep(5)\n",
    "    print('.', end='')\n",
    " \n",
    "    getTextDetection = rekognition.get_text_detection(\n",
    "    JobId=textJobId\n",
    "    )\n",
    "    \n",
    "display(getTextDetection['JobStatus'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Review raw JSON reponse from Rekognition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show JSON response returned by Rekognition Text Detection API\n",
    "# In the JSON response below, you will see list of detected text.\n",
    "# For each detected object, you will see information like Timestamp\n",
    "\n",
    "display(getTextDetection)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Display recognized text in the video"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "flaggedTextInVideo = [\"AWS\", \"Twitter\"]\n",
    "\n",
    "theLines = {}\n",
    "\n",
    "# Display timestamps and objects detected at that time\n",
    "strDetail = \"Text detected in video<br>=======================================<br>\"\n",
    "strOverall = \"Text in the overall video:<br>=======================================<br>\"\n",
    "\n",
    "# Objects detected in each frame\n",
    "for obj in getTextDetection['TextDetections']:\n",
    "    if(obj['TextDetection']['Type'] == 'WORD'):\n",
    "        ts = obj [\"Timestamp\"]\n",
    "        cconfidence = obj['TextDetection'][\"Confidence\"]\n",
    "        oname = obj['TextDetection'][\"DetectedText\"]\n",
    "\n",
    "        if(oname in flaggedTextInVideo):\n",
    "            print(\"Found flagged text at {} ms: {} (Confidence: {})\".format(ts, oname, round(cconfidence,2)))\n",
    "\n",
    "        strDetail = strDetail + \"At {} ms: {} (Confidence: {})<br>\".format(ts, oname, round(cconfidence,2))\n",
    "        if oname in theLines:\n",
    "            cojb = theLines[oname]\n",
    "            theLines[oname] = {\"Text\" : oname, \"Count\": 1+cojb[\"Count\"]}\n",
    "        else:\n",
    "            theLines[oname] = {\"Text\" : oname, \"Count\": 1}\n",
    "\n",
    "# Unique objects detected in video\n",
    "for theLine in theLines:\n",
    "    strOverall = strOverall + \"Name: {}, Count: {}<br>\".format(theLine, theLines[theLine][\"Count\"])\n",
    "\n",
    "# Display results\n",
    "display(HTML(strOverall))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Show video in the player"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show video in a player\n",
    "\n",
    "s3VideoUrl = s3.generate_presigned_url('get_object', Params={'Bucket': bucketName, 'Key': videoName})\n",
    "\n",
    "videoTag = \"<video controls='controls' autoplay width='640' height='360' name='Video' src='{0}'></video>\".format(s3VideoUrl)\n",
    "\n",
    "videoui = \"<table><tr><td style='vertical-align: top'>{}</td></tr></table>\".format(videoTag)\n",
    "\n",
    "display(HTML(videoui))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "listui = \"<table><tr><td style='vertical-align: top'>{}</td></tr></table>\".format(strDetail)\n",
    "display(HTML(listui))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "***\n",
    "### References\n",
    "- https://docs.aws.amazon.com/rekognition/latest/dg/API_DetectText.html\n",
    "- https://docs.aws.amazon.com/rekognition/latest/dg/API_StartTextDetection.html\n",
    "- https://docs.aws.amazon.com/rekognition/latest/dg/API_GetTextDetection.html\n",
    "\n",
    "***"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You have successfully used Amazon Rekognition to identify text in images an videos."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}