In [47]:
%matplotlib inline
import os
from collections import namedtuple
from collections import defaultdict
from collections import Counter
from datetime import datetime
import itertools
import base64
import glob
import json
import random
import time
import imageio
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import shutil
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.metrics import confusion_matrix
import boto3
import botocore
import sagemaker
from urllib.parse import urlparse

In [48]:
role = sagemaker.get_execution_role()
region = boto3.session.Session().region_name
s3 = boto3.client('s3')

## Job parameters

In [76]:
BUCKET="tanmcrae-greengrass-blog"

In [77]:
bucket_region = s3.head_bucket(Bucket=BUCKET)['ResponseMetadata']['HTTPHeaders']['x-amz-bucket-region']
assert bucket_region == region, "Your S3 bucket {} and this notebook need to be in the same region.".format(BUCKET)

In [111]:
MANIFEST = "blue_box_large_job.json"
JOB_NAME = "blue-box-large-job-public"
EXP_NAME = 'blue-box'
print(JOB_NAME)


blue-box-large-job-public


In [112]:
USE_AUTO_LABELING = False
RUN_FULL_AL_DEMO = False 
USE_PRIVATE_WORKFORCE = False

## specifying categories

In [113]:
CLASS_NAME = "storage box"

In [114]:
CLASS_LIST = [CLASS_NAME]
print("Label space is {}".format(CLASS_LIST))

json_body = {
    'labels': [{'label': label} for label in CLASS_LIST]
}
with open('class_labels.json', 'w') as f:
    json.dump(json_body, f)

LABEL_KEY = "ground-truth/{}/class_labels.json".format(EXP_NAME)
s3.upload_file('class_labels.json', BUCKET, LABEL_KEY)
print ("uploaded s3://{}/{}".format(BUCKET, LABEL_KEY))

Label space is ['storage box']
uploaded s3://tanmcrae-greengrass-blog/ground-truth/blue-box/class_labels.json


## Create the instruction template


In [115]:
def make_template(test_template=False, save_fname='instructions.template'):
    template = r"""<script src="https://assets.crowd.aws/crowd-html-elements.js"></script>

<crowd-form>
  <crowd-bounding-box
    name="boundingBox"
    src="{{ task.input.taskObject | grant_read_access }}"
    header="Draw bounding box for the storage boxes in the picture (blue). Each bounding box should fit tight around the box. Only draw one bounding box per storage box, even if part of the box may be occluded."
    labels="['storage box']"
  >
    <full-instructions header="Please annotate storage boxes in the picture">
      <ol>
        <li><strong>Inspect</strong> the image</li>
        <li><strong>Determine</strong> if there are visible blue storage box in the picture.</li>
        <li><strong>Outline</strong> the storage box in the image using the provided “Box” tool. </li>
      </ol>

      <h2><span style="color: rgb(0, 138, 0);">Good Example</span></h2>
      <p><img src=" https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/good-exmaples/good-example-arm.png " style="max-width:450"></p>
      <p><img src=" https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/good-exmaples/good-example-occlusion.png " style="max-width:450"></p>
      <p><img src=" https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/good-exmaples/good-example-partial.png " style="max-width:450"></p>
      <p><img src=" https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/good-exmaples/good-example-standard.png " style="max-width:450"></p>
      <h2><span style="color: rgb(230, 0, 0);">Bad Example</span></h2>

      <p>The bounding boxes below are bad as it didn't cover the entire box. </p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-not-full.png" style="max-width:450"></p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-not-full-2.png" style="max-width:450"></p>
      <p>The bounding boxes below are bad as it's not tight around storage box. </p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-not-tight.png" style="max-width:450"></p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-not-tight-2.png" style="max-width:450"></p>
      <p>The labeling below are bad as it didn't cover the full </p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-occlusion-partial.png" style="max-width:450"></p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-occlusion-partial-2.png" style="max-width:450"></p>

    </full-instructions>

    <short-instructions>
      <p>Label every blue storage box in the picture. Boxes should fit tight. If the target goes off the screen, label up to the edge of the image. Do not label if it completely cannot be seen. </p>
      <p><img src=" https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/good-exmaples/good-example-arm.png " style="max-width:100%"/></p>
      <p><img src=" https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/good-exmaples/good-example-occlusion.png " style="max-width:100%"/></p>
      <p><img src=" https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/good-exmaples/good-example-partial.png " style="max-width:100%"/></p>
      <p><img src=" https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/good-exmaples/good-example-standard.png " style="max-width:100%"/></p>
      <p><br/></p>
      <h2><span style="color: rgb(230, 0, 0);">Bad examples</span></h2>
      <p>The bounding boxes below are bad as it didn't cover the entire box. </p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-not-full.png" style="max-width:100%"></p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-not-full-2.png" style="max-width:100%"></p>
      <p>The bounding boxes below are bad as it's not tight around storage box. </p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-not-tight.png" style="max-width:100%"></p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-not-tight-2.png" style="max-width:100%"></p>
      <p>The labeling below are bad as it only labeled part of the storage box </p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-occlusion-partial.png" style="max-width:100%"></p>
      <p><img src="https://s3.amazonaws.com/angelaw-workshop/groundtruth/greengrass-blog/bad-examples/bad-example-occlusion-partial-2.png" style="max-width:100%"></p>

    </short-instructions>
  </crowd-bounding-box>
</crowd-form>
    """
    with open(save_fname, 'w') as f:
        f.write(template)
        
template_name = 'instructions.template'
# make_template(test_template=True, save_fname='instructions.html')
make_template(test_template=False, save_fname=template_name)
s3.upload_file(template_name, BUCKET, EXP_NAME + '/' + template_name)

print("uploaded template to s3://{}/ground-truth/{}/{}".format(BUCKET, EXP_NAME, template_name))

uploaded template to s3://tanmcrae-greengrass-blog/ground-truth/blue-box/instructions.template


In [116]:
private_workteam_arn = "arn:aws:sagemaker:us-east-1:854681337758:workteam/private-crowd/greengrass-blog"


## Create job

In [117]:
task_description = 'Dear Annotator, please draw a box around the yellow or blue storage box in the picture. Thank you!'
task_keywords = ['image', 'object', 'detection', CLASS_NAME]
task_title = 'Draw a box around storage box in the picture'

print("task_title: {}".format(task_title))
print("JOB_NAME: {}".format(JOB_NAME))
task_keywords

task_title: Draw a box around storage box in the picture
JOB_NAME: blue-box-large-job-public


['image', 'object', 'detection', 'storage box']

In [118]:
# Specify ARNs for resources needed to run an object detection job.
ac_arn_map = {'us-west-2': '081040173940',
              'us-east-1': '432418664414',
              'us-east-2': '266458841044',
              'eu-west-1': '568282634449',
              'ap-northeast-1': '477331159723'}

prehuman_arn = 'arn:aws:lambda:{}:{}:function:PRE-BoundingBox'.format(region, ac_arn_map[region])
acs_arn = 'arn:aws:lambda:{}:{}:function:ACS-BoundingBox'.format(region, ac_arn_map[region]) 
labeling_algorithm_specification_arn = 'arn:aws:sagemaker:{}:027400017018:labeling-job-algorithm-specification/object-detection'.format(region)
public_workteam_arn = 'arn:aws:sagemaker:{}:394669845002:workteam/public-crowd/default'.format(region)

In [119]:
human_task_config = {
      "AnnotationConsolidationConfig": {
        "AnnotationConsolidationLambdaArn": acs_arn,
      },
      "PreHumanTaskLambdaArn": prehuman_arn,
      "MaxConcurrentTaskCount": 300, # 200 images will be sent at a time to the workteam.
      "NumberOfHumanWorkersPerDataObject": 1, # We will obtain and consolidate just 1 human annotation for each image.
      "TaskAvailabilityLifetimeInSeconds": 43200, #864000, #43200 # Your workteam has 10 days to complete all pending tasks.
      "TaskDescription": task_description,
      "TaskKeywords": task_keywords,
      "TaskTimeLimitInSeconds": 600, # Each image must be labeled within 10 minutes.
      "TaskTitle": task_title,
      "UiConfig": {
        "UiTemplateS3Uri": 's3://{}/{}/{}'.format(BUCKET, EXP_NAME, template_name),
      }
    }

In [120]:
if not USE_PRIVATE_WORKFORCE:
    human_task_config["PublicWorkforceTaskPrice"] = {
        "AmountInUsd": {
           "Dollars": 0,
           "Cents": 3,
           "TenthFractionsOfACent": 6,
        }
    } 
    human_task_config["WorkteamArn"] = public_workteam_arn
else:
    human_task_config["WorkteamArn"] = private_workteam_arn

In [121]:
print(json.dumps (human_task_config, indent =2 ))

{
  "AnnotationConsolidationConfig": {
    "AnnotationConsolidationLambdaArn": "arn:aws:lambda:us-east-1:432418664414:function:ACS-BoundingBox"
  },
  "PreHumanTaskLambdaArn": "arn:aws:lambda:us-east-1:432418664414:function:PRE-BoundingBox",
  "MaxConcurrentTaskCount": 300,
  "NumberOfHumanWorkersPerDataObject": 1,
  "TaskAvailabilityLifetimeInSeconds": 43200,
  "TaskDescription": "Dear Annotator, please draw a box around the yellow or blue storage box in the picture. Thank you!",
  "TaskKeywords": [
    "image",
    "object",
    "detection",
    "storage box"
  ],
  "TaskTimeLimitInSeconds": 600,
  "TaskTitle": "Draw a box around storage box in the picture",
  "UiConfig": {
    "UiTemplateS3Uri": "s3://tanmcrae-greengrass-blog/blue-box/instructions.template"
  },
  "PublicWorkforceTaskPrice": {
    "AmountInUsd": {
      "Dollars": 0,
      "Cents": 3,
      "TenthFractionsOfACent": 6
    }
  },
  "WorkteamArn": "arn:aws:sagemaker:us-east-1:394669845002:workteam/public-crowd/default"

In [122]:
ground_truth_request = {
        "InputConfig" : {
          "DataSource": {
            "S3DataSource": {
              "ManifestS3Uri": 's3://{}/{}/{}'.format(BUCKET, 'manifests', MANIFEST),
            }
          },
          "DataAttributes": {
            "ContentClassifiers": [
              "FreeOfPersonallyIdentifiableInformation",
              "FreeOfAdultContent"
            ]
          },  
        },
        "OutputConfig" : {
          "S3OutputPath": 's3://{}/ground-truth-output/'.format(BUCKET),
        },
        "HumanTaskConfig" : human_task_config,
        "LabelingJobName": JOB_NAME,
        "RoleArn": role, 
        "LabelAttributeName": "bb",
        "LabelCategoryConfigS3Uri": 's3://{}/{}'.format(BUCKET, LABEL_KEY),
    }


if USE_AUTO_LABELING and RUN_FULL_AL_DEMO:
    ground_truth_request[ "LabelingJobAlgorithmsConfig"] = {
            "LabelingJobAlgorithmSpecificationArn": labeling_algorithm_specification_arn
                                       }

In [123]:
print(json.dumps (ground_truth_request, indent =2 ))

{
  "InputConfig": {
    "DataSource": {
      "S3DataSource": {
        "ManifestS3Uri": "s3://tanmcrae-greengrass-blog/manifests/blue_box_large_job.json"
      }
    },
    "DataAttributes": {
      "ContentClassifiers": [
        "FreeOfPersonallyIdentifiableInformation",
        "FreeOfAdultContent"
      ]
    }
  },
  "OutputConfig": {
    "S3OutputPath": "s3://tanmcrae-greengrass-blog/ground-truth-output/"
  },
  "HumanTaskConfig": {
    "AnnotationConsolidationConfig": {
      "AnnotationConsolidationLambdaArn": "arn:aws:lambda:us-east-1:432418664414:function:ACS-BoundingBox"
    },
    "PreHumanTaskLambdaArn": "arn:aws:lambda:us-east-1:432418664414:function:PRE-BoundingBox",
    "MaxConcurrentTaskCount": 300,
    "NumberOfHumanWorkersPerDataObject": 1,
    "TaskAvailabilityLifetimeInSeconds": 43200,
    "TaskDescription": "Dear Annotator, please draw a box around the yellow or blue storage box in the picture. Thank you!",
    "TaskKeywords": [
      "image",
      "object",
  

In [124]:
sagemaker_client = boto3.client('sagemaker')
sagemaker_client.create_labeling_job(**ground_truth_request)

{'LabelingJobArn': 'arn:aws:sagemaker:us-east-1:854681337758:labeling-job/blue-box-large-job-public',
 'ResponseMetadata': {'RequestId': 'adfaed82-9c2b-4c45-ac5c-b06dfb20572d',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'adfaed82-9c2b-4c45-ac5c-b06dfb20572d',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '100',
   'date': 'Tue, 21 May 2019 21:42:16 GMT'},
  'RetryAttempts': 0}}

## look at output manifest

In [94]:
job_name = 'yellow-box-small-job-public'
OUTPUT_MANIFEST = 's3://{}/ground-truth-output/{}/manifests/output/output.manifest'.format(BUCKET, job_name)

output_file = job_name+'.output.manifest'
!aws s3 cp {OUTPUT_MANIFEST} {output_file}

Completed 15.7 KiB/15.7 KiB (261.1 KiB/s) with 1 file(s) remainingdownload: s3://tanmcrae-greengrass-blog/ground-truth-output/yellow-box-small-job-public/manifests/output/output.manifest to ./yellow-box-small-job-public.output.manifest


In [95]:
with open(output_file, 'r') as f:
    output = [json.loads(line.strip()) for line in f.readlines()]

In [96]:
len(output)

32