{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting pip\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/43/84/23ed6a1796480a6f1a2d38f2802901d078266bda38388954d01d3f2e821d/pip-20.1.1-py2.py3-none-any.whl (1.5MB)\n", "\u001b[K 100% |████████████████████████████████| 1.5MB 22.6MB/s ta 0:00:01\n", "\u001b[?25hInstalling collected packages: pip\n", " Found existing installation: pip 10.0.1\n", " Uninstalling pip-10.0.1:\n", " Successfully uninstalled pip-10.0.1\n", "Successfully installed pip-20.1.1\n", "\u001b[33mYou are using pip version 20.1.1, however version 20.2b1 is available.\n", "You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n", "Collecting boto3\n", " Downloading boto3-1.13.19-py2.py3-none-any.whl (128 kB)\n", "\u001b[K |████████████████████████████████| 128 kB 11.6 MB/s eta 0:00:01\n", "\u001b[?25hCollecting s3transfer<0.4.0,>=0.3.0\n", " Downloading s3transfer-0.3.3-py2.py3-none-any.whl (69 kB)\n", "\u001b[K |████████████████████████████████| 69 kB 10.0 MB/s eta 0:00:01\n", "\u001b[?25hCollecting botocore<1.17.0,>=1.16.19\n", " Downloading botocore-1.16.19-py2.py3-none-any.whl (6.2 MB)\n", "\u001b[K |████████████████████████████████| 6.2 MB 26.6 MB/s eta 0:00:01\n", "\u001b[?25hRequirement already satisfied, skipping upgrade: jmespath<1.0.0,>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from boto3) (0.9.4)\n", "Requirement already satisfied, skipping upgrade: python-dateutil<3.0.0,>=2.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.19->boto3) (2.7.3)\n", "Requirement already satisfied, skipping upgrade: docutils<0.16,>=0.10 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.19->boto3) (0.14)\n", "Requirement already satisfied, skipping upgrade: urllib3<1.26,>=1.20; python_version != \"3.4\" in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from botocore<1.17.0,>=1.16.19->boto3) (1.23)\n", "Requirement already satisfied, skipping upgrade: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from python-dateutil<3.0.0,>=2.1->botocore<1.17.0,>=1.16.19->boto3) (1.11.0)\n", "\u001b[31mERROR: awscli 1.16.283 has requirement botocore==1.13.19, but you'll have botocore 1.16.19 which is incompatible.\u001b[0m\n", "\u001b[31mERROR: awscli 1.16.283 has requirement s3transfer<0.3.0,>=0.2.0, but you'll have s3transfer 0.3.3 which is incompatible.\u001b[0m\n", "Installing collected packages: botocore, s3transfer, boto3\n", " Attempting uninstall: botocore\n", " Found existing installation: botocore 1.13.19\n", " Uninstalling botocore-1.13.19:\n", " Successfully uninstalled botocore-1.13.19\n", " Attempting uninstall: s3transfer\n", " Found existing installation: s3transfer 0.2.1\n", " Uninstalling s3transfer-0.2.1:\n", " Successfully uninstalled s3transfer-0.2.1\n", " Attempting uninstall: boto3\n", " Found existing installation: boto3 1.10.19\n", " Uninstalling boto3-1.10.19:\n", " Successfully uninstalled boto3-1.10.19\n", "Successfully installed boto3-1.13.19 botocore-1.16.19 s3transfer-0.3.3\n", "Requirement already up-to-date: botocore in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (1.16.19)\n", "Requirement already satisfied, skipping upgrade: python-dateutil<3.0.0,>=2.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from botocore) (2.7.3)\n", "Requirement already satisfied, skipping upgrade: urllib3<1.26,>=1.20; python_version != \"3.4\" in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from botocore) (1.23)\n", "Requirement already satisfied, skipping upgrade: jmespath<1.0.0,>=0.7.1 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from botocore) (0.9.4)\n", "Requirement already satisfied, skipping upgrade: docutils<0.16,>=0.10 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from botocore) (0.14)\n", "Requirement already satisfied, skipping upgrade: six>=1.5 in /home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages (from python-dateutil<3.0.0,>=2.1->botocore) (1.11.0)\n" ] } ], "source": [ "# First, let's get the latest installations of our dependencies\n", "!pip install --upgrade pip\n", "!pip install boto3 --upgrade\n", "!pip install -U botocore" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Environment Setup\n", "\n", "We need to set up the following data:\n", "\n", " REGION - Region to call A2I.\n", " BUCKET_NAME - A S3 bucket accessible by the given role\n", " Used to store the input files and output results\n", " Must be within the same region A2I is called from\n", " WORKTEAM_ARN - To create your Private Workteam, visit the instructions here: https://docs.aws.amazon.com/sagemaker/latest/dg/sms-workforce-private.html After you have created your workteam, replace \\ below\n", " ROLE - The IAM role used as part of StartHumanLoop. By default, this notebook will use the execution role. You can learn more about IAM Policies here https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies.html\n", "\n" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "REGION = 'us-east-1'\n", "BUCKET_NAME = 'comprehend-data-label'\n", "WORKTEAM_ARN= \"\"\n", "\n" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'arn:aws:iam::820570838999:role/service-role/sgdemo-AmazonSageMaker-ExecutionRole'" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sagemaker import get_execution_role\n", "import sagemaker\n", "\n", "# Setting Role to the default SageMaker Execution Role\n", "ROLE = get_execution_role()\n", "display(ROLE)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "#Setup Bucket and Paths\n", "import os\n", "import boto3\n", "import botocore\n", "\n", "sess = sagemaker.Session()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Client Setup\n", "\n", "Let's setup the clients for Amazon S3, Amazon SageMaker A2I Runtime and Amazon Comprehend.\n" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import io\n", "import json\n", "import uuid\n", "import botocore\n", "import time\n", "import botocore\n", "\n", "# Amazon SageMaker client\n", "sagemaker = boto3.client('sagemaker', REGION)\n", "\n", "# Amazon Comprehend client\n", "comprehend = boto3.client('comprehend', REGION)\n", "\n", "# S3 client\n", "s3 = boto3.client('s3', REGION)\n", "\n", "# A2I Runtime client\n", "a2i_runtime_client = boto3.client('sagemaker-a2i-runtime', REGION)" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "import pprint\n", "\n", "# Pretty print setup\n", "pp = pprint.PrettyPrinter(indent=2)\n", "\n", "# Function to pretty-print AWS SDK responses\n", "def print_response(response):\n", " if 'ResponseMetadata' in response:\n", " del response['ResponseMetadata']\n", " pp.pprint(response)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Sample Data\n", "\n", "Let's create some sample text that we would test our translation with and store it in S3.\n" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'ResponseMetadata': {'RequestId': 'C0DFA7E8C61763A9',\n", " 'HostId': 'ziS04WXFEKDcVcMCPjoR98EeEg35Rv5eOsxzjmBDj6p27NsMMIYEbYXzxif02/mRFuW86OcfI+g=',\n", " 'HTTPStatusCode': 200,\n", " 'HTTPHeaders': {'x-amz-id-2': 'ziS04WXFEKDcVcMCPjoR98EeEg35Rv5eOsxzjmBDj6p27NsMMIYEbYXzxif02/mRFuW86OcfI+g=',\n", " 'x-amz-request-id': 'C0DFA7E8C61763A9',\n", " 'date': 'Wed, 03 Jun 2020 01:21:20 GMT',\n", " 'x-amz-version-id': 'IPcN1E2HLxWLBkgEpAk5JVZVgBezaVH7',\n", " 'etag': '\"ba9b13b50673313a99cee6a1d8fdc1c6\"',\n", " 'content-length': '0',\n", " 'server': 'AmazonS3'},\n", " 'RetryAttempts': 0},\n", " 'ETag': '\"ba9b13b50673313a99cee6a1d8fdc1c6\"',\n", " 'VersionId': 'IPcN1E2HLxWLBkgEpAk5JVZVgBezaVH7'}" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "translation_text = \"\"\"\n", "Just then another visitor entered the drawing room: Prince Andrew Bolkónski, the little princess’ husband. He was a very handsome young man, of medium height, with firm, clearcut features. Everything about him, from his weary, bored expression to his quiet, measured step, offered a most striking contrast to his quiet, little wife. It was evident that he not only knew everyone in the drawing room, but had found them to be so tiresome that it wearied him to look at or listen to them. And among all these faces that he found so tedious, none seemed to bore him so much as that of his pretty wife. He turned away from her with a grimace that distorted his handsome face, kissed Anna Pávlovna’s hand, and screwing up his eyes scanned the whole company.\n", "\"\"\"\n", "\n", "key = \"input/test.txt\"\n", "\n", "s3.put_object(Bucket=BUCKET_NAME, Key=key, Body=translation_text)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Create Control Plane Resources\n", "Create a Worker Task Tempalte\n", "\n", "Create a human task UI resource, giving a UI template in liquid html. This template will be rendered to the human workers whenever human loop is required.\n", "\n", "For over 70 pre built UIs, check: https://github.com/aws-samples/amazon-a2i-sample-task-uis.\n", "\n", "We will be taking translation review and correction UI and filling in the object categories in the labels variable in the template.\n" ] }, { "cell_type": "code", "execution_count": 265, "metadata": {}, "outputs": [], "source": [ "template = \"\"\"\n", "\n", "\n", "\n", "\n", " \n", "
    \n", "
  1. Read the text carefully.
  2. \n", "
  3. Highlight words, phrases, or sections of the text.
  4. \n", "
  5. Choose the label that best matches what you have highlighted.
  6. \n", "
  7. To change a label, choose highlighted text and select a new label.
  8. \n", "
  9. To remove a label from highlighted text, choose the X next to the abbreviated label name on the highlighted text.
  10. \n", "
  11. You can select all of a previously highlighted text, but not a portion of it.
  12. \n", "
\n", "
\n", "\n", " \n", " Apply labels to words or phrases.\n", " \n", "\n", " \n", "\n", "\"\"\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Create a Worker Task Template Creator Function\n", "\n", "This function would be a higher level abstration, on the SageMaker package's method to create the Worker Task Template which we will use in the next step to create a human review workflow.\n" ] }, { "cell_type": "code", "execution_count": 266, "metadata": {}, "outputs": [], "source": [ "def create_task_ui(task_ui_name, template):\n", " '''\n", " Creates a Human Task UI resource.\n", "\n", " Returns:\n", " struct: HumanTaskUiArn\n", " '''\n", " response = sagemaker.create_human_task_ui(\n", " HumanTaskUiName=task_ui_name,\n", " UiTemplate={'Content': template})\n", " return response" ] }, { "cell_type": "code", "execution_count": 268, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "arn:aws:sagemaker:us-east-1:820570838999:human-task-ui/a2i-comprehend-test-12-ue-1\n" ] } ], "source": [ "\n", "\n", "# Task UI name - this value is unique per account and region. You can also provide your own value here.\n", "taskUIName = 'a2i-comprehend-test-12-ue-1'\n", "\n", "# Create task UI\n", "humanTaskUiResponse = create_task_ui(taskUIName, template)\n", "humanTaskUiArn = humanTaskUiResponse['HumanTaskUiArn']\n", "print(humanTaskUiArn)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Creating the Flow Definition\n", "\n", "In this section, we're going to create a flow definition definition. Flow Definitions allow us to specify:\n", "\n", " The workforce that your tasks will be sent to.\n", " The instructions that your workforce will receive. This is called a worker task template.\n", " Where your output data will be stored.\n", "\n", "This demo is going to use the API, but you can optionally create this workflow definition in the console as well.\n", "\n", "For more details and instructions, see: https://docs.aws.amazon.com/sagemaker/latest/dg/a2i-create-flow-definition.html.\n" ] }, { "cell_type": "code", "execution_count": 269, "metadata": {}, "outputs": [], "source": [ "def create_flow_definition(flow_definition_name):\n", " '''\n", " Creates a Flow Definition resource\n", "\n", " Returns:\n", " struct: FlowDefinitionArn\n", " '''\n", " response = sagemaker.create_flow_definition(\n", " FlowDefinitionName= flow_definition_name,\n", " RoleArn= ROLE,\n", " HumanLoopConfig= {\n", " \"WorkteamArn\": WORKTEAM_ARN,\n", " \"HumanTaskUiArn\": humanTaskUiArn,\n", " \"TaskCount\": 1,\n", " \"TaskDescription\": \"Please review the entities and labels done using Amazon Comprehend and make corrections and improvements.\",\n", " \"TaskTitle\": \"Review and Improve entity.\"\n", " },\n", " OutputConfig={\n", " \"S3OutputPath\" : \"s3://\"+BUCKET_NAME+\"/\"\n", " }\n", " )\n", " \n", " return response['FlowDefinitionArn']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Now we are ready to create our flow definition" ] }, { "cell_type": "code", "execution_count": 270, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "arn:aws:sagemaker:us-east-1:820570838999:flow-definition/comprehend-a2i-6943a9bd-c543-4b52-bd10-2fd9434e9d2f\n" ] } ], "source": [ "\n", "\n", "# Flow definition name - this value is unique per account and region. You can also provide your own value here.\n", "uniqueId = str(uuid.uuid4())\n", "flowDefinitionName = f'comprehend-a2i-{uniqueId}' \n", "\n", "flowDefinitionArn = create_flow_definition(flowDefinitionName)\n", "print(flowDefinitionArn)\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Data Load" ] }, { "cell_type": "code", "execution_count": 271, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "test\n" ] } ], "source": [ "# Get file from S3 and load it into a variable\n", "file_contents = s3.get_object(Bucket=BUCKET_NAME, Key=key)['Body'].read().decode(\"utf-8\", 'ignore')\n", "\n", "# Get just the filename without prefix or suffix\n", "fileName = key[key.rindex('/')+1:key.rindex('.')]\n", "print(fileName)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Comprehend Documents\n", "\n", "Now that we have the Human Review Workflow set up, we can comprehend our documents and pass them over to a Human Loop for review.\n" ] }, { "cell_type": "code", "execution_count": 274, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Splitting file and performing translation\n", "{'SourceLanguage': 'English', 'sourceLanguageCode': 'en', 'rowCount': 0, 'labels': [{'label': 'PERSON'}, {'label': ''}, {'label': ''}, {'label': ''}, {'label': ''}, {'label': 'PERSON'}], 'taskObject': [{'originalText': '\\nJust then another visitor entered the drawing room: Prince Andrew Bolkónski, the little princess’ husband.'}, {'originalText': ' He was a very handsome young man, of medium height, with firm, clearcut features.'}, {'originalText': ' Everything about him, from his weary, bored expression to his quiet, measured step, offered a most striking contrast to his quiet, little wife.'}, {'originalText': ' It was evident that he not only knew everyone in the drawing room, but had found them to be so tiresome that it wearied him to look at or listen to them.'}, {'originalText': ' And among all these faces that he found so tedious, none seemed to bore him so much as that of his pretty wife.'}, {'originalText': ' He turned away from her with a grimace that distorted his handsome face, kissed Anna Pávlovna’s hand, and screwing up his eyes scanned the whole company.'}], 'bucketName': 'comprehend-data-label', 'keyName': 'input/test.txt'}\n" ] } ], "source": [ "# Create the human loop input JSON object\n", "humanLoopInput = {\n", " 'SourceLanguage' : 'English',\n", " 'sourceLanguageCode':'en',\n", " 'rowCount': 0,\n", " 'labels' : [],\n", " 'taskObject':[],\n", " 'bucketName': BUCKET_NAME,\n", " 'keyName': key\n", "}\n", "\n", "translatedText = ''\n", "rowCount = 0\n", "\n", "print('Splitting file and performing translation') \n", "textvalues=[]\n", "# split the body by period to get individual sentences\n", "for sentence in file_contents.split('.'):\n", " if len(sentence.lstrip()) > 0:\n", " # call translation\n", " comprehend_response = comprehend.detect_entities(\n", " Text=sentence + '.',\n", " LanguageCode='en')\n", "\n", " entities = comprehend_response['Entities']\n", " \n", " textvalues=[]\n", " for s in entities:\n", " textvalues.append(s.get(\"Type\"))\n", " set(textvalues)\n", " str1 = ';'.join(textvalues)\n", " originalText = sentence + ' '\n", " labels={ 'label':str1}\n", " taskObject = {\n", " 'originalText': sentence + '.'\n", " }\n", " \n", " #humanLoopInput['taskObject'].append(taskObject)\n", " \n", " rowCount+=1\n", " humanLoopInput['taskObject'].append(taskObject) \n", " humanLoopInput['labels'].append(labels) \n", "print(humanLoopInput)\n", " \n", "\n", "humanLoopInput['rowCount'] = rowCount\n", "\n" ] }, { "cell_type": "code", "execution_count": 275, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Starting human loop - Comprehend-A2I-Text1591733581953\n", "Writing translated text to comprehend-data-label/machine_output/MO-test.txt\n" ] }, { "data": { "text/plain": [ "{'ResponseMetadata': {'RequestId': 'A6A1D06641439F92',\n", " 'HostId': 'AHCt+LItmiSaeRoJOx8u52MzlvceXEh8xCv8AmAxYC6ehzLZvpp+XQMZMBDf8THa/YLBDj0bW0s=',\n", " 'HTTPStatusCode': 200,\n", " 'HTTPHeaders': {'x-amz-id-2': 'AHCt+LItmiSaeRoJOx8u52MzlvceXEh8xCv8AmAxYC6ehzLZvpp+XQMZMBDf8THa/YLBDj0bW0s=',\n", " 'x-amz-request-id': 'A6A1D06641439F92',\n", " 'date': 'Tue, 09 Jun 2020 20:13:03 GMT',\n", " 'x-amz-version-id': 'Nk50gmevnCzTROad34n3TBvEmGKhYL8d',\n", " 'etag': '\"d41d8cd98f00b204e9800998ecf8427e\"',\n", " 'content-length': '0',\n", " 'server': 'AmazonS3'},\n", " 'RetryAttempts': 0},\n", " 'ETag': '\"d41d8cd98f00b204e9800998ecf8427e\"',\n", " 'VersionId': 'Nk50gmevnCzTROad34n3TBvEmGKhYL8d'}" ] }, "execution_count": 275, "metadata": {}, "output_type": "execute_result" } ], "source": [ "humanLoopName = 'Comprehend-A2I-Text' + str(int(round(time.time() * 1000)))\n", "print('Starting human loop - ' + humanLoopName)\n", "response = a2i_runtime_client.start_human_loop(\n", " HumanLoopName=humanLoopName,\n", " FlowDefinitionArn= flowDefinitionArn,\n", " HumanLoopInput={\n", " 'InputContent': json.dumps(humanLoopInput)\n", " }\n", " )\n", "\n", "# write the machine translated file to S3 bucket.\n", "targetKey = ('machine_output/MO-{0}.txt').format(fileName)\n", "print ('Writing translated text to '+ BUCKET_NAME + '/' + targetKey)\n", "s3.put_object(Bucket=BUCKET_NAME, Key=targetKey, Body=translatedText.encode('utf-8'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Check Status of Human Loop\n", "\n", "Let's define a function that allows us to check the status of Human Loop progress.\n", "\n" ] }, { "cell_type": "code", "execution_count": 276, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "HumanLoop Name: Comprehend-A2I-Text1591733581953\n", "HumanLoop Status: InProgress\n", "HumanLoop Output Destination: {'OutputS3Uri': 's3://comprehend-data-label/comprehend-a2i-6943a9bd-c543-4b52-bd10-2fd9434e9d2f/2020/06/09/20/13/02/Comprehend-A2I-Text1591733581953/output.json'}\n", "\n", "\n" ] } ], "source": [ "\n", "resp = a2i_runtime_client.describe_human_loop(HumanLoopName=humanLoopName)\n", "print(f'HumanLoop Name: {humanLoopName}')\n", "print(f'HumanLoop Status: {resp[\"HumanLoopStatus\"]}')\n", "print(f'HumanLoop Output Destination: {resp[\"HumanLoopOutput\"]}')\n", "print('\\n')\n", "\n", "humanLoopStatus = resp[\"HumanLoopStatus\"]\n", "outputFilePath = resp[\"HumanLoopOutput\"]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Wait For Work Team to Complete Task" ] }, { "cell_type": "code", "execution_count": 277, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Navigate to the private worker portal and do the tasks. Make sure you've invited yourself to your workteam!\n", "https://1ajv1yl4hz.labeling.us-east-1.sagemaker.aws\n" ] } ], "source": [ "workteamName = WORKTEAM_ARN[WORKTEAM_ARN.rfind('/') + 1:]\n", "print(\"Navigate to the private worker portal and do the tasks. Make sure you've invited yourself to your workteam!\")\n", "print('https://' + sagemaker.describe_workteam(WorkteamName=workteamName)['Workteam']['SubDomain'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Check Status of Human Loop Again and process Task Results\n", "\n", "Once the Human Loop Status has changed to completed, you can post process the results to build the final file, with Human Reviewed corrections, for future use.\n" ] }, { "cell_type": "code", "execution_count": 278, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Output File successfully stored in s3://comprehend-data-label/post_edits/PO-test.txt\n" ] } ], "source": [ "resp = a2i_runtime_client.describe_human_loop(HumanLoopName=humanLoopName)\n", "humanLoopStatus = resp[\"HumanLoopStatus\"]\n", "outputFilePath = resp[\"HumanLoopOutput\"]['OutputS3Uri']\n", "\n", "if humanLoopStatus == \"Completed\":\n", " # Remove s3:// from S3 File Path\n", " outputFilePath = outputFilePath.replace(\"s3://\", \"\")\n", "\n", " # recreate the output text document, including post edits.\n", " tmsFile = s3.get_object(Bucket=outputFilePath.split('/')[0],\n", " Key=\"/\".join(outputFilePath.split('/')[1:]))['Body'].read()\n", "\n", " tmsFile = json.loads(tmsFile.decode('utf-8'))\n", " inputContent = tmsFile['inputContent']\n", " rowcount = inputContent['rowCount']\n", " answerContent = tmsFile['humanAnswers'][0]['answerContent']\n", " editedContent = ''\n", "\n", " # extract the file name\n", " targetKeyName = inputContent['keyName']\n", " targetKeyName = targetKeyName[targetKeyName.index('/') + 1: len(targetKeyName)]\n", "\n", " # save the file.\n", " s3.put_object(Bucket=BUCKET_NAME,\n", " Key='post_edits/PO-{0}'.format(targetKeyName),\n", " Body=editedContent.encode('utf-8'))\n", "\n", " print(\"Output File successfully stored in s3://{0}/post_edits/PO-{1}\".format(BUCKET_NAME,targetKeyName))\n", "elif humanLoopStatus == \"InProgress\":\n", " print(\"Navigate to the private worker portal and do the tasks. Make sure you've invited yourself to your workteam!\")\n", " print('https://' + sagemaker.describe_workteam(WorkteamName=workteamName)['Workteam']['SubDomain'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "conda_python3", "language": "python", "name": "conda_python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" } }, "nbformat": 4, "nbformat_minor": 2 }