{ "cells": [ { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "import boto3\n", "import time \n", "import os\n", "import json\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from matplotlib import colors\n", "\n", "from collections import namedtuple\n", "from urllib.parse import urlparse\n", "transcribe = boto3.client('transcribe')\n", "s3 = boto3.client('s3')" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "media=[\n", " 's3://broadcast-monitoring-assets/audio/BarackObamaPodcast1.mp3',\n", " 's3://broadcast-monitoring-assets/audio/VOA_U.S._News_in_Russian_90sec.mp3',\n", " 's3://broadcast-monitoring-assets/audio/hongkong-covid-with-audio-scenarios-90sec.mp3'\n", "]\n", "output_bucket='video-processing-angelaw-496010403454-us-east-1'" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "def genereate_job_name(prefix='lang_id'):\n", " timestamp = time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())\n", " job_id = prefix + timestamp\n", " return job_id\n", "\n", "def start_job(media_s3_uri, job_prefix='lang_id'):\n", " job_id = genereate_job_name(prefix=job_prefix)\n", " response = transcribe.start_transcription_job(\n", " TranscriptionJobName=job_id,\n", " IdentifyLanguage = True,\n", " MediaFormat='mp3',\n", " Media={\n", " 'MediaFileUri': media_s3_uri\n", " },\n", " OutputBucketName=output_bucket\n", " )\n", " return job_id\n" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "TranscribeJobRecord = namedtuple ('TranscribeJobRecord', ['JobCompletion','JobStart','LangaugeDetectionTime','LangaugeDetectionScore','LangaugeDetected','LanguageId','JobResponse'])\n" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "def timed_transcribe(media_s3_uri):\n", " completed = False\n", " language_detected = None\n", " \n", " starttime=time.time()\n", " \n", " job_prefix = os.path.splitext(os.path.basename(media_s3_uri))[0]\n", " \n", " job_id = start_job(media_s3_uri, job_prefix)\n", " print('start job {} took {:10.4f} seconds'.format(job_id, time.time() - starttime))\n", " while not completed:\n", " response = transcribe.get_transcription_job(\n", " TranscriptionJobName=job_id\n", " )\n", " status = response['TranscriptionJob']['TranscriptionJobStatus']\n", " if status in [ 'COMPLETED' , 'FAILED' ]:\n", " completed = True\n", " if not language_detected and 'LanguageCode' in response['TranscriptionJob']:\n", " language_detected = response['TranscriptionJob']['LanguageCode']\n", " language_score = response['TranscriptionJob']['IdentifiedLanguageScore']\n", " language_detection_seconds = time.time() - starttime\n", " print('{} detected in {:10.3f} seconds with {} score'.format(language_detected, language_detection_seconds, language_score))\n", " time.sleep(0.5)\n", " if status != 'COMPLETED':\n", " print(f'job {job_id} failed for {media_s3_uri}')\n", " return TranscribeJobRecord(JobCompletion=None, \n", " JobStart=None, \n", " LangaugeDetectionTime=None, \n", " LangaugeDetectionScore=None, \n", " LangaugeDetected=None, \n", " LanguageId=None, \n", " JobResponse =response)\n", " \n", " job_startTime = response['TranscriptionJob']['StartTime']\n", " job_creationTime = response['TranscriptionJob']['CreationTime']\n", " job_completionTime = response['TranscriptionJob']['CompletionTime']\n", " job_start_seconds = (job_startTime-job_creationTime).total_seconds()\n", " job_completion_seconds = (job_completionTime-job_creationTime).total_seconds()\n", " print('job started in {:10.3f} seconds'.format(job_start_seconds))\n", " print('job completed in {:10.3f} seconds'.format(job_completion_seconds))\n", " u = urlparse(response['TranscriptionJob']['Transcript']['TranscriptFileUri'])\n", " bucket = u.path.split('/')[1]\n", " path = '/'.join(u.path.split('/')[2:])\n", " transcript_file = os.path.join('tmp', f'{job_id}.json')\n", " s3.download_file(bucket, path, transcript_file)\n", " with open(transcript_file, 'r') as f:\n", " output = json.load(f)\n", " print('transcript: ' + output['results']['transcripts'][0]['transcript'][:150] + '...')\n", " language_id_alts = output['results']['language_identification']\n", " print('language_id: ' + json.dumps(language_id_alts))\n", "\n", " return TranscribeJobRecord(JobCompletion=job_completion_seconds, \n", " JobStart=job_start_seconds, \n", " LangaugeDetectionTime=language_detection_seconds, \n", " LangaugeDetectionScore=language_score, \n", " LangaugeDetected=language_detected, \n", " LanguageId=language_id_alts, \n", " JobResponse = response)\n" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "start job covid_mandarin000-2020-06-02-17-56-56 took 0.3926 seconds\n", "zh-CN detected in 32.360 seconds with 1.0 score\n", "job started in 0.030 seconds\n", "job completed in 58.250 seconds\n", "transcript: 截至一月二十三日,香港共出现两例新型冠状病毒感染的肺炎个案,其中手...\n", "language_id: [{\"score\": \"1\", \"code\": \"zh-CN\"}, {\"score\": \"0.0001\", \"code\": \"es-US\"}, {\"score\": \"0.0001\", \"code\": \"id-ID\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"es-ES\"}]\n" ] } ], "source": [ "response = timed_transcribe('s3://broadcast-monitoring-assets/audio/mandarin-6sec/covid_mandarin000.mp3')" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "def get_audio_s3_uris(s3_bucket, s3_prefix):\n", " s3_paths = []\n", " s3bucket = boto3.resource('s3').Bucket(s3_bucket)\n", "\n", " for i, s3_object in enumerate(s3bucket.objects.filter(Prefix=s3_prefix)):\n", " if s3_object.key.endswith('mp3'):\n", " s3_paths.append(f's3://{s3_bucket}/{s3_object.key}')\n", " return s3_paths" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "s3_paths = get_audio_s3_uris('broadcast-monitoring-assets','audio/')" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "start job BarackObamaPodcast1-2020-06-02-17-59-03 took 0.3116 seconds\n", "en-US detected in 47.140 seconds with 0.5891635417938232 score\n", "job started in 0.024 seconds\n", "job completed in 164.576 seconds\n", "transcript: This is Senator Barack Obama, and today is Thursday, September 8th, 2005. Welcome to my first podcast. You know, in the future we're gonna be discussi...\n", "language_id: [{\"score\": \"0.5892\", \"code\": \"en-US\"}, {\"score\": \"0.2527\", \"code\": \"en-AU\"}, {\"score\": \"0.1537\", \"code\": \"en-GB\"}, {\"score\": \"0.0036\", \"code\": \"en-AB\"}, {\"score\": \"0.0004\", \"code\": \"en-IE\"}]\n", "start job VOA_U.S._News_in_Russian_90sec-2020-06-02-18-01-49 took 0.1820 seconds\n", "ru-RU detected in 54.064 seconds with 0.9999224543571472 score\n", "job started in 0.022 seconds\n", "job completed in 207.216 seconds\n", "transcript: власти США подтвердили, чтобы газ добывают предприятия, захваченного террористами. Приземлился американский военный самолет, который должен вывести ос...\n", "language_id: [{\"score\": \"0.9999\", \"code\": \"ru-RU\"}, {\"score\": \"0.0001\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"pt-BR\"}, {\"score\": \"0.0001\", \"code\": \"en-GB\"}, {\"score\": \"0.0001\", \"code\": \"id-ID\"}]\n", "start job hongkong-covid-with-audio-scenarios-90sec-2020-06-02-18-05-18 took 0.2349 seconds\n", "zh-CN detected in 50.597 seconds with 0.9997016191482544 score\n", "job started in 0.023 seconds\n", "job completed in 161.402 seconds\n", "transcript: 截至一月二十三日,香港共出现两例新型冠状病毒感染的肺炎个案,其中首位病患是来自武汉的游客。第二位患者为香港本地居民,曾于一月十日前往武汉停留,约九日后返港。两者到岗前建议出现发热症状,目前已送往香港玛嘉丽医院传染病中心隔离一致。一月二十三日,记者走访香港都区,大部分香港市民以戴上口罩,严格防范新型冠...\n", "language_id: [{\"score\": \"0.9997\", \"code\": \"zh-CN\"}, {\"score\": \"0.0001\", \"code\": \"en-GB\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"en-US\"}, {\"score\": \"0.0001\", \"code\": \"en-AU\"}]\n", "start job covid_mandarin000-2020-06-02-18-08-00 took 0.2085 seconds\n", "zh-CN detected in 36.977 seconds with 1.0 score\n", "job started in 0.028 seconds\n", "job completed in 112.958 seconds\n", "transcript: 截至一月二十三日,香港共出现两例新型冠状病毒感染的肺炎个案,其中手...\n", "language_id: [{\"score\": \"1\", \"code\": \"zh-CN\"}, {\"score\": \"0.0001\", \"code\": \"es-US\"}, {\"score\": \"0.0001\", \"code\": \"id-ID\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"es-ES\"}]\n", "start job covid_mandarin001-2020-06-02-18-09-55 took 0.3045 seconds\n", "zh-CN detected in 45.029 seconds with 1.0 score\n", "job started in 0.155 seconds\n", "job completed in 159.779 seconds\n", "transcript: 病患是来自武汉的游客。第二位患者为香港本地居民,曾于一月十日...\n", "language_id: [{\"score\": \"1\", \"code\": \"zh-CN\"}, {\"score\": \"0.0001\", \"code\": \"ja-JP\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"ko-KR\"}, {\"score\": \"0.0001\", \"code\": \"en-GB\"}]\n", "start job covid_mandarin002-2020-06-02-18-12-35 took 0.2086 seconds\n", "zh-CN detected in 28.588 seconds with 1.0 score\n", "job started in 0.024 seconds\n", "job completed in 99.665 seconds\n", "transcript: 往武汉停留约九日后反等。两者到岗前建议出现发热症状,目前已送往...\n", "language_id: [{\"score\": \"1\", \"code\": \"zh-CN\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"ko-KR\"}, {\"score\": \"0.0001\", \"code\": \"id-ID\"}, {\"score\": \"0.0001\", \"code\": \"fa-IR\"}]\n", "start job covid_mandarin003-2020-06-02-18-14-17 took 0.2071 seconds\n", "zh-CN detected in 26.015 seconds with 1.0 score\n", "job started in 0.024 seconds\n", "job completed in 100.838 seconds\n", "transcript: 香港玛嘉丽医院传染病中心隔离一致。一月二十三日,记者走访香港都去...\n", "language_id: [{\"score\": \"1\", \"code\": \"zh-CN\"}, {\"score\": \"0.0001\", \"code\": \"ko-KR\"}, {\"score\": \"0.0001\", \"code\": \"ja-JP\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"id-ID\"}]\n", "start job covid_mandarin004-2020-06-02-18-15-59 took 0.1555 seconds\n", "zh-CN detected in 25.914 seconds with 1.0 score\n", "job started in 0.021 seconds\n", "job completed in 120.037 seconds\n", "transcript: 大部分香港市民以戴上口罩,严格防范新型冠状病毒肺炎在香...\n", "language_id: [{\"score\": \"1\", \"code\": \"zh-CN\"}, {\"score\": \"0.0001\", \"code\": \"fr-CA\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"en-GB\"}, {\"score\": \"0.0001\", \"code\": \"fr-FR\"}]\n", "start job covid_mandarin005-2020-06-02-18-18-00 took 0.2182 seconds\n", "job covid_mandarin005-2020-06-02-18-18-00 failed for s3://broadcast-monitoring-assets/audio/mandarin-6sec/covid_mandarin005.mp3\n", "start job covid_mandarin006-2020-06-02-18-18-22 took 0.1798 seconds\n", "job covid_mandarin006-2020-06-02-18-18-22 failed for s3://broadcast-monitoring-assets/audio/mandarin-6sec/covid_mandarin006.mp3\n", "start job covid_mandarin007-2020-06-02-18-18-42 took 0.1981 seconds\n", "job covid_mandarin007-2020-06-02-18-18-42 failed for s3://broadcast-monitoring-assets/audio/mandarin-6sec/covid_mandarin007.mp3\n", "start job covid_mandarin008-2020-06-02-18-19-03 took 0.2009 seconds\n", "job covid_mandarin008-2020-06-02-18-19-03 failed for s3://broadcast-monitoring-assets/audio/mandarin-6sec/covid_mandarin008.mp3\n", "start job covid_mandarin009-2020-06-02-18-19-22 took 0.2871 seconds\n", "job covid_mandarin009-2020-06-02-18-19-22 failed for s3://broadcast-monitoring-assets/audio/mandarin-6sec/covid_mandarin009.mp3\n", "start job covid_mandarin010-2020-06-02-18-19-38 took 0.4059 seconds\n", "en-US detected in 15.595 seconds with 0.8695744276046753 score\n", "job started in 0.022 seconds\n", "job completed in 101.562 seconds\n", "transcript: This is Senator Barack Obama, and today is Thursday, september 8th 2005....\n", "language_id: [{\"score\": \"0.8696\", \"code\": \"en-US\"}, {\"score\": \"0.0819\", \"code\": \"en-GB\"}, {\"score\": \"0.0485\", \"code\": \"en-AU\"}, {\"score\": \"0.0001\", \"code\": \"it-IT\"}, {\"score\": \"0.0001\", \"code\": \"en-AB\"}]\n", "start job covid_mandarin011-2020-06-02-18-21-21 took 0.1815 seconds\n", "en-US detected in 15.164 seconds with 0.8507449626922607 score\n", "job started in 0.023 seconds\n", "job completed in 87.591 seconds\n", "transcript: Welcome to my first podcast. You in the future, we're gonna be discussing a wide variety of issues and...\n", "language_id: [{\"score\": \"0.8507\", \"code\": \"en-US\"}, {\"score\": \"0.106\", \"code\": \"en-AU\"}, {\"score\": \"0.0394\", \"code\": \"en-GB\"}, {\"score\": \"0.003\", \"code\": \"en-AB\"}, {\"score\": \"0.0004\", \"code\": \"pt-PT\"}]\n", "start job covid_mandarin012-2020-06-02-18-22-50 took 0.1715 seconds\n", "en-US detected in 26.043 seconds with 0.8252528309822083 score\n", "job started in 0.020 seconds\n", "job completed in 70.803 seconds\n", "transcript: sir, questions from constituents. But today I think like most of you, I've been spending a lot....\n", "language_id: [{\"score\": \"0.8253\", \"code\": \"en-US\"}, {\"score\": \"0.1116\", \"code\": \"en-AU\"}, {\"score\": \"0.0618\", \"code\": \"en-GB\"}, {\"score\": \"0.0009\", \"code\": \"en-AB\"}, {\"score\": \"0.0002\", \"code\": \"pt-PT\"}]\n", "start job covid_mandarin013-2020-06-02-18-24-02 took 0.2579 seconds\n", "en-US detected in 24.181 seconds with 0.5824006795883179 score\n", "job started in 0.106 seconds\n", "job completed in 88.914 seconds\n", "transcript: time thinking about Hurricane Katrina, and so that's gonna be the focus of today's....\n", "language_id: [{\"score\": \"0.5824\", \"code\": \"en-US\"}, {\"score\": \"0.3394\", \"code\": \"en-AU\"}, {\"score\": \"0.0706\", \"code\": \"en-GB\"}, {\"score\": \"0.0041\", \"code\": \"pt-PT\"}, {\"score\": \"0.0009\", \"code\": \"id-ID\"}]\n", "start job covid_mandarin014-2020-06-02-18-25-32 took 0.2180 seconds\n", "en-US detected in 26.434 seconds with 0.9942843914031982 score\n", "job started in 0.026 seconds\n", "job completed in 54.911 seconds\n", "transcript: just podcast. On Monday, I had the opportunity to visit the Houston Astrodome in the Reliant....\n", "language_id: [{\"score\": \"0.9943\", \"code\": \"en-US\"}, {\"score\": \"0.0038\", \"code\": \"en-AU\"}, {\"score\": \"0.0017\", \"code\": \"en-GB\"}, {\"score\": \"0.0002\", \"code\": \"en-AB\"}, {\"score\": \"0.0001\", \"code\": \"it-IT\"}]\n", "start job voa_russian000-2020-06-02-18-26-28 took 0.1754 seconds\n", "job voa_russian000-2020-06-02-18-26-28 failed for s3://broadcast-monitoring-assets/audio/voa-russian-6sec/voa_russian000.mp3\n", "start job voa_russian001-2020-06-02-18-26-49 took 0.1720 seconds\n", "ru-RU detected in 17.444 seconds with 0.9998711347579956 score\n", "job started in 0.026 seconds\n", "job completed in 112.074 seconds\n", "transcript: Власти США подтвердили, что вблизи города, вызывающего предприятия, захваченного террористами, приземлился американский...\n", "language_id: [{\"score\": \"0.9999\", \"code\": \"ru-RU\"}, {\"score\": \"0.0001\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"pt-BR\"}, {\"score\": \"0.0001\", \"code\": \"fr-FR\"}, {\"score\": \"0.0001\", \"code\": \"ar-AE\"}]\n", "start job voa_russian002-2020-06-02-18-28-42 took 0.3172 seconds\n", "ru-RU detected in 16.637 seconds with 0.9999451041221619 score\n", "job started in 0.082 seconds\n", "job completed in 110.460 seconds\n", "transcript: военный самолет, который должен вывести освобожденных заложников, которым может понадобиться медицинская помощь....\n", "language_id: [{\"score\": \"0.9999\", \"code\": \"ru-RU\"}, {\"score\": \"0.0001\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"en-GB\"}, {\"score\": \"0.0001\", \"code\": \"tr-TR\"}, {\"score\": \"0.0001\", \"code\": \"pt-BR\"}]\n", "start job voa_russian003-2020-06-02-18-30-33 took 0.2949 seconds\n", "ru-RU detected in 18.675 seconds with 0.9999231100082397 score\n", "job started in 0.031 seconds\n", "job completed in 106.681 seconds\n", "transcript: ранее алжирских войны попытались силой освободить заложников, судя по предварительным данным, операция не увенчалась....\n", "language_id: [{\"score\": \"0.9999\", \"code\": \"ru-RU\"}, {\"score\": \"0.0001\", \"code\": \"pt-BR\"}, {\"score\": \"0.0001\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"it-IT\"}, {\"score\": \"0.0001\", \"code\": \"en-GB\"}]\n", "start job voa_russian004-2020-06-02-18-32-21 took 0.2708 seconds\n", "ru-RU detected in 34.021 seconds with 0.9995741248130798 score\n", "job started in 0.022 seconds\n", "job completed in 132.235 seconds\n", "transcript: успехом, особенно в части заложников, ничего не известно. Американские эксперты прибыли в Японии, чтобы...\n", "language_id: [{\"score\": \"0.9996\", \"code\": \"ru-RU\"}, {\"score\": \"0.0004\", \"code\": \"es-US\"}, {\"score\": \"0.0001\", \"code\": \"pt-BR\"}, {\"score\": \"0.0001\", \"code\": \"es-ES\"}, {\"score\": \"0.0001\", \"code\": \"pt-PT\"}]\n", "start job voa_russian005-2020-06-02-18-34-35 took 0.2019 seconds\n", "ru-RU detected in 56.424 seconds with 0.9999998211860657 score\n", "job started in 0.025 seconds\n", "job completed in 164.750 seconds\n", "transcript: помочь выявить причины поломки авиалайнера Боинг семьсот восемьдесят, тем более известного, как Владимир ранее Владимира....\n", "language_id: [{\"score\": \"1\", \"code\": \"ru-RU\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"pt-BR\"}, {\"score\": \"0.0001\", \"code\": \"id-ID\"}]\n", "start job voa_russian006-2020-06-02-18-37-20 took 0.3833 seconds\n", "ru-RU detected in 76.643 seconds with 0.9512348771095276 score\n", "job started in 0.026 seconds\n", "job completed in 171.654 seconds\n", "transcript: в квартирах ряда японских авиакомпании были обнаружены серьезные проблемы с батареями, топливной системой, тормозами....\n", "language_id: [{\"score\": \"0.9512\", \"code\": \"ru-RU\"}, {\"score\": \"0.0444\", \"code\": \"pt-PT\"}, {\"score\": \"0.0035\", \"code\": \"es-US\"}, {\"score\": \"0.0004\", \"code\": \"es-ES\"}, {\"score\": \"0.0003\", \"code\": \"en-GB\"}]\n", "start job voa_russian007-2020-06-02-18-40-13 took 0.2228 seconds\n", "ru-RU detected in 81.305 seconds with 0.9908612370491028 score\n", "job started in 0.038 seconds\n", "job completed in 181.032 seconds\n", "transcript: инцидента привели к тому, что власти США и ряд других странах или запрета на полеты в плане половина старых стран....\n", "language_id: [{\"score\": \"0.9909\", \"code\": \"ru-RU\"}, {\"score\": \"0.0059\", \"code\": \"id-ID\"}, {\"score\": \"0.0031\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"pt-BR\"}, {\"score\": \"0.0001\", \"code\": \"en-AU\"}]\n", "start job voa_russian008-2020-06-02-18-43-16 took 0.3864 seconds\n", "ru-RU detected in 87.012 seconds with 0.9999921917915344 score\n", "job started in 0.042 seconds\n", "job completed in 201.321 seconds\n", "transcript: легендарный велогонщика Троекратный, победитель гонки Турции Франции, впоследствии лишен всех наград, включая олимпийской медали....\n", "language_id: [{\"score\": \"1\", \"code\": \"ru-RU\"}, {\"score\": \"0.0001\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"ms-MY\"}, {\"score\": \"0.0001\", \"code\": \"en-GB\"}, {\"score\": \"0.0001\", \"code\": \"pt-BR\"}]\n", "start job voa_russian009-2020-06-02-18-46-39 took 0.2608 seconds\n", "ru-RU detected in 75.853 seconds with 0.7539833188056946 score\n", "job started in 0.021 seconds\n", "job completed in 177.092 seconds\n", "transcript: в конце концов признался в том, что использовал рост рынка, сказал об этом ведущий. Привыкли, сказал....\n", "language_id: [{\"score\": \"0.754\", \"code\": \"ru-RU\"}, {\"score\": \"0.2289\", \"code\": \"pt-PT\"}, {\"score\": \"0.0082\", \"code\": \"ms-MY\"}, {\"score\": \"0.0023\", \"code\": \"id-ID\"}, {\"score\": \"0.0019\", \"code\": \"de-DE\"}]\n", "start job voa_russian010-2020-06-02-18-49-37 took 0.1830 seconds\n", "ru-RU detected in 69.093 seconds with 0.9964892268180847 score\n", "job started in 0.021 seconds\n", "job completed in 161.303 seconds\n", "transcript: во время своей спортивной карьеры, когда был готов идти на все ради победы во время интервью Армстронга беспокоят....\n", "language_id: [{\"score\": \"0.9965\", \"code\": \"ru-RU\"}, {\"score\": \"0.0017\", \"code\": \"pt-BR\"}, {\"score\": \"0.0005\", \"code\": \"ar-AE\"}, {\"score\": \"0.0005\", \"code\": \"en-GB\"}, {\"score\": \"0.0003\", \"code\": \"pt-PT\"}]\n", "start job voa_russian011-2020-06-02-18-52-19 took 0.1790 seconds\n", "ru-RU detected in 57.383 seconds with 0.9985325932502747 score\n", "job started in 0.027 seconds\n", "job completed in 165.891 seconds\n", "transcript: практически не проявлял эмоций. В городе Парк Сити, штат Юта, открылся Александр в течение...\n", "language_id: [{\"score\": \"0.9985\", \"code\": \"ru-RU\"}, {\"score\": \"0.0011\", \"code\": \"pt-BR\"}, {\"score\": \"0.0003\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"tr-TR\"}, {\"score\": \"0.0001\", \"code\": \"it-IT\"}]\n", "start job voa_russian012-2020-06-02-18-55-06 took 0.5102 seconds\n", "ru-RU detected in 49.199 seconds with 0.5401967763900757 score\n", "job started in 0.088 seconds\n", "job completed in 158.415 seconds\n", "transcript: если зрители увидят сто девятнадцать из тридцати двух государств мира с Анной придумал и организовал....\n", "language_id: [{\"score\": \"0.5402\", \"code\": \"ru-RU\"}, {\"score\": \"0.4355\", \"code\": \"pt-PT\"}, {\"score\": \"0.0208\", \"code\": \"pt-BR\"}, {\"score\": \"0.0027\", \"code\": \"ko-KR\"}, {\"score\": \"0.0003\", \"code\": \"tr-TR\"}]\n", "start job voa_russian013-2020-06-02-18-57-46 took 0.8469 seconds\n", "ru-RU detected in 50.346 seconds with 0.9994410276412964 score\n", "job started in 0.024 seconds\n", "job completed in 156.141 seconds\n", "transcript: это прославленный актер и режиссер Роберт для помощи независимым производителям....\n", "language_id: [{\"score\": \"0.9994\", \"code\": \"ru-RU\"}, {\"score\": \"0.0002\", \"code\": \"pt-PT\"}, {\"score\": \"0.0001\", \"code\": \"ar-SA\"}, {\"score\": \"0.0001\", \"code\": \"tr-TR\"}, {\"score\": \"0.0001\", \"code\": \"id-ID\"}]\n", "start job voa_russian014-2020-06-02-19-00-23 took 0.1946 seconds\n", "job voa_russian014-2020-06-02-19-00-23 failed for s3://broadcast-monitoring-assets/audio/voa-russian-6sec/voa_russian014.mp3\n" ] } ], "source": [ "records = []\n", "for s3_uri in s3_paths:\n", " records.append(timed_transcribe(s3_uri))" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "TranscribeJobRecord(JobCompletion=164.576, JobStart=0.024, LangaugeDetectionTime=47.14001202583313, LangaugeDetectionScore=0.5891635417938232, LangaugeDetected='en-US', LanguageId=[{'score': '0.5892', 'code': 'en-US'}, {'score': '0.2527', 'code': 'en-AU'}, {'score': '0.1537', 'code': 'en-GB'}, {'score': '0.0036', 'code': 'en-AB'}, {'score': '0.0004', 'code': 'en-IE'}], JobResponse={'TranscriptionJob': {'TranscriptionJobName': 'BarackObamaPodcast1-2020-06-02-17-59-03', 'TranscriptionJobStatus': 'COMPLETED', 'LanguageCode': 'en-US', 'MediaSampleRateHertz': 44100, 'MediaFormat': 'mp3', 'Media': {'MediaFileUri': 's3://broadcast-monitoring-assets/audio/BarackObamaPodcast1.mp3'}, 'Transcript': {'TranscriptFileUri': 'https://s3.amazonaws.com/video-processing-angelaw-496010403454-us-east-1/BarackObamaPodcast1-2020-06-02-17-59-03.json'}, 'StartTime': datetime.datetime(2020, 6, 2, 13, 59, 4, 67000, tzinfo=tzlocal()), 'CreationTime': datetime.datetime(2020, 6, 2, 13, 59, 4, 43000, tzinfo=tzlocal()), 'CompletionTime': datetime.datetime(2020, 6, 2, 14, 1, 48, 619000, tzinfo=tzlocal()), 'Settings': {'ChannelIdentification': False, 'ShowAlternatives': False}, 'IdentifyLanguage': True, 'IdentifiedLanguageScore': 0.5891635417938232}, 'ResponseMetadata': {'RequestId': '7b96a797-e105-4a4b-8e25-f2516d41ef27', 'HTTPStatusCode': 200, 'HTTPHeaders': {'content-type': 'application/x-amz-json-1.1', 'date': 'Tue, 02 Jun 2020 18:01:48 GMT', 'x-amzn-requestid': '7b96a797-e105-4a4b-8e25-f2516d41ef27', 'content-length': '673', 'connection': 'keep-alive'}, 'RetryAttempts': 0}})" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "records[0]" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "58.25" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [ "respons" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "filtered_records = records[4:]" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [], "source": [ "lang_detection_time = np.array([r.LangaugeDetectionTime for r in filtered_records if r.LangaugeDetectionTime is not None])" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "22" ] }, "execution_count": 110, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(lang_detection_time)" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAOR0lEQVR4nO3cb6xkd13H8c+Xbgm0ECr0SirleksgJYTYP9xUCNhIESy0gcSglohBgu4T1FYxZDHxASYmJTEIDwhxQ0ES+SMWGkkbCwRKBKPFbluk7VLFskAJ0DbIXxOg5euDOVtuN4U7272z9zed1yu52Ttzz06+v8zMvvfMOfdUdwcARvOI3R4AAB6MQAEwJIECYEgCBcCQBAqAIe1ZxIOeeuqpvbGxsYiHBmDJHThw4J7uXttuu4UEamNjIzfccMMiHhqAJVdVX5pnOx/xATAkgQJgSAIFwJAECoAhCRQAQxIoAIYkUAAMaa5AVdUpVXVlVX2+qg5W1XMWPRgAq23eX9R9a5Jru/vlVfXIJCctcCYA2D5QVfW4JOcn+b0k6e4fJvnhYscCYNXNswd1RpK7k7yrqs5KciDJpd39/a0bVdXeJHuTZH19fafn3HUb+67Z7REe4NDlF+32CAALNc8xqD1Jzk3y9u4+J8n3k+w7cqPu3t/dm929uba27TUAAeBnmidQdya5s7uvn25fmVmwAGBhtg1Ud389yVeq6szprhckuW2hUwGw8uY9i++PkrxnOoPvjiSvXtxIADBnoLr75iSbC54FAO7nShIADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEPaM89GVXUoyXeT3Jfk3u7eXORQADBXoCbP7+57FjYJAGzhIz4AhjTvHlQn+WhVdZK/7e79R25QVXuT7E2S9fX1Yx5sY981x/wYACyvefegntfd5yZ5cZLXVtX5R27Q3fu7e7O7N9fW1nZ0SABWz1yB6u6vTn/eleSqJOctcigA2DZQVXVyVT328PdJXpTklkUPBsBqm+cY1BOTXFVVh7d/b3dfu9CpAFh52waqu+9IctZxmAUA7uc0cwCGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQ5g5UVZ1QVTdV1dWLHAgAkqPbg7o0ycFFDQIAW80VqKo6PclFSd6x2HEAYGbePai3JHl9kh//tA2qam9V3VBVN9x99907MhwAq2vbQFXVxUnu6u4DP2u77t7f3Zvdvbm2trZjAwKwmubZg3pukpdW1aEk709yQVX9/UKnAmDlbRuo7n5Dd5/e3RtJLknyie5+5cInA2Cl+T0oAIa052g27u5PJvnkQiYBgC3sQQEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADGnbQFXVo6rqM1X12aq6tareeDwGA2C17Zljmx8kuaC7v1dVJyb5dFX9c3f/+4JnA2CFbRuo7u4k35tunjh99SKHAoB59qBSVSckOZDkqUne1t3XP8g2e5PsTZL19fWdnBGOysa+a3Z7hAc4dPlFuz0CLKW5TpLo7vu6++wkpyc5r6qe+SDb7O/uze7eXFtb2+k5AVgxR3UWX3d/K8l1SS5czDgAMDPPWXxrVXXK9P2jk7wwyecXPRgAq22eY1CnJXn3dBzqEUk+0N1XL3YsAFbdPGfx/WeSc47DLABwP1eSAGBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxp20BV1ZOr6rqquq2qbq2qS4/HYACstj1zbHNvktd1941V9dgkB6rqY91924JnA2CFbbsH1d1f6+4bp++/m+RgkictejAAVttRHYOqqo0k5yS5fhHDAMBh83zElySpqsck+WCSy7r7Ow/y871J9ibJ+vr6jg0Iy25j3zW7PcIDHLr8ot0eYVijPVejOd6vnbn2oKrqxMzi9J7u/tCDbdPd+7t7s7s319bWdnJGAFbQPGfxVZIrkhzs7jcvfiQAmG8P6rlJfjfJBVV18/T1kgXPBcCK2/YYVHd/Okkdh1kA4H6uJAHAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMKRtA1VV76yqu6rqluMxEAAk8+1B/V2SCxc8BwA8wLaB6u5/SfLN4zALANxvz049UFXtTbI3SdbX13fqYfkpNvZds9sjPMChyy/a7RFYUqO9lhnHjp0k0d37u3uzuzfX1tZ26mEBWFHO4gNgSAIFwJDmOc38fUn+LcmZVXVnVb1m8WMBsOq2PUmiu19xPAYBgK18xAfAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMKS5AlVVF1bV7VX1harat+ihAGDbQFXVCUneluTFSZ6R5BVV9YxFDwbAaptnD+q8JF/o7ju6+4dJ3p/kZYsdC4BVt2eObZ6U5Ctbbt+Z5JeP3Kiq9ibZO938XlXdfuzjLdypSe7Z7SGO0RBrqDcd80MMsY4dMPw65nyuhl/HHB4Oa0gGWscxvs+3ruMX5/kL8wRqLt29P8n+nXq846Gqbujuzd2e41g8HNaQWMdoHg7reDisIVntdczzEd9Xkzx5y+3Tp/sAYGHmCdR/JHlaVZ1RVY9MckmSDy92LABW3bYf8XX3vVX1h0k+kuSEJO/s7lsXPtnxsVQfSf4UD4c1JNYxmofDOh4Oa0hWeB3V3YsYBACOiStJADAkgQJgSCsRqKp6clVdV1W3VdWtVXXpdP/jq+pjVfXf058/t9uz/ixV9aiq+kxVfXZaxxun+8+oquunS1H9w3Qyy9Cq6oSquqmqrp5uL+MaDlXV56rq5qq6YbpvqV5TSVJVp1TVlVX1+ao6WFXPWbZ1VNWZ0/Nw+Os7VXXZEq7jT6b39i1V9b7pPb+M741LpzXcWlWXTfcd9XOxEoFKcm+S13X3M5I8O8lrp8s17Uvy8e5+WpKPT7dH9oMkF3T3WUnOTnJhVT07yZuS/E13PzXJ/yZ5zS7OOK9LkxzccnsZ15Akz+/us7f8fseyvaaS5K1Jru3upyc5K7PnZanW0d23T8/D2UmeleT/klyVJVpHVT0pyR8n2ezuZ2Z2UtolWbL3RlU9M8kfZHYVorOSXFxVT81DeS66e+W+kvxTkhcmuT3JadN9pyW5fbdnO4o1nJTkxsyu6nFPkj3T/c9J8pHdnm+b2U+fXqAXJLk6SS3bGqY5DyU59Yj7luo1leRxSb6Y6YSpZV3HEbO/KMm/Lts68pOr9jw+szOsr07y68v23kjym0mu2HL7L5K8/qE8F6uyB3W/qtpIck6S65M8sbu/Nv3o60meuEtjzW36aOzmJHcl+ViS/0nyre6+d9rkzsxe6CN7S2Yv2B9Pt5+Q5VtDknSSj1bVgelSX8nyvabOSHJ3kndNH7m+o6pOzvKtY6tLkrxv+n5p1tHdX03y10m+nORrSb6d5ECW771xS5JfqaonVNVJSV6S2cUejvq5WKlAVdVjknwwyWXd/Z2tP+tZ1oc/57677+vZxxinZ7YL/fRdHumoVNXFSe7q7gO7PcsOeF53n5vZlf5fW1Xnb/3hkrym9iQ5N8nbu/ucJN/PER+9LMk6kiTT8ZmXJvnHI382+jqmYzIvy+w/Db+Q5OQkF+7qUA9Bdx/M7GPJjya5NsnNSe47Ypu5nouVCVRVnZhZnN7T3R+a7v5GVZ02/fy0zPZKlkJ3fyvJdZnt8p9SVYd/6Xr0S1E9N8lLq+pQZlfGvyCzYyDLtIYk9/+PN919V2bHO87L8r2m7kxyZ3dfP92+MrNgLds6Dntxkhu7+xvT7WVax68l+WJ3393dP0ryoczeL8v43riiu5/V3edndtzsv/IQnouVCFRVVZIrkhzs7jdv+dGHk7xq+v5VmR2bGlZVrVXVKdP3j87sONrBzEL18mmzodfR3W/o7tO7eyOzj2I+0d2/kyVaQ5JU1clV9djD32d23OOWLNlrqru/nuQrVXXmdNcLktyWJVvHFq/ITz7eS5ZrHV9O8uyqOmn6N+vwc7FU740kqaqfn/5cT/IbSd6bh/BcrMSVJKrqeUk+leRz+clxjz/P7DjUB5KsJ/lSkt/q7m/uypBzqKpfSvLuzM7ueUSSD3T3X1bVUzLbG3l8kpuSvLK7f7B7k86nqn41yZ9198XLtoZp3qumm3uSvLe7/6qqnpAlek0lSVWdneQdSR6Z5I4kr870+spyrePkzP6Rf0p3f3u6b6mej+lXR347szOPb0ry+5kdc1qa90aSVNWnMju2/KMkf9rdH38oz8VKBAqA5bMSH/EBsHwECoAhCRQAQxIoAIYkUAAMSaAAGJJAATCk/wfqC9FjEHYtiQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots(tight_layout=True)\n", "hist = ax.hist(lang_detection_time)\n" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "31.30412745475769" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.median(lang_detection_time)" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([45.0291729 , 28.58757186, 26.01508212, 25.91366768, 15.595222 ,\n", " 15.16394925, 26.04337311, 24.18059015, 26.4344418 , 17.44418979,\n", " 16.63693595, 18.67512417, 34.02068305, 56.42417693, 76.64306378,\n", " 81.30487394, 87.01156592, 75.85345602, 69.09301972, 57.38299012,\n", " 49.19913197, 50.34604192])" ] }, "execution_count": 115, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lang_detection_time" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [], "source": [ "completion_time = np.array([r.JobCompletion for r in filtered_records if r.JobCompletion is not None])" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAOq0lEQVR4nO3ce4yldX3H8c+3LKCoFS2jtcJ2aKMmxkTBKdVobcReUIz0D//A9KKtzSamNWhMDdakif+hNvaSGM3GS2m1WovaGo1WbbHGpGB3EZBrpQoCYllivDYR0W//OM/isN3ZOStz9vxm5/VKJnvOcx6G7+88c+Y958wzp7o7ADCan1r2AABwOAIFwJAECoAhCRQAQxIoAIa0axGf9LTTTuvV1dVFfGoAjjP79++/p7tXDt2+kECtrq5m3759i/jUABxnquq2w233Eh8AQxIoAIYkUAAMSaAAGJJAATAkgQJgSAIFwJDmClRVnVpVl1XVTVV1Y1U9c9GDAbCzzfuHun+V5BPd/eKqOinJKQucCQA2D1RVPTLJc5K8LEm6+94k9y52LAB2unmeQZ2Z5ECSd1fVU5PsT3JRd39v/U5VtSfJniTZvXv3Vs/JNrB68ceWPcID3HrJ+csegaPg64dDzfM7qF1Jzk7ytu4+K8n3klx86E7dvbe717p7bWXl/73nHwAclXkCdUeSO7r7yun6ZZkFCwAWZtNAdffXk9xeVU+aNj0vyQ0LnQqAHW/es/hemeS90xl8X07y+4sbCQDmDFR3X51kbcGzAMD9vJMEAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQds2zU1XdmuQ7SX6Y5L7uXlvkUAAwV6Amz+3uexY2CQCs4yU+AIY0b6A6ySeran9V7TncDlW1p6r2VdW+AwcObN2EAOxI8wbq2d19dpLnJ/mjqnrOoTt0997uXuvutZWVlS0dEoCdZ65Adfed0793J/lwknMWORQAbBqoqnpYVT3i4OUkv5HkukUPBsDONs9ZfI9N8uGqOrj/33f3JxY6FQA73qaB6u4vJ3nqMZgFAO7nNHMAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkOYOVFWdUFVfqKqPLnIgAEiO7hnURUluXNQgALDeXIGqqtOTnJ/kHYsdBwBm5n0G9ZdJXpvkRxvtUFV7qmpfVe07cODAlgwHwM61aaCq6oVJ7u7u/Ufar7v3dvdad6+trKxs2YAA7EzzPIN6VpIXVdWtSd6f5Nyqes9CpwJgx9s0UN39uu4+vbtXk1yY5N+6+3cWPhkAO5q/gwJgSLuOZufu/kySzyxkEgBYxzMoAIYkUAAMSaAAGJJAATAkgQJgSAIFwJAECoAhCRQAQxIoAIYkUAAMSaAAGJJAATAkgQJgSAIFwJAECoAhCRQAQxIoAIYkUAAMSaAAGJJAATAkgQJgSAIFwJAECoAhCRQAQxIoAIYkUAAMSaAAGJJAATAkgQJgSAIFwJAECoAhbRqoqnpIVX2+qq6pquur6g3HYjAAdrZdc+zz/STndvd3q+rEJJ+rqo939xULng2AHWzTQHV3J/nudPXE6aMXORQAzPU7qKo6oaquTnJ3kk9195WH2WdPVe2rqn0HDhzY6jkB2GHmClR3/7C7n5bk9CTnVNVTDrPP3u5e6+61lZWVrZ4TgB3mqM7i6+5vJrk8yXmLGQcAZuY5i2+lqk6dLj80ya8nuWnRgwGws81zFt/jklxaVSdkFrQPdPdHFzsWADvdPGfxXZvkrGMwCwDczztJADAkgQJgSAIFwJAECoAhCRQAQxIoAIYkUAAMSaAAGJJAATAkgQJgSAIFwJAECoAhCRQAQxIoAIYkUAAMSaAAGJJAATAkgQJgSAIFwJAECoAhCRQAQxIoAIYkUAAMSaAAGJJAATAkgQJgSAIFwJAECoAhCRQAQxIoAIa0aaCq6oyquryqbqiq66vqomMxGAA726459rkvyWu6+6qqekSS/VX1qe6+YcGzAbCDbfoMqrvv6u6rpsvfSXJjkscvejAAdraj+h1UVa0mOSvJlYsYBgAOmuclviRJVT08yQeTvKq7v32Y2/ck2ZMku3fv3rIB2djqxR9b9ghw3Brt8XXrJecve4Rjbq5nUFV1YmZxem93f+hw+3T33u5e6+61lZWVrZwRgB1onrP4Ksk7k9zY3W9Z/EgAMN8zqGcl+d0k51bV1dPHCxY8FwA73Ka/g+ruzyWpYzALANzPO0kAMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxJoAAYkkABMCSBAmBIAgXAkAQKgCEJFABDEigAhiRQAAxp00BV1buq6u6quu5YDAQAyXzPoP4myXkLngMAHmDTQHX3Z5N84xjMAgD3q+7efKeq1SQf7e6nHGGfPUn2JMnu3buffttttz3o4VYv/tiD/hwALMatl5y/JZ+nqvZ399qh27fsJInu3tvda929trKyslWfFoAdyll8AAxJoAAY0jynmb8vyX8keVJV3VFVL1/8WADsdLs226G7X3IsBgGA9bzEB8CQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwJIECYEgCBcCQBAqAIQkUAEMSKACGJFAADEmgABiSQAEwpLkCVVXnVdXNVXVLVV286KEAYNNAVdUJSd6a5PlJnpzkJVX15EUPBsDONs8zqHOS3NLdX+7ue5O8P8kFix0LgJ1u1xz7PD7J7euu35Hklw/dqar2JNkzXf1uVd384Mc7aqcluWcJ/9+tdDysITk+1mEN4zge1nHcraHeuGWf9+cPt3GeQM2lu/cm2btVn+8nUVX7unttmTM8WMfDGpLjYx3WMI7jYR3WcPTmeYnvziRnrLt++rQNABZmnkD9Z5InVNWZVXVSkguTfGSxYwGw0236El9331dVf5zkX5KckORd3X39wif7ySz1JcYtcjysITk+1mEN4zge1mENR6m6+1j+/wBgLt5JAoAhCRQAQ9rWgaqqU6vqsqq6qapurKpnVtWjq+pTVfWl6d9HLXvOI6mqV1fV9VV1XVW9r6oeMp2QcuX01lL/MJ2cMoyqeldV3V1V163bdtj7vWb+elrLtVV19vImf6AN1vHm6evp2qr6cFWduu62103ruLmqfnM5Uz/Q4daw7rbXVFVX1WnT9SGPxUZrqKpXTsfi+qp607rtwx2HZMOvp6dV1RVVdXVV7auqc6btox6LM6rq8qq6YbrfL5q2L+fx3d3b9iPJpUn+cLp8UpJTk7wpycXTtouTvHHZcx5h/scn+UqSh07XP5DkZdO/F07b3p7kFcue9ZC5n5Pk7CTXrdt22Ps9yQuSfDxJJXlGkiuXPf8m6/iNJLumy29ct44nJ7kmyclJzkzy30lOGHEN0/YzMjux6bYkp418LDY4Ds9N8ukkJ0/XHzPycTjCOj6Z5Pnr7v/PDH4sHpfk7OnyI5L813SfL+XxvW2fQVXVIzP7gnhnknT3vd39zczehunSabdLk/zWciac264kD62qXUlOSXJXknOTXDbdPtwauvuzSb5xyOaN7vcLkvxtz1yR5NSqetyxmfTIDreO7v5kd983Xb0is7/7S2breH93f7+7v5LklszeBmypNjgWSfIXSV6bZP1ZUEMeiw3W8Iokl3T396d97p62D3kckg3X0Ul+err8yCRfmy6Peizu6u6rpsvfSXJjZj9IL+XxvW0DldlPTweSvLuqvlBV76iqhyV5bHffNe3z9SSPXdqEm+juO5P8eZKvZhambyXZn+Sb675J3pHZF8joNrrfD/dWWdthPUnyB5n9dJhso3VU1QVJ7uzuaw65adusIckTk/zK9FL3v1fVL03bt9MakuRVSd5cVbdn9lh/3bR9+HVU1WqSs5JcmSU9vrdzoHZl9nT6bd19VpLvZfbU8349ew467Hn00+u4F2QW259L8rAk5y11qC0w+v0+j6p6fZL7krx32bMcjao6JcmfJvmzZc/yIO1K8ujMXjb6kyQfqKpa7kg/kVckeXV3n5Hk1Zle8RldVT08yQeTvKq7v73+tmP5+N7OgbojyR3dfeV0/bLMgvU/B59iTv/evcF/P4JfS/KV7j7Q3T9I8qEkz8rsafLBP6LeLm8ttdH9vu3eKquqXpbkhUl+e3owJttnHb+Y2Q8811TVrZnNeVVV/Wy2zxqS2eP7Q9NLR59P8qPM3qh0O60hSV6a2eM6Sf4xP345cth1VNWJmcXpvd19cPalPL63baC6++tJbq+qJ02bnpfkhszehuml07aXJvnnJYw3r68meUZVnTL9dHhwDZcnefG0z+hrOGij+/0jSX5vOtvnGUm+te6lguFU1XmZ/e7mRd39v+tu+kiSC6vq5Ko6M8kTknx+GTMeSXd/sbsf092r3b2a2Tf6s6fHy3Y6Fv+U2YkSqaonZnYS1D3ZJsdhna8l+dXp8rlJvjRdHvJYTN+H3pnkxu5+y7qblvP4XsaZIlv1keRpSfYluTazL+hHJfmZJP+a2RfCp5M8etlzbrKGNyS5Kcl1Sf4us7OTfiGzB90tmf3UdfKy5zxk5vdl9juzH2T2DfDlG93vmZ3d89bMzrb6YpK1Zc+/yTpuyew19aunj7ev2//10zpuznRm1rI/DreGQ26/NT8+i2/IY7HBcTgpyXumx8VVSc4d+TgcYR3Pzuz3ytdk9rucpw9+LJ6d2ct31657DLxgWY9vb3UEwJC27Ut8ABzfBAqAIQkUAEMSKACGJFAADEmgABiSQAEwpP8DBldPY85Nl+EAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig, ax = plt.subplots(tight_layout=True)\n", "hist = ax.hist(completion_time)\n" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([159.779, 99.665, 100.838, 120.037, 101.562, 87.591, 70.803,\n", " 88.914, 54.911, 112.074, 110.46 , 106.681, 132.235, 164.75 ,\n", " 171.654, 181.032, 201.321, 177.092, 161.303, 165.891, 158.415,\n", " 156.141])" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "completion_time" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [], "source": [ "from scipy import stats" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "DescribeResult(nobs=22, minmax=(54.911, 201.321), mean=131.05222727272727, variance=1620.582215422078, skewness=-0.09564323820390873, kurtosis=-1.1040085470633172)" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stats.describe(completion_time)" ] }, { "cell_type": "code", "execution_count": 121, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "126.13600000000001" ] }, "execution_count": 121, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.median(completion_time)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "broadcast-monitoring", "language": "python", "name": "broadcast-monitoring" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }