#!/usr/bin/env python # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from __future__ import print_function import json import os import sys import traceback from urllib.parse import urlparse from urllib.request import urlopen from random import randint from time import sleep import boto3 import botocore import jupyter_client.kernelspec as kernelspec import papermill input_var = "PAPERMILL_INPUT" output_var = "PAPERMILL_OUTPUT" params_var = "PAPERMILL_PARAMS" def run_notebook(): try: notebook = os.environ[input_var] output_notebook = os.environ[output_var] params = json.loads(os.environ[params_var]) notebook_dir = os.path.dirname(notebook) notebook_file = os.path.basename(notebook) pause = randint(1,10) print("Waiting {} seconds....".format(pause)) sleep(pause) # If the user specified notebook path in S3, run with that path. if notebook.startswith("s3://"): print("Downloading notebook {}".format(notebook)) o = urlparse(notebook) bucket = o.netloc key = o.path[1:] s3 = boto3.resource("s3") try: s3.Bucket(bucket).download_file(key, "/tmp/" + notebook_file) notebook_dir = "/tmp" except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] == "404": print("The notebook {} does not exist.".format(notebook)) raise print("Download complete") os.chdir(notebook_dir) kernel = os.environ.get("PAPERMILL_KERNEL", None) if not kernel: nb_kernel = kernel_for(notebook_file) avail_kernels = available_kernels() print("The kernel found in the notebook metadata is: ", nb_kernel) print("The kernels available to execute within jupyter are kernels are: ", ','.join(avail_kernels)) if nb_kernel is None or nb_kernel not in avail_kernels: print("No default notebook kernel found or it is not available in the execution environment, picking from available kernels") kernel = avail_kernels[0] else: print("Using notebook provided kernel: ", nb_kernel) kernel = nb_kernel print( "Executing {} with output to {}{}".format( notebook_file, output_notebook, (" using kernel " + kernel) if kernel else "", ) ) print("Notebook params = {}".format(params)) arg_map = dict(kernel_name=kernel) if kernel else {} papermill.execute_notebook( input_path=notebook_file, output_path=output_notebook, parameters=params, progress_bar=False, log_output=True, cwd=notebook_dir, **arg_map, ) print("Execution complete") except Exception as e: message = str(e) if len(message) > 1024: lines = message.splitlines() ellipsis = "\n\n[...]\n\n" error_message = ellipsis + lines[-1] truncated_length = 1023 - len(error_message) if truncated_length == 0: message = lines[-1] else: message = message[:truncated_length] + error_message # Write to an error file. This will be returned as the failureReason in the # DescribeProcessingJob result. with open("/opt/ml/output/message", "w") as failure: failure.write(message) # Print the stack trace to the Processing job CloudWatch logs. trc = traceback.format_exc() print(trc, file=sys.stderr) # A non-zero exit code causes the Processing job to be marked as Failed. sys.exit(1) if not os.path.exists(output_notebook): print("No output notebook was generated") else: print("Output was written to {}".format(output_notebook)) def available_kernels(): """Return the list of kernels""" mgr = kernelspec.KernelSpecManager() return list(mgr.find_kernel_specs().keys()) def kernel_for(notebook): """Read the notebook and extract the kernel name, if any""" with open(notebook, "r") as f: nb = json.load(f) md = nb.get("metadata") if md: ks = md.get("kernelspec") if ks: return ks["name"] return None if __name__ == "__main__": run_notebook()