""" Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/ or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. """ import concurrent.futures import datetime import os import re from copy import deepcopy import constants import utils import boto3 import itertools from codebuild_environment import get_codebuild_project_name, get_cloned_folder_path from config import parse_dlc_developer_configs, is_build_enabled from context import Context from metrics import Metrics from image import DockerImage from common_stage_image import CommonStageImage from buildspec import Buildspec from output import OutputFormatter FORMATTER = OutputFormatter(constants.PADDING) build_context = os.getenv("BUILD_CONTEXT") def is_nightly_build_context(): """ Returns True if image builder is running in a nightly context or nightly PR test mode. Otherwise returns False :return: True or False """ return ( build_context == "NIGHTLY" or os.getenv("NIGHTLY_PR_TEST_MODE", "false").lower() == "true" ) def _find_image_object(images_list, image_name): """ Find and return an image object from images_list with a name that matches image_name :param images_list: List of objects :param image_name: Name of image as per buildspec :return: Object with image_name as "name" attribute """ ret_image_object = None for image in images_list: if image.name == image_name: ret_image_object = image break return ret_image_object # TODO: Abstract away to ImageBuilder class def image_builder(buildspec, image_types=[], device_types=[]): """ Builds images using build specification with specified image and device types and export them to ECR image repository An empty image types array indicates all image types. Similarly, an empty device types array indicates all device types :param buildspec: buid specification defining images to be build :param image_types: list of image types :param device_types: list of image device type """ BUILDSPEC = Buildspec() BUILDSPEC.load(buildspec) PRE_PUSH_STAGE_IMAGES = [] COMMON_STAGE_IMAGES = [] if ( "huggingface" in str(BUILDSPEC["framework"]) or "autogluon" in str(BUILDSPEC["framework"]) or "trcomp" in str(BUILDSPEC["framework"]) ): os.system("echo login into public ECR") os.system( "aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-west-2.amazonaws.com" ) for image_name, image_config in BUILDSPEC["images"].items(): # filter by image type if type is specified if image_types and not image_config["image_type"] in image_types: continue # filter by device type if type is specified if device_types and not image_config["device_type"] in device_types: continue ARTIFACTS = deepcopy(BUILDSPEC["context"]) if BUILDSPEC.get("context") else {} extra_build_args = {} labels = {} enable_datetime_tag = parse_dlc_developer_configs("build", "datetime_tag") if image_config.get("version") is not None: if BUILDSPEC["version"] != image_config.get("version"): continue if image_config.get("context") is not None: ARTIFACTS.update(image_config["context"]) image_tag = ( tag_image_with_pr_number(image_config["tag"]) if build_context == "PR" else image_config["tag"] ) additional_image_tags = [] if is_nightly_build_context(): additional_image_tags.append(tag_image_with_date(image_tag)) if enable_datetime_tag or build_context != "PR": image_tag = tag_image_with_datetime(image_tag) additional_image_tags.append(image_tag) image_repo_uri = ( image_config["repository"] if build_context == "PR" else modify_repository_name_for_context(str(image_config["repository"]), build_context) ) base_image_uri = None if image_config.get("base_image_name") is not None: base_image_object = _find_image_object( PRE_PUSH_STAGE_IMAGES, image_config["base_image_name"] ) base_image_uri = base_image_object.ecr_url if image_config.get("download_artifacts") is not None: for artifact_name, artifact in image_config.get("download_artifacts").items(): type = artifact["type"] uri = artifact["URI"] var = artifact["VAR_IN_DOCKERFILE"] try: file_name = utils.download_file(uri, type).strip() except ValueError: FORMATTER.print(f"Artifact download failed: {uri} of type {type}.") ARTIFACTS.update( { f"{artifact_name}": { "source": f"{os.path.join(os.sep, os.path.abspath(os.getcwd()), file_name)}", "target": file_name, } } ) extra_build_args[var] = file_name labels[var] = file_name labels[f"{var}_URI"] = uri transformers_version = image_config.get("transformers_version") if str(BUILDSPEC["framework"]).startswith("huggingface"): if transformers_version: extra_build_args["TRANSFORMERS_VERSION"] = transformers_version else: raise KeyError( f"HuggingFace buildspec.yml must contain 'transformers_version' field for each image" ) if "datasets_version" in image_config: extra_build_args["DATASETS_VERSION"] = image_config.get("datasets_version") elif str(image_config["image_type"]) == "training": raise KeyError( f"HuggingFace buildspec.yml must contain 'datasets_version' field for each image" ) torchserve_version = image_config.get("torch_serve_version") inference_toolkit_version = image_config.get("tool_kit_version") if torchserve_version: extra_build_args["TORCHSERVE_VERSION"] = torchserve_version if inference_toolkit_version: extra_build_args["SM_TOOLKIT_VERSION"] = inference_toolkit_version ARTIFACTS.update( { "dockerfile": { "source": image_config["docker_file"], "target": "Dockerfile", } } ) context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"]) if "labels" in image_config: labels.update(image_config.get("labels")) for label, value in labels.items(): if isinstance(value, bool): labels[label] = str(value) cx_type = utils.get_label_prefix_customer_type(image_tag) # Define label variables label_framework = str(BUILDSPEC["framework"]).replace("_", "-") if image_config.get("framework_version"): label_framework_version = str(image_config["framework_version"]).replace(".", "-") else: label_framework_version = str(BUILDSPEC["version"]).replace(".", "-") label_device_type = str(image_config["device_type"]) if label_device_type == "gpu": label_device_type = f"{label_device_type}.{str(image_config['cuda_version'])}" label_arch = str(BUILDSPEC["arch_type"]) label_python_version = str(image_config["tag_python_version"]) label_os_version = str(image_config.get("os_version")).replace(".", "-") label_contributor = str(BUILDSPEC.get("contributor")) label_transformers_version = str(transformers_version).replace(".", "-") # job_type will be either inference or training, based on the repo URI if "training" in image_repo_uri: label_job_type = "training" elif "inference" in image_repo_uri: label_job_type = "inference" else: raise RuntimeError( f"Cannot find inference or training job type in {image_repo_uri}. " f"This is required to set job_type label." ) if cx_type == "sagemaker": # Adding standard labels to all images labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.framework.{label_framework}.{label_framework_version}" ] = "true" labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.device.{label_device_type}"] = "true" labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.arch.{label_arch}"] = "true" # python version label will look like py_version.py36, for example labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.python.{label_python_version}"] = "true" labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.os.{label_os_version}"] = "true" labels[f"com.amazonaws.ml.engines.{cx_type}.dlc.job.{label_job_type}"] = "true" if label_contributor: labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.contributor.{label_contributor}" ] = "true" if transformers_version: labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.lib.transformers.{label_transformers_version}" ] = "true" if torchserve_version and inference_toolkit_version: labels[ f"com.amazonaws.ml.engines.{cx_type}.dlc.inference-toolkit.{inference_toolkit_version}.torchserve.{torchserve_version}" ] = "true" """ Override parameters from parent in child. """ info = { "account_id": str(BUILDSPEC["account_id"]), "region": str(BUILDSPEC["region"]), "framework": str(BUILDSPEC["framework"]), "version": str(BUILDSPEC["version"]), "root": str(image_config["root"]), "name": str(image_name), "device_type": str(image_config["device_type"]), "python_version": str(image_config["python_version"]), "image_type": str(image_config["image_type"]), "image_size_baseline": int(image_config["image_size_baseline"]), "base_image_uri": base_image_uri, "enable_test_promotion": image_config.get("enable_test_promotion", True), "labels": labels, "extra_build_args": extra_build_args, } # Create pre_push stage docker object pre_push_stage_image_object = DockerImage( info=info, dockerfile=image_config["docker_file"], repository=image_repo_uri, tag=append_tag(image_tag, "pre-push"), to_build=image_config["build"], stage=constants.PRE_PUSH_STAGE, context=context, additional_tags=additional_image_tags, target=image_config.get("target"), ) ##### Create Common stage docker object ##### # If for a pre_push stage image we create a common stage image, then we do not push the pre_push stage image # to the repository. Instead, we just push its common stage image to the repository. Therefore, # inside function get_common_stage_image_object we make pre_push_stage_image_object non pushable. common_stage_image_object = generate_common_stage_image_object( pre_push_stage_image_object, image_tag ) COMMON_STAGE_IMAGES.append(common_stage_image_object) PRE_PUSH_STAGE_IMAGES.append(pre_push_stage_image_object) FORMATTER.separator() FORMATTER.banner("DLC") # Parent images do not inherit from any containers built in this job # Child images use one of the parent images as their base image parent_images = [image for image in PRE_PUSH_STAGE_IMAGES if not image.is_child_image] child_images = [image for image in PRE_PUSH_STAGE_IMAGES if image.is_child_image] ALL_IMAGES = PRE_PUSH_STAGE_IMAGES + COMMON_STAGE_IMAGES IMAGES_TO_PUSH = [image for image in ALL_IMAGES if image.to_push and image.to_build] pushed_images = [] pushed_images += process_images(parent_images, "Parent/Independent") pushed_images += process_images(child_images, "Child/Dependent") assert all( image in pushed_images for image in IMAGES_TO_PUSH ), "Few images could not be pushed." # After the build, display logs/summary for all the images. FORMATTER.banner("Summary") show_build_info(ALL_IMAGES) FORMATTER.banner("Errors") is_any_build_failed, is_any_build_failed_size_limit = show_build_errors(ALL_IMAGES) # From all images, filter the images that were supposed to be built and upload their metrics BUILT_IMAGES = [image for image in ALL_IMAGES if image.to_build] if BUILT_IMAGES: FORMATTER.banner("Upload Metrics") upload_metrics(BUILT_IMAGES, BUILDSPEC, is_any_build_failed, is_any_build_failed_size_limit) # Set environment variables to be consumed by test jobs test_trigger_job = get_codebuild_project_name() # Tests should only run on images that were pushed to the repository images_to_test = IMAGES_TO_PUSH if not is_build_enabled(): # Ensure we have images populated if do_build is false, so that tests can proceed if needed images_to_test = [image for image in ALL_IMAGES if image.to_push] if images_to_test: FORMATTER.banner("Test Env") utils.set_test_env( images_to_test, use_latest_additional_tag=True, BUILD_CONTEXT=os.getenv("BUILD_CONTEXT"), TEST_TRIGGER=test_trigger_job, ) def process_images(pre_push_image_list, pre_push_image_type="Pre-push"): """ Handles all the tasks related to a particular type of Pre Push images. It takes in the list of pre push images and then builds it. After the pre-push images have been built, it extracts the corresponding common stage images for the pre-push images and builds those common stage images. After the common stage images have been built, it finds outs the docker images that need to be pushed and pushes them accordingly. Note that the common stage images should always be built after the pre-push images of a particular kind. This is because the Common stage images use are built on respective Standard and Example images. :param pre_push_image_list: list[DockerImage], list of pre-push images :param pre_push_image_type: str, used to display the message on the logs :return: list[DockerImage], images that were supposed to be pushed. """ FORMATTER.banner(f"{pre_push_image_type} Build") build_images(pre_push_image_list) FORMATTER.banner(f"{pre_push_image_type} Common Build") common_stage_image_list = [ image.corresponding_common_stage_image for image in pre_push_image_list if image.corresponding_common_stage_image is not None ] build_images(common_stage_image_list, make_dummy_boto_client=True) FORMATTER.banner(f"{pre_push_image_type} Push Images") all_images = pre_push_image_list + common_stage_image_list images_to_push = [image for image in all_images if image.to_push and image.to_build] push_images(images_to_push) FORMATTER.banner(f"{pre_push_image_type} Retagging") retag_and_push_images(images_to_push) return images_to_push def generate_common_stage_image_object(pre_push_stage_image_object, image_tag): """ Creates a common stage image object for a pre_push stage image. If for a pre_push stage image we create a common stage image, then we do not push the pre_push stage image to the repository. Instead, we just push its common stage image to the repository. Therefore, inside the function pre_push_stage_image_object is made NON-PUSHABLE. :param pre_push_stage_image_object: DockerImage, an object of class DockerImage :return: CommonStageImage, an object of class CommonStageImage. CommonStageImage inherits DockerImage. """ common_stage_info = deepcopy(pre_push_stage_image_object.info) common_stage_info["extra_build_args"].update( {"PRE_PUSH_IMAGE": pre_push_stage_image_object.ecr_url} ) common_stage_image_object = CommonStageImage( info=common_stage_info, dockerfile=os.path.join( os.sep, get_cloned_folder_path(), "miscellaneous_dockerfiles", "Dockerfile.common" ), repository=pre_push_stage_image_object.repository, tag=append_tag(image_tag, "multistage-common"), to_build=pre_push_stage_image_object.to_build, stage=constants.COMMON_STAGE, additional_tags=pre_push_stage_image_object.additional_tags, ) pre_push_stage_image_object.to_push = False pre_push_stage_image_object.corresponding_common_stage_image = common_stage_image_object return common_stage_image_object def show_build_info(images): """ Displays the build info for a list of input images. :param images: list[DockerImage] """ if not os.path.isdir("logs"): os.makedirs("logs") for image in images: image_description = f"{image.name}-{image.stage}" FORMATTER.title(image_description) FORMATTER.table(image.info.items()) flattened_logs = list(itertools.chain(*image.log)) with open(f"logs/{image_description}", "w") as fp: fp.write("/n".join(flattened_logs)) image.summary["log"] = f"logs/{image_description}" FORMATTER.table(image.summary.items()) FORMATTER.title(f"Ending Logs for {image_description}") FORMATTER.print_lines(image.log[-1][-2:]) def show_build_errors(images): """ Iterates through each image to check if there is any image that has a failed status. In case an image with a failed status is found, it raises an exception. :param images: list[DockerImage] """ is_any_build_failed = False is_any_build_failed_size_limit = False for image in images: if image.build_status == constants.FAIL: FORMATTER.title(image.name) FORMATTER.print_lines(image.log[-1][-10:]) is_any_build_failed = True else: if image.build_status == constants.FAIL_IMAGE_SIZE_LIMIT: is_any_build_failed_size_limit = True if is_any_build_failed: raise Exception("Build failed") else: if is_any_build_failed_size_limit: FORMATTER.print("Build failed. Image size limit breached.") else: FORMATTER.print("No errors") return is_any_build_failed, is_any_build_failed_size_limit def upload_metrics(images, BUILDSPEC, is_any_build_failed, is_any_build_failed_size_limit): """ Uploads Metrics for a list of images. :param images: list[DockerImage] :param BUILDSPEC: Buildspec :param is_any_build_failed: bool :param is_any_build_failed_size_limit: bool """ metrics = Metrics( context=constants.BUILD_CONTEXT, region=BUILDSPEC["region"], namespace=constants.METRICS_NAMESPACE, ) for image in images: try: metrics.push_image_metrics(image) except Exception as e: if is_any_build_failed or is_any_build_failed_size_limit: raise Exception(f"Build failed.{e}") else: raise Exception(f"Build passed. {e}") if is_any_build_failed_size_limit: raise Exception("Build failed because of file limit") FORMATTER.print("Metrics Uploaded") def build_images(images, make_dummy_boto_client=False): """ Takes a list of images and executes their build process concurrently. :param images: list[DockerImage] :param make_dummy_boto_client: bool, specifies if a dummy client should be declared or not. TODO: The parameter make_dummy_boto_client should be removed when get_dummy_boto_client method is removed. """ THREADS = {} # In the context of the ThreadPoolExecutor each instance of image.build submitted # to it is executed concurrently in a separate thread. with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: #### TODO: Remove this entire if block when get_dummy_boto_client is removed #### if make_dummy_boto_client: get_dummy_boto_client() for image in images: THREADS[image.name] = executor.submit(image.build) # the FORMATTER.progress(THREADS) function call also waits until all threads have completed FORMATTER.progress(THREADS) #### TODO: Remove this entire method when https://github.com/boto/boto3/issues/1592 is resolved #### def get_dummy_boto_client(): """ Makes a dummy boto3 client to ensure that boto3 clients behave in a thread safe manner. In absence of this method, the behaviour documented in https://github.com/boto/boto3/issues/1592 is observed. Once https://github.com/boto/boto3/issues/1592 is resolved, this method can be removed. :return: BotocoreClientSTS """ return boto3.client("sts", region_name=os.getenv("REGION")) def push_images(images): """ Takes a list of images and PUSHES them to ECR concurrently. :param images: list[DockerImage] """ THREADS = {} with concurrent.futures.ThreadPoolExecutor( max_workers=constants.MAX_WORKER_COUNT_FOR_PUSHING_IMAGES ) as executor: for image in images: THREADS[image.name] = executor.submit(image.push_image) FORMATTER.progress(THREADS) def retag_and_push_images(images): """ Takes a list of images, retags them and pushes to the repository :param images: list[DockerImage] """ THREADS = {} with concurrent.futures.ThreadPoolExecutor( max_workers=constants.MAX_WORKER_COUNT_FOR_PUSHING_IMAGES ) as executor: for image in images: THREADS[image.name] = executor.submit(image.push_image_with_additional_tags) FORMATTER.progress(THREADS) def tag_image_with_pr_number(image_tag): pr_number = os.getenv("PR_NUMBER") return f"{image_tag}-pr-{pr_number}" def tag_image_with_date(image_tag): date_suffix = datetime.datetime.now().strftime("%Y-%m-%d") return f"{image_tag}-{date_suffix}" def tag_image_with_datetime(image_tag): datetime_suffix = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") return f"{image_tag}-{datetime_suffix}" def append_tag(image_tag, append_str): """ Appends image_tag with append_str :param image_tag: str, original image tag :param append_str: str, string to be appended """ return f"{image_tag}-{append_str}" def modify_repository_name_for_context(image_repo_uri, build_context): repo_uri_values = image_repo_uri.split("/") repo_name = repo_uri_values[-1] if build_context == "MAINLINE": repo_uri_values[-1] = repo_name.replace( constants.PR_REPO_PREFIX, constants.MAINLINE_REPO_PREFIX ) elif build_context == "NIGHTLY": repo_uri_values[-1] = repo_name.replace( constants.PR_REPO_PREFIX, constants.NIGHTLY_REPO_PREFIX ) return "/".join(repo_uri_values)