{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Allocate resources in remote container" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from kubeflow import fairing\n", "from kubeflow.fairing import TrainJob\n", "from kubeflow.fairing.backends import KubeflowAWSBackend\n", "from kubeflow.fairing.kubernetes.utils import get_resource_mutator" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import multiprocessing\n", "import os\n", "import sys\n", "\n", "def train():\n", " print(\"CPU count: {}\".format(multiprocessing.cpu_count()))\n", " print(\"Memory: {}\".format(os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')/(1024.**3)))\n", "\n", "train()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "AWS_ACCOUNT_ID=fairing.cloud.aws.guess_account_id()\n", "AWS_REGION='us-west-2'\n", "DOCKER_REGISTRY = '{}.dkr.ecr.{}.amazonaws.com'.format(AWS_ACCOUNT_ID, AWS_REGION)\n", "PY_VERSION = \".\".join([str(x) for x in sys.version_info[0:3]])\n", "BASE_IMAGE = '{}/python:{}'.format(DOCKER_REGISTRY, PY_VERSION)\n", "# TODO: bug to fix. use tensorflow image temporarily\n", "BASE_IMAGE = 'tensorflow/tensorflow:1.14.0-py3'\n", "\n", "job = TrainJob(train, base_docker_image=BASE_IMAGE, docker_registry=DOCKER_REGISTRY, backend=KubeflowAWSBackend(), \n", " pod_spec_mutators=[get_resource_mutator(cpu=3, memory=4)])\n", "job.submit()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }