{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "439e2446-33af-409b-b93c-827bc4c59659", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:47:59.463660Z", "iopub.status.busy": "2022-08-06T01:47:59.463430Z", "iopub.status.idle": "2022-08-06T01:48:03.179737Z", "shell.execute_reply": "2022-08-06T01:48:03.178914Z", "shell.execute_reply.started": "2022-08-06T01:47:59.463597Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Defaulting to user installation because normal site-packages is not writeable\n", "Requirement already up-to-date: torch in /home/emr-notebook/.local/lib/python3.7/site-packages (1.12.1)\n", "Requirement already up-to-date: torchvision in /home/emr-notebook/.local/lib/python3.7/site-packages (0.13.1)\n", "Requirement already up-to-date: pandas in /home/emr-notebook/.local/lib/python3.7/site-packages (1.3.5)\n", "Requirement already up-to-date: datasets in /home/emr-notebook/.local/lib/python3.7/site-packages (2.4.0)\n", "Requirement already up-to-date: accelerate in /home/emr-notebook/.local/lib/python3.7/site-packages (0.12.0)\n", "Requirement already up-to-date: scikit-learn in /home/emr-notebook/.local/lib/python3.7/site-packages (1.0.2)\n", "Requirement already up-to-date: mlflow in /home/emr-notebook/.local/lib/python3.7/site-packages (1.27.0)\n", "Requirement already up-to-date: tensorboard in /home/emr-notebook/.local/lib/python3.7/site-packages (2.9.1)\n", "Requirement already up-to-date: ray[all]==2.0.0rc0 in /home/emr-notebook/.local/lib/python3.7/site-packages (2.0.0rc0)\n", "Requirement already satisfied, skipping upgrade: typing-extensions in /home/emr-notebook/.local/lib/python3.7/site-packages (from torch) (4.3.0)\n", "Requirement already satisfied, skipping upgrade: requests in /home/emr-notebook/.local/lib/python3.7/site-packages (from torchvision) (2.28.1)\n", "Requirement already satisfied, skipping upgrade: pillow!=8.3.*,>=5.3.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from torchvision) (9.2.0)\n", "Requirement already satisfied, skipping upgrade: numpy in /usr/local/lib64/python3.7/site-packages (from torchvision) (1.20.0)\n", "Requirement already satisfied, skipping upgrade: python-dateutil>=2.7.3 in /home/emr-notebook/.local/lib/python3.7/site-packages (from pandas) (2.8.2)\n", "Requirement already satisfied, skipping upgrade: pytz>=2017.3 in /usr/local/lib/python3.7/site-packages (from pandas) (2022.1)\n", "Requirement already satisfied, skipping upgrade: tqdm>=4.62.1 in /usr/local/lib64/python3.7/site-packages (from datasets) (4.63.1)\n", "Requirement already satisfied, skipping upgrade: responses<0.19 in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (0.18.0)\n", "Requirement already satisfied, skipping upgrade: aiohttp in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (3.8.1)\n", "Requirement already satisfied, skipping upgrade: fsspec[http]>=2021.11.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (2022.7.1)\n", "Requirement already satisfied, skipping upgrade: huggingface-hub<1.0.0,>=0.1.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (0.8.1)\n", "Requirement already satisfied, skipping upgrade: packaging in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (21.3)\n", "Requirement already satisfied, skipping upgrade: pyarrow>=6.0.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (6.0.1)\n", "Requirement already satisfied, skipping upgrade: importlib-metadata; python_version < \"3.8\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (4.12.0)\n", "Requirement already satisfied, skipping upgrade: multiprocess in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (0.70.13)\n", "Requirement already satisfied, skipping upgrade: dill<0.3.6 in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (0.3.5.1)\n", "Requirement already satisfied, skipping upgrade: xxhash in /home/emr-notebook/.local/lib/python3.7/site-packages (from datasets) (3.0.0)\n", "Requirement already satisfied, skipping upgrade: pyyaml in /usr/local/lib64/python3.7/site-packages (from accelerate) (5.4.1)\n", "Requirement already satisfied, skipping upgrade: psutil in /home/emr-notebook/.local/lib/python3.7/site-packages (from accelerate) (5.9.1)\n", "Requirement already satisfied, skipping upgrade: joblib>=0.11 in /usr/local/lib64/python3.7/site-packages (from scikit-learn) (1.1.0)\n", "Requirement already satisfied, skipping upgrade: scipy>=1.1.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from scikit-learn) (1.7.3)\n", "Requirement already satisfied, skipping upgrade: threadpoolctl>=2.0.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from scikit-learn) (3.1.0)\n", "Requirement already satisfied, skipping upgrade: sqlalchemy>=1.4.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (1.4.39)\n", "Requirement already satisfied, skipping upgrade: gitpython>=2.1.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (3.1.27)\n", "Requirement already satisfied, skipping upgrade: prometheus-flask-exporter in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (0.20.3)\n", "Requirement already satisfied, skipping upgrade: alembic in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (1.8.1)\n", "Requirement already satisfied, skipping upgrade: docker>=4.0.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (5.0.3)\n", "Requirement already satisfied, skipping upgrade: cloudpickle in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (2.1.0)\n", "Requirement already satisfied, skipping upgrade: querystring-parser in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (1.2.4)\n", "Requirement already satisfied, skipping upgrade: gunicorn; platform_system != \"Windows\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (20.1.0)\n", "Requirement already satisfied, skipping upgrade: click>=7.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (8.0.4)\n", "Requirement already satisfied, skipping upgrade: databricks-cli>=0.8.7 in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (0.17.0)\n", "Requirement already satisfied, skipping upgrade: entrypoints in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (0.4)\n", "Requirement already satisfied, skipping upgrade: Flask in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (2.2.1)\n", "Requirement already satisfied, skipping upgrade: protobuf>=3.12.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (3.20.1)\n", "Requirement already satisfied, skipping upgrade: sqlparse>=0.3.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from mlflow) (0.4.2)\n", "Requirement already satisfied, skipping upgrade: tensorboard-plugin-wit>=1.6.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (1.8.1)\n", "Requirement already satisfied, skipping upgrade: tensorboard-data-server<0.7.0,>=0.6.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (0.6.1)\n", "Requirement already satisfied, skipping upgrade: google-auth-oauthlib<0.5,>=0.4.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (0.4.6)\n", "Requirement already satisfied, skipping upgrade: wheel>=0.26 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (0.37.1)\n", "Requirement already satisfied, skipping upgrade: setuptools>=41.0.0 in /usr/lib/python3.7/site-packages (from tensorboard) (49.1.3)\n", "Requirement already satisfied, skipping upgrade: markdown>=2.6.8 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (3.4.1)\n", "Requirement already satisfied, skipping upgrade: werkzeug>=1.0.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (2.2.1)\n", "Requirement already satisfied, skipping upgrade: google-auth<3,>=1.6.3 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (2.10.0)\n", "Requirement already satisfied, skipping upgrade: grpcio>=1.24.3 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (1.43.0)\n", "Requirement already satisfied, skipping upgrade: absl-py>=0.4 in /home/emr-notebook/.local/lib/python3.7/site-packages (from tensorboard) (1.2.0)\n", "Requirement already satisfied, skipping upgrade: attrs in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (22.1.0)\n", "Requirement already satisfied, skipping upgrade: virtualenv in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (20.16.3)\n", "Requirement already satisfied, skipping upgrade: frozenlist in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.3.1)\n", "Requirement already satisfied, skipping upgrade: aiosignal in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.2.0)\n", "Requirement already satisfied, skipping upgrade: msgpack<2.0.0,>=1.0.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.0.4)\n", "Requirement already satisfied, skipping upgrade: filelock in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (3.7.1)\n", "Requirement already satisfied, skipping upgrade: jsonschema in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (4.9.1)\n", "Requirement already satisfied, skipping upgrade: tabulate; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.8.10)\n", "Requirement already satisfied, skipping upgrade: gpustat>=1.0.0b1; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.0.0rc1)\n", "Requirement already satisfied, skipping upgrade: colorful; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.5.4)\n", "Requirement already satisfied, skipping upgrade: urllib3; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.26.11)\n", "Requirement already satisfied, skipping upgrade: aiorwlock; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.3.0)\n", "Requirement already satisfied, skipping upgrade: dm-tree; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.1.7)\n", "Requirement already satisfied, skipping upgrade: tensorboardX>=1.9; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (2.5.1)\n", "Requirement already satisfied, skipping upgrade: ray-cpp==2.0.0rc0; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (2.0.0rc0)\n", "Requirement already satisfied, skipping upgrade: aiohttp-cors; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.7.0)\n", "Requirement already satisfied, skipping upgrade: kopf; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.35.6)\n", "Requirement already satisfied, skipping upgrade: scikit-image; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.19.3)\n", "Requirement already satisfied, skipping upgrade: lz4; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (4.0.2)\n", "Requirement already satisfied, skipping upgrade: fastapi; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.79.0)\n", "Requirement already satisfied, skipping upgrade: kubernetes; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (24.2.0)\n", "Requirement already satisfied, skipping upgrade: uvicorn==0.16.0; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.16.0)\n", "Requirement already satisfied, skipping upgrade: opencensus; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.11.0)\n", "Requirement already satisfied, skipping upgrade: prometheus-client<0.14.0,>=0.7.1; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.13.1)\n", "Requirement already satisfied, skipping upgrade: py-spy>=0.2.0; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.3.12)\n", "Requirement already satisfied, skipping upgrade: pydantic; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.9.1)\n", "Requirement already satisfied, skipping upgrade: opentelemetry-sdk==1.1.0; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.1.0)\n", "Requirement already satisfied, skipping upgrade: smart-open; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (6.0.0)\n", "Requirement already satisfied, skipping upgrade: opentelemetry-api==1.1.0; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.1.0)\n", "Requirement already satisfied, skipping upgrade: opentelemetry-exporter-otlp==1.1.0; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (1.1.0)\n", "Requirement already satisfied, skipping upgrade: matplotlib!=3.4.3; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (3.5.2)\n", "Requirement already satisfied, skipping upgrade: starlette; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.19.1)\n", "Requirement already satisfied, skipping upgrade: gym<0.24.0,>=0.21.0; extra == \"all\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from ray[all]==2.0.0rc0) (0.23.1)\n", "Requirement already satisfied, skipping upgrade: certifi>=2017.4.17 in /home/emr-notebook/.local/lib/python3.7/site-packages (from requests->torchvision) (2022.6.15)\n", "Requirement already satisfied, skipping upgrade: charset-normalizer<3,>=2 in /home/emr-notebook/.local/lib/python3.7/site-packages (from requests->torchvision) (2.1.0)\n", "Requirement already satisfied, skipping upgrade: idna<4,>=2.5 in /home/emr-notebook/.local/lib/python3.7/site-packages (from requests->torchvision) (3.3)\n", "Requirement already satisfied, skipping upgrade: six>=1.5 in /usr/local/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas) (1.13.0)\n", "Requirement already satisfied, skipping upgrade: multidict<7.0,>=4.5 in /home/emr-notebook/.local/lib/python3.7/site-packages (from aiohttp->datasets) (6.0.2)\n", "Requirement already satisfied, skipping upgrade: asynctest==0.13.0; python_version < \"3.8\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from aiohttp->datasets) (0.13.0)\n", "Requirement already satisfied, skipping upgrade: yarl<2.0,>=1.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from aiohttp->datasets) (1.8.1)\n", "Requirement already satisfied, skipping upgrade: async-timeout<5.0,>=4.0.0a3 in /home/emr-notebook/.local/lib/python3.7/site-packages (from aiohttp->datasets) (4.0.2)\n", "Requirement already satisfied, skipping upgrade: pyparsing!=3.0.5,>=2.0.2 in /home/emr-notebook/.local/lib/python3.7/site-packages (from packaging->datasets) (3.0.9)\n", "Requirement already satisfied, skipping upgrade: zipp>=0.5 in /home/emr-notebook/.local/lib/python3.7/site-packages (from importlib-metadata; python_version < \"3.8\"->datasets) (3.8.1)\n", "Requirement already satisfied, skipping upgrade: greenlet!=0.4.17; python_version >= \"3\" and (platform_machine == \"aarch64\" or (platform_machine == \"ppc64le\" or (platform_machine == \"x86_64\" or (platform_machine == \"amd64\" or (platform_machine == \"AMD64\" or (platform_machine == \"win32\" or platform_machine == \"WIN32\")))))) in /home/emr-notebook/.local/lib/python3.7/site-packages (from sqlalchemy>=1.4.0->mlflow) (1.1.2)\n", "Requirement already satisfied, skipping upgrade: gitdb<5,>=4.0.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from gitpython>=2.1.0->mlflow) (4.0.9)\n", "Requirement already satisfied, skipping upgrade: importlib-resources; python_version < \"3.9\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from alembic->mlflow) (5.9.0)\n", "Requirement already satisfied, skipping upgrade: Mako in /home/emr-notebook/.local/lib/python3.7/site-packages (from alembic->mlflow) (1.2.1)\n", "Requirement already satisfied, skipping upgrade: websocket-client>=0.32.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from docker>=4.0.0->mlflow) (1.3.3)\n", "Requirement already satisfied, skipping upgrade: pyjwt>=1.7.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from databricks-cli>=0.8.7->mlflow) (2.4.0)\n", "Requirement already satisfied, skipping upgrade: oauthlib>=3.1.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from databricks-cli>=0.8.7->mlflow) (3.2.0)\n", "Requirement already satisfied, skipping upgrade: Jinja2>=3.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from Flask->mlflow) (3.1.2)\n", "Requirement already satisfied, skipping upgrade: itsdangerous>=2.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from Flask->mlflow) (2.1.2)\n", "Requirement already satisfied, skipping upgrade: requests-oauthlib>=0.7.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard) (1.3.1)\n", "Requirement already satisfied, skipping upgrade: MarkupSafe>=2.1.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from werkzeug>=1.0.1->tensorboard) (2.1.1)\n", "Requirement already satisfied, skipping upgrade: pyasn1-modules>=0.2.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard) (0.2.8)\n", "Requirement already satisfied, skipping upgrade: rsa<5,>=3.1.4; python_version >= \"3.6\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard) (4.9)\n", "Requirement already satisfied, skipping upgrade: cachetools<6.0,>=2.0.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from google-auth<3,>=1.6.3->tensorboard) (5.2.0)\n", "Requirement already satisfied, skipping upgrade: platformdirs<3,>=2.4 in /home/emr-notebook/.local/lib/python3.7/site-packages (from virtualenv->ray[all]==2.0.0rc0) (2.5.2)\n", "Requirement already satisfied, skipping upgrade: distlib<1,>=0.3.5 in /home/emr-notebook/.local/lib/python3.7/site-packages (from virtualenv->ray[all]==2.0.0rc0) (0.3.5)\n", "Requirement already satisfied, skipping upgrade: pyrsistent!=0.17.0,!=0.17.1,!=0.17.2,>=0.14.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from jsonschema->ray[all]==2.0.0rc0) (0.18.1)\n", "Requirement already satisfied, skipping upgrade: pkgutil-resolve-name>=1.3.10; python_version < \"3.9\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from jsonschema->ray[all]==2.0.0rc0) (1.3.10)\n", "Requirement already satisfied, skipping upgrade: nvidia-ml-py<=11.495.46,>=11.450.129 in /home/emr-notebook/.local/lib/python3.7/site-packages (from gpustat>=1.0.0b1; extra == \"all\"->ray[all]==2.0.0rc0) (11.495.46)\n", "Requirement already satisfied, skipping upgrade: blessed>=1.17.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from gpustat>=1.0.0b1; extra == \"all\"->ray[all]==2.0.0rc0) (1.19.1)\n", "Requirement already satisfied, skipping upgrade: python-json-logger in /home/emr-notebook/.local/lib/python3.7/site-packages (from kopf; extra == \"all\"->ray[all]==2.0.0rc0) (2.0.4)\n", "Requirement already satisfied, skipping upgrade: iso8601 in /home/emr-notebook/.local/lib/python3.7/site-packages (from kopf; extra == \"all\"->ray[all]==2.0.0rc0) (1.0.2)\n", "Requirement already satisfied, skipping upgrade: networkx>=2.2 in /home/emr-notebook/.local/lib/python3.7/site-packages (from scikit-image; extra == \"all\"->ray[all]==2.0.0rc0) (2.6.3)\n", "Requirement already satisfied, skipping upgrade: PyWavelets>=1.1.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from scikit-image; extra == \"all\"->ray[all]==2.0.0rc0) (1.3.0)\n", "Requirement already satisfied, skipping upgrade: imageio>=2.4.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from scikit-image; extra == \"all\"->ray[all]==2.0.0rc0) (2.21.0)\n", "Requirement already satisfied, skipping upgrade: tifffile>=2019.7.26 in /home/emr-notebook/.local/lib/python3.7/site-packages (from scikit-image; extra == \"all\"->ray[all]==2.0.0rc0) (2021.11.2)\n", "Requirement already satisfied, skipping upgrade: asgiref>=3.4.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from uvicorn==0.16.0; extra == \"all\"->ray[all]==2.0.0rc0) (3.5.2)\n", "Requirement already satisfied, skipping upgrade: h11>=0.8 in /home/emr-notebook/.local/lib/python3.7/site-packages (from uvicorn==0.16.0; extra == \"all\"->ray[all]==2.0.0rc0) (0.13.0)\n", "Requirement already satisfied, skipping upgrade: opencensus-context>=0.1.3 in /home/emr-notebook/.local/lib/python3.7/site-packages (from opencensus; extra == \"all\"->ray[all]==2.0.0rc0) (0.1.3)\n", "Requirement already satisfied, skipping upgrade: google-api-core<3.0.0,>=1.0.0; python_version >= \"3.6\" in /home/emr-notebook/.local/lib/python3.7/site-packages (from opencensus; extra == \"all\"->ray[all]==2.0.0rc0) (2.8.2)\n", "Requirement already satisfied, skipping upgrade: opentelemetry-semantic-conventions==0.20b0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from opentelemetry-sdk==1.1.0; extra == \"all\"->ray[all]==2.0.0rc0) (0.20b0)\n", "Requirement already satisfied, skipping upgrade: opentelemetry-exporter-otlp-proto-grpc==1.1.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from opentelemetry-exporter-otlp==1.1.0; extra == \"all\"->ray[all]==2.0.0rc0) (1.1.0)\n", "Requirement already satisfied, skipping upgrade: kiwisolver>=1.0.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from matplotlib!=3.4.3; extra == \"all\"->ray[all]==2.0.0rc0) (1.4.4)\n", "Requirement already satisfied, skipping upgrade: cycler>=0.10 in /home/emr-notebook/.local/lib/python3.7/site-packages (from matplotlib!=3.4.3; extra == \"all\"->ray[all]==2.0.0rc0) (0.11.0)\n", "Requirement already satisfied, skipping upgrade: fonttools>=4.22.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from matplotlib!=3.4.3; extra == \"all\"->ray[all]==2.0.0rc0) (4.34.4)\n", "Requirement already satisfied, skipping upgrade: anyio<5,>=3.4.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from starlette; extra == \"all\"->ray[all]==2.0.0rc0) (3.6.1)\n", "Requirement already satisfied, skipping upgrade: gym-notices>=0.0.4 in /home/emr-notebook/.local/lib/python3.7/site-packages (from gym<0.24.0,>=0.21.0; extra == \"all\"->ray[all]==2.0.0rc0) (0.0.7)\n", "Requirement already satisfied, skipping upgrade: smmap<6,>=3.0.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from gitdb<5,>=4.0.1->gitpython>=2.1.0->mlflow) (5.0.0)\n", "Requirement already satisfied, skipping upgrade: pyasn1<0.5.0,>=0.4.6 in /home/emr-notebook/.local/lib/python3.7/site-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard) (0.4.8)\n", "Requirement already satisfied, skipping upgrade: wcwidth>=0.1.4 in /home/emr-notebook/.local/lib/python3.7/site-packages (from blessed>=1.17.1->gpustat>=1.0.0b1; extra == \"all\"->ray[all]==2.0.0rc0) (0.2.5)\n", "Requirement already satisfied, skipping upgrade: googleapis-common-protos<2.0dev,>=1.56.2 in /home/emr-notebook/.local/lib/python3.7/site-packages (from google-api-core<3.0.0,>=1.0.0; python_version >= \"3.6\"->opencensus; extra == \"all\"->ray[all]==2.0.0rc0) (1.56.4)\n", "Requirement already satisfied, skipping upgrade: backoff~=1.10.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from opentelemetry-exporter-otlp-proto-grpc==1.1.0->opentelemetry-exporter-otlp==1.1.0; extra == \"all\"->ray[all]==2.0.0rc0) (1.10.0)\n", "Requirement already satisfied, skipping upgrade: opentelemetry-proto==1.1.0 in /home/emr-notebook/.local/lib/python3.7/site-packages (from opentelemetry-exporter-otlp-proto-grpc==1.1.0->opentelemetry-exporter-otlp==1.1.0; extra == \"all\"->ray[all]==2.0.0rc0) (1.1.0)\n", "Requirement already satisfied, skipping upgrade: sniffio>=1.1 in /home/emr-notebook/.local/lib/python3.7/site-packages (from anyio<5,>=3.4.0->starlette; extra == \"all\"->ray[all]==2.0.0rc0) (1.2.0)\n" ] } ], "source": [ "!pip3 install -U torch torchvision pandas scikit-learn tensorboard ray[all]==2.0.0rc0\n", " #https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl" ] }, { "cell_type": "code", "execution_count": 2, "id": "0ebe4ab7-e6ef-4e17-b662-884ab34944ce", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:48:03.181153Z", "iopub.status.busy": "2022-08-06T01:48:03.180966Z", "iopub.status.idle": "2022-08-06T01:48:03.191185Z", "shell.execute_reply": "2022-08-06T01:48:03.190522Z", "shell.execute_reply.started": "2022-08-06T01:48:03.181128Z" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'status': 'ok', 'restart': True}" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# restart kernel to pick up the pip installs above\n", "import IPython\n", "\n", "IPython.Application.instance().kernel.do_shutdown(True) #automatically restarts kernel" ] }, { "cell_type": "code", "execution_count": 3, "id": "5bdc429f-15ed-4b54-b462-df5652cb7cd6", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:48:03.192594Z", "iopub.status.busy": "2022-08-06T01:48:03.192406Z", "iopub.status.idle": "2022-08-06T01:48:04.531157Z", "shell.execute_reply": "2022-08-06T01:48:04.530499Z", "shell.execute_reply.started": "2022-08-06T01:48:03.192573Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Usage stats disabled for future clusters. Restart any current running clusters for this to take effect.\n", "\u001b[0m" ] } ], "source": [ "!ray disable-usage-stats" ] }, { "cell_type": "code", "execution_count": 1, "id": "53d8975a-b047-42e0-98f4-ffee1acd5eba", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:48:55.573685Z", "iopub.status.busy": "2022-08-06T01:48:55.573412Z", "iopub.status.idle": "2022-08-06T01:48:58.899275Z", "shell.execute_reply": "2022-08-06T01:48:58.898734Z", "shell.execute_reply.started": "2022-08-06T01:48:55.573613Z" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", "

Ray

\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "
Python version:3.7.10
Ray version: 2.0.0rc0
Dashboard:http://127.0.0.1:8265
\n", "
\n", "
\n" ], "text/plain": [ "ClientContext(dashboard_url='127.0.0.1:8265', python_version='3.7.10', ray_version='2.0.0rc0', ray_commit='a0588094ec52b45a878f59e98258cd5e90f4ec36', protocol_version='2022-07-24', _num_clients=3, _context_to_restore=)" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import ray\n", "\n", "ray.shutdown()\n", "address='ray://localhost:10001'\n", "ray.init(address=address)" ] }, { "cell_type": "code", "execution_count": 2, "id": "ddd9a721-690b-4153-89e0-d0dc11f7a0cc", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:48:58.900416Z", "iopub.status.busy": "2022-08-06T01:48:58.900246Z", "iopub.status.idle": "2022-08-06T01:49:22.186198Z", "shell.execute_reply": "2022-08-06T01:49:22.185288Z", "shell.execute_reply.started": "2022-08-06T01:48:58.900396Z" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2022-08-06 01:49:05,913\tWARNING read_api.py:292 -- ⚠️ The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use `.repartition(n)` to increase the number of dataset blocks.\n", "\u001b[2m\u001b[36m(_get_read_tasks pid=7856)\u001b[0m 2022-08-06 01:49:05,903\tWARNING torch_datasource.py:56 -- `SimpleTorchDatasource` doesn't support parallel reads. The `parallelism` argument will be ignored.\n", "2022-08-06 01:49:18,167\tWARNING read_api.py:292 -- ⚠️ The number of blocks in this dataset (1) limits its parallelism to 1 concurrent tasks. This is much less than the number of available CPU slots in the cluster. Use `.repartition(n)` to increase the number of dataset blocks.\n", "\u001b[2m\u001b[36m(_get_read_tasks pid=7856)\u001b[0m 2022-08-06 01:49:18,160\tWARNING torch_datasource.py:56 -- `SimpleTorchDatasource` doesn't support parallel reads. The `parallelism` argument will be ignored.\n" ] } ], "source": [ "import ray\n", "from ray.data.datasource import SimpleTorchDatasource\n", "import torchvision\n", "import torchvision.transforms as transforms\n", "from torchvision.transforms import ToTensor\n", "\n", "ray.shutdown()\n", "ray.init(address='ray://localhost:10001',\n", " runtime_env={\"pip\": [\n", " \"torch\",\n", " \"torchvision\",\n", " \"scikit-learn\",\n", " \"pandas\",\n", " \"scikit-learn\",\n", " \"tensorboardx\"\n", " ]\n", " })\n", "\n", "def train_dataset_factory():\n", " return torchvision.datasets.FashionMNIST(root=\"./data\", download=True, train=True, transform=ToTensor())\n", "\n", "def test_dataset_factory():\n", " return torchvision.datasets.FashionMNIST(root=\"./data\", download=True, train=False, transform=ToTensor())\n", "\n", "train_dataset: ray.data.Dataset = ray.data.read_datasource(SimpleTorchDatasource(), dataset_factory=train_dataset_factory)\n", "test_dataset: ray.data.Dataset = ray.data.read_datasource(SimpleTorchDatasource(), dataset_factory=test_dataset_factory)" ] }, { "cell_type": "code", "execution_count": 3, "id": "bf76f33a-ecf9-4275-aecc-e194ba7b3360", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:49:22.188225Z", "iopub.status.busy": "2022-08-06T01:49:22.187954Z", "iopub.status.idle": "2022-08-06T01:49:22.193247Z", "shell.execute_reply": "2022-08-06T01:49:22.192709Z", "shell.execute_reply.started": "2022-08-06T01:49:22.188190Z" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Dataset(num_blocks=1, num_rows=60000, schema=)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_dataset" ] }, { "cell_type": "code", "execution_count": 4, "id": "4f51853f-c247-47b8-b8d8-98aaf6339659", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:49:22.194606Z", "iopub.status.busy": "2022-08-06T01:49:22.194289Z", "iopub.status.idle": "2022-08-06T01:49:31.785302Z", "shell.execute_reply": "2022-08-06T01:49:31.784605Z", "shell.execute_reply.started": "2022-08-06T01:49:22.194582Z" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Read->Map_Batches: 100%|██████████| 1/1 [00:08<00:00, 8.42s/it]\n", "Read->Map_Batches: 100%|██████████| 1/1 [00:01<00:00, 1.11s/it]\n" ] } ], "source": [ "from typing import Tuple\n", "import pandas as pd\n", "from ray.data.extensions import TensorArray\n", "import torch\n", "\n", "\n", "def convert_batch_to_pandas(batch: Tuple[torch.Tensor, int]) -> pd.DataFrame:\n", " images = TensorArray([image.numpy() for image, _ in batch])\n", " labels = [label for _, label in batch]\n", "\n", " df = pd.DataFrame({\"image\": images, \"label\": labels})\n", "\n", " return df\n", "\n", "\n", "train_dataset = train_dataset.map_batches(convert_batch_to_pandas)\n", "test_dataset = test_dataset.map_batches(convert_batch_to_pandas)" ] }, { "cell_type": "code", "execution_count": 5, "id": "ecd7549f-eaf3-4e1b-9b5d-5e0c8ad245cb", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:49:31.786753Z", "iopub.status.busy": "2022-08-06T01:49:31.786488Z", "iopub.status.idle": "2022-08-06T01:49:31.791907Z", "shell.execute_reply": "2022-08-06T01:49:31.791211Z", "shell.execute_reply.started": "2022-08-06T01:49:31.786719Z" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Dataset(num_blocks=1, num_rows=60000, schema={image: TensorDtype(shape=(1, 28, 28), dtype=float32), label: int64})" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_dataset" ] }, { "cell_type": "code", "execution_count": 6, "id": "a5c407fd-17a4-4df9-871d-34c6784683be", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:49:31.793438Z", "iopub.status.busy": "2022-08-06T01:49:31.793144Z", "iopub.status.idle": "2022-08-06T01:49:31.800482Z", "shell.execute_reply": "2022-08-06T01:49:31.799773Z", "shell.execute_reply.started": "2022-08-06T01:49:31.793405Z" }, "tags": [] }, "outputs": [], "source": [ "import torch\n", "from torch import nn\n", "\n", "# Define model\n", "class NeuralNetwork(nn.Module):\n", " def __init__(self):\n", " super(NeuralNetwork, self).__init__()\n", " self.flatten = nn.Flatten()\n", " self.linear_relu_stack = nn.Sequential(\n", " nn.Linear(28 * 28, 512),\n", " nn.ReLU(),\n", " nn.Linear(512, 512),\n", " nn.ReLU(),\n", " nn.Linear(512, 10),\n", " nn.ReLU(),\n", " )\n", "\n", " def forward(self, x):\n", " x = self.flatten(x)\n", " logits = self.linear_relu_stack(x)\n", " return logits" ] }, { "cell_type": "code", "execution_count": 7, "id": "4623b066-1f36-4a3b-bfa0-751dff320e31", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T01:49:31.801922Z", "iopub.status.busy": "2022-08-06T01:49:31.801676Z", "iopub.status.idle": "2022-08-06T01:49:31.861398Z", "shell.execute_reply": "2022-08-06T01:49:31.860778Z", "shell.execute_reply.started": "2022-08-06T01:49:31.801890Z" }, "tags": [] }, "outputs": [], "source": [ "from ray import train\n", "from ray.air import session, Checkpoint\n", "import torch.optim as optim\n", "\n", "\n", "def train_loop_per_worker(config):\n", " model = train.torch.prepare_model(NeuralNetwork())\n", "\n", " criterion = nn.CrossEntropyLoss()\n", " optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", "\n", " train_dataset_shard: torch.utils.data.Dataset = session.get_dataset_shard(\"train\").to_torch(\n", " feature_columns=[\"image\"],\n", " label_column=\"label\",\n", " batch_size=config[\"batch_size\"],\n", " unsqueeze_feature_tensors=False,\n", " unsqueeze_label_tensor=False\n", " )\n", "\n", " for epoch in range(config[\"epochs\"]):\n", " running_loss = 0.0\n", " for i, data in enumerate(train_dataset_shard):\n", " # get the inputs; data is a list of [inputs, labels]\n", " inputs, labels = data\n", "\n", " # zero the parameter gradients\n", " optimizer.zero_grad()\n", "\n", " # forward + backward + optimize\n", " outputs = model(inputs)\n", " loss = criterion(outputs, labels)\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # print statistics\n", " running_loss += loss.item()\n", " if i % 2000 == 1999: # print every 2000 mini-batches\n", " print(f\"[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}\")\n", " running_loss = 0.0\n", "\n", " session.report(\n", " dict(running_loss=running_loss),\n", " checkpoint=Checkpoint.from_dict(dict(model=model.module.state_dict())),\n", " )" ] }, { "cell_type": "code", "execution_count": 15, "id": "e7102ca0-0411-44e6-a7de-d74aae019c86", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T02:05:00.703608Z", "iopub.status.busy": "2022-08-06T02:05:00.703381Z", "iopub.status.idle": "2022-08-06T02:05:16.188766Z", "shell.execute_reply": "2022-08-06T02:05:16.188176Z", "shell.execute_reply.started": "2022-08-06T02:05:00.703585Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m == Status ==\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Current time: 2022-08-06 02:05:05 (running for 00:00:02.50)\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Memory usage on this node: 12.5/62.1 GiB\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Using FIFO scheduling algorithm.\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Resources requested: 3.0/24 CPUs, 0/0 GPUs, 0.0/114.74 GiB heap, 0.0/51.5 GiB objects\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Result logdir: /home/hadoop/ray_results/TorchTrainer_2022-08-06_02-05-03\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m +--------------------------+----------+---------------------+\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m | Trial name | status | loc |\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m |--------------------------+----------+---------------------|\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m | TorchTrainer_2f23a_00000 | RUNNING | 172.31.16.116:25299 |\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m +--------------------------+----------+---------------------+\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m 2022-08-06 02:05:07,794\tINFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=2]\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25354, ip=172.31.16.116)\u001b[0m /usr/local/lib64/python3.7/site-packages/pandas/core/indexing.py:1773: SettingWithCopyWarning: \n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25354, ip=172.31.16.116)\u001b[0m A value is trying to be set on a copy of a slice from a DataFrame.\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25354, ip=172.31.16.116)\u001b[0m Try using .loc[row_indexer,col_indexer] = value instead\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25354, ip=172.31.16.116)\u001b[0m \n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25354, ip=172.31.16.116)\u001b[0m See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25354, ip=172.31.16.116)\u001b[0m self._setitem_single_column(ilocs[0], value, pi)\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25354, ip=172.31.16.116)\u001b[0m /usr/local/lib/python3.7/site-packages/ray/air/_internal/torch_utils.py:67: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:178.)\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25354, ip=172.31.16.116)\u001b[0m return torch.as_tensor(vals, dtype=dtype)\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m 2022-08-06 02:05:10,434\tINFO train_loop_utils.py:300 -- Moving model to device: cpu\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m 2022-08-06 02:05:10,434\tINFO train_loop_utils.py:347 -- Wrapping provided model in DDP.\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m /usr/local/lib64/python3.7/site-packages/pandas/core/indexing.py:1773: SettingWithCopyWarning: \n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m A value is trying to be set on a copy of a slice from a DataFrame.\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m Try using .loc[row_indexer,col_indexer] = value instead\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m \n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m self._setitem_single_column(ilocs[0], value, pi)\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m /usr/local/lib/python3.7/site-packages/ray/air/_internal/torch_utils.py:67: UserWarning: The given NumPy array is not writable, and PyTorch does not support non-writable tensors. This means writing to this tensor will result in undefined behavior. You may want to copy the array to protect its data or make it writable before converting it to a tensor. This type of warning will be suppressed for the rest of this program. (Triggered internally at ../torch/csrc/utils/tensor_numpy.cpp:178.)\n", "\u001b[2m\u001b[36m(RayTrainWorker pid=25353, ip=172.31.16.116)\u001b[0m return torch.as_tensor(vals, dtype=dtype)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m == Status ==\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Current time: 2022-08-06 02:05:10 (running for 00:00:07.50)\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Memory usage on this node: 12.5/62.1 GiB\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Using FIFO scheduling algorithm.\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Resources requested: 3.0/24 CPUs, 0/0 GPUs, 0.0/114.74 GiB heap, 0.0/51.5 GiB objects\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Result logdir: /home/hadoop/ray_results/TorchTrainer_2022-08-06_02-05-03\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m +--------------------------+----------+---------------------+\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m | Trial name | status | loc |\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m |--------------------------+----------+---------------------|\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m | TorchTrainer_2f23a_00000 | RUNNING | 172.31.16.116:25299 |\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m +--------------------------+----------+---------------------+\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Result for TorchTrainer_2f23a_00000:\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m _time_this_iter_s: 4.841738700866699\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m _timestamp: 1659751515\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m _training_iteration: 1\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m date: 2022-08-06_02-05-15\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m done: false\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m experiment_id: ee4b52f2bd854ddcbbae568865fdd47e\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m hostname: ip-172-31-16-116\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m iterations_since_restore: 1\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m node_ip: 172.31.16.116\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m pid: 25299\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m running_loss: 1006.1698578596115\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m should_checkpoint: true\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m time_since_restore: 9.62822437286377\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m time_this_iter_s: 9.62822437286377\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m time_total_s: 9.62822437286377\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m timestamp: 1659751515\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m timesteps_since_restore: 0\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m training_iteration: 1\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m trial_id: 2f23a_00000\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m warmup_time: 0.0038607120513916016\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m == Status ==\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Current time: 2022-08-06 02:05:15 (running for 00:00:12.59)\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Memory usage on this node: 12.5/62.1 GiB\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Using FIFO scheduling algorithm.\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Resources requested: 3.0/24 CPUs, 0/0 GPUs, 0.0/114.74 GiB heap, 0.0/51.5 GiB objects\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Result logdir: /home/hadoop/ray_results/TorchTrainer_2022-08-06_02-05-03\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Number of trials: 1/1 (1 RUNNING)\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m +--------------------------+----------+---------------------+--------+------------------+----------------+--------------+---------------------+\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m | Trial name | status | loc | iter | total time (s) | running_loss | _timestamp | _time_this_iter_s |\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m |--------------------------+----------+---------------------+--------+------------------+----------------+--------------+---------------------|\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m | TorchTrainer_2f23a_00000 | RUNNING | 172.31.16.116:25299 | 1 | 9.62822 | 1006.17 | 1659751515 | 4.84174 |\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m +--------------------------+----------+---------------------+--------+------------------+----------------+--------------+---------------------+\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Result for TorchTrainer_2f23a_00000:\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m _time_this_iter_s: 4.841738700866699\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m _timestamp: 1659751515\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m _training_iteration: 1\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m date: 2022-08-06_02-05-15\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m done: true\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m experiment_id: ee4b52f2bd854ddcbbae568865fdd47e\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m experiment_tag: '0'\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m hostname: ip-172-31-16-116\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m iterations_since_restore: 1\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m node_ip: 172.31.16.116\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m pid: 25299\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m running_loss: 1006.1698578596115\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m should_checkpoint: true\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m time_since_restore: 9.62822437286377\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m time_this_iter_s: 9.62822437286377\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m time_total_s: 9.62822437286377\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m timestamp: 1659751515\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m timesteps_since_restore: 0\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m training_iteration: 1\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m trial_id: 2f23a_00000\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m warmup_time: 0.0038607120513916016\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m == Status ==\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Current time: 2022-08-06 02:05:16 (running for 00:00:12.86)\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Memory usage on this node: 12.5/62.1 GiB\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Using FIFO scheduling algorithm.\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Resources requested: 0/24 CPUs, 0/0 GPUs, 0.0/114.74 GiB heap, 0.0/51.5 GiB objects\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Result logdir: /home/hadoop/ray_results/TorchTrainer_2022-08-06_02-05-03\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m Number of trials: 1/1 (1 TERMINATED)\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m +--------------------------+------------+---------------------+--------+------------------+----------------+--------------+---------------------+\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m | Trial name | status | loc | iter | total time (s) | running_loss | _timestamp | _time_this_iter_s |\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m |--------------------------+------------+---------------------+--------+------------------+----------------+--------------+---------------------|\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m | TorchTrainer_2f23a_00000 | TERMINATED | 172.31.16.116:25299 | 1 | 9.62822 | 1006.17 | 1659751515 | 4.84174 |\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m +--------------------------+------------+---------------------+--------+------------------+----------------+--------------+---------------------+\n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n", "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m \n" ] } ], "source": [ "from ray.train.torch import TorchTrainer\n", "from ray.air.config import ScalingConfig, RunConfig\n", "from ray.tune import SyncConfig\n", "\n", "s3_checkpoint_prefix=\"s3://dsoaws/ray_output\"\n", "\n", "trainer = TorchTrainer(\n", " train_loop_per_worker=train_loop_per_worker,\n", " train_loop_config={\n", " \"batch_size\": 64,\n", " \"epochs\": 1\n", " },\n", " datasets={\"train\": train_dataset},\n", " scaling_config=ScalingConfig(num_workers=2),\n", " run_config = RunConfig(\n", " sync_config=SyncConfig(\n", " # This will store checkpoints on S3.\n", " upload_dir=s3_checkpoint_prefix\n", " )\n", " )\n", ")\n", " \n", "result = trainer.fit()" ] }, { "cell_type": "code", "execution_count": 16, "id": "50ae0f69-92a2-4132-912d-8ef1ceffdd7b", "metadata": { "execution": { "iopub.execute_input": "2022-08-06T02:05:16.189918Z", "iopub.status.busy": "2022-08-06T02:05:16.189750Z", "iopub.status.idle": "2022-08-06T02:05:16.193491Z", "shell.execute_reply": "2022-08-06T02:05:16.192937Z", "shell.execute_reply.started": "2022-08-06T02:05:16.189898Z" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Your model checkpoint files are here:\n", "s3://dsoaws/ray_output/TorchTrainer_2022-08-06_02-05-03/TorchTrainer_2f23a_00000_0_2022-08-06_02-05-03/checkpoint_000000\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\u001b[2m\u001b[36m(TunerInternal pid=11224)\u001b[0m 2022-08-06 02:05:16,173\tINFO tune.py:759 -- Total run time: 13.02 seconds (12.86 seconds for the tuning loop).\n" ] } ], "source": [ "print(\"Your model checkpoint files are here:\")\n", "print(f\"{s3_checkpoint_prefix}/{str(result.log_dir).split('/')[-2]}/{str(result.log_dir).split('/')[-1]}/checkpoint_000000\")" ] }, { "cell_type": "code", "execution_count": null, "id": "6511f77a-7c3e-432c-a940-bdfea6108ecf", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 5 }