{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "%glue_ray\n", "%session_id_prefix xgboost-ray\n", "%additional_python_modules ray[ml],xgboost,xgboost_ray" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "%min_workers 1\n", "%number_of_workers 2\n", "%object_memory_worker 10" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import ray\n", "from ray.train.xgboost import XGBoostTrainer\n", "from ray.air.config import ScalingConfig\n", "from ray.air.config import RunConfig\n", "\n", "# Load data.\n", "dataset = ray.data.read_csv(\"s3://air-example-data/breast_cancer.csv\")\n", "\n", "# Split data into train and validation.\n", "train_dataset, valid_dataset = dataset.train_test_split(test_size=0.3)\n", "\n", "trainer = XGBoostTrainer(\n", " scaling_config=ScalingConfig(\n", " # Number of workers to use for data parallelism.\n", " num_workers=10,\n", " # Whether to use GPU acceleration.\n", " use_gpu=False,\n", " ),\n", " run_config=RunConfig(local_dir=\"/tmp/ray_results\"),\n", " label_column=\"target\",\n", " num_boost_round=20,\n", " params={\n", " # XGBoost specific params\n", " \"objective\": \"binary:logistic\",\n", " \"eval_metric\": [\"logloss\", \"error\"],\n", " },\n", " datasets={\"train\": train_dataset, \"valid\": valid_dataset},\n", ")\n", "\n", "result = trainer.fit()\n", "print(result.metrics)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "%stop_session" ] } ], "metadata": { "kernelspec": { "display_name": "Glue Python [PySpark and Ray] (SparkAnalytics 1.0)", "language": "python", "name": "conda-env-sm_glue_is-glue_pyspark__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-sparkanalytics-v1" }, "language_info": { "codemirror_mode": { "name": "python", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "Python_Glue_Session", "pygments_lexer": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }