{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "9b189b16-04c1-4f3a-bd2a-4b581e4955b4", "metadata": { "tags": [] }, "outputs": [], "source": [ "%stop_session" ] }, { "cell_type": "code", "execution_count": null, "id": "e5bbe98a-9311-406f-88dd-f61228ebb5ca", "metadata": { "tags": [] }, "outputs": [], "source": [ "%glue_ray\n", "%session_id_prefix aws-sdk-for-pandas" ] }, { "cell_type": "code", "execution_count": null, "id": "d08c2e56-c02d-4237-8395-b7763ca342ec", "metadata": { "tags": [] }, "outputs": [], "source": [ "%min_workers 1\n", "%number_of_workers 2\n", "%object_memory_worker 10" ] }, { "cell_type": "code", "execution_count": null, "id": "7eb96e2c-d713-4f63-b89a-331bd39df42b", "metadata": { "tags": [] }, "outputs": [], "source": [ "import ray\n", "\n", "ray.init(\"auto\",\n", " runtime_env={\n", " \"local_dir\": \"/tmp/ray_results\",\n", " \"__MODIN_AUTOIMPORT_PANDAS__\": \"1\",\n", " # \"pip\": [\"\"]\n", " },\n", " configure_logging=True,\n", " log_to_driver=False\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "4c05db94-c7fe-4e25-a742-a1f66b1a0bd8", "metadata": {}, "outputs": [], "source": [ "import awswrangler as wr" ] }, { "cell_type": "code", "execution_count": null, "id": "f7437fd2-1ff5-411d-a5dc-312b907f5717", "metadata": {}, "outputs": [], "source": [ "print(f\"Execution Engine: {wr.engine.get()}\")\n", "print(f\"Memory Format: {wr.memory_format.get()}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "61535875-9963-4b9d-80ac-c9a92f3d2bae", "metadata": { "tags": [] }, "outputs": [], "source": [ "df = wr.s3.select_query(\n", " sql=\"SELECT * FROM s3object s where s.\\\"star_rating\\\" >= 5\",\n", " path=\"s3://amazon-reviews-pds/parquet/product_category=Gift_Card/part-00000-495c48e6-96d6-4650-aa65-3c36a3516ddd.c000.snappy.parquet\",\n", " input_serialization=\"Parquet\",\n", " input_serialization_params={},\n", " use_threads=True,\n", ")\n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "36207258-0c12-4e52-988b-48553210c914", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "Glue Python [PySpark and Ray] (SparkAnalytics 1.0)", "language": "python", "name": "conda-env-sm_glue_is-glue_pyspark__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-sparkanalytics-v1" }, "language_info": { "codemirror_mode": { "name": "python", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "Python_Glue_Session", "pygments_lexer": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }