{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "trusted": true, "vscode": { "languageId": "python_glue_session" } }, "outputs": [], "source": [ "%idle_timeout 2880\n", "%glue_version 3.0\n", "%worker_type G.1X\n", "%number_of_workers 5\n", "\n", "import sys\n", "from awsglue.transforms import *\n", "from awsglue.utils import getResolvedOptions\n", "from pyspark.context import SparkContext\n", "from awsglue.context import GlueContext\n", "from awsglue.job import Job\n", " \n", "sc = SparkContext.getOrCreate()\n", "glueContext = GlueContext(sc)\n", "spark = glueContext.spark_session\n", "job = Job(glueContext)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "trusted": true, "vscode": { "languageId": "python_glue_session" } }, "outputs": [], "source": [ "# change those to your specific names\n", "database_name = \"fits_datalake\"\n", "table_name = \"my_fits_datalake_fitsstorebucket\" \n", "\n", "header = glueContext.create_dynamic_frame.from_catalog(database=database_name, table_name=table_name)\n", "print (\"Count: \", header.count())\n", "header.printSchema()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "trusted": true, "vscode": { "languageId": "python_glue_session" } }, "outputs": [], "source": [ "job" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "trusted": true, "vscode": { "languageId": "python_glue_session" } }, "outputs": [], "source": [ "short_header_df = header.toDF()['source_key', 'card_name', 'card_value', 'partition_1', 'partition_2']\n", "short_header_df.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "trusted": true, "vscode": { "languageId": "python_glue_session" } }, "outputs": [], "source": [ "t = short_header_df.where(\"card_name='ORIGIN'\")\n", "t.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python_glue_session" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Glue PySpark", "language": "python", "name": "glue_pyspark" }, "language_info": { "codemirror_mode": { "name": "python", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "Python_Glue_Session", "pygments_lexer": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }