{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%env\n", "SCHEMA=https://\n", "OUTPUT=s3://\n", "ETL_CONF_ENV=production" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"extract csv data from nyc_tripdata\",\n", " \"environments\": [\"production\", \"test\"],\n", " \"inputURI\": \"s3a://nyc-tlc/trip*data/green_tripdata_*.csv\",\n", " \"outputView\": \"green_tripdata0_raw\", \n", " \"delimiter\": \"Comma\",\n", " \"quote\" : \"DoubleQuote\",\n", " \"header\": true,\n", " \"persist\": true\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"apply green_tripdata schema 0 data types\",\n", " \"environments\": [\"production\", \"test\"],\n", " \"schemaURI\": ${SCHEMA},\n", " \"inputView\": \"green_tripdata0_raw\", \n", " \"outputView\": \"green_tripdata0\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"aggregate the result by month and year\" outputView=green_trip_summery environments=production,test persist=true\n", "\n", "SELECT \n", " year(lpep_pickup_datetime) AS trip_year\n", " ,month(lpep_pickup_datetime) AS trip_month\n", " ,vendor_id\n", " ,sum(coalesce(trip_distance,0)) AS total_distance\n", " ,sum(coalesce(total_amount,0)) AS total_fee\n", "FROM green_tripdata0\n", "GROUP BY trip_year, trip_month, vendor_id" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"ParquetLoad\",\n", " \"name\": \"write out green_tripdata0 dataset as Parquet\",\n", " \"environments\": [\"production\", \"test\"],\n", " \"inputView\": \"green_trip_summery\",\n", " \"outputURI\": ${OUTPUT},\n", " \"saveMode\": \"Overwrite\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Arc", "language": "javascript", "name": "arc" }, "language_info": { "codemirror_mode": "javascript", "file_extension": ".json", "mimetype": "javascript", "name": "arc", "nbconvert_exporter": "arcexport", "version": "3.12.1" } }, "nbformat": 4, "nbformat_minor": 4 }