{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%conf \n", "numRows=5\n", "showLog=true" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%env \n", "ETL_CONF_DATA_URL=s3a://nyc-tlc/trip*data\n", "ETL_CONF_JOB_URL=https://raw.githubusercontent.com/tripl-ai/arc-starter/master/examples/kubernetes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"DelimitedExtract\",\n", " \"name\": \"extract data from green_tripdata schema 0\",\n", " \"environments\": [\"production\", \"test\"],\n", " \"inputURI\": ${ETL_CONF_DATA_URL}\"/green_tripdata_2013-08.csv\",\n", " \"outputView\": \"green_tripdata0_raw\", \n", " \"delimiter\": \"Comma\",\n", " \"quote\" : \"DoubleQuote\",\n", " \"header\": true,\n", " \"persist\": true\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "{\n", " \"type\": \"TypingTransform\",\n", " \"name\": \"apply green_tripdata schema 0 data types\",\n", " \"environments\": [\"production\", \"test\"],\n", " \"schemaURI\": ${ETL_CONF_JOB_URL}\"/green_tripdata0.json\",\n", " \"inputView\": \"green_tripdata0_raw\", \n", " \"outputView\": \"green_tripdata0\"\n", "}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sqlvalidate name=\"ensure no errors exist after data typing\" environments=production,test\n", "SELECT\n", " SUM(error) = 0 AS valid\n", " ,TO_JSON(\n", " NAMED_STRUCT(\n", " 'count', COUNT(error), \n", " 'errors', SUM(error)\n", " )\n", " ) AS message\n", "FROM (\n", " SELECT \n", " CASE \n", " WHEN SIZE(_errors) > 0 THEN 1 \n", " ELSE 0 \n", " END AS error \n", " FROM green_tripdata0\n", ") input_table" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%sql name=\"ensure a query can be executed\" environments=production,test persist=true outputView=green_trip_filtered\n", "SELECT * \n", "FROM green_tripdata0\n", "WHERE store_and_fwd_flag = TRUE" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Arc", "language": "javascript", "name": "arc" }, "language_info": { "file_extension": "arc", "mimetype": "text/arc", "name": "arc", "nbconvert_exporter": "text", "version": "2.2.0" } }, "nbformat": 4, "nbformat_minor": 2 }