{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "![logo](./finspace_logo.png)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%local\n", "from aws.finspace.cluster import FinSpaceClusterManager\n", "\n", "# if this was already run, no need to run again\n", "if 'finspace_clusters' not in globals():\n", " finspace_clusters = FinSpaceClusterManager()\n", " finspace_clusters.auto_connect()\n", "else:\n", " print(f'connected to cluster: {finspace_clusters.get_connected_cluster_id()}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#####----------------------------------------------------------\n", "##### REPLACE WITH CORRECT IDS!\n", "##### Dataset: \"US Equity Time-Bar Summary - 1 min, 14 Symbols\"\n", "#####\n", "#####----------------------------------------------------------\n", "dataset_id = ''\n", "view_id = ''" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# import needed libraries\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import datetime as dt\n", "import pyspark.sql.functions as F\n", "import pyspark.sql.types as T\n", "\n", "from aws.finspace.timeseries.spark.util import string_to_timestamp_micros\n", "from aws.finspace.timeseries.spark.analytics import *\n", "from aws.finspace.timeseries.spark.windows import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from aws.finspace.analytics import FinSpaceAnalyticsManager\n", "finspace = FinSpaceAnalyticsManager(spark = spark)\n", "\n", "sumDF = finspace.read_data_view(dataset_id = dataset_id, data_view_id = view_id)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sumDF.show(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# for now, limit the dates involved\n", "sDate = dt.datetime(2019, 10, 1)\n", "eDate = dt.datetime(2019, 10, 1)\n", "\n", "df = ( sumDF.filter(sumDF.eventtype == \"TRADE NB\").filter( sumDF.date.between(sDate, eDate) ) )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df.show(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Available Time Series Features" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "help(moving_average)\n", "help(realized_volatility)\n", "help(exponential_moving_average)\n", "help(moving_average_converge_diverge)\n", "help(moving_average_converge_diverge_hist)\n", "\n", "help(momentum_indicator)\n", "help(roc_indicator)\n", "help(rsi)\n", "help(realized_volatility_spread)\n", "help(pairwise_realized_correlation)\n", "\n", "help(linear_regression)\n", "help(realized_correlation_matrix)\n", "help(abands)\n", "help(bollinger_bands)\n", "help(acc_dist_indicator)\n", "\n", "help(average_true_range)\n", "help(commodity_channel_index)\n", "help(ichimoku_indicator)\n", "help(linear_weighted_moving_average)\n", "help(on_balance_volume)\n", "\n", "help(slow_stock_oscillator)\n", "help(fast_stock_oscillator)\n", "help(aroon_up_indicator)\n", "help(aroon_down_indicator)\n", "help(aroon_oscillator)\n", "\n", "help(will_r_indicator)\n", "help(cmo_indicator)\n", "help(natr_indicator)\n", "help(pvt_indicator)\n", "help(keltner_indicator)\n", "\n", "help(ult_osc_indicator)\n", "help(double_exponential_moving_average)\n", "help(mf_indicator)\n", "help(triple_exponential_moving_average)\n", "\n", "help(ts_linear_regression)\n", "help(weighted_ts_linear_regression)\n", "help(weighted_linear_regression)\n", "help(percentage_price_oscillator)\n", "help(ROC100_indicator)\n", "\n", "help(ROCP_indicator)\n", "help(ROCR_indicator)\n", "help(price_channel_indicator)\n", "help(trima_indicator)\n", "help(trix_indicator)\n", "\n", "help(midprice_indicator)\n", "help(midpoint_indicator)\n", "help(min_indicator)\n", "help(max_indicator)\n", "help(minmax_indicator)\n", "\n", "help(kama_indicator)\n", "help(adx_indicator)\n", "help(t3_ema_indicator)\n", "help(wilder_smoothing_indicator)\n", "help(sar_indicator)\n", "\n", "help(stddev_indicator)\n", "help(dmi_indicator)\n", "help(tr_indicator)\n", "help(pos_dm_indicator)\n", "help(neg_dm_indicator)\n", "\n", "help(chaiken_money_flow_indicator)\n", "help(force_index_indicator)\n", "help(ease_of_movement_indicator)\n", "help(negative_volume_indicator)\n", "help(donchian_channel_indicator)\n", "\n", "help(mass_index_indicator)\n", "help(dpo_indicator)\n", "help(coppock_curve_indicator)\n", "help(elder_ray_index_indicator)\n", "help(fisher_transformation_indicator)\n", "\n", "help(chaikens_volatility_indicator)\n", "help(klinger_oscillator_indicator)\n", "help(hull_moving_average_indicator)\n", "help(time_series_forecast_indicator)\n", "help(stoch_rsi_indicator)\n", "\n", "help(typical_price_indicator)\n", "help(weighted_close_indicator)\n", "help(adrx_indicator)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# all functions have help, like this\n", "help(time_series_forecast_indicator)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# All Indicators in one DataFrame\n", "This is an example of how one can combine any or all the indicator functions into very wide DataFrames" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tsDF = df\n", "\n", "# group the sets of values\n", "partitionList = [\"ticker\", \"eventtype\"]\n", "\n", "tenor = 15\n", "numStd = 2\n", "\n", "timeCol = 'end'\n", "priceCol = 'vwap'\n", "highCol = 'high'\n", "lowCol = 'low'\n", "volCol = 'volume'\n", "\n", "ts_dict = {\n", " 'moving_average' : moving_average( tenor, timeCol, priceCol ),\n", " 'realized_volatility' : realized_volatility( tenor, timeCol, priceCol ),\n", " 'exponential_moving_average' : exponential_moving_average( tenor, timeCol, priceCol ),\n", " 'moving_average_converge_diverge' : moving_average_converge_diverge( 12, 26, timeCol, priceCol ),\n", " 'moving_average_converge_diverge_hist' : moving_average_converge_diverge_hist( 12, 26, 9, timeCol, priceCol ),\n", " \n", " 'momentum_indicator' : momentum_indicator( tenor, timeCol, priceCol ),\n", " 'roc_indicator' : roc_indicator( tenor, timeCol, priceCol ),\n", " 'rsi' : rsi( tenor, timeCol, priceCol ),\n", " 'realized_volatility_spread' : realized_volatility_spread( tenor, timeCol, priceCol, 'open' ),\n", " 'pairwise_realized_correlation' : pairwise_realized_correlation( tenor, timeCol, priceCol, 'open' ),\n", " \n", " 'bollinger_bands' : bollinger_bands( tenor, numStd, timeCol, priceCol, highCol, lowCol ),\n", " 'acc_dist_indicator' : acc_dist_indicator( timeCol, priceCol, highCol, lowCol, volCol ),\n", " \n", " 'average_true_range' : average_true_range( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'commodity_channel_index' : commodity_channel_index( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'ichimoku_indicator' : ichimoku_indicator( timeCol, priceCol, 9, 26, 52 ),\n", " 'linear_weighted_moving_average' : linear_weighted_moving_average( tenor, timeCol, priceCol ),\n", " 'on_balance_volume' : on_balance_volume( timeCol, priceCol, volCol ),\n", " \n", " 'slow_stock_oscillator' : slow_stock_oscillator( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'fast_stock_oscillator' : fast_stock_oscillator( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'aroon_up_indicator' : aroon_up_indicator( tenor, timeCol, priceCol ),\n", " 'aroon_down_indicator' : aroon_down_indicator( tenor, timeCol, priceCol ),\n", " 'aroon_oscillator' : aroon_oscillator( tenor, timeCol, priceCol ),\n", " \n", " 'will_r_indicator' : will_r_indicator( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'cmo_indicator' : cmo_indicator( tenor, timeCol, priceCol ),\n", " 'natr_indicator' : natr_indicator( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'pvt_indicator' : pvt_indicator( timeCol, priceCol, volCol ),\n", " 'keltner_indicator' : keltner_indicator( timeCol, priceCol, highCol, lowCol, 2, 20, 20),\n", " \n", " 'ult_osc_indicator' : ult_osc_indicator( timeCol, priceCol, highCol, lowCol ),\n", " 'apo_indicator' : apo_indicator( 12, 26, timeCol, priceCol ),\n", " 'double_exponential_moving_average' : double_exponential_moving_average( tenor, timeCol, priceCol ),\n", " 'mf_indicator' : mf_indicator( tenor, timeCol, priceCol, highCol, lowCol, volCol ),\n", " 'triple_exponential_moving_average' : triple_exponential_moving_average( tenor, timeCol, priceCol ),\n", " \n", " 'ts_linear_regression' : ts_linear_regression( tenor, 'seconds', timeCol, timeCol, priceCol ),\n", " 'weighted_ts_linear_regression' : weighted_ts_linear_regression( tenor, 'seconds', timeCol, timeCol, priceCol, volCol ),\n", " 'percentage_price_oscillator' : percentage_price_oscillator( 12, 26, 9, timeCol, priceCol ),\n", " 'ROC100_indicator' : ROC100_indicator( tenor, timeCol, priceCol ),\n", "\n", " 'ROCP_indicator' : ROCP_indicator( tenor, timeCol, priceCol ),\n", " 'ROCR_indicator' : ROCR_indicator( tenor, timeCol, priceCol ),\n", " 'price_channel_indicator' : price_channel_indicator( tenor, timeCol, priceCol ),\n", " 'trima_indicator' : trima_indicator( tenor, timeCol, priceCol ),\n", " 'trix_indicator' : trix_indicator( tenor, timeCol, priceCol ),\n", "\n", " 'midprice_indicator' : midprice_indicator( tenor, timeCol, highCol, lowCol ),\n", " 'midpoint_indicator' : midpoint_indicator( tenor, timeCol, priceCol ),\n", " 'min_indicator' : min_indicator( tenor, timeCol, priceCol ),\n", " 'max_indicator' : max_indicator( tenor, timeCol, priceCol ),\n", " 'minmax_indicator' : minmax_indicator( tenor, timeCol, priceCol ),\n", "\n", " 'kama_indicator' : kama_indicator( timeCol, priceCol, 10, 30, 2 ),\n", " 'adx_indicator' : adx_indicator( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 't3_ema_indicator' : t3_ema_indicator( tenor, timeCol, priceCol ),\n", " 'wilder_smoothing_indicator' : wilder_smoothing_indicator( tenor, timeCol, priceCol ),\n", " 'sar_indicator' : sar_indicator( timeCol, priceCol, highCol, lowCol ),\n", "\n", " 'stddev_indicator' : stddev_indicator( tenor, timeCol, priceCol ),\n", " 'dmi_indicator' : dmi_indicator( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'tr_indicator' : tr_indicator( timeCol, priceCol, highCol, lowCol ),\n", " 'pos_dm_indicator' : pos_dm_indicator( timeCol, highCol, lowCol ),\n", " 'neg_dm_indicator' : neg_dm_indicator( timeCol, highCol, lowCol ),\n", "\n", " 'chaiken_money_flow_indicator' : chaiken_money_flow_indicator( tenor, timeCol, priceCol, highCol, lowCol, volCol ),\n", " 'force_index_indicator' : force_index_indicator( tenor, timeCol, priceCol, volCol ),\n", " 'negative_volume_indicator' : negative_volume_indicator( 1, timeCol, priceCol, volCol ),\n", " 'donchian_channel_indicator' : donchian_channel_indicator( tenor, timeCol, highCol, lowCol ),\n", "\n", " 'mass_index_indicator' : mass_index_indicator( 9, 25, timeCol, highCol, lowCol ),\n", " 'dpo_indicator' : dpo_indicator( tenor, timeCol, priceCol ),\n", " 'coppock_curve_indicator' : coppock_curve_indicator( 14, 11, 10, timeCol, priceCol ),\n", " 'elder_ray_index_indicator' : elder_ray_index_indicator( tenor, timeCol, priceCol ),\n", " 'fisher_transformation_indicator' : fisher_transformation_indicator( tenor, timeCol, highCol, lowCol ),\n", "\n", " 'chaikens_volatility_indicator' : chaikens_volatility_indicator( tenor, timeCol, highCol, lowCol ),\n", " 'klinger_oscillator_indicator' : klinger_oscillator_indicator( 34, 55, timeCol, priceCol, highCol, lowCol, volCol ),\n", " 'hull_moving_average_indicator' : hull_moving_average_indicator( tenor, timeCol, priceCol ),\n", " 'time_series_forecast_indicator' : time_series_forecast_indicator( tenor, 'seconds', timeCol, timeCol, priceCol ),\n", " 'stoch_rsi_indicator' : stoch_rsi_indicator( tenor, tenor, timeCol, priceCol ),\n", "\n", " 'typical_price_indicator' : typical_price_indicator( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'weighted_close_indicator' : weighted_close_indicator( tenor, timeCol, priceCol, highCol, lowCol ),\n", " 'adrx_indicator' : adrx_indicator( tenor, tenor, timeCol, priceCol, highCol, lowCol ),\n", "\n", "}\n", "\n", "for key, fnc in ts_dict.items():\n", " tsDF = compute_analytics_on_features(tsDF, key, fnc, partition_col_list = partitionList)\n", "\n", "tsDF.printSchema() " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Compute and Sample some rows \n", "# 2*tenor: get past the initial NAs that are computed for less-full windows\n", "\n", "tsDF.show(2*tenor)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import datetime\n", "print( f\"Last Run: {datetime.datetime.now()}\" )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "instance_type": "ml.t3.medium", "kernelspec": { "display_name": "FinSpace PySpark (finspace-sparkmagic-84084/latest)", "language": "python", "name": "pysparkkernel__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:489461498020:image/finspace-sparkmagic-84084" }, "language_info": { "codemirror_mode": { "name": "python", "version": 3 }, "mimetype": "text/x-python", "name": "pyspark", "pygments_lexer": "python3" } }, "nbformat": 4, "nbformat_minor": 4 }