{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "e72f43f5", "metadata": {}, "outputs": [], "source": [ "!wget https://raw.githubusercontent.com/mlzoo/multi-step-regression-tutorials/main/Raotbl6.csv" ] }, { "cell_type": "code", "execution_count": null, "id": "fa654d28", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import lightgbm as lgb\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.multioutput import MultiOutputRegressor\n", "\n", "df = pd.read_csv('Raotbl6.csv')\n", "df['date'] = pd.to_datetime(df['date'])\n", "df.index = df['date']\n", "df.drop('date', axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "0f98cf80", "metadata": {}, "outputs": [], "source": [ "target = 'rgnp'\n", "\n", "X_train = df[: int(len(df) * 0.8)].iloc[:-1]\n", "y_train = df[: int(len(df) * 0.8)].shift(-1).dropna()\n", "\n", "X_test = df[int(len(df) * 0.8) :].iloc[:-1]\n", "y_test = df[int(len(df) * 0.8) :].shift(-1).dropna()\n", "\n", "model = MultiOutputRegressor(lgb.LGBMRegressor(objective='regression')).fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": null, "id": "28742b07", "metadata": {}, "outputs": [], "source": [ "results = []\n", "data = X_test\n", "for i in range(12):\n", " data = pd.DataFrame(model.predict(data), columns=data.columns, index=data.index)\n", " results.append(data)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "0b4ee882", "metadata": {}, "outputs": [], "source": [ "# make future targets \n", "for i in range(12):\n", " df['rgnp_{}'.format(i+1)] = df['rgnp'].shift(-i-1)\n", "\n", "df.dropna(inplace=True)\n", "\n", "targets = [item for item in df.columns if 'rgnp_' in item]\n", "\n", "X_train = df.drop(targets, axis=1)[: int(len(df) * 0.8)]\n", "y_train = df[targets][: int(len(df) * 0.8)]\n", "\n", "X_test = df.drop(targets, axis=1)[int(len(df) * 0.8) :]\n", "y_test = df[targets][int(len(df) * 0.8) :]" ] }, { "cell_type": "code", "execution_count": null, "id": "243ee157", "metadata": {}, "outputs": [], "source": [ "models = []\n", "for i in range(1, 13):\n", " \n", " model = lgb.LGBMRegressor(objective='regression').fit(X_train, y_train['rgnp_{}'.format(i)])\n", " X_train['rgnp_{}'.format(i)] = y_train['rgnp_{}'.format(i)]\n", " models.append(model)" ] }, { "cell_type": "code", "execution_count": null, "id": "95170bbd", "metadata": {}, "outputs": [], "source": [ "final_results = []\n", "\n", "for i in range(12):\n", " final_results.append(models[i].predict(X_test))\n", " X_test['rgnp_{}'.format(i + 1)] = results[i]['rgnp']\n", " \n", "pred = pd.DataFrame(np.array(final_results).T, columns=y_test.columns, index=y_test.index)\n", "(pred - y_test).abs().mean()" ] } ], "metadata": { "kernelspec": { "display_name": "conda_pytorch_p38", "language": "python", "name": "conda_pytorch_p38" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }