{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a7eeb41e",
   "metadata": {},
   "source": [
    "# mRNA renal cell carcinoma prediction\n",
    "Note: cleaned data from preprocessing script must be input here."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4b5fd799",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "scipy version:  1.5.4\n"
     ]
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "import numpy as np\n",
    "import boto3\n",
    "import pandas as pd\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import preprocessing\n",
    "import os\n",
    "import matplotlib.pyplot as plt\n",
    "from cox_functions import categorize_expression_levels, cox_ph_pipeline, normalize_gene_expression\n",
    "from copy import deepcopy\n",
    "\n",
    "from numpy.random import seed\n",
    "seed(1)\n",
    "\n",
    "import scipy\n",
    "print('scipy version: ', scipy.__version__)\n",
    "\n",
    "\n",
    "# !pip install lifelines\n",
    "\n",
    "from lifelines.utils import concordance_index\n",
    "from lifelines import CoxPHFitter\n",
    "\n",
    "import pandas as pd\n",
    "import json\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import roc_auc_score\n",
    "\n",
    "from xgboost import XGBClassifier\n",
    "\n",
    "# from XGBoostPipeline import XGBoostPipeline\n",
    "from XGBoostPipelineLatest import XGBoostPipeline\n",
    "from sklearn.metrics import classification_report"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c2d9ea16",
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data(filename):\n",
    "#     df = pd.read_csv('../data/KIRC_TCGA_microRNA_expression_and_clinical.csv')\n",
    "    df = pd.read_csv(filename)\n",
    "    if 'Unnamed: 0' in df.columns:\n",
    "        df.drop(columns=['Unnamed: 0'], inplace=True)\n",
    "    # DO NOT SET THE INDEX.  \n",
    "    # You will run into an issue where every CoxPH model will return a \n",
    "    # concordance of 0.5 no matter what subset of features you use if you set the index.\n",
    "\n",
    "    # Dropping columns that are not the target or related to microRNA expression\n",
    "\n",
    "    label = \"AJCC_PATHOLOGIC_TUMOR_STAGE\"\n",
    "\n",
    "    cols_to_drop = [\n",
    "        'PATIENT_ID',\n",
    "        \"SEX_male_female\",\n",
    "        \"RACE\", \n",
    "        \"ETHNICITY\",\n",
    "        \"DFS_STATUS\",\n",
    "        \"DFS_MONTHS\",\n",
    "    ]\n",
    "\n",
    "    df.drop(columns=[ci for ci in cols_to_drop if ci in df.columns], inplace=True)\n",
    "\n",
    "    # From dataset joining, the NaN values for tumor stage have been verified to be normal patients\n",
    "    df[label].fillna('normal',inplace=True)\n",
    "#     dfph=dfph.drop(index=np.where(dfph[\"OS_MONTHS\"]==0)[0])\n",
    "    # Map tumor stage categories to numeric categories\n",
    "    tumor_stage_map = {\n",
    "        \"normal\" : 0,\n",
    "        \"STAGE I\" : 1,\n",
    "        \"STAGE II\" : 1,\n",
    "        \"STAGE III\" : 2,\n",
    "        \"STAGE IV\" : 2\n",
    "    }\n",
    "\n",
    "    df[label] = df[label].map(tumor_stage_map)\n",
    "    \n",
    "    \n",
    "    train, test = train_test_split(df, test_size=0.3, random_state=42, stratify=df[\"AJCC_PATHOLOGIC_TUMOR_STAGE\"].values)\n",
    "\n",
    "    train, val = train_test_split(train, test_size=0.2, random_state=32, stratify=train[\"AJCC_PATHOLOGIC_TUMOR_STAGE\"].values)\n",
    "    \n",
    "    dfph = deepcopy(train)\n",
    "    dfphtest = deepcopy(test)\n",
    "    dfphval = deepcopy(val)\n",
    "    \n",
    "    dfph.loc[dfph['AJCC_PATHOLOGIC_TUMOR_STAGE']==0,\"OS_STATUS\"]=\"0:LIVING\"\n",
    "    dfph.loc[dfph['AJCC_PATHOLOGIC_TUMOR_STAGE']==0,\"OS_MONTHS\"] = dfph['OS_MONTHS'].median()\n",
    "    \n",
    "    dfphval.loc[dfphval['AJCC_PATHOLOGIC_TUMOR_STAGE']==0,\"OS_STATUS\"]=\"0:LIVING\"\n",
    "    dfphval.loc[dfphval['AJCC_PATHOLOGIC_TUMOR_STAGE']==0,\"OS_MONTHS\"] = dfph['OS_MONTHS'].median()\n",
    "\n",
    "    dfphtest.loc[dfphtest['AJCC_PATHOLOGIC_TUMOR_STAGE']==0,\"OS_STATUS\"]=\"0:LIVING\"\n",
    "    dfphtest.loc[dfphtest['AJCC_PATHOLOGIC_TUMOR_STAGE']==0,\"OS_MONTHS\"] = dfph['OS_MONTHS'].median()\n",
    "\n",
    "    dfph['OS_STATUS'] = dfph['OS_STATUS'].astype('category').cat.codes\n",
    "    dfphval['OS_STATUS'] = dfphval['OS_STATUS'].astype('category').cat.codes\n",
    "    dfphtest['OS_STATUS'] = dfphtest['OS_STATUS'].astype('category').cat.codes\n",
    "    \n",
    "    if 'hsa-mir-4296' in dfph.columns:\n",
    "        dfph.drop(columns=['hsa-mir-4296'], inplace=True)\n",
    "        dfphval.drop(columns=['hsa-mir-4296'], inplace=True)\n",
    "        dfphtest.drop(columns=['hsa-mir-4296'], inplace=True)\n",
    "    \n",
    "    empty_features = []\n",
    "    for col in dfph.columns:\n",
    "        if (dfph[col].sum() == 0):\n",
    "            empty_features.append(col)\n",
    "    \n",
    "    dfph.drop(columns=empty_features,inplace=True)\n",
    "    dfphval.drop(columns=empty_features,inplace=True)\n",
    "    dfphtest.drop(columns=empty_features,inplace=True)\n",
    "\n",
    "    gc = list(dfph.columns)\n",
    "    genes = gc[3:]\n",
    "\n",
    "#     return train, val, test, gc\n",
    "    return dfph, dfphval, dfphtest, genes, label\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "42b089d0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# def normalize_expression(dfph, dfphval, dfphtest, genes):\n",
    "    \n",
    "#     dfph = normalize_gene_expression(dfph, genes);   \n",
    "#     dfphval = normalize_gene_expression(dfphval, genes);\n",
    "#     dfphtest = normalize_gene_expression(dfphtest, genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "8dee3dd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_cox(dfph, dfphval, dfphtest, genes, pvalue=.05, dataset_name=\"final_microRNA_plus_normal_trainvaltest\"):\n",
    "    \n",
    "    dfph = normalize_gene_expression(dfph, genes);   \n",
    "    dfphval = normalize_gene_expression(dfphval, genes);\n",
    "    dfphtest = normalize_gene_expression(dfphtest, genes)\n",
    "    \n",
    "    dfph = categorize_expression_levels(dfph, genes)\n",
    "    dfphval = categorize_expression_levels(dfphval, genes)    \n",
    "    dfphtest = categorize_expression_levels(dfphtest, genes)\n",
    "    \n",
    "    print('Running Cox PH on train set\\n')\n",
    "    info_map, significant_genes = cox_ph_pipeline(\n",
    "                dfph, \n",
    "                genes, \n",
    "                dataset_name=dataset_name, \n",
    "                duration=\"OS_MONTHS\", \n",
    "                event=\"OS_STATUS\",\n",
    "                pvalue=pvalue\n",
    "    )\n",
    "    print('number of significant genes: ',len(significant_genes))\n",
    "    return dfph, dfphval, dfphtest, info_map, significant_genes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "b6531e49",
   "metadata": {},
   "outputs": [],
   "source": [
    "def data_format_xgbpipeline(significant_genes, label = \"AJCC_PATHOLOGIC_TUMOR_STAGE\"):\n",
    "    input_df = deepcopy(dfph[[label] + significant_genes])\n",
    "    input_df_val = deepcopy(dfphval[[label] + significant_genes])\n",
    "    input_df_test = deepcopy(dfphtest[[label] + significant_genes])\n",
    "\n",
    "    all_columns = input_df.columns # Creates list of all column headers\n",
    "    input_df[all_columns[1:]] = input_df[all_columns[1:]].astype('float')\n",
    "    input_df_val[all_columns[1:]] = input_df_val[all_columns[1:]].astype('float')\n",
    "    input_df_test[all_columns[1:]] = input_df_test[all_columns[1:]].astype('float')\n",
    "    \n",
    "    return input_df, input_df_val, input_df_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "db190d1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate_xgbpipeline(input_df_test, pipeline):\n",
    "    xgb_model = XGBClassifier()\n",
    "    xgb_model.load_model(pipeline.model_filepath)\n",
    "    hr_pred = xgb_model.predict(input_df_test[input_df_test.columns[1:]])\n",
    "    hr_pred_proba = xgb_model.predict_proba(input_df_test[input_df_test.columns[1:]])#,validate_features=True)#, input_df_test[input_df.columns[0]].values-1)\n",
    "    hr_pred_proba_norm = hr_pred_proba[:,np.shape(hr_pred_proba)[1]//2:].T\n",
    "    roc_auc_score_ovo = roc_auc_score(\n",
    "                input_df_test[input_df_test.columns[0]].values,\n",
    "                hr_pred_proba_norm,\n",
    "                multi_class='ovo'\n",
    "            )\n",
    "\n",
    "    roc_auc_score_ovr = roc_auc_score(\n",
    "                input_df_test[input_df_test.columns[0]].values,\n",
    "                hr_pred_proba_norm,\n",
    "                multi_class='ovr'\n",
    "            )\n",
    "    print(classification_report(hr_pred, input_df_test[input_df_test.columns[0]]))\n",
    "    print('roc_auc_ovo: ', roc_auc_score_ovo, 'roc_auc_ovr: ',roc_auc_score_ovr)\n",
    "    return roc_auc_score_ovo, roc_auc_score_ovr\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b46a193f",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "def save_importance_matrix(model, dataset_name='final_microRNA_james'):\n",
    "\n",
    "    print(\"Creating importance matrix\")\n",
    "\n",
    "    feature_importance_dict = {}\n",
    "    importance_types = ['gain', 'cover', 'weight', 'total_gain', 'total_cover']\n",
    "\n",
    "    for metric in importance_types:\n",
    "        feature_importance_dict[metric] = model.get_booster().get_score(importance_type=metric)\n",
    "\n",
    "    importance_matrix = pd.DataFrame(feature_importance_dict)\n",
    "    importance_matrix_filepath = \"../final_results/XGBoost/{}_xgboost_feature_importance_latest.csv\".format(dataset_name)\n",
    "\n",
    "#         print(\"Saving importance matrix to:\", importance_matrix_filepath)\n",
    "#         importance_matrix.to_csv(importance_matrix_filepath)\n",
    "\n",
    "    return importance_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2c782391",
   "metadata": {},
   "source": [
    "# Run"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2dc94e5b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:3263: DtypeWarning: Columns (1,2,3,4,5,7) have mixed types.Specify dtype option on import or set low_memory=False.\n",
      "  if (await self.run_code(code, result,  async_=asy)):\n"
     ]
    }
   ],
   "source": [
    "filename = \"../data/KIRC_TCGA_GTEX_mRNA_expression_and_clinical_normalized.csv\"\n",
    "dfph, dfphval, dfphtest, genes, label = load_data(filename)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d4f64302",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5569"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# cph_map = {} #k: gene_name, v: {info about model}\n",
    "filepath = '../final_results/final_mRNA_plus_normal_trainvaltest_individual_cox_results.json'\n",
    "f = open(filepath, \"r\")\n",
    "cph_map = json.load(f)\n",
    "f.close()\n",
    "\n",
    "all_genes_df = pd.DataFrame(cph_map).T\n",
    "\n",
    "# significant_genes_05 = significant_genes.copy()\n",
    "\n",
    "significant_genes = all_genes_df.loc[all_genes_df.sort_values('p-value')['p-value'].values<.05].index.values\n",
    "\n",
    "significant_genes = [si for si in significant_genes]\n",
    "\n",
    "len([si for si in significant_genes])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "7003c580",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "input_df, input_df_val, input_df_test = data_format_xgbpipeline(significant_genes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "04eb0f84",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Initializing pipeline:\n",
      "Initializing test and train data:\n",
      "Running XGBoost pipeline\n",
      "Beginning Bayesian Optimization:\n",
      "\n",
      "|   iter    |  target   |    eta    |   gamma   | max_de... | max_depth |\n",
      "-------------------------------------------------------------------------\n",
      "| \u001b[0m 1       \u001b[0m | \u001b[0m 0.8453  \u001b[0m | \u001b[0m 0.3079  \u001b[0m | \u001b[0m 0.1869  \u001b[0m | \u001b[0m 8.756   \u001b[0m | \u001b[0m 7.66    \u001b[0m |\n",
      "| \u001b[0m 2       \u001b[0m | \u001b[0m 0.8408  \u001b[0m | \u001b[0m 0.5713  \u001b[0m | \u001b[0m 0.3983  \u001b[0m | \u001b[0m 10.11   \u001b[0m | \u001b[0m 3.074   \u001b[0m |\n",
      "| \u001b[95m 3       \u001b[0m | \u001b[95m 0.8454  \u001b[0m | \u001b[95m 0.1787  \u001b[0m | \u001b[95m 0.1234  \u001b[0m | \u001b[95m 17.62   \u001b[0m | \u001b[95m 9.121   \u001b[0m |\n",
      "| \u001b[95m 4       \u001b[0m | \u001b[95m 0.8458  \u001b[0m | \u001b[95m 0.277   \u001b[0m | \u001b[95m 0.1405  \u001b[0m | \u001b[95m 17.65   \u001b[0m | \u001b[95m 9.128   \u001b[0m |\n",
      "| \u001b[95m 5       \u001b[0m | \u001b[95m 0.8487  \u001b[0m | \u001b[95m 0.7922  \u001b[0m | \u001b[95m 0.2426  \u001b[0m | \u001b[95m 17.85   \u001b[0m | \u001b[95m 9.166   \u001b[0m |\n",
      "=========================================================================\n",
      "Best AUC: 0.848742724867725\n",
      "Best parameters: {'eta': 0.7922203053182407, 'gamma': 0.24264067575499107, 'max_delta_step': 17.853711554895643, 'max_depth': 9.166231294401388}\n",
      "{'eta': 0.7922203053182407, 'gamma': 0.24264067575499107, 'max_delta_step': 17.853711554895643, 'max_depth': 9, 'eval_metric': 'auc', 'objective': 'multi:softprob', 'num_class': 3, 'min_child_weight': 1, 'subsample': 1, 'colsample_bytree': 1}\n",
      "Training XGBoost model\n",
      "[0]\tTest-auc:0.76933\n",
      "[1]\tTest-auc:0.79571\n",
      "[2]\tTest-auc:0.80572\n",
      "[3]\tTest-auc:0.82041\n",
      "[4]\tTest-auc:0.82220\n",
      "[5]\tTest-auc:0.82459\n",
      "[6]\tTest-auc:0.82832\n",
      "[7]\tTest-auc:0.83424\n",
      "[8]\tTest-auc:0.83204\n",
      "[9]\tTest-auc:0.83136\n",
      "[10]\tTest-auc:0.83187\n",
      "[11]\tTest-auc:0.83187\n",
      "[12]\tTest-auc:0.83187\n",
      "[13]\tTest-auc:0.83187\n",
      "[14]\tTest-auc:0.83187\n",
      "[15]\tTest-auc:0.83187\n",
      "[16]\tTest-auc:0.83187\n",
      "[17]\tTest-auc:0.83187\n",
      "[18]\tTest-auc:0.83187\n",
      "[19]\tTest-auc:0.83187\n",
      "[20]\tTest-auc:0.83187\n",
      "[21]\tTest-auc:0.83187\n",
      "[22]\tTest-auc:0.83187\n",
      "[23]\tTest-auc:0.83187\n",
      "[24]\tTest-auc:0.83187\n",
      "[25]\tTest-auc:0.83187\n",
      "[26]\tTest-auc:0.83187\n",
      "[27]\tTest-auc:0.83187\n",
      "[28]\tTest-auc:0.83187\n",
      "[29]\tTest-auc:0.83187\n",
      "[30]\tTest-auc:0.83187\n",
      "[31]\tTest-auc:0.83187\n",
      "[32]\tTest-auc:0.83187\n",
      "[33]\tTest-auc:0.83187\n",
      "[34]\tTest-auc:0.83187\n",
      "[35]\tTest-auc:0.83187\n",
      "[36]\tTest-auc:0.83187\n",
      "[37]\tTest-auc:0.83187\n",
      "[38]\tTest-auc:0.83187\n",
      "[39]\tTest-auc:0.83187\n",
      "[40]\tTest-auc:0.83187\n",
      "[41]\tTest-auc:0.83187\n",
      "[42]\tTest-auc:0.83187\n",
      "[43]\tTest-auc:0.83187\n",
      "[44]\tTest-auc:0.83187\n",
      "[45]\tTest-auc:0.83187\n",
      "[46]\tTest-auc:0.83187\n",
      "[47]\tTest-auc:0.83187\n",
      "[48]\tTest-auc:0.83187\n",
      "[49]\tTest-auc:0.83187\n",
      "[50]\tTest-auc:0.83187\n",
      "[51]\tTest-auc:0.83187\n",
      "[52]\tTest-auc:0.83187\n",
      "[53]\tTest-auc:0.83187\n",
      "[54]\tTest-auc:0.83187\n",
      "[55]\tTest-auc:0.83187\n",
      "[56]\tTest-auc:0.83187\n",
      "[57]\tTest-auc:0.83187\n",
      "[58]\tTest-auc:0.83187\n",
      "[59]\tTest-auc:0.83187\n",
      "[60]\tTest-auc:0.83187\n",
      "[61]\tTest-auc:0.83187\n",
      "[62]\tTest-auc:0.83187\n",
      "[63]\tTest-auc:0.83187\n",
      "[64]\tTest-auc:0.83187\n",
      "[65]\tTest-auc:0.83187\n",
      "[66]\tTest-auc:0.83187\n",
      "[67]\tTest-auc:0.83187\n",
      "[68]\tTest-auc:0.83187\n",
      "[69]\tTest-auc:0.83187\n",
      "[70]\tTest-auc:0.83187\n",
      "[71]\tTest-auc:0.83187\n",
      "[72]\tTest-auc:0.83187\n",
      "[73]\tTest-auc:0.83187\n",
      "[74]\tTest-auc:0.83187\n",
      "[75]\tTest-auc:0.83187\n",
      "[76]\tTest-auc:0.83187\n",
      "[77]\tTest-auc:0.83187\n",
      "[78]\tTest-auc:0.83187\n",
      "[79]\tTest-auc:0.83187\n",
      "[80]\tTest-auc:0.83187\n",
      "[81]\tTest-auc:0.83187\n",
      "[82]\tTest-auc:0.83187\n",
      "[83]\tTest-auc:0.83187\n",
      "[84]\tTest-auc:0.83187\n",
      "[85]\tTest-auc:0.83187\n",
      "[86]\tTest-auc:0.83187\n",
      "[87]\tTest-auc:0.83187\n",
      "[88]\tTest-auc:0.83187\n",
      "[89]\tTest-auc:0.83187\n",
      "[90]\tTest-auc:0.83187\n",
      "[91]\tTest-auc:0.83187\n",
      "[92]\tTest-auc:0.83187\n",
      "[93]\tTest-auc:0.83187\n",
      "[94]\tTest-auc:0.83187\n",
      "[95]\tTest-auc:0.83187\n",
      "[96]\tTest-auc:0.83187\n",
      "[97]\tTest-auc:0.83187\n",
      "[98]\tTest-auc:0.83187\n",
      "[99]\tTest-auc:0.83187\n",
      "[100]\tTest-auc:0.83187\n",
      "[101]\tTest-auc:0.83187\n",
      "[102]\tTest-auc:0.83187\n",
      "[103]\tTest-auc:0.83187\n",
      "[104]\tTest-auc:0.83187\n",
      "[105]\tTest-auc:0.83187\n",
      "[106]\tTest-auc:0.83187\n",
      "[107]\tTest-auc:0.83187\n",
      "Saving model to:  ../final_results/XGBoost/final_mRNA_multiclass_post_cox_bayes_opt_xgboost_best_james_ttv2.json\n",
      "Saving XGBoost JSON results to: ../final_results/XGBoost/final_mRNA_multiclass_post_cox_bayes_opt_xgboost_best_output_james_ttv2.json\n",
      "Creating importance matrix\n",
      "Saving importance matrix to: ../final_results/XGBoost/final_mRNA_xgboost_feature_importance.csv\n",
      "Results summary:\n",
      "Parameter bounds: {'max_depth': (3, 10), 'eta': (0.01, 1), 'gamma': (0.0, 1), 'max_delta_step': (1, 25)}\n",
      "Number of boosting rounds: 1000\n",
      "Early stopping rounds: 100\n",
      "Accuracy = 0.691358024691358\n",
      "ROC AUC OVO = 0.8749703228869895\n",
      "ROC AUC OVR = 0.8497237276477043\n",
      "Model filepath = ../final_results/XGBoost/final_mRNA_multiclass_post_cox_bayes_opt_xgboost_best_james_ttv2.json\n",
      "Importance matrix filepath = ../final_results/XGBoost/final_mRNA_xgboost_feature_importance.csv\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<xgboost.core.Booster at 0x7f539a99afd0>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "pipeline = XGBoostPipeline(\n",
    "    pd.concat([input_df,input_df_val]),\n",
    "    random_state=60, \n",
    "    label_column=label,\n",
    "    num_classes=3, \n",
    "    weighted=False,\n",
    "    n_iter=50,\n",
    "    model_name=\"final_mRNA_multiclass_post_cox_bayes_opt_xgboost_best_james_ttv2.json\",\n",
    "    json_filepath=\"final_mRNA_multiclass_post_cox_bayes_opt_xgboost_best_output_james_ttv2.json\",\n",
    "    dataset_name=\"final_mRNA\",\n",
    "#     X_train=input_df[input_df.columns[1:]],\n",
    "#     y_train=input_df[input_df.columns[0]],\n",
    "#     X_test=input_df_val[input_df.columns[1:]],\n",
    "#     y_test=input_df_val[input_df.columns[0]]\n",
    ")\n",
    "\n",
    "pipeline.run_workflow()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6fa9cb0a",
   "metadata": {},
   "source": [
    "# Final fit"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d8cc13a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "input_df_final = pd.concat([input_df,input_df_val])\n",
    "\n",
    "xgb_model_final = XGBClassifier(pipeline.best_params,objective='multi:softprob')\n",
    "\n",
    "xgb_model_final.fit(X=input_df_final[input_df_final.columns[1:]],y=input_df_final[input_df_final.columns[0]])#,eval_metric='multi:softprob')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "f3488155",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAbUAAAEWCAYAAADhIgmdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAABTqElEQVR4nO2de5yWc/7/n68OKBWSolCKajpOidgNZbfYhMJK7GGsdrHbimTZ9bXit6tFqaTFrkPOWSVZrMOmUSg6zSSl3aWhohClE53evz+u656uuee+Z+6a0z33vJ+Px/WY63p/Pp/rel8fY95dn8PrLTPDcRzHcTKBWlXtgOM4juOUFx7UHMdxnIzBg5rjOI6TMXhQcxzHcTIGD2qO4zhOxuBBzXEcx8kYPKg5TjVH0h8kPVDVfjhOOiDfp+bUZCQVAM2AXRFzWzP7tIz3HGpm/y6bd9UPSaOAY83sJ1Xti1Mz8S81x4GzzaxB5NjngFYeSKpTlc/fV6qr305m4UHNcRIg6SBJD0r6TNIaSX+SVDssayPpdUnrJX0p6QlJB4dljwFHA/+UtFnS7yT1lrQ67v4Fkn4Yno+SNFXS45K+AXJKen4CX0dJejw8byXJJF0qaZWkryVdIekESUskbZB0T6RtjqS3JE2UtFHSB5J+EClvLul5SV9J+p+kX8Y9N+r3FcAfgMHhu+eH9S6VtFzSJkkfSbo8co/eklZLulbS5+H7XhoprydprKSPQ//elFQvLDtJ0tvhO+VL6r0P/6mdDMODmuMk5hFgJ3As0A3oBwwNywSMBpoDWcBRwCgAM/sp8Al7vv7uSPF55wJTgYOBJ0p5fir0BI4DBgPjgRuBHwIdgQslnRZX9yOgCXAz8KykxmHZU8Dq8F0vAG6LBr04vx8EbgOeDt+9a1jnc2AA0Ai4FBgnqXvkHocDBwEtgMuASZIOCcvGAMcD3wMaA78DdktqAbwI/Cm0jwSmSTpsL/rIyUA8qDkOPBf+a3+DpOckNQN+BFxtZlvM7HNgHHARgJn9z8xeM7PvzOwL4C7gtOS3T4m5Zvacme0m+OOf9Pkp8v/M7FszexXYAjxlZp+b2RpgDkGgjPE5MN7MdpjZ08AK4CxJRwG9gOvDe+UBDwA/TeS3mW1L5IiZvWhmH1rAG8CrwCmRKjuAW8PnvwRsBtpJqgX8AhhuZmvMbJeZvW1m3wE/AV4ys5fCZ78GLAD670UfORmIj4E7DgyMLuqQdCJQF/hMUsxcC1gVljcF7ib4w9wwLPu6jD6sipy3LOn5KbIucr4twXWDyPUaK7pi7GOCL7PmwFdmtimurEcSvxMi6UcEX4BtCd6jPvBepMp6M9sZud4a+tcEOAD4MMFtWwI/lnR2xFYXmFWaP05m40HNcYqzCvgOaBL3xzbGaMCALma2XtJA4J5IefyS4i0Ef8gBCOfG4ofJom1Ke35500KSIoHtaOB54FOgsaSGkcB2NLAm0jb+XYtcS9ofmAb8DJhhZjskPUcwhFsaXwLfAm2A/LiyVcBjZvbLYq2cGo0PPzpOHGb2GcEQ2VhJjSTVCheHxIYYGxIMkW0I53aui7vFOqB15Po/wAGSzpJUF/g/YP8yPL+8aQpcJamupB8TzBO+ZGargLeB0ZIOkNSFYM7riRLutQ5oFQ4dAuxH8K5fADvDr7Z+qTgVDsU+BNwVLlipLenkMFA+Dpwt6YzQfkC46OTIvX99J5PwoOY4ifkZwR/kZQRDi1OBI8KyW4DuwEaCxQrPxrUdDfxfOEc30sw2Ar8mmI9aQ/DltpqSKen55c07BItKvgT+DFxgZuvDsiFAK4KvtunAzeH8VTKeCX+ul7Qo/MK7CvgHwXtcTPAVmCojCYYq5wNfAbcDtcKAey7BassvCL7crsP/ptV4fPO149RgJOUQbBTvVdW+OE554P+qcRzHcTIGD2qO4zhOxuDDj47jOE7G4F9qjuM4Tsbg+9TKkYMPPtiOPfbYqnYjLdmyZQsHHnhgVbuRtnj/lIz3T3IyoW8WLlz4pZmVi8SZB7VypFmzZixYsKCq3UhLcnNz6d27d1W7kbZ4/5SM909yMqFvJH1cXvfy4UfHcRwnY/Cg5jiO42QMHtQcx3GcjMGDmuM4jpMxeFBzHMdxMgYPao7jOE6ZWLVqFX369CErK4uOHTsyYcIEAJ555hk6duxIrVq1kq4MX7VqFUBbScslvS9peHwdSSMlmaQmpflS7YKapF2S8iJHK0nnSZoZqdMrLOsq6T+S6kXKXpR0kaQcSV9IWizpv5JekfS9BM9LuTMdx3FqInXq1GHs2LEsX76cefPmMWnSJJYtW0anTp149tlnOfXUU0tsC6w2syzgJOA3kjrEysMM7H2BT1LxpdoFNWCbmWVHjgIzexb4VtLFkuoAfwV+bWb5BGlBbgQIkznWNbMp4b2eNrNuZnYc8BfgWUlZsQftbWc6juPURI444gi6d+8OQMOGDcnKymLNmjVkZWXRrl27UtsSZDsnTFW0HGgRqTIO+B3FE9ImJJM2X/8W+DfQEZhvZm+H9luBxZKmEgSusxM1NrNZkv4G/Aq4JjTHOnNGKg5s27GLVje8uO9vkMFc23knOd43SfH+KRnvn+SUV98U/OWscvAGCgoKWLx4MT179tzrtpJaAd0Icvwh6RxgjZnlS6kkS6+eQa2epLzwfKWZDQIws48kPQ0MI0j/TmjfKmkkMBu4y8z+W8K9FwGXQ+qdKelXBIGQJk0O44+dd+7zi2UyzeoF//M5ifH+KRnvn+SUV9/k5uaW+R7btm1j+PDhDB06lEWLFhXaN2zYwMKFC9m8eXPStpIaANOAq83sG0n1CUbZUsqUXoiZVasD2JzEXosgO+4a4PgE5Z8ATSPXOcA9cXUGAf8C6hP8S+Gg0F4ANCnNt7Zt25qTmFmzZlW1C2mN90/JeP8kJ136Zvv27davXz8bO3ZssbLTTjvN5s+fn7QtsBB4BRhhe/4edwY+D//+FgA7w7/jh1sJf4er45xaMn4DLAUuAyap+OfV7vAoiW4E47ltgGOAfEkFwJHAIkmHl6vHjuM4GYCZcdlll5GVlcWIESP2ui3QElhuZndF7O+ZWVMza2VmrYDVQHczW1vS/TIiqIXBZgTwOzN7meBrbehe3uM0gmHEv+9rZzqO49RE3nrrLR577DFef/11srOzyc7O5qWXXmL69OkceeSRzJ07l7POOoszzjgDgE8//ZT+/fsXtgUOBU6PrGrvv6++VMc5tUTcBdxhZl+E11cDcyRNM7OvSmg3WFIvguHGlcD5Zra8Yl11HMfJLHr16hX74irGoEGDitmaN2/OSy+9VNgWWGhmPUp6RviBUSrVLqiZWYMEtovjrlcBreJs8deTgckpPrNVqZUcx3GcKicjhh8dx3HSlV/84hc0bdqUTp06FdoGDx5cOEzXqlUrsrOzi7VLptIBcNNNN9GlSxeys7O57rrr+PTTTyvjVaoFVR7UIgohSyX9U9LBceX5kp6Ks50k6Z2w3XJJo0J7jqR7Ejzjz5JWSdocZz9V0iJJOyVdELG3krQtvH++pLcllbyD0HEcJwE5OTm8/PLLRWxPP/00eXl55OXlcf7553PeeecVa5dMpQPguuuuY8mSJeTl5XHSSSdx6623Vsq7VAeqPKixRyGkE/AVwSpGAEJ1j1rAqZKi+cofAX5lZtlAJ+AfpTzjn8CJCeyfECztfzJB2YehX13D5/0htddxHMfZw6mnnkrjxo0TlpkZ//jHPxgyZEixsmQqHQCNGjUqrPftt9+S6sbkmkC6zanNBbpEri8GHgOygHOA2BdbU+AzADPbBSwr6aZmNg8o9h/ezApCe2lL/RsBX5fmvCuKJMcVIUrG+6dkqrp/ykttI545c+bQrFkzjjvuuJKfn0Cl48Ybb+TRRx+lTp06vPvuuxXiX3UkbYKapNrAD4AHI+bBBNqL7QiUQmJBbRywQlIu8DLwiJl9W84utQmVSxoSrI5MqPniiiKp4YoQJeP9UzJV3T9lVdtYu3YtW7ZsKXafcePGceKJJ5Z4/2QqHX379qVv3748/PDDjBw5kksvvbRMPmYMJe3MrowD2AXkARuAmUDt0H4C8FZ4Xptgr9ghkXZtgCuBN4BcS6ISEvesZGokk4ELItetgKWR68HAy6W9iyuKJCddVA/SFe+fkqnu/bNy5Urr2LFjEduOHTusadOmtmrVqqTtSlLpiPHUU08Vu3d1A1hg5RRT0mZOjWBH+X7smVMbArQPFT0+JBgCPD/WyMw+NLN7Cb7uuko6tAJ9fB5InjvBcRxnL/n3v/9N+/btOfLIIxOWWwkqHf/97x4J27fffpv27dtXqK/ViXQIagCY2UbgKmCkpP2BHwNdbI+qx7kEgQ5JZ0VksI4j+NrbUIHu9SIIrI7jOHvFkCFDOPnkk1mxYgVHHnkkDz4YzLBMmTKl2AKReKWNRCodADfccAOdOnWiS5cuLFiwoMhy/5pO2sypAZjZYkn5wIUECvlrIsWzgQ6SjgB+CoyTtJVA5PISM9sVxrmcMG9ajJMIguXFQH1Jq4EHzGyUpBOA6cAhwNmSbjGzjmG72JyagO3speyW4zgOwFNPPZXQPnny5GK2eKUNS6LSMW3atMLz3NxcWrRokbBeTaTKg5rFKYSYWSzf2WNx9l3AEeHlRUnuNZnEKiG/C4/4+vMJxIrj7QVAvXi74ziOk96kzfCj4zhOJrGvSiLJ2kKw6bp9+/Z06dKFQYMGsWHDhgp8g+pJtQpqkg6NqDivlbQmcn20pBmS/ivpQ0kTJO0XtustaaOkxaECyc2hva+khZLeC3+eHve8bpJM0hlV8b6O41Rf9lVJJFlbCJbxL126lCVLltC2bVtGjx5dIb5XZ6pVUDOz9RaofGQD9wHjwvNuwFTgOTM7DmgLNAD+HGk+x8y6AT2An0g6HvgSONvMOgM/J27Ik2BhypvhT8dxnJTZVyWRktr269ePOnWCWaOTTjqJ1atXl5/DGUKVz6mVE6cD35rZwxDMv0m6BlgZ+yqLYWZbJC0E2phZVF7rfeAASfub2Xfh6soLCDZ/z5F0gJWywdsVRZJT1YoQ6Y73T8lUVf9UtZJISTz00EMMHjy4HL3KDDIlqHUkSAdeiJl9I+kT4NioPdzPdhLw/+LucT6w2My+C6+/D6w0sw9D5ZL+wLPxD3ZFkdSoakWIdMf7p2Sqqn+qUkkkWVuAxx9/nA0bNtCiRQs2b95cZj8zivLaxV3ZBzAKGBmeDwfuSlAnD+gM9AY2AosJgt8VcfU6EuxDaxOxTQJ+GZ6fAzxTmk+uKJKc6q4IUdF4/5RMde2ffVUSSdbWzGzy5Ml20kkn2ZYtW8ys+vZNFMpRUSRTvtTeJ6I2AiCpEXAUQbA6lGBObUB8Q0lHEuxV+5mZfRjaaof3O0fSjQR71Q6V1NDMNlXomziOk9GUpiRSEi+//DK33347b7zxBvXr168A76o/1WqhSAnMJNhY/TMoDEpjgclmtjVZozB324vA783srUjRD4F8MzvKAkWTlsA0YGAF+e84Toaxr0oiJbUdNmwYmzZtom/fvmRnZ3PFFVdU3gtVEzLiS83MTNIg4K+SbiII1i9Reg60YQRzbjeF7QD6Eax2nB5XdxqBgHL8CknHcZxi7KuSSElt//e//xWz+XxaUaptUDOzUXHXq4Czk9TNBXIT2P8E/ClBk5wEdZ8nEDZ2HMdx0pRMGX50HMdxHA9qjuOkRiLppmeeeYaOHTtSq1YtFixYkLDdihUrCqWhsrOzadSoEePHjwdg1KhRtGjRopgKvePsKxkR1PZFPktSw/D6uPAedUO5rJ6SDpD0rqR8Se9LuqWq39FxqppE0k2dOnXi2Wef5dRTk6cbbNeuXaE01MKFC6lfvz6DBg0qLL/mmmsKy6OLJRxnX8iIoGb7IJ8VLs3/PcF+NICRwNtm9g7wHXC6mXUFsoEzJZ1Uia/kOGlHIummrKws2rVrl/I9Zs6cSZs2bWjZsmV5u+c4QDVeKJIiJcpnmdk/JP1C0u+AKwiCIOFmwM3hPeqGR+LERhFcJis5LgNVMpXRPxUl+bQ3JFrOfs899/Doo4/So0cPxo4dyyGHHFJF3jmZQKYHtdLks5YAVwPLgV+Z2VexeuFet4VhvUnhF1wxXCYrNVwGqmQqo3/KY+l3MummDRs2sHDhQjZv3py4IbBjxw6mTZvGgAEDCtt36dKFBx98EEk89NBDXHzxxVx//fXF2roUVHK8b4qS6UFNJP7CitrPBD4DiiQusiApaXa4QXu6pE5mtjT+Rmb2N+BvAO3atbPfXnJu+XmfQeTm5nJh795V7UbaUl36p6CggAMPPJDecb4efPDBHH/88fTo0SNp2xkzZtCzZ8+k6VZat27NgAEDit0bgv5JZHe8b+LJiDm1EnifINVMIVH5LEnNgauAE4H+krrE38DMNhDscTuzop11nEzmqaeeKjb0+NlnnxWeT58+vVhSTMfZWzI9qJUmnzUOuM3MVgMjgEkKOCz8QkNSPQLZrA+q4gUcJ11IJN00ffp0jjzySObOnctZZ53FGWcE+XTjZZ+2bt3Ka6+9Vuwr7Xe/+x2dO3emS5cuzJo1i3HjxlXqOzmZR0YPP5YknyWpL3A08GBY95+Sfgn8jEDN/5EwCNYC/mFmL1TJSzhOmpBMuim6PD9GvOxT/fr1Wb9+fbF6jz3mqnNO+ZJxQW0v5LNeC49o3XMil93K3TnHcRynQsn04UfHyRgmTJhAp06d6NixY6EiR5Tc3FwOOuigQnWOW2+9FYBVq1bRp08fsrKy6NixIxMmTKhkzx2n8kjrLzVJm82sQXh+HMEcWBawAfgGuNnMZkvKAXqY2bC49r2Au4BGoemucLUikkYBvwNamdnnCZ5XAGwCdgE7zSz5si7HqWCWLl3K3//+d9599132228/zjzzTM466yyOO+64IvVOOeUUXnih6Eh5nTp1GDt2LN27d2fTpk0cf/zx9O3blw4dOlTmKzhOpVAtvtQkHUCQ9+xvZtbGzI4Hfgu0LqHN4cCTBFmu2wO9gMslRXegfglcW8Kj+4RKJR7QnCpl+fLlnHTSSdSvX586depw2mmnMX16fHakxBxxxBF0794dgIYNG5KVlcWaNWsq0l3HqTLS+kstwiXA3DD9CwDhnrFi+8Yi/IZgleOisP6XoXLIKIIACfAQkCPp9ujG633FFUWS44oiZVP06NSpEzfeeCPr16+nXr16vPTSSwn3hM2dO5euXbvSvHlzxowZQ8eOHYv6UFDA4sWL6dmz5z774jjpTHUJah2BRfvQ5pE424LQHmMzQWAbDtwcV9eAVyUZcH9s2DIeVxRJDVcUKVnRIxVViHPPPZeTTz6ZevXq0bJlS9auXVukzZYtW3j88cepV68e8+bN44wzzuDxxx8vLN+2bRvDhw9n6NChLFq0t/87VS2umpEc75s4zCxtD2Bz+PMuYHjEPp3gK+3Z8DoHuCeu7XTgnDjbQcD68HwUgYjxwUABwbzb5kjd5uHPpkA+cGpp/rZt29acxMyaNauqXUhr9rZ/fv/739ukSZNKrNOyZUv74osvzMxs+/bt1q9fPxs7duy+ulil+O9PcjKhb4AFVk5xo1rMqREog3SPXZjZIIJA1jhZAxKoiQDHA8uiBgsUQ54Efh1n/zT8+TlBgDxxnzx3nHLi888/B+CTTz7h2WefLabOsXbt2tg/yHj33XfZvXs3hx56KGbGZZddRlZWFiNGjKh0vx2nMqkuw49PAr+XdI7tmVerX0qbScA7kp41szxJhwK3A7cmqHsXMJ+wPyQdCNQys03heb8k7Ryn0jj//PNZv349devWZdKkSRxyyCHcd999AFxxxRVMnTqVe++9lzp16lCvXj2mTJmCJN58800ee+wxOnfuTHZ2NgC33Xab5y5zMpJqEdTMbJukAcBdksYD6wiW2/8pUi1H0sDI9UnAT4C/S2pIIGI83sz+meD+X0qaDlwTmpoRiBhD0EdPmtnL8e0cpzKZM2dOMdsVV1xReD5s2DCGDRtWrE6vXr0Kv+AcJ9NJ66Bm4Z6x8PwDIOE/Lc1sMjA5QdFq4IQkbUbFXY8g0H/EzD4Cuu6Dy47jOE4VUl3m1BwnIxg3bhwdO3akU6dODBkyhG+//TZhvfnz51O7dm2mTp0KwIoVKwqVQrKzs2nUqFFCVRHHqelUu6Am6XBJUyR9KGmZpJck/UrSC3H1Jku6IDzPlZRwA7WkoyVtljQyzt5Nkkk6o+LexqlJrFmzhrvvvpsFCxawdOlSdu3axZQpU4rV27VrF9dff32h4j1Au3btyMvLIy8vj4ULF1K/fv2EQsKOU9OpVkFNwSTXdCDXAmWRDsAfCObA9pVxwL8S2IcAb4Y/Hadc2LlzJ9u2bWPnzp1s3bqV5s2bF6szceJEzj//fJo2bZrwHjNnzqRNmza0bNmyot11nGpHWs+pJaAPsMPM7osZwpWNBwN7LZEQLiz5CNgSZxdwAdAXmCPpADNLPE4UwRVFkpMpiiJlUQVp0aIFI0eO5Oijj6ZevXr069ePfv36FamzZs0apk+fzuuvv878+fMT3mfKlCnFlvM7jhNQ3YJaJ2BhedwoXKp/PUHgGhlX/H1gpZl9KCmXYIHKs0nu44oiKZApiiJlUW7YtGkTjzzyCI8//jgNGjRg1KhR3HjjjfTt27dQFWLUqFEMHjyYOXPmsHbtWt5//32aNGlSeI8dO3Ywbdo0BgwYUKNUJFw1IzneN0WpbkEtGcnWK5e0jvkWYJyZbQ6X7kcZAsQmO6YAPyVJULNAPutvAEe3PtbGvpcpXVq+XNt5J5nQNwWX9N7nts888wzdunVj4MCBQJAdet68efTu3Zvc3Fx69+7Nxx9/zB133AHAl19+yaJFi+jatWthmxkzZtCzZ89iGaQznVj/OMXxvilKdfsr8z7BsGA864FD4myNCVT4k9ETuEDSHQRSWbslfQvcC5wPnCPpRoL9bYdKamhmm0pyrl7d2qwow/BUJpObm1umgJAJHH300cybN4+tW7dSr149Zs6cWUyUeOXKlYXnOTk5DBgwoDCgQZB92oceHSc51WqhCPA6sL+kX8YMkk4ADgWaS8oKbS0J9pnlJbuRmZ1iZq3MrBUwHrjNzO4Bfgjkm9lRYXlLYBowsELeyKkx9OzZkwsuuIDu3bvTuXNndu/eza9+9Svuu+8+nn/++VLbb926lddee63GfaU5zt5Qrb7UzMwkDQLGS7oB+JZAjPhqAvWQh8PcazuAoWa2MdL8RUk7wvO5ZvbjJI8ZQrDCMso04ErgsXJ5EafGcsstt3DLLbcUsV1xxRUJ50QmT55c5Lp+/fqsX7++Ar1znOpPtQpqUCg0fGGCov8SSGMlatO7lHuOipznJCh/Hij9n9KO4zhOlVLdhh8dx3EcJyke1BynkthXiaxvv/2WE088ka5du9KxY0duvjk+n63jODGqRVCTtDlyfpykF0KZrIWSZkk6NSxrFpblxyS0QnsrSdsk5UWO/SRdImlJeLwtqWtY/wBJ74b3eV/SLYk9c5zUKItE1v7778/rr79Ofn4+eXl5vPzyy8ybN68y3XecakO1mlMLF4G8CIyM5VWT1IkgGehsgpxnr5nZhLCsS6T5h2aWHXe/lcBpZva1pB8R7DfrCXwHnB7uYasLvCnpX2bmf0mcfSYmkVW3bt1SJbKiaiKSaNAgSFixY8cOduzYQYK9lY7jUM2CGnAJwcrFwkUbZrYUWBpeHgG8GilbUtLNzOztyOU84MjQbkDs67BueJSakMplspKTCTJZVSmRtWvXLo4//nj+97//8Zvf/IaePfdaFc5xagTVLah1BBaVUD4JeFrSMODfwMPhakmANpLywvO3zOw3cW0vIyJsLKk2gSTXscAkM3sn0QNdJis1MkEmq6IksiCQOrrkkktKlMgaP348mzdv5qabbqJ9+/Ycc8wxZX2laoNLQSXH+6Yo1S2oFSHMVn0c8B8zO8/MXpHUGjgT+BGwOByehATDj5H79CEIar1iNjPbBWSHYsnTJXUKvwqLEJXJateunf32knPL7f0yidzcXC6swVI+JUlkQdA/pUlkxVi4cCHr16/n0ksvrcQ3qFpcCio53jdFqRYLRSK8D3SPXZjZICCHQBIrZvvKzJ40s58C84FTS7phOO/2AHCumRXb2WpmG4BcgkDpOPtEVCLLzJg5cyZZWVlF6qxcuZKCggIKCgq44IIL+Otf/8rAgQP54osv2LBhAwDbtm3j3//+N+3bt6+Ct3Cc9Ke6fak9Cfxe0jmRebX6sUJJpwPzzGyrpIZAG+CTZDeTdDSBUPFPzew/EfthBCluNkiqRyCddXv5v45TU4hKZNWpU4du3boVSmQBJQapzz77jJ///Ofs2rWL3bt3c+GFFzJgwIDKct1xqhXVKqiZ2TZJA4C7JI0H1gGbgD+FVY4H7pG0k+Ar9AEzmy+pVZJb/pFAN/Kv4WqynWbWg2DBySPhvFot4B9m9kKSezhOSiSTyILi83VRiawuXbqwePHiinbPcTKCahHUzKxB5PwDgvxmierdCdyZwF5AkIst3j4UGJrAvgTotu8eO47jOFVBdZtTc5xqRWkqIh988AEnn3wy/fr1Y8yYMXvV1nGc4qR1UJN0uKQpoXrIMkkvSWorqdgqxEibGZLmxtnaScoNlUSWS/pbaO8taaOkxZI+kDQm0iZH0hdhm2XRdDeOkwqpqIg0btyYu+++mwsvvHCv2zqOU5y0HX5UMMk1HXjEzC4KbdlAsxLaHEywOnKzpGPMLJZx8W6CLNczwnqdI83mmNmAcEHIYknTzeytsOxpMxsmqSnwvqTnzWxdOb6mk+GUpiLStGlTmjZtSp06xf9XTEWBxHGcoqRtUAP6EKxAvC9mMLO8EhZ9QJCx+p8EC0guAkaH9iOA1ZH7vBffMFyEkge0SFD2uaQPgZbhvRPiiiLJqa6KIhWtIlIRbR2nJpPOQa0TgaLH3jAEuIUg8ExlT1AbB7wu6W0CGa2Hw/1nhUg6hGAj9+z4m4YbulsD/0tQ5ooiKVBdFUUqUkUkyvbt2/nwww8Ln7c3bWsCrpqRHO+boqRzUNsrJDUjkLR6M8yQvTOmAmJmD0t6hWAD9bnA5TFFfuAUSUuAdsBfzGxt5LaDJfUiEDi+3My+in+uK4qkRk1UFClNRSTK5MmTadOmTWHZ3rStCbhqRnK8b4qSzgtF3ifYd5Yqg4FDgJWSCoBWBEOQQJAx28weMrNzgZ3sWeI/x8y6AJ2BK8N5uxhPm1m2mfU0s+n7/CZOjSQVFZGKaOs4NZl0DmqvA/tHVx1KOoFgXisRQ4AzzayVmbUiCIixBSZnhilkkHQ4wYbrNdHGoaLIaOD6cn4Pp4YSVRHp3Lkzu3fvLlQRiSmJrF27liOPPJJnnnmGP/3pTxx55JF88803Sds6jlMyCrKspCeSmgPjCQLUt0ABcDWwjKILNiaE9iMt8kKSFgFXEnzFnRXeA+BOM3tcUm+C3GwDwvr1CObNegGnAT3MbFiq/rZr185WrFixdy9ZQ/AhkpLx/ikZ75/kZELfSFoYqjmVmbSeUwvTxlyYoKhuAlsiJZGY+PE7wIgE5bkEYsWx623sWf24Epi8N/46juM4VUs6Dz86TrVhxYoVZGdnFx6NGjVi/PjxReps3LiRs88+m65du9KxY0cefvhhAFatWsU111xDVlYWHTt2ZMKECVXwBo6TGaQU1CS1kbR/eN5b0lXhRucyIenQULEjT9JaSWsi1xb+XCrpn/HPk5Qv6ak422RJK8N2iySdXJI9LBsZqoksDe/5s9B+elh3qaRHJKX1V61TtbRr1468vDzy8vJYuHAh9evXZ9CgQUXqTJo0iQ4dOpCfn09ubi7XXnst27dvp06dOlx55ZUsX76cefPmMWnSJJYtW1ZFb+I41ZtUv9SmAbskHQs8CBxDkAamTJjZ+nB1YTZwH4HqR+x6S3jeCfgKKMxULSkr9P1USQfG3fa6sP0NwP0l2SVdAfQFTgyfc2pgVi3gEeCi0P4x8POyvq9TM5g5cyZt2rShZcuia5oksWnTJsyMzZs307hxY+rUqcMRRxxB27ZtAWjYsCFZWVmsWbMm0a0dxymFVIPabjPbCQwCxpvZNQQqHZXFXIoqfVwMPEawkfqcJG1mE+xbK8n+B+DXZvYNgJltNLNHCFZHfhfJsfYagVqJ45TKlClTGDJkSDH7sGHDWL58Oc2bN6dz585MmDCBWrWK/i9YUFDA4sWL6dmzZ2W56zgZRapDajskDSH4Wjk7tCVarFHuhDnNfkDwhRhjMMEXVjtgGPBUgqZnA8XksGL2MIloQzP7MEGdL4G6knqY2QLgAuCo0nx1mazkVAeZrLJIYsXYvn07zz//PKNHjy5W9sorr5Cdnc3rr7/Ohx9+SN++fTnllFNo1KgREChDnH/++YwfP77Q5jjO3pFqULsUuAL4s5mtlHQM8HjFuQVAvVCLsRWBXNZrULhX7Qsz+1jSauAhSYeY2ddhuzsl/R/wBXBZ5H7xdgEJ9zOEiiQXAePCucRXCTZsF8NlslKjOshklYfU0JtvvskxxxzD8uXLWb58eZGyMWPGcPHFF/PGG28AcMghh/DEE0+QlZXFhg0b6NOnDz179qRx48YuexSHS0Elx/smDjNL6QDqAe1Srb+3BzCKYM9Y7Hpz+PMgYA5wVXh9F7CeYM9aAfANMDQsmwxckODeyeyrgNYp+NaPIPt1ifXatm1rTmJmzZpV1S5UCoMHD7aHHnooYdkVV1xhN998s5mZrV271po3b25ffPGF7d692/r27WvDhw+vPEerGTXl92dfyIS+ARZYOcWSVFc/ng3kAS+H19mSnt+3MLp3mNlG4CpgZPjV9GOgi+1RDjmXQE1kXxgNTJLUCEBSo/DLizDdDOEzrydYyOI4Sdm6dSuvvfYa5513XqEtqh5y00038fbbb9O5c2d+8IMfcPvtt9OkSRPeeustXnvtNV5//fXCLQEvvfRSVb2G41RrUh1+HAWcSLhR2YIUMMdUkE/FMLPFkvIJNmKvMbPo0rDZQAdJ+7Jw5V6gATBf0g5gBzA2LLtO0gCCxTT3mtnr+/4GTk2gfv36rF+/vojtiiuuKDxv3rw5r776arF2vXr1YtasWdVeFcJx0oFUg9pOM9sY5O0spFz1tcxsVNx1g7jr2AKVx+Lsu9izEjMnyb2T2Q24Izziy64DrivVccdxHCdtSDWoLZV0MVBb0nEEw4FvV5xbjuM4jrP3pLpP7bdAR4K8Yk8CGwkEhB2nxpOKRNadd95ZWN6pUydq167NV18F6fk2bNjAzTffTPv27cnKymLu3LlV8BaOkxmUGtTCfWLPm9mNZnZCePyfmX1bWtuyImlQKJfVPrxuFV7/NlLnHkk54flkSReE540lLZZ0adhuW0SCKy8ih1Ug6b2I/Xuh/WVJGyS9UNHv6VRvUpHIuu666wrrjB49mtNOO43GjRsDMHz4cE488UQ++OAD8vPzPW+a45SBUoNaOGe1VdJBleBPPEOAN4kk+wQ+B4ZL2i9Zo9DXV4C/mdnDoflDCyW4wuPRSJM+EXtsWPVO4Kfl9ypOTSCZRFaUp556qlBx5JtvvmH27Nn0798fgP3224+DDz64Mlx1nIwk1Tm1bwlUOF4DtsSMZnZVhXgFSGoAfB/oAzxPsAITgs3TbxGom/w9QdMGwL+AJ83s3n19vpnNDPOtpYwriiQn3RVFykNNBJJLZMXYunUrL7/8Mvfccw8AH330EYcddhi33347I0aM4Pjjj2fChAkceGC8pKnjOKmQalB7MTwqk4HAy2b2H0lfSepOIGwM8BfgX5IeStDuLuABMxsXZ28TKpTE+K2ZzQnPZ0naRaD3uFeie64okhrprihSHooMO3bsYNq0aQwYMCDp/V5//XXat2/PkiVLgGA+buHChdxxxx0cf/zxTJw4kSuvvJJf/OIXZfYnk3DVjOR438RRXru4y/sgCKJ9w/OrCIYDWwFLQ9ujBMOD9wA5tkc5ZCqwAmgauVdhuwTPKQCaJCnrDbyQqs+uKJKcTFA9KI3nnnvO+vbtW2KdgQMH2hNPPFF4/dlnn1nLli0L+2f27NnWv3//inSzWlITfn/2lUzoG8pRUSSlLzVJK0mwL83MWu9tEE3xeYcCpwOdJBlQO3z+XyPVbiMIYLPjmk8hmId7SVIfM9tUET46TjzRubJEbNy4kTfeeIPHH98jm3r44Ydz1FFH8cknnwDBnFyHDh0q3FfHyVRSHX7sETk/gECqqnH5u1PIBcCjZnZ5zCDpDeDI2LWZfSBpGTAAeDfa2MzGhwoj0yX1r0A/HQfYI5F1//17UvjF5LFiqiLTp0+nX79+xebLJk6cyODBgxkzZgytW7cuzIjtOM7ek1JQM7P1cabxkt4E/lj+LgHBqse/xNmmEeQ/i/JnYHGiG5jZ9ZIeJlAg+T3F59QeMrO7kzkgaQ7QHmgQZgO4zMxe2au3cGoMpUlkAeTk5JCTk1OsbXZ2Nvfff7/LZDlOOZDq8GP3yGUtgi+3hhXiEWBmvRPY7gbujrPlE9mWYHFyWGZ2aeSyXpJntUpiPyVVfx3HcZz0IFVFkbGRYzTQnUBc2HFqJKmoiOTm5nLQQQcV1rn11lsLy1q1akXnzp3Jzs6mR48eOI5TPqQ6p3aZmX0UNVSmSn8qSDocGA+cQCDnVUAg5ZUPfEAwF7gJmGRmj4RtrgMuCW9RB8gCDjOzrySdCUwgWKTygJnFD4c6NZiYigjArl27aNGiRTEVEYBTTjmFF15ILEoza9YsmjRpApTPlgLHcVIPalMJvs7ibceXrzv7hoL0AdOBR8zsotCWDTQjUBLpFtpaA89KqmVmD5vZnQRbBWI5464JA1ptYBLQF1hNkJrmeTNbVtnv5qQ/qaiIOI5TOZQY1ELNxY7AQZLOixQ1IvjySRf6ADvMrDCRpwU531pFK5nZR5JGEAyjxi8xGwI8FZ6fCPwv9nUqaQpBMtISg5oriiQnHRVFKkNFZO7cuXTt2pXmzZszZswYOnbsCIAk+vXrhyQuv/xy2rZtWy6+OE5Np7QvtXYES+YPBs6O2DcBv6wgn/aFTsDCFOsuIljVWIik+sCZwLDQ1AJYFamyGkioNOKKIqmRjooiFa0ismXLFh5//HHq1avHvHnzOOOMMwr3qN155500adKEr7/+mpEjR/LLX6bT/07ph6tmJMf7piglBjUzmwHMkHSymWVKPgwlsJ0NvGVmX5VQJ2FSVDP7G/A3gHbt2tlvLzm3XJzMNHJzc7kwA5esz5gxg549e3LeeeeVWK93797cd999dOrUqXAeLUZ+fj4ff/wxN9xwQ0W6Wq3Jzc31LQ9J8L4pSqpzaosl/YZgKLJw2NHM0kWg7n2CDdup0A1YHme7iD1DjxB8mR0VuT4S+HSfvXMylpJURNauXUuzZs2QxLvvvsvu3bs59NBD2bJlC7t376Zhw4Zs2bKFV199lYEDB1au446ToaS6pP8x4HDgDCCm7JFO8lOvA/tLKhzDkXQCUGTmPpxjGwNMjNgOAk4DZkSqzgeOk3RMmOLmIoJMAY5TSExFJPqVdt999xUqiUydOpVOnTrRtWtXrrrqKqZMmYIk1q1bR69evejatSsnnngiZ511FieeeGJVvYbjZBSpfqkda2Y/lnSumT0i6UmCfGVpgZmZpEEESic3EKTKKSBY0t9G0mL2LOmfaHtyrAEMAl41s2hKnZ2ShhG8Y20C9ZH3K+VlnGpDaSoiw4YNY9iwYfHNaN26Nfn5+UVsPifiOOVDqkFtR/hzg6ROwFoC5fu0wcw+JfGG8IRKIpF2kwnU/ePtLwEvlYdvjuM4TuWQ6vDj3yQdAtxEMAy3DLijwrxynDQiFfWQDz74gJNPPpn999+fMWPG7FVbx3HKj1QFjR8IT98AKiTdTKqEyTzfI/B9OfBzM9saZ18J/NTMNoTzaMsJcqztBywgUEjZEe69+42Z/SC8dy+C/Gw9gMHA9eFjNwNXhlqTTg0jFfWQxo0bc/fdd/Pcc8/tdVvHccqPlL7UJDWT9KCkf4XXHSRdVrGuJWWbmWWbWSdgO3BFAvtXwG8ibT40s2ygM8EilwsBzOxZ4FtJF0uqQ5Cv7ddmtpMgMJ5mZl2A/0e4bN+p2SRTD2natCknnHACdevW3eu2juOUH6kOP04mWDTRPLz+D8EijKpmDnBsAvtcgg3URTCzXQS516JlvwX+BNwCzDezt8O6b5vZ12GdeURyuTk1l5LUQyqyreM4qZHqQpEmZvYPSb+HwtWBuyrQr1IJv6x+BLwcZ68N/AB4MEGbAwiUQYbHbKF01tMEaiJtkjzuMuBfpfnkMlnJqUqZrPKSw9q+fTvPP/88o0ePrtS2juOkTqpBbYukQwlVNSSdBGysMK9Kpl4k2ecc9gSvmL0VgWTWa5E2sQShxwFTzWxJrEBSLeCHBPNmLYEvow+T1IcgqPVK5IzLZKVGVcpklddy+TfffJNjjjmG5cuXs3x5/P79gIKCAurVq1fsmaW1damjkvH+SY73TRxmVupBoND/FkEge4tg+LFLKm3L+wA2l2QHDiIIdleF162ApeH5EQQLRs6JtPstgbjxmQTDjIqUdQE+BNqm4lvbtm3NScysWbOq2oUyM3jwYHvooYdKrHPzzTfbnXfeuddtM6F/KhLvn+RkQt8AC6ycYkRpKv1Hm9knZrZI0mkEAscCVpjZjpLaVhVmtlHSVQSalffGlX0Wbs7+PfB8mINtBHCimX0RKpIMBf4u6WjgWYJVlP+p5Ndw0oyYesj9999faIsph1xxxRWsXbuWHj168M0331CrVi3Gjx/PsmXLaNSoUcK2juNUDKUNPz7HnjxqT5vZ+RXrTvlgZosl5RPIW82JK34OGCXpFOBK4A4z+yIsuxqYI2ka8EfgUOCvQbo2dpqZpyiuoZSmHnL44YezevXqlNs6jlMxlBbUomr1Vbo/LYaZNUjFbmbRVDmdInYDuoaXc+LarGKPUsrQ8HAcx3GqCaUt6bck547jOI6TdpQW1LpK+kbSJqBLeP6NpE2SvqkMBx2nKimLRNaqVavo06cPWVlZdOzYkQkTJlSy945T8ygxqJlZbTNrZGYNzaxOeB67blRZTsaQtDly3l/SfyUdLWmUpDWS8iQtlXROWGeUpK2Smia6R3g9SJJJah+xZUuaK+l9SUskDa6M93PSj5jMVV5eHgsXLqR+/fpJJbJGjhxZxF6nTh3Gjh3L8uXLmTdvHpMmTWLZsmWV6b7j1DhSVRRJKyT9gCAn2plm9kloHmeBFNaPgYfC/WcQ7Du7toTbDQHeJFhUEmMr8DMz60iw1H+8pIPL7w2c6sjeSmQdccQRdO8erLNq2LAhWVlZrFmzptL8dZyaSKqbr9OGcNXi34H+ZvZhfLmZLZe0E2gSmh4CciTdbmZfxd2rAfB9oA9B9oFR4T3+E7nfp5I+Bw4DNpTkmyuKJCcTFEXKInNVUFDA4sWL6dmzZ7n44jhOYqpbUNufIEN1bzP7IFEFST2B3UBsmf5mgsA2HLg5rvpA4GUz+4+kryR1N7NFcfc7kUDdv1gADctdUSQFqruiyI4dO5g2bRoDBgxIer9kaiLbtm1j+PDhDB06lEWLFiVs66oQJeP9kxzvmzjKaxd3ZRwEw4IvABPi7KOANUAewTL9UyL2kcDBBJmwGxFRJAFeBPqG51cBd8bdN6ZAclIq/rmiSHKqu+rBc889Z3379i2xTiI1ke3bt1u/fv1s7NixJbat7v1T0Xj/JCcT+obKUhRJQ3YTpI35t6Q/mNltkbJxZjYmUSML8qo9Cfw6Zgu1LE8HOkkyoDZgkn5nZiapEUHQ+z8zm1dRL+RUD5566qm9Hno0My677DKysrIYMWJEBXnmOE6U6hbUsCAh6AAC5Y91ZlZMjT8JdwHz2fPOFwCPmtnlsQqS3gB6SXoHmB6WP1OO7jvVkH2VyFqyZAmPPfYYnTt3Jjs7G4DbbruN/v37V8VrOE6NoNoFNQAz+0rSmcBsSV+W2iBo86Wk6cA1oWkI8Je4atOAiwnU+k8FDpWUE5blmFleWX13qh/7KpHVq1ev2DC24ziVRLUKahaRwrJA0uqY8HJGkvqj4q5HEAgYY2a9E9S/O3L5eNm8dRzHcSqbarlPzXEqgw0bNnDBBRfQvn17srKymDt3bpHyr7/+mkGDBtGlSxdOPPFEli5dmnJbx3EqhrQKaqGyx2OR6zqSvpD0Qly9GZLmxtkSqoqEZRdKWhYqhDwZ2lpJ2iZpsaTlkt6V9PNIm/ahqsh3kopKRTg1guHDh3PmmWfywQcfkJ+fT1ZWVpHy2267jezsbJYsWcKjjz7K8OHDU27rOE7FkG7Dj1sIViPWM7NtQF+CpfqFhMoe3YHNko4xs5WR4nFmNkZSFsFCkqZAG4L8ad83s6+jklnAh2bWLbxva+BZSbXM7GHgK4Jl/gMr5E2dtOabb75h9uzZTJ48GYD99tuP/fbbr0idZcuW8fvf/x6A9u3bU1BQwLp166hXr16pbR3HqRjSLagB/As4C5hKsJjjKeCUSPn5wD+BdQTSVqPjb2BFVUV+CUwys6/Dss8TPdTMPpI0AhgLPBzW+1xSynIUriiSnMpWFCmrishHH33EYYcdxqWXXkp+fj7HH388EyZM4MADDyys07VrV5599ll69erFu+++y8cff8zq1aupXbt2qW0dx6kY0jGoTQH+GA45diFQA4kGtSHALQRBbSoJglqcqkjb0PYWwV60UWb2cpJnLwLaJylLiCuKpEZlK4qUVWFhxYoVLFy4kJycHHJycpg4cSJXXnklv/jFLwrrfP/73+eee+7h2GOPpXXr1hx77LEsXryYXbt2ldo2HleFKBnvn+R438RRXru4y+MgVPsAFgCXArcBvYEXQnszAmUQhdeLgE5WsqrICwR7zuoSrJZcTaAw0gpYGvf8Q4BtcbZRwMhU/HdFkeRUN9WDzz77zFq2bFl4PXv2bOvfv3/S+rt377aWLVvaxo0b97qtWfXrn8rG+yc5mdA3lKOiSFotFInwPDCGYOgxymCCwLNSUgFBYIqq648zs2wzO8XMYlmtVwMzzGyHBfNvK4Djkjy3G7C8fF7Bqc4cfvjhHHXUUaxYsQIIFPo7dOhQpM6GDRvYvn07AA888ACnnnoqjRo1Sqmt4zgVQzoOP0Iw5LjRzN6T1DtiH0KQbmYugKRjgNeA/yvhXs+F7SZLakIwHPkR0DBaSVIrgkA6sVzewKn2TJw4kUsuuYTt27fTunVrHn744SJKIsuXL+dnP/sZtWvXpkOHDjz44IMltnUcp+JJy6BmZquBImmCw6BzNDAvUm9lmIm7pHwerwD9JC0DdgHXmdl6SQ2BNpIWAwcAm4CJFqx8RNLhBMOgjYDdkq4GOpiZZ/yuIWRnZ7NgwYIitqiSyMknn8x///vflNs6jlPxpFVQs4hiSMSWC+SGly0SlHcPT99Jck8jUBEZEWcvAOqV4Mta4MjSvXYcx3HShXSdU3Mcx3GcvcaDmuPEUZrE1caNGzn77LPp2rUrHTt2LJwvW7VqFX369CErK4uOHTsyYcKERLd3HKcCqbZBTdKNoezVklAaq6ekAaHsVX4oi3V5WDcqofWBpHsl1QrLJktaGZYtknRyaP9xeP/dknpU5bs6lUtpEleTJk2iQ4cO5Ofnk5uby7XXXsv27dupU6cOY8eOZfny5cybN49JkyaxbNmyKnoLx6mZpNWcWqqEgWcA0N3MvgtXNR5IsB/tRDNbLWl/giX/McZZIKFVC5gNnAbMCsuuM7OpkvoB9xNs+l4KnBdeOzWEVOSxJLFp0ybMjM2bN9O4cWPq1KnDEUccwRFHHAFAw4YNycrKYs2aNb6c33EqkWoZ1IAjgC/N7DsozJW2m+B91oe27wj2pMWzH8Fqx68TlM0Gjg3bL4fgD1iquExWcipLJqsy5LGGDRvGOeecQ/Pmzdm0aRNPP/00tWoVHfQoKChg8eLF9OxZ0sJcx3HKm+oa1F4lkNL6D/Bv4Gkze0PS88DHkmYSKIk8ZWa7wzbXSPoJQQLQf1nihJ9nA+/tjSMuk5UalSWTVRnyWG+88QZNmjThySef5NNPP2Xo0KE88MADhYFv27ZtDB8+nKFDh7Jo0aKUnutSRyXj/ZMc75s4ykuapLIPAh3H3gQ6kGsJMlMDdCbIbr0YmGxxUlcEclkzgIvC68nASgJ5rdcIZbciz8kFeqTik8tkJae6SPmkInHVv39/mz17duF1nz597J133jEzs+3bt1u/fv1s7Nixe/Xc6tI/VYX3T3IyoW+oATJZpWJmu8ws18xuBoYRqPdjZu+Z2TiCtDXnJ2i3A3gZODVivs4Cea2+ZrY0vo1Tc0hF4uroo49m5syZAKxbt44VK1bQunVrzIzLLruMrKwsRowYUezejuNUPNVy+FFSO2C3mcXkHLKBdZJ6W7BZO2b7OEFbAd8j+DJznGKUJo910003kZOTQ+fOnTEzbr/9dpo0acKbb77JY489RufOncnOzgaCRKL9+/evwrdxnJpFtQxqQANgYpgwdCfwP2A4cL+k+4FtBAlHcyJtYnNqdYElwF9LeoCkQQQ6kIcBL0rKM7Mzyvk9nDSkNHms5s2b8+qrrxZr16tXr9iQteM4VUS1DGpmtpDgayuehP8kNrNRBPNqicpyktinE2wRcBzHcaoJ1XZOzXHKSmnKITHmz59P7dq1mTp1ahH7rl276NatGwMGDKgMdx3HSYG0DWqSBkkySe3D61bh9W8jde6RlBO5HhEqhrwXqorcJaluWFYQ2vPC4+7QPlnSBQme/7KkDWEGbicDKU05BILAdf3113PGGcVHnidMmJCwjeM4VUfaBjWCHGhvUjQJ6OfAcEn7xVeWdAXQDzjJzDoDJ4T1o0r8fcJVjtlmdlUpz78T+GlZXsBJX2LKIZdddhkQKIccfPDBxepNnDiR888/n6ZNmxaxr169mhdffJGhQ4dWhruO46RIWs6pSWoAfB/oQ5AFe1RY9AXwFvBz4O9xzW4ETjWzDQBmth34y776YGYz4xKUlooriiSnIhRFyqIekopyyJo1a5g+fTqvv/468+fPL9L+6quv5o477mDTpk377IPjOOVPWgY1YCDwspn9R9JXkroDX4VlfwH+JemhWOUw4WcDM1tZyn1nSdoVnj8S7mcrE64okhoVoShSFhWFVJRDRo0axeDBg5kzZw5r167l/fffp0mTJsydO5cdO3awadMm8vLyWL9+fZkVHVwVomS8f5LjfRNHee3iLs8DeBHoG55fRTAU2ApYGtoeJRgavIdg2X4j4KtI+zMI9qEVAN8LbQVAkwTPmgxckMSP3sALqfrtiiLJSTfVg1SUQ1q1amUtW7a0li1b2oEHHmiHHXaYTZ8+3W644QZr0aKFtWzZ0po1a2b16tWzSy65pEz+pFv/pBveP8nJhL6hHBVF0u5LTdKhwOlAJ0lGIIdlFN1XdhswlUCAGDP7RtIWSceY2UozewV4JVzkUWz+zXGiyiHt2rVLqByycuWeD/+cnBwGDBjAwIEDGThwIKNHjwaCr8UxY8bw+OOPV6r/juMkJu2CGnAB8KiZXR4zSHoDODJ2bWYfSFpGkH7m3dA8GrhX0kVmtiFUDjmgEv12qhmlKYc4jlP9SMegNoTiCzymAX+Is/2ZQLQ4xr1AfeAdSd8BmwkWlUTrROfUlpjZz8Lz+yWND89XmdnJkuYA7YEGklYDl4VfgE6GUJpySJRYfrV4evfuTe/evcvZM8dx9pW0C2pm1juB7W7g7jhbPpEtCeG47JjwSHTfVknsOUnsp6TosuM4jpMmpPM+Nccpd0pTEXniiSfo0qULXbp04Xvf+x75+fmFZa1atSoUK+7Ro0dlu+44TgqkVVCTdKOk9yUtCVU/ekrKlbQiogQyNaw7KlQYOTbS/prQ1iNi6xbazojYDpD0bqg68r6kW+L8SKpM4lRvSlMROeaYY3jjjTdYsmQJN910E7/61a+KlM+aNYu8vLxiw5aO46QHaTP8KOlkgoUf3c3sO0lN2LNy8RIzS/RX5D0CxZE/hdcXAMvi6sSUSYYAsTmx74DTzWxzGKzelPQvM5sXp0yyIVQvGUGgTLKjXF7WqRJiKiKx+bH99tuP/fYrujj2e9/bo5N90kknsXr16sp00XGcMpI2QQ04AvjSzL4DMLMvAYJFjEl5DjgX+JOk1sBGIoEnXAF5AUHC0DmSDjCzb8P5t81htbrhEcsZss/KJK4okpzyUhSpaBWRKA8++CA/+tGPCq8l0a9fPyRx+eWXF/uKcxyn6pGlSf6nUBrrTYIVjP8GnjazNyTlEgS8bWHV18zsOkmjCALT94A/EgS31cClwEgzWyCpF3CLmf1A0pPAVDN7NnxebWAhcCwwycyuD5VJPjGzQ/bC76iiyPF/HB+v3uVAoCiyblvp9Uqjc4uD9rntihUr+PWvf83EiRPp0KEDEydO5MADDyyiIhJj8eLFjB8/nrvvvpuDDgqe+eWXX9KkSRO+/vprRo4cyVVXXUXXrl332Z8omzdvpkGDBuVyr0zE+yc5mdA3ffr0WWhm5TNRXV67uMvjINho3Ru4BVhLoBaSC/RIUHcUMBK4kGD4cT6BskhhfWAS8Mvw/BzgmQT3ORiYBXQiBWWSkg5XFElOOqgepKIiYmaWn59vrVu3thUrViS9180332x33nlnufmWDv2Tznj/JCcT+oZyVBRJq4UiZrbLzHLN7GZgGHB+Cs3+SSCZ9YmZfRMzhl9i5wN/lFRAkMX6R+HXWPSZGwgC4Zlh+y2SjgnLXjGzbGAprkxS7YmqiAAJVUQ++eQTzjvvPB577DHatm1baN+yZUuhePGWLVt49dVX6dSpU+U57zhOSqTNnJqkdsBuM/tvaMoGPib4gkqKmW2TdD3wn7iiHwL5ZhZd9fgIMFDSy8AOCxaC1Avr3h5Wc2WSDKY0FZFbb72V9evX8+tf/xqAOnXqsGDBAtatW8egQYMA2LlzJxdffDFnnnlmlb2H4ziJSZugBjQAJko6GNgJ/I9grmoq8ISk2IzMl2b2w2hDM5uS4H5DgOlxtmnAlUA+8Ej4NVcL+IeZxZKBpqJM4lRTSlMReeCBB3jggQeKtWvdunWRPWuO46QnaRPUzGwhwaKPeHonqT8qiT1WPydB2fME+dkAuiVpX6IyieM4jpO+pNWcmuM4juOUBQ9qTo2hNImsDz74gJNPPpn999+fMWP2fKivWLGC7OzswqNRo0aMHz++kr13HCcV0mb4MYakw4HxwAkEyh8FwNVh8XigLcEG6/eA3wJZwAzgI4K5sHXAHZE5MiT9DPgdoPB4yMzGRMpHEiQiPczMvpR0CXBdxK0uBEoneeX5rk7lEpPImjp1Ktu3b2fr1q1Fyhs3bszdd9/Nc889V8Terl078vLyANi1axctWrQoXDTiOE56kVZBLVxpOB14xMwuCm3ZQDPgIWCEmf0ztPcBDgubzjGzAZH6z0naZmYzJf2IICj2M7NPJR1AsAUg9syjCBRHPonZzOwJ4ImwvDMwwwNa9SYViaymTZvStGlTXnwxufLJzJkzadOmDS1btqxIdx3H2UfSKqgBfQiW2t8XM5hZnqRfAHNjAS20zwKQ1Dt6g7D+rQT73GYCvydQGPk0LP8WiMp+jCP4ipuRxKchwFOpOO8yWckpD5msypTISsaUKVMYMmTIPvvhOE7Fkm5BrROBdFWq9mQsYs/wYdK2ks4B1phZfgkak4MJJLgSEieTxR8779wLN2sOzeoFga0s5Obm7nPbFStWsHDhQnJycsjJyWHixIlceeWVCSWyCgoKqFevXrHn7dixg2nTpjFgwIAy+ZKIzZs3l/s9Mwnvn+R43xQl3YJaeVGiCjKApPoE4sX9SqjTE9hqZkuT1TGzvwF/A2jXrp399pKk8a9Gk5uby4VVmCG6ffv2jB49unBTde3atfnLX/6SMGt1bm4uDRo0KFY2Y8YMevbsyXnnnVfu/uXm5noG7RLw/kmO901R0m314/vA8XthT0Y3YHkpbdsAxwD5oYzWkcCicKFKjItIcejRSW9SkcgqjaeeesqHHh0nzUm3L7XXgdsk/dLM/g4g6QQCdZHfSzrLzF4M7WcCa+JvIKkLcBMwNDSNBu6QNMDM1kraH7jczO4GmkbaFRAIIcdS3tQCfgycWjGv6lQ2pUlkrV27lh49evDNN99Qq1Ytxo8fz7Jly2jUqBFbt27ltdde4/7776/it3AcpyTSKqiZmUkaBIyXdAPwLXuW9A8I7eMJlvQvAYYDhwKnSFpMsKT/c+AqM5sZ3vMlSc2Af4erK41gJWVpnAqsNrOPyu8NnaqkNImsww8/PGlS0Pr167N+/foK9c9xnLKTVkENIFyleGGS4kQKsuuAEpNsmdnDwMOl1GkVd50LnFRSG8dxHCe9SLc5NcepMPZVUSTGrl276NatGwMGDKgslx3H2UvSIqhJaibpSUkfSVooaa6kQZJ6S9ooabGkDySNiWt3mKQdki6PsxdImhNny5O0NDyP3neFpNmSBkTqjpC0TNISSTMl+U7bDCCmKPLBBx+Qn59PVlZWkfKYosjIkSMTtp8wYUKxNo7jpBdVHtTCea7ngNlm1trMjidYdXhkWGWOmXUjWNE4QNL3I81/DMwj2CAdT8NQLQRJif4SzTGzbmbWDrgKuEfSD8KyxQSLRroQpL65o0wv6VQ5MUWRyy67DAgURQ4++OAidZo2bcoJJ5xA3bp1i7VfvXo1L774IkOHDi1W5jhO+pAOc2qnA9vjVEQ+Jsit1jti2yYpD2gRaTsEuBZ4UlILM4uuhvwHwcbpMexRBfkpCYhXIYmplYTMA36Syou4okhyqruiyNVXX80dd9xRmP3acZz0JB2CWkcCBZASkXQIcBwwO7w+CjjczN6VFAtgd0WaTAUmEwS1s4FLSBLUQqIqJFEuA/5Vgl+uKJIC1VlRZO7cuezYsYNNmzaRl5fH+vXrXVGkkvH+SY73TRxmVqUHwdDfuMj1JILM1PMJEoRuJFi+/x1wS6TedcCfw/MuwPxIWQHQBHiRYCjzSaAVsDQs7w28EOdHN2B5nO0nBF9q+6fyLm3btjUnMbNmzarS53/22WfWsmXLwuvZs2db//79E9a9+eab7c477yy8vuGGG6xFixbWsmVLa9asmdWrV88uueSScvWvqvsn3fH+SU4m9A2wwMopplT5nBqB4kf32IWZ/Qb4AUUV+LsAnYErQxV+CIYUc8JN088DXSUdF3fvpwmCZCqqIFEVEiT9kEBG6xwz+24v38lJM8qiKDJ69GhWr15NQUEBU6ZM4fTTT+fxxx+vSHcdx9lH0mH4MaYicqWZ3Rva6sdXMrP/SBoNXC9pFHCgmRXOr0m6heCr7P9Fmk0HjgBeAZoncyBehURSN+B+4Ewz+7wM7+akEWVRFHEcp3pQ5UHNzEzSQGCcpN8BXwBbgOsTVL8PGAn8gSBgRZkGTCES1MxsE3A7QAIV/qQqJAQJQxsAz4TtPjGzc/bxFZ00oSyKIjF69+7t4rGOk8ZUeVADMLPPCL6yEpEbqbeNoqsfo/dYAnQIz1slKC8gSEMTUwtJqkJiZj9MxW/HcRwnvUiHOTXHKXdatWpF586dyc7OpkePHsXKN27cyNlnn03Xrl3p2LEjDz9cooqa4zjVhAoLapJ2xVQ8JP1T0sGhvZUkk/TbSN17JOVErutI+jKcQ4vec0CoApIfKn5cHtpHSVoTPi92HBwqh7wQd4/Jki4Iz58IFUWWSnpIUt24ujMkFdVScqoNs2bNIi8vr9iQI8CkSZPo0KED+fn55Obmcu2117J9+/Yq8NJxnPKkIr/UtplZtpl1Ar4CfhMp+xwYLmm/JG37ASuAC0PFEcKA8zfgbDPrSrBaMTfSZlz4vNixIQUfnwDaE6ysrMeedDWEQbg7cLCkY1K4l1ONkMSmTZswMzZv3kzjxo2pUyctRuMdxykDlfV/8VyCvWQxvgDeAn4O/D1B/SHABOBKAqX8uUBDAn/XA4TL7FeUxSkzeyl2Luld9khzAZwP/JMgC8BFBHnZSsQVRZKzt4oiZVEPgSBo9evXD0lcfvnl/OpXvypSPmzYMM455xyaN2/Opk2bePrpp6lVy0fjHae6U+FBTVJtgn1nD8YV/QX4l6SH4urXC+tfDhxMEODmmtlXkp4HPpY0E3gBeMrMdodNr5EUk7P62sz6hOenhPJaMY4O20afWZdAbWR4xDwEuIUgqE0lSVBzRZHU2FtFkbIqJNx55500adKEr7/+mpEjR7Jt2za6du1aWP7GG2/QpEkTnnzyST799FOGDh3KAw88kLJsVnnjqhAl4/2THO+bOMprF3f8AewC8oANwEygdmhvxR5lj0cJgsk9QE5o+zHwRHh+KLAq1ja0dQauIRAdnhzaRgEjE/jQm+LKIZOBC+JsfwfGR66bEaiSKLxeBHQq7Z1dUSQ5Val6EK8QYmbWv39/mz17duF1nz597J133qls1wrJBFWIisT7JzmZ0DdUE0WRbWaWDbQE9qPonFqM2wj2o0X9GAL8MFQKWUgQ2GJfXZjZe2Y2DuhLMERYJiTdTKBeMiJiHgwcAqwM/WhF8i0HTpqxZcuWQuHhLVu28Oqrr9KpU6cidY4++mhmzgy2Ja5bt44VK1bQunXrSvfVcZzypcKHH81so6SrgBmS7o0r+0DSMmAA8K6kRkAv4CgLpakkXQoMkTSPIB1Mbtg8G/i4LL5JGgqcAfzA9gxjQhBYzzSzuWG9Y4DXgP8ry/OcymHdunUMGjQIgJ07d3LxxRdz5plnFlEPuemmm8jJyaFz586YGbfffjtNmjSpSrcdxykHKmWhiJktlpRP8LUzJ674zwRDiQDnAa9bUa3FGQT5zEYAv5N0P7CNQHUkJ1IvOqcGMDAF1+4jCIxzw0WWzxIMiR5NIGQc83+lpG8k9TSzd1K4r1OFtG7dmvz8/GL2qHpI8+bNefXVVyvTLcdxKoEKC2pm1iDu+uzIZaeIPZ+iw4+T49p9xR5x4/5JnjWKYF4tngKKLvvHzHIi58nev5hqiZl1T1TRcRzHSR98DbPjOI6TMXhQcxzHcTIGD2qO4zhOxuBBzXEcx8kYPKg5juM4GUNMMcMpByRtoox6lBlME+DLqnYijfH+KRnvn+RkQt+0NLPDSq9WOi5LXr6sMLPiybscJC3wvkmO90/JeP8kx/umKD786DiO42QMHtQcx3GcjMGDWvnyt6p2II3xvikZ75+S8f5JjvdNBF8o4jiO42QM/qXmOI7jZAwe1BzHcZyMwYNaOSDpTEkrJP1P0g1V7U+6IalA0nuS8iQtqGp/qhpJD0n6XNLSiK2xpNck/Tf8eUhV+lhVJOmbUZLWhL8/eZISZuuoCUg6StIsScslvS9peGj3358QD2plRFJtYBLwI6ADQULTDlXrVVrSx8yyfT8NEKRXOjPOdgMw08yOA2aG1zWRyRTvG4Bx4e9Ptpm9VMk+pRM7gWvNLAs4CfhN+PfGf39CPKiVnROB/5nZR2a2HZgCnFvFPjlpjJnNBr6KM58LPBKeP0JqSW4zjiR944SY2Wdmtig83wQsJ8j/6L8/IR7Uyk4LYFXkejUJkozWcAx4VdJCSb+qamfSlGZm9hkEf7iAplXsT7oxTNKScHiyxg6tRZHUCugGvIP//hTiQa3sKIHN90kU5fth5vAfEQyXnFrVDjnVinuBNkA28Bkwtkq9SQMkNQCmAVeb2TdV7U864UGt7KwGjopcHwl8WkW+pCVm9mn483NgOsGQrVOUdZKOAAh/fl7F/qQNZrbOzHaZ2W7g79Tw3x9JdQkC2hNm9mxo9t+fEA9qZWc+cJykYyTtB1wEPF/FPqUNkg6U1DB2DvQDlpbcqkbyPPDz8PznwIwq9CWtiP2xDhlEDf79kSTgQWC5md0VKfLfnxBXFCkHwiXG44HawENm9ueq9Sh9kNSa4OsMgqwQT9b0/pH0FNCbIGXIOuBm4DngH8DRwCfAj82sxi2YSNI3vQmGHg0oAC6PzR/VNCT1AuYA7wG7Q/MfCObVavzvD3hQcxzHcTIIH350HMdxMgYPao7jOE7G4EHNcRzHyRg8qDmO4zgZgwc1x3EcJ2OoU9UOOI5TMpJ2ESzhjjHQzAqqyB3HSWt8Sb/jpDmSNptZg0p8Xh0z21lZz3Oc8sSHHx2nmiPpCEmzw1xjSyWdEtrPlLRIUr6kmaGtsaTnQnHgeZK6hPZRkv4m6VXgUUmHSZomaX54fL8KX9FxUsaHHx0n/aknKS88X2lmg+LKLwZeMbM/h/n96ks6jEAn8VQzWympcVj3FmCxmQ2UdDrwKIFaB8DxQC8z2ybpSYIcZm9KOhp4BciqsDd0nHLCg5rjpD/bzCy7hPL5wEOh0O1zZpYnqTcw28xWAkQkk3oB54e21yUdKumgsOx5M9sWnv8Q6BBIDQLQSFLDMIeX46QtHtQcp5pjZrPDdD5nAY9JuhPYQOIUSCWlStoSsdUCTo4EOcepFvicmuNUcyS1BD43s78TKLh3B+YCp0k6JqwTG36cDVwS2noDXybJx/UqMCzyjOwKct9xyhX/UnOc6k9v4DpJO4DNwM/M7Iswy/izkmoR5NfqC4wCHpa0BNjKnnQl8VwFTArr1SEIhldU6Fs4TjngS/odx3GcjMGHHx3HcZyMwYOa4ziOkzF4UHMcx3EyBg9qjuM4TsbgQc1xHMfJGDyoOY7jOBmDBzXHcRwnY/j/Q6syJcluD6QAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "\n",
    "from xgboost import plot_importance\n",
    "\n",
    "# Get the booster from the xgbmodel\n",
    "booster = xgb_model_final.get_booster()\n",
    "\n",
    "# Get the importance dictionary (by gain) from the booster\n",
    "importance = booster.get_score(importance_type=\"gain\")\n",
    "\n",
    "# make your changes\n",
    "for key in importance.keys():\n",
    "    importance[key] = round(importance[key],2)\n",
    "\n",
    "# provide the importance dictionary to the plotting function\n",
    "ax = plot_importance(importance, max_num_features=20, importance_type='gain', show_values=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "820971b9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "81d78d55",
   "metadata": {},
   "source": [
    "# Autogluon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "aa8cff7f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from autogluon.tabular import TabularPredictor as task\n",
    "from sklearn.model_selection import train_test_split\n",
    "import json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "cc1b93fc",
   "metadata": {},
   "outputs": [],
   "source": [
    "input_df_final = pd.concat([input_df,input_df_val])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "6c20a62e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Beginning AutoGluon training ... Time limit = 180s\n",
      "AutoGluon will save models to \"../final_results/AutoGluon/models/mRNA/\"\n",
      "AutoGluon Version:  0.3.1\n",
      "Train Data Rows:    405\n",
      "Train Data Columns: 5569\n",
      "Preprocessing data ...\n",
      "AutoGluon infers your prediction problem is: 'multiclass' (because dtype of label-column == int, but few unique label-values observed).\n",
      "\t3 unique label values:  [0, 1, 2]\n",
      "\tIf 'multiclass' is not the correct problem_type, please manually specify the problem_type argument in fit() (You may specify problem_type as one of: ['binary', 'multiclass', 'regression'])\n",
      "Train Data Class Count: 3\n",
      "Using Feature Generators to preprocess the data ...\n",
      "Fitting AutoMLPipelineFeatureGenerator...\n",
      "\tAvailable Memory:                    126346.32 MB\n",
      "\tTrain Data (Original)  Memory Usage: 18.04 MB (0.0% of available memory)\n",
      "\tInferring data type of each feature based on column values. Set feature_metadata_in to manually specify special dtypes of the features.\n",
      "\tStage 1 Generators:\n",
      "\t\tFitting AsTypeFeatureGenerator...\n",
      "\t\t\tNote: Converting 11 features to boolean dtype as they only contain 2 unique values.\n",
      "\tStage 2 Generators:\n",
      "\t\tFitting FillNaFeatureGenerator...\n",
      "\tStage 3 Generators:\n",
      "\t\tFitting IdentityFeatureGenerator...\n",
      "\tStage 4 Generators:\n",
      "\t\tFitting DropUniqueFeatureGenerator...\n",
      "\tTypes of features in original data (raw dtype, special dtypes):\n",
      "\t\t('float', []) : 5569 | ['BLOC1S4', 'TBC1D17', 'CARD8', 'KIF12', 'SPECC1L', ...]\n",
      "\tTypes of features in processed data (raw dtype, special dtypes):\n",
      "\t\t('float', [])     : 5558 | ['BLOC1S4', 'TBC1D17', 'CARD8', 'KIF12', 'SPECC1L', ...]\n",
      "\t\t('int', ['bool']) :   11 | ['DUX4L3', 'CT47A7', 'GAGE13', 'DUX4L6', 'CT47A8', ...]\n",
      "\t8.7s = Fit runtime\n",
      "\t5569 features in original data used to generate 5569 features in processed data.\n",
      "\tTrain Data (Processed) Memory Usage: 18.01 MB (0.0% of available memory)\n",
      "Data preprocessing and feature engineering runtime = 9.39s ...\n",
      "AutoGluon will gauge predictive performance using evaluation metric: 'roc_auc_ovo_macro'\n",
      "\tThis metric expects predicted probabilities rather than predicted class labels, so you'll need to use predict_proba() instead of predict()\n",
      "\tTo change this, specify the eval_metric argument of fit()\n",
      "Automatically generating train/validation split with holdout_frac=0.2, Train Rows: 324, Val Rows: 81\n",
      "Fitting 13 L1 models ...\n",
      "Fitting model: KNeighborsUnif ... Training model for up to 170.61s of the 170.43s of remaining time.\n",
      "\t0.7639\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t1.44s\t = Training   runtime\n",
      "\t0.12s\t = Validation runtime\n",
      "Fitting model: KNeighborsDist ... Training model for up to 168.85s of the 168.66s of remaining time.\n",
      "\t0.773\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t1.43s\t = Training   runtime\n",
      "\t0.12s\t = Validation runtime\n",
      "Fitting model: NeuralNetFastAI ... Training model for up to 167.09s of the 166.9s of remaining time.\n",
      "\tWarning: Exception caused NeuralNetFastAI to fail during training... Skipping this model.\n",
      "\t\tfuture feature annotations is not defined (dispatch.py, line 4)\n",
      "Detailed Traceback:\n",
      "Traceback (most recent call last):\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/autogluon/tabular/trainer/abstract_trainer.py\", line 962, in _train_and_save\n",
      "    model = self._train_single(X, y, model, X_val, y_val, **model_fit_kwargs)\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/autogluon/tabular/trainer/abstract_trainer.py\", line 934, in _train_single\n",
      "    model = model.fit(X=X, y=y, X_val=X_val, y_val=y_val, **model_fit_kwargs)\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/autogluon/core/models/abstract/abstract_model.py\", line 522, in fit\n",
      "    out = self._fit(**kwargs)\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/autogluon/tabular/models/fastainn/tabular_nn_fastai.py\", line 163, in _fit\n",
      "    try_import_fastai()\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/autogluon/core/utils/try_import.py\", line 107, in try_import_fastai\n",
      "    import autogluon.tabular.models.fastainn.imports_helper\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/autogluon/tabular/models/fastainn/imports_helper.py\", line 1, in <module>\n",
      "    from fastai.tabular.all import *\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/fastai/tabular/all.py\", line 1, in <module>\n",
      "    from ..basics import *\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/fastai/basics.py\", line 1, in <module>\n",
      "    from .data.all import *\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/fastai/data/all.py\", line 1, in <module>\n",
      "    from ..torch_basics import *\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/fastai/torch_basics.py\", line 9, in <module>\n",
      "    from .imports import *\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/fastai/imports.py\", line 30, in <module>\n",
      "    from fastcore.all import *\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/fastcore/all.py\", line 3, in <module>\n",
      "    from .dispatch import *\n",
      "  File \"/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/fastcore/dispatch.py\", line 4\n",
      "    from __future__ import annotations\n",
      "                                     ^\n",
      "SyntaxError: future feature annotations is not defined\n",
      "Fitting model: LightGBMXT ... Training model for up to 164.99s of the 164.81s of remaining time.\n",
      "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/lightgbm/engine.py:239: UserWarning: 'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "\t0.8521\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t9.55s\t = Training   runtime\n",
      "\t0.03s\t = Validation runtime\n",
      "Fitting model: LightGBM ... Training model for up to 155.22s of the 155.04s of remaining time.\n",
      "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/lightgbm/engine.py:239: UserWarning: 'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "\t0.8472\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t10.64s\t = Training   runtime\n",
      "\t0.03s\t = Validation runtime\n",
      "Fitting model: RandomForestGini ... Training model for up to 144.37s of the 144.19s of remaining time.\n",
      "\t0.8311\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t3.46s\t = Training   runtime\n",
      "\t0.11s\t = Validation runtime\n",
      "Fitting model: RandomForestEntr ... Training model for up to 140.61s of the 140.43s of remaining time.\n",
      "\t0.844\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t3.79s\t = Training   runtime\n",
      "\t0.11s\t = Validation runtime\n",
      "Fitting model: CatBoost ... Training model for up to 136.52s of the 136.34s of remaining time.\n",
      "Metric roc_auc_ovo_macro is not supported by this model - using AUC:type=Mu instead\n",
      "\tMany features detected (5569), dynamically setting 'colsample_bylevel' to 0.1795654516071108 to speed up training (Default = 1).\n",
      "\tTo disable this functionality, explicitly specify 'colsample_bylevel' in the model hyperparameters.\n",
      "\t0.8562\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t55.52s\t = Training   runtime\n",
      "\t0.08s\t = Validation runtime\n",
      "Fitting model: ExtraTreesGini ... Training model for up to 80.72s of the 80.54s of remaining time.\n",
      "\t0.818\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t3.45s\t = Training   runtime\n",
      "\t0.11s\t = Validation runtime\n",
      "Fitting model: ExtraTreesEntr ... Training model for up to 76.97s of the 76.79s of remaining time.\n",
      "\t0.8356\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t3.54s\t = Training   runtime\n",
      "\t0.11s\t = Validation runtime\n",
      "Fitting model: XGBoost ... Training model for up to 73.13s of the 72.95s of remaining time.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[20:12:48] WARNING: ../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'multi:softprob' was changed from 'merror' to 'mlogloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\t0.8291\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t46.16s\t = Training   runtime\n",
      "\t0.04s\t = Validation runtime\n",
      "Fitting model: NeuralNetMXNet ... Training model for up to 26.72s of the 26.54s of remaining time.\n",
      "\t0.8911\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t13.25s\t = Training   runtime\n",
      "\t2.69s\t = Validation runtime\n",
      "Fitting model: LightGBMLarge ... Training model for up to 10.56s of the 10.38s of remaining time.\n",
      "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/lightgbm/engine.py:239: UserWarning: 'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. Pass 'log_evaluation()' callback via 'callbacks' argument instead.\n",
      "  _log_warning(\"'verbose_eval' argument is deprecated and will be removed in a future release of LightGBM. \"\n",
      "\tRan out of time, early stopping on iteration 45. Best iteration is:\n",
      "\t[21]\ttrain_set's multi_logloss: 0.390576\ttrain_set's roc_auc_ovo_macro: 1\tvalid_set's multi_logloss: 0.809444\tvalid_set's roc_auc_ovo_macro: 0.805259\n",
      "\t0.8053\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t12.15s\t = Training   runtime\n",
      "\t0.03s\t = Validation runtime\n",
      "Fitting model: WeightedEnsemble_L2 ... Training model for up to 170.61s of the -6.05s of remaining time.\n",
      "\t0.8962\t = Validation score   (roc_auc_ovo_macro)\n",
      "\t3.39s\t = Training   runtime\n",
      "\t0.0s\t = Validation runtime\n",
      "AutoGluon training complete, total runtime = 189.52s ...\n",
      "TabularPredictor saved. To load, use: predictor = TabularPredictor.load(\"../final_results/AutoGluon/models/mRNA/\")\n"
     ]
    }
   ],
   "source": [
    "time_limit = 180*1\n",
    "metric = 'roc_auc_ovo_macro' \n",
    "save_path = '../final_results/AutoGluon/models/mRNA'  # specifies folder to store trained models\n",
    "predictor = task(\n",
    "    label=label, \n",
    "    path=save_path, \n",
    "    eval_metric=metric).fit(input_df_final, time_limit=time_limit)#,                           \n",
    "#         hyperparameters=hyperparameters, hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,)#, presets='best_quality')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "09398471",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Evaluation: roc_auc_ovo_macro on test data: 0.8759131103421761\n",
      "Evaluations on test data:\n",
      "{\n",
      "    \"roc_auc_ovo_macro\": 0.8759131103421761,\n",
      "    \"accuracy\": 0.735632183908046,\n",
      "    \"balanced_accuracy\": 0.7572755417956657,\n",
      "    \"mcc\": 0.6009272578354864\n",
      "}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'roc_auc_ovo_macro': 0.8759131103421761,\n",
       " 'accuracy': 0.735632183908046,\n",
       " 'balanced_accuracy': 0.7572755417956657,\n",
       " 'mcc': 0.6009272578354864}"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictor.evaluate(input_df_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "511d2656",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>model</th>\n",
       "      <th>score_val</th>\n",
       "      <th>pred_time_val</th>\n",
       "      <th>fit_time</th>\n",
       "      <th>pred_time_val_marginal</th>\n",
       "      <th>fit_time_marginal</th>\n",
       "      <th>stack_level</th>\n",
       "      <th>can_infer</th>\n",
       "      <th>fit_order</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>WeightedEnsemble_L2</td>\n",
       "      <td>0.896249</td>\n",
       "      <td>2.724961</td>\n",
       "      <td>26.194055</td>\n",
       "      <td>0.003094</td>\n",
       "      <td>3.394044</td>\n",
       "      <td>2</td>\n",
       "      <td>True</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NeuralNetMXNet</td>\n",
       "      <td>0.891115</td>\n",
       "      <td>2.692266</td>\n",
       "      <td>13.250540</td>\n",
       "      <td>2.692266</td>\n",
       "      <td>13.250540</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>CatBoost</td>\n",
       "      <td>0.856244</td>\n",
       "      <td>0.079102</td>\n",
       "      <td>55.515267</td>\n",
       "      <td>0.079102</td>\n",
       "      <td>55.515267</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>LightGBMXT</td>\n",
       "      <td>0.852060</td>\n",
       "      <td>0.029601</td>\n",
       "      <td>9.549471</td>\n",
       "      <td>0.029601</td>\n",
       "      <td>9.549471</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>LightGBM</td>\n",
       "      <td>0.847163</td>\n",
       "      <td>0.028573</td>\n",
       "      <td>10.635069</td>\n",
       "      <td>0.028573</td>\n",
       "      <td>10.635069</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>RandomForestEntr</td>\n",
       "      <td>0.844047</td>\n",
       "      <td>0.108880</td>\n",
       "      <td>3.789447</td>\n",
       "      <td>0.108880</td>\n",
       "      <td>3.789447</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>ExtraTreesEntr</td>\n",
       "      <td>0.835574</td>\n",
       "      <td>0.108996</td>\n",
       "      <td>3.537303</td>\n",
       "      <td>0.108996</td>\n",
       "      <td>3.537303</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>RandomForestGini</td>\n",
       "      <td>0.831093</td>\n",
       "      <td>0.109040</td>\n",
       "      <td>3.459631</td>\n",
       "      <td>0.109040</td>\n",
       "      <td>3.459631</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>XGBoost</td>\n",
       "      <td>0.829060</td>\n",
       "      <td>0.039434</td>\n",
       "      <td>46.158945</td>\n",
       "      <td>0.039434</td>\n",
       "      <td>46.158945</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>ExtraTreesGini</td>\n",
       "      <td>0.818020</td>\n",
       "      <td>0.108950</td>\n",
       "      <td>3.449311</td>\n",
       "      <td>0.108950</td>\n",
       "      <td>3.449311</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>LightGBMLarge</td>\n",
       "      <td>0.805259</td>\n",
       "      <td>0.028821</td>\n",
       "      <td>12.145864</td>\n",
       "      <td>0.028821</td>\n",
       "      <td>12.145864</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>KNeighborsDist</td>\n",
       "      <td>0.772985</td>\n",
       "      <td>0.116131</td>\n",
       "      <td>1.433261</td>\n",
       "      <td>0.116131</td>\n",
       "      <td>1.433261</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>KNeighborsUnif</td>\n",
       "      <td>0.763874</td>\n",
       "      <td>0.118167</td>\n",
       "      <td>1.438675</td>\n",
       "      <td>0.118167</td>\n",
       "      <td>1.438675</td>\n",
       "      <td>1</td>\n",
       "      <td>True</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  model  score_val  pred_time_val   fit_time  \\\n",
       "0   WeightedEnsemble_L2   0.896249       2.724961  26.194055   \n",
       "1        NeuralNetMXNet   0.891115       2.692266  13.250540   \n",
       "2              CatBoost   0.856244       0.079102  55.515267   \n",
       "3            LightGBMXT   0.852060       0.029601   9.549471   \n",
       "4              LightGBM   0.847163       0.028573  10.635069   \n",
       "5      RandomForestEntr   0.844047       0.108880   3.789447   \n",
       "6        ExtraTreesEntr   0.835574       0.108996   3.537303   \n",
       "7      RandomForestGini   0.831093       0.109040   3.459631   \n",
       "8               XGBoost   0.829060       0.039434  46.158945   \n",
       "9        ExtraTreesGini   0.818020       0.108950   3.449311   \n",
       "10        LightGBMLarge   0.805259       0.028821  12.145864   \n",
       "11       KNeighborsDist   0.772985       0.116131   1.433261   \n",
       "12       KNeighborsUnif   0.763874       0.118167   1.438675   \n",
       "\n",
       "    pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  \\\n",
       "0                 0.003094           3.394044            2       True   \n",
       "1                 2.692266          13.250540            1       True   \n",
       "2                 0.079102          55.515267            1       True   \n",
       "3                 0.029601           9.549471            1       True   \n",
       "4                 0.028573          10.635069            1       True   \n",
       "5                 0.108880           3.789447            1       True   \n",
       "6                 0.108996           3.537303            1       True   \n",
       "7                 0.109040           3.459631            1       True   \n",
       "8                 0.039434          46.158945            1       True   \n",
       "9                 0.108950           3.449311            1       True   \n",
       "10                0.028821          12.145864            1       True   \n",
       "11                0.116131           1.433261            1       True   \n",
       "12                0.118167           1.438675            1       True   \n",
       "\n",
       "    fit_order  \n",
       "0          13  \n",
       "1          11  \n",
       "2           7  \n",
       "3           3  \n",
       "4           4  \n",
       "5           6  \n",
       "6           9  \n",
       "7           5  \n",
       "8          10  \n",
       "9           8  \n",
       "10         12  \n",
       "11          2  \n",
       "12          1  "
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictor.leaderboard(silent=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a883e033",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Computing feature importance via permutation shuffling for 5569 features using 174 rows with 3 shuffle sets...\n",
      "\t50177.81s\t= Expected runtime (16725.94s per shuffle set)\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "## Feature importance by autogluon\n",
    "\n",
    "ag_feature_importance_test = predictor.feature_importance(input_df_test)\n",
    "ag_feature_importance_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "f25001d7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>importance</th>\n",
       "      <th>stddev</th>\n",
       "      <th>p_value</th>\n",
       "      <th>n</th>\n",
       "      <th>p99_high</th>\n",
       "      <th>p99_low</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>GNG7</th>\n",
       "      <td>0.002113</td>\n",
       "      <td>0.000841</td>\n",
       "      <td>0.024476</td>\n",
       "      <td>3</td>\n",
       "      <td>0.006931</td>\n",
       "      <td>-0.002705</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>BCR</th>\n",
       "      <td>0.002016</td>\n",
       "      <td>0.001607</td>\n",
       "      <td>0.080939</td>\n",
       "      <td>3</td>\n",
       "      <td>0.011225</td>\n",
       "      <td>-0.007192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>B3GALT1</th>\n",
       "      <td>0.001641</td>\n",
       "      <td>0.000733</td>\n",
       "      <td>0.030286</td>\n",
       "      <td>3</td>\n",
       "      <td>0.005842</td>\n",
       "      <td>-0.002560</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SPTSSB</th>\n",
       "      <td>0.001590</td>\n",
       "      <td>0.000669</td>\n",
       "      <td>0.027155</td>\n",
       "      <td>3</td>\n",
       "      <td>0.005426</td>\n",
       "      <td>-0.002246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GLS2</th>\n",
       "      <td>0.001577</td>\n",
       "      <td>0.000893</td>\n",
       "      <td>0.046195</td>\n",
       "      <td>3</td>\n",
       "      <td>0.006695</td>\n",
       "      <td>-0.003541</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         importance    stddev   p_value  n  p99_high   p99_low\n",
       "GNG7       0.002113  0.000841  0.024476  3  0.006931 -0.002705\n",
       "BCR        0.002016  0.001607  0.080939  3  0.011225 -0.007192\n",
       "B3GALT1    0.001641  0.000733  0.030286  3  0.005842 -0.002560\n",
       "SPTSSB     0.001590  0.000669  0.027155  3  0.005426 -0.002246\n",
       "GLS2       0.001577  0.000893  0.046195  3  0.006695 -0.003541"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ag_feature_importance_test.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f260929b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Computing feature importance via permutation shuffling for 5569 features using 405 rows with 3 shuffle sets...\n",
      "\t55531.05s\t= Expected runtime (18510.35s per shuffle set)\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "## Feature importance by autogluon\n",
    "\n",
    "ag_feature_importance = predictor.feature_importance(input_df_final)\n",
    "ag_feature_importance.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "aedfaccd",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>importance</th>\n",
       "      <th>stddev</th>\n",
       "      <th>p_value</th>\n",
       "      <th>n</th>\n",
       "      <th>p99_high</th>\n",
       "      <th>p99_low</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TMCO2</th>\n",
       "      <td>0.001268</td>\n",
       "      <td>0.000169</td>\n",
       "      <td>0.002920</td>\n",
       "      <td>3</td>\n",
       "      <td>0.002234</td>\n",
       "      <td>0.000302</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NKX2-3</th>\n",
       "      <td>0.000731</td>\n",
       "      <td>0.000261</td>\n",
       "      <td>0.020054</td>\n",
       "      <td>3</td>\n",
       "      <td>0.002229</td>\n",
       "      <td>-0.000767</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SNAP91</th>\n",
       "      <td>0.000665</td>\n",
       "      <td>0.000097</td>\n",
       "      <td>0.003536</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001224</td>\n",
       "      <td>0.000107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CYP1A2</th>\n",
       "      <td>0.000642</td>\n",
       "      <td>0.000243</td>\n",
       "      <td>0.022298</td>\n",
       "      <td>3</td>\n",
       "      <td>0.002036</td>\n",
       "      <td>-0.000751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CTAG1A</th>\n",
       "      <td>0.000542</td>\n",
       "      <td>0.000219</td>\n",
       "      <td>0.025103</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001795</td>\n",
       "      <td>-0.000711</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        importance    stddev   p_value  n  p99_high   p99_low\n",
       "TMCO2     0.001268  0.000169  0.002920  3  0.002234  0.000302\n",
       "NKX2-3    0.000731  0.000261  0.020054  3  0.002229 -0.000767\n",
       "SNAP91    0.000665  0.000097  0.003536  3  0.001224  0.000107\n",
       "CYP1A2    0.000642  0.000243  0.022298  3  0.002036 -0.000751\n",
       "CTAG1A    0.000542  0.000219  0.025103  3  0.001795 -0.000711"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ag_feature_importance.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "fed8babd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ag_feature_importance.to_csv('/home/ec2-user/SageMaker/daniel/mRNA_train_james.csv')\n",
    "\n",
    "# ag_feature_importance.to_csv('/home/ec2-user/SageMaker/daniel/final_results/AutoGluon/models/mRNA/mRNA_james.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "574b6f8e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>importance</th>\n",
       "      <th>stddev</th>\n",
       "      <th>p_value</th>\n",
       "      <th>n</th>\n",
       "      <th>p99_high</th>\n",
       "      <th>p99_low</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>TMCO2</th>\n",
       "      <td>0.001268</td>\n",
       "      <td>0.000169</td>\n",
       "      <td>0.002920</td>\n",
       "      <td>3</td>\n",
       "      <td>0.002234</td>\n",
       "      <td>0.000302</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NKX2-3</th>\n",
       "      <td>0.000731</td>\n",
       "      <td>0.000261</td>\n",
       "      <td>0.020054</td>\n",
       "      <td>3</td>\n",
       "      <td>0.002229</td>\n",
       "      <td>-0.000767</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SNAP91</th>\n",
       "      <td>0.000665</td>\n",
       "      <td>0.000097</td>\n",
       "      <td>0.003536</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001224</td>\n",
       "      <td>0.000107</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CYP1A2</th>\n",
       "      <td>0.000642</td>\n",
       "      <td>0.000243</td>\n",
       "      <td>0.022298</td>\n",
       "      <td>3</td>\n",
       "      <td>0.002036</td>\n",
       "      <td>-0.000751</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CTAG1A</th>\n",
       "      <td>0.000542</td>\n",
       "      <td>0.000219</td>\n",
       "      <td>0.025103</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001795</td>\n",
       "      <td>-0.000711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LHFPL5</th>\n",
       "      <td>0.000512</td>\n",
       "      <td>0.000438</td>\n",
       "      <td>0.090087</td>\n",
       "      <td>3</td>\n",
       "      <td>0.003020</td>\n",
       "      <td>-0.001996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RNASE13</th>\n",
       "      <td>0.000505</td>\n",
       "      <td>0.000180</td>\n",
       "      <td>0.019996</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001539</td>\n",
       "      <td>-0.000529</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>WIF1</th>\n",
       "      <td>0.000505</td>\n",
       "      <td>0.000452</td>\n",
       "      <td>0.096271</td>\n",
       "      <td>3</td>\n",
       "      <td>0.003095</td>\n",
       "      <td>-0.002085</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SPACA5B</th>\n",
       "      <td>0.000500</td>\n",
       "      <td>0.000113</td>\n",
       "      <td>0.008351</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001149</td>\n",
       "      <td>-0.000149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RXFP4</th>\n",
       "      <td>0.000494</td>\n",
       "      <td>0.000058</td>\n",
       "      <td>0.002314</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000829</td>\n",
       "      <td>0.000159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TTLL2</th>\n",
       "      <td>0.000477</td>\n",
       "      <td>0.000257</td>\n",
       "      <td>0.042331</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001949</td>\n",
       "      <td>-0.000995</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AMELY</th>\n",
       "      <td>0.000461</td>\n",
       "      <td>0.000103</td>\n",
       "      <td>0.008057</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001050</td>\n",
       "      <td>-0.000127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RAD21L1</th>\n",
       "      <td>0.000459</td>\n",
       "      <td>0.000085</td>\n",
       "      <td>0.005630</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000947</td>\n",
       "      <td>-0.000029</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AC007919.2</th>\n",
       "      <td>0.000443</td>\n",
       "      <td>0.000144</td>\n",
       "      <td>0.016700</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001269</td>\n",
       "      <td>-0.000382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MC4R</th>\n",
       "      <td>0.000441</td>\n",
       "      <td>0.000070</td>\n",
       "      <td>0.004101</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000840</td>\n",
       "      <td>0.000042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GOLGA8G</th>\n",
       "      <td>0.000432</td>\n",
       "      <td>0.000048</td>\n",
       "      <td>0.002017</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000705</td>\n",
       "      <td>0.000159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SMIM18</th>\n",
       "      <td>0.000432</td>\n",
       "      <td>0.000264</td>\n",
       "      <td>0.052641</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001945</td>\n",
       "      <td>-0.001081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LSMEM2</th>\n",
       "      <td>0.000429</td>\n",
       "      <td>0.000114</td>\n",
       "      <td>0.011357</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001082</td>\n",
       "      <td>-0.000224</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RBP2</th>\n",
       "      <td>0.000421</td>\n",
       "      <td>0.000240</td>\n",
       "      <td>0.046576</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001794</td>\n",
       "      <td>-0.000952</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>MYOD1</th>\n",
       "      <td>0.000419</td>\n",
       "      <td>0.000308</td>\n",
       "      <td>0.071522</td>\n",
       "      <td>3</td>\n",
       "      <td>0.002185</td>\n",
       "      <td>-0.001348</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>HIST1H2AB</th>\n",
       "      <td>0.000416</td>\n",
       "      <td>0.000121</td>\n",
       "      <td>0.013549</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001111</td>\n",
       "      <td>-0.000278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CCDC166</th>\n",
       "      <td>0.000415</td>\n",
       "      <td>0.000075</td>\n",
       "      <td>0.005370</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000845</td>\n",
       "      <td>-0.000015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>XAGE1B</th>\n",
       "      <td>0.000411</td>\n",
       "      <td>0.000128</td>\n",
       "      <td>0.015470</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001145</td>\n",
       "      <td>-0.000324</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>EFCAB8</th>\n",
       "      <td>0.000410</td>\n",
       "      <td>0.000192</td>\n",
       "      <td>0.032787</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001508</td>\n",
       "      <td>-0.000687</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NLRP4</th>\n",
       "      <td>0.000403</td>\n",
       "      <td>0.000142</td>\n",
       "      <td>0.019443</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001216</td>\n",
       "      <td>-0.000410</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AC104057.1</th>\n",
       "      <td>0.000403</td>\n",
       "      <td>0.000165</td>\n",
       "      <td>0.025844</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001348</td>\n",
       "      <td>-0.000543</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>FABP2</th>\n",
       "      <td>0.000397</td>\n",
       "      <td>0.000135</td>\n",
       "      <td>0.018113</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001169</td>\n",
       "      <td>-0.000374</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>PLK5</th>\n",
       "      <td>0.000391</td>\n",
       "      <td>0.000203</td>\n",
       "      <td>0.039790</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001557</td>\n",
       "      <td>-0.000775</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LCT</th>\n",
       "      <td>0.000384</td>\n",
       "      <td>0.000248</td>\n",
       "      <td>0.057471</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001803</td>\n",
       "      <td>-0.001035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>INSM2</th>\n",
       "      <td>0.000377</td>\n",
       "      <td>0.000255</td>\n",
       "      <td>0.062174</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001839</td>\n",
       "      <td>-0.001084</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RP11-293I14.2</th>\n",
       "      <td>0.000375</td>\n",
       "      <td>0.000257</td>\n",
       "      <td>0.063754</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001851</td>\n",
       "      <td>-0.001100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>S100G</th>\n",
       "      <td>0.000375</td>\n",
       "      <td>0.000055</td>\n",
       "      <td>0.003504</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000689</td>\n",
       "      <td>0.000062</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>AL117190.3</th>\n",
       "      <td>0.000374</td>\n",
       "      <td>0.000043</td>\n",
       "      <td>0.002161</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000619</td>\n",
       "      <td>0.000129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CT45A3</th>\n",
       "      <td>0.000373</td>\n",
       "      <td>0.000073</td>\n",
       "      <td>0.006187</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000790</td>\n",
       "      <td>-0.000043</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>GJA8</th>\n",
       "      <td>0.000372</td>\n",
       "      <td>0.000078</td>\n",
       "      <td>0.007125</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000818</td>\n",
       "      <td>-0.000074</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SLC39A12</th>\n",
       "      <td>0.000370</td>\n",
       "      <td>0.000206</td>\n",
       "      <td>0.044851</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001552</td>\n",
       "      <td>-0.000811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OR4F17</th>\n",
       "      <td>0.000368</td>\n",
       "      <td>0.000142</td>\n",
       "      <td>0.023006</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001180</td>\n",
       "      <td>-0.000444</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TBATA</th>\n",
       "      <td>0.000363</td>\n",
       "      <td>0.000036</td>\n",
       "      <td>0.001606</td>\n",
       "      <td>3</td>\n",
       "      <td>0.000568</td>\n",
       "      <td>0.000158</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>CDH19</th>\n",
       "      <td>0.000360</td>\n",
       "      <td>0.000382</td>\n",
       "      <td>0.122104</td>\n",
       "      <td>3</td>\n",
       "      <td>0.002552</td>\n",
       "      <td>-0.001831</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>TCEAL6</th>\n",
       "      <td>0.000360</td>\n",
       "      <td>0.000167</td>\n",
       "      <td>0.032492</td>\n",
       "      <td>3</td>\n",
       "      <td>0.001317</td>\n",
       "      <td>-0.000598</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               importance    stddev   p_value  n  p99_high   p99_low\n",
       "TMCO2            0.001268  0.000169  0.002920  3  0.002234  0.000302\n",
       "NKX2-3           0.000731  0.000261  0.020054  3  0.002229 -0.000767\n",
       "SNAP91           0.000665  0.000097  0.003536  3  0.001224  0.000107\n",
       "CYP1A2           0.000642  0.000243  0.022298  3  0.002036 -0.000751\n",
       "CTAG1A           0.000542  0.000219  0.025103  3  0.001795 -0.000711\n",
       "LHFPL5           0.000512  0.000438  0.090087  3  0.003020 -0.001996\n",
       "RNASE13          0.000505  0.000180  0.019996  3  0.001539 -0.000529\n",
       "WIF1             0.000505  0.000452  0.096271  3  0.003095 -0.002085\n",
       "SPACA5B          0.000500  0.000113  0.008351  3  0.001149 -0.000149\n",
       "RXFP4            0.000494  0.000058  0.002314  3  0.000829  0.000159\n",
       "TTLL2            0.000477  0.000257  0.042331  3  0.001949 -0.000995\n",
       "AMELY            0.000461  0.000103  0.008057  3  0.001050 -0.000127\n",
       "RAD21L1          0.000459  0.000085  0.005630  3  0.000947 -0.000029\n",
       "AC007919.2       0.000443  0.000144  0.016700  3  0.001269 -0.000382\n",
       "MC4R             0.000441  0.000070  0.004101  3  0.000840  0.000042\n",
       "GOLGA8G          0.000432  0.000048  0.002017  3  0.000705  0.000159\n",
       "SMIM18           0.000432  0.000264  0.052641  3  0.001945 -0.001081\n",
       "LSMEM2           0.000429  0.000114  0.011357  3  0.001082 -0.000224\n",
       "RBP2             0.000421  0.000240  0.046576  3  0.001794 -0.000952\n",
       "MYOD1            0.000419  0.000308  0.071522  3  0.002185 -0.001348\n",
       "HIST1H2AB        0.000416  0.000121  0.013549  3  0.001111 -0.000278\n",
       "CCDC166          0.000415  0.000075  0.005370  3  0.000845 -0.000015\n",
       "XAGE1B           0.000411  0.000128  0.015470  3  0.001145 -0.000324\n",
       "EFCAB8           0.000410  0.000192  0.032787  3  0.001508 -0.000687\n",
       "NLRP4            0.000403  0.000142  0.019443  3  0.001216 -0.000410\n",
       "AC104057.1       0.000403  0.000165  0.025844  3  0.001348 -0.000543\n",
       "FABP2            0.000397  0.000135  0.018113  3  0.001169 -0.000374\n",
       "PLK5             0.000391  0.000203  0.039790  3  0.001557 -0.000775\n",
       "LCT              0.000384  0.000248  0.057471  3  0.001803 -0.001035\n",
       "INSM2            0.000377  0.000255  0.062174  3  0.001839 -0.001084\n",
       "RP11-293I14.2    0.000375  0.000257  0.063754  3  0.001851 -0.001100\n",
       "S100G            0.000375  0.000055  0.003504  3  0.000689  0.000062\n",
       "AL117190.3       0.000374  0.000043  0.002161  3  0.000619  0.000129\n",
       "CT45A3           0.000373  0.000073  0.006187  3  0.000790 -0.000043\n",
       "GJA8             0.000372  0.000078  0.007125  3  0.000818 -0.000074\n",
       "SLC39A12         0.000370  0.000206  0.044851  3  0.001552 -0.000811\n",
       "OR4F17           0.000368  0.000142  0.023006  3  0.001180 -0.000444\n",
       "TBATA            0.000363  0.000036  0.001606  3  0.000568  0.000158\n",
       "CDH19            0.000360  0.000382  0.122104  3  0.002552 -0.001831\n",
       "TCEAL6           0.000360  0.000167  0.032492  3  0.001317 -0.000598"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ag_feature_importance.sort_values(\"importance\")[::-1][:40]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a986a9a5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}