{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Regression\n",
    "In this example we are building a model that predicts house prices in Boston  \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dan0nchik/SAP-HANA-AutoML/blob/dev/docs/source/regression.ipynb)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Install modules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip3 install Cython\n",
    "# !pip3 install hana_automl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    from hana_automl.automl import AutoML\n",
    "    import pandas as pd\n",
    "    from hana_ml.dataframe import ConnectionContext\n",
    "    from hana_automl.storage import Storage\n",
    "except ImportError:\n",
    "    sys.exit(\"\"\"You need to install hana_automl and pandas. Uncomment cell above\"\"\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's get used to the dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>crim</th>\n",
       "      <th>zn</th>\n",
       "      <th>indus</th>\n",
       "      <th>chas</th>\n",
       "      <th>nox</th>\n",
       "      <th>rm</th>\n",
       "      <th>age</th>\n",
       "      <th>dis</th>\n",
       "      <th>rad</th>\n",
       "      <th>tax</th>\n",
       "      <th>ptratio</th>\n",
       "      <th>black</th>\n",
       "      <th>lstat</th>\n",
       "      <th>medv</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>0.15876</td>\n",
       "      <td>0.0</td>\n",
       "      <td>10.81</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.413</td>\n",
       "      <td>5.961</td>\n",
       "      <td>17.5</td>\n",
       "      <td>5.2873</td>\n",
       "      <td>4.0</td>\n",
       "      <td>305.0</td>\n",
       "      <td>19.2</td>\n",
       "      <td>376.94</td>\n",
       "      <td>9.88</td>\n",
       "      <td>21.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>0.10328</td>\n",
       "      <td>25.0</td>\n",
       "      <td>5.13</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.453</td>\n",
       "      <td>5.927</td>\n",
       "      <td>47.2</td>\n",
       "      <td>6.9320</td>\n",
       "      <td>8.0</td>\n",
       "      <td>284.0</td>\n",
       "      <td>19.7</td>\n",
       "      <td>396.90</td>\n",
       "      <td>9.22</td>\n",
       "      <td>19.6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>0.34940</td>\n",
       "      <td>0.0</td>\n",
       "      <td>9.90</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.544</td>\n",
       "      <td>5.972</td>\n",
       "      <td>76.7</td>\n",
       "      <td>3.1025</td>\n",
       "      <td>4.0</td>\n",
       "      <td>304.0</td>\n",
       "      <td>18.4</td>\n",
       "      <td>396.24</td>\n",
       "      <td>9.97</td>\n",
       "      <td>20.3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2.73397</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19.58</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.871</td>\n",
       "      <td>5.597</td>\n",
       "      <td>94.9</td>\n",
       "      <td>1.5257</td>\n",
       "      <td>5.0</td>\n",
       "      <td>403.0</td>\n",
       "      <td>14.7</td>\n",
       "      <td>351.85</td>\n",
       "      <td>21.45</td>\n",
       "      <td>15.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>0.04337</td>\n",
       "      <td>21.0</td>\n",
       "      <td>5.64</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.439</td>\n",
       "      <td>6.115</td>\n",
       "      <td>63.0</td>\n",
       "      <td>6.8147</td>\n",
       "      <td>4.0</td>\n",
       "      <td>243.0</td>\n",
       "      <td>16.8</td>\n",
       "      <td>393.97</td>\n",
       "      <td>9.43</td>\n",
       "      <td>20.5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   ID     crim    zn  indus  chas    nox     rm   age     dis  rad    tax  \\\n",
       "0   0  0.15876   0.0  10.81   0.0  0.413  5.961  17.5  5.2873  4.0  305.0   \n",
       "1   1  0.10328  25.0   5.13   0.0  0.453  5.927  47.2  6.9320  8.0  284.0   \n",
       "2   2  0.34940   0.0   9.90   0.0  0.544  5.972  76.7  3.1025  4.0  304.0   \n",
       "3   3  2.73397   0.0  19.58   0.0  0.871  5.597  94.9  1.5257  5.0  403.0   \n",
       "4   4  0.04337  21.0   5.64   0.0  0.439  6.115  63.0  6.8147  4.0  243.0   \n",
       "\n",
       "   ptratio   black  lstat  medv  \n",
       "0     19.2  376.94   9.88  21.7  \n",
       "1     19.7  396.90   9.22  19.6  \n",
       "2     18.4  396.24   9.97  20.3  \n",
       "3     14.7  351.85  21.45  15.4  \n",
       "4     16.8  393.97   9.43  20.5  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test_df = pd.read_csv('https://raw.githubusercontent.com/dan0nchik/SAP-HANA-AutoML/dev/docs/source/datasets/boston_test_data.csv')\n",
    "df = pd.read_csv('https://raw.githubusercontent.com/dan0nchik/SAP-HANA-AutoML/dev/docs/source/datasets/boston_data.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Pass credentials to the database."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Replace with your credentials\n",
    "cc = ConnectionContext(address='address', \n",
    "                       port=39015, # default for most databases. Details here: https://help.sap.com/viewer/0eec0d68141541d1b07893a39944924e/2.0.03/en-US/b250e7fef8614ea0a0973d58eb73bda8.html\n",
    "                       user='user',\n",
    "                       password='password')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "automl = AutoML(connection_context=cc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "automl.fit(\n",
    "    df=df,\n",
    "    task=None, # library will try to determine task\n",
    "    steps=10,\n",
    "    target='medv',\n",
    "    table_name='REGRESSION', # optional\n",
    "    id_column='ID', # pass None if no ID column in dataset\n",
    "    verbose=1\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "storage = Storage(connection_context=cc, schema='DEVELOPER')\n",
    "automl.model.name = \"boston\" # don't forget to specify the name\n",
    "storage.save_model(automl=automl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>NAME</th>\n",
       "      <th>VERSION</th>\n",
       "      <th>LIBRARY</th>\n",
       "      <th>CLASS</th>\n",
       "      <th>JSON</th>\n",
       "      <th>TIMESTAMP</th>\n",
       "      <th>MODEL_STORAGE_VER</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>boston</td>\n",
       "      <td>1</td>\n",
       "      <td>PAL</td>\n",
       "      <td>hana_ml.algorithms.pal.trees.HybridGradientBoo...</td>\n",
       "      <td>{\"model_attributes\": {\"n_estimators\": 541, \"ra...</td>\n",
       "      <td>2021-05-29 17:19:09</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     NAME  VERSION LIBRARY                                              CLASS  \\\n",
       "0  boston        1     PAL  hana_ml.algorithms.pal.trees.HybridGradientBoo...   \n",
       "\n",
       "                                                JSON           TIMESTAMP  \\\n",
       "0  {\"model_attributes\": {\"n_estimators\": 541, \"ra... 2021-05-29 17:19:09   \n",
       "\n",
       "   MODEL_STORAGE_VER  \n",
       "0                  1  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "storage.list_models()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load model and predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Creating table with name: AUTOML6b526c36-6c5e-459b-b5ec-92cf44f78b15\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1/1 [00:00<00:00,  6.94it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Preprocessor settings: median\n",
      "Prediction results (first 20 rows): \n",
      "     ID               SCORE CONFIDENCE\n",
      "0    1   35.29534448792848       None\n",
      "1    2  22.489733948983936       None\n",
      "2    3  13.713093051897628       None\n",
      "3    4  24.172307331613972       None\n",
      "4    5  20.248966896747383       None\n",
      "5    6   23.39085017433158       None\n",
      "6    7  20.549891594531058       None\n",
      "7    8   16.66822914049735       None\n",
      "8    9  18.383664287056316       None\n",
      "9   10  46.003539073294455       None\n",
      "10  11   40.11289274119828       None\n",
      "11  12   11.69618986227771       None\n",
      "12  13   12.88755355977079       None\n",
      "13  14  35.486779348478116       None\n",
      "14  15  20.358130558843374       None\n",
      "15  16   9.478611548228256       None\n",
      "16  17   20.84342879503455       None\n",
      "17  18  20.269377197301793       None\n",
      "18  19  20.410181244909623       None\n",
      "19  20  10.344656992114738       None\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>SCORE</th>\n",
       "      <th>CONFIDENCE</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>35.29534448792848</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>22.489733948983936</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>13.713093051897628</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>24.172307331613972</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>20.248966896747383</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>98</td>\n",
       "      <td>23.80490001240718</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>99</td>\n",
       "      <td>22.24186449851454</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>100</td>\n",
       "      <td>18.38892297314412</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>100</th>\n",
       "      <td>101</td>\n",
       "      <td>38.51662147490824</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>101</th>\n",
       "      <td>102</td>\n",
       "      <td>46.306857840422325</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>102 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      ID               SCORE CONFIDENCE\n",
       "0      1   35.29534448792848       None\n",
       "1      2  22.489733948983936       None\n",
       "2      3  13.713093051897628       None\n",
       "3      4  24.172307331613972       None\n",
       "4      5  20.248966896747383       None\n",
       "..   ...                 ...        ...\n",
       "97    98   23.80490001240718       None\n",
       "98    99   22.24186449851454       None\n",
       "99   100   18.38892297314412       None\n",
       "100  101   38.51662147490824       None\n",
       "101  102  46.306857840422325       None\n",
       "\n",
       "[102 rows x 3 columns]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_model = storage.load_model('boston', version=1)\n",
    "new_model.predict(df=test_df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Cleanup storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "storage.clean_up()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For more information, visit AutoML class and Storage class in documentation"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}