{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Classification\n",
"In this example we are building a model that classifies iris flowers. "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dan0nchik/SAP-HANA-AutoML/blob/dev/docs/source/classification.ipynb)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Uncomment to install modules"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# !pip3 install Cython\n",
"# !pip3 install hana_automl"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" from hana_automl.automl import AutoML\n",
" import pandas as pd\n",
" from hana_ml.dataframe import ConnectionContext\n",
" from hana_automl.storage import Storage\n",
"except ImportError:\n",
" sys.exit(\"\"\"You need to install hana_automl and pandas. Uncomment cell above\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ID | \n",
" sepal_length | \n",
" sepal_width | \n",
" petal_length | \n",
" petal_width | \n",
" species | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 4.8 | \n",
" 3.1 | \n",
" 1.6 | \n",
" 0.2 | \n",
" setosa | \n",
"
\n",
" \n",
" 1 | \n",
" 31 | \n",
" 5.4 | \n",
" 3.4 | \n",
" 1.5 | \n",
" 0.4 | \n",
" setosa | \n",
"
\n",
" \n",
" 2 | \n",
" 32 | \n",
" 5.2 | \n",
" 4.1 | \n",
" 1.5 | \n",
" 0.1 | \n",
" setosa | \n",
"
\n",
" \n",
" 3 | \n",
" 33 | \n",
" 5.5 | \n",
" 4.2 | \n",
" 1.4 | \n",
" 0.2 | \n",
" setosa | \n",
"
\n",
" \n",
" 4 | \n",
" 34 | \n",
" 4.9 | \n",
" 3.1 | \n",
" 1.5 | \n",
" 0.1 | \n",
" setosa | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ID sepal_length sepal_width petal_length petal_width species\n",
"0 30 4.8 3.1 1.6 0.2 setosa\n",
"1 31 5.4 3.4 1.5 0.4 setosa\n",
"2 32 5.2 4.1 1.5 0.1 setosa\n",
"3 33 5.5 4.2 1.4 0.2 setosa\n",
"4 34 4.9 3.1 1.5 0.1 setosa"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_df = pd.read_csv('https://raw.githubusercontent.com/dan0nchik/SAP-HANA-AutoML/dev/docs/source/datasets/test_iris.csv', index_col='Unnamed: 0')\n",
"df = pd.read_csv('https://raw.githubusercontent.com/dan0nchik/SAP-HANA-AutoML/dev/docs/source/datasets/iris.csv', index_col='Unnamed: 0')\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Pass credentials to the database."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"# Replace with your credentials\n",
"cc = ConnectionContext(address='address', \n",
" port=39015, # default for most databases. Details here: https://help.sap.com/viewer/0eec0d68141541d1b07893a39944924e/2.0.03/en-US/b250e7fef8614ea0a0973d58eb73bda8.html\n",
" user='user',\n",
" password='password')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"automl = AutoML(connection_context=cc)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"automl.fit(\n",
" df=df,\n",
" task='cls', # if task = None, we'll determine it for you\n",
" steps=10,\n",
" target='species',\n",
" table_name='CLASSIFICATION', # optional\n",
" categorical_features=['species'],\n",
" id_column='ID', # optional\n",
" verbose=False\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Save model"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" NAME | \n",
" VERSION | \n",
" LIBRARY | \n",
" CLASS | \n",
" JSON | \n",
" TIMESTAMP | \n",
" MODEL_STORAGE_VER | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" iris | \n",
" 1 | \n",
" PAL | \n",
" hana_ml.algorithms.pal.neural_network.MLPClass... | \n",
" {\"model_attributes\": {\"activation\": \"sin_asymm... | \n",
" 2021-05-29 17:33:15 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" NAME VERSION LIBRARY CLASS \\\n",
"0 iris 1 PAL hana_ml.algorithms.pal.neural_network.MLPClass... \n",
"\n",
" JSON TIMESTAMP \\\n",
"0 {\"model_attributes\": {\"activation\": \"sin_asymm... 2021-05-29 17:33:15 \n",
"\n",
" MODEL_STORAGE_VER \n",
"0 1 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"storage = Storage(connection_context=cc, schema='DEVELOPER')\n",
"automl.model.name = \"iris\" # don't forget to specify the name\n",
"storage.save_model(automl=automl)\n",
"storage.list_models()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load model and predict"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Creating table with name: AUTOML2af7880c-467f-437c-b3be-b1c519a7678e\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 1/1 [00:00<00:00, 6.17it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Preprocessor settings: mean\n",
"Prediction results (first 20 rows): \n",
" ID TARGET VALUE\n",
"0 0 setosa 0.577740\n",
"1 1 setosa 0.580586\n",
"2 2 setosa 0.580305\n",
"3 3 setosa 0.580340\n",
"4 4 setosa 0.576891\n",
"5 5 setosa 0.578495\n",
"6 6 setosa 0.580451\n",
"7 7 setosa 0.579999\n",
"8 8 setosa 0.579468\n",
"9 9 setosa 0.580387\n",
"10 10 setosa 0.574936\n",
"11 11 setosa 0.580524\n",
"12 12 setosa 0.580249\n",
"13 13 setosa 0.579009\n",
"14 14 setosa 0.542972\n",
"15 15 setosa 0.562199\n",
"16 16 setosa 0.574057\n",
"17 17 setosa 0.579634\n",
"18 18 setosa 0.577878\n",
"19 19 setosa 0.577438\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" ID | \n",
" TARGET | \n",
" VALUE | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" setosa | \n",
" 0.577740 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" setosa | \n",
" 0.580586 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" setosa | \n",
" 0.580305 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" setosa | \n",
" 0.580340 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" setosa | \n",
" 0.576891 | \n",
"
\n",
" \n",
" 5 | \n",
" 5 | \n",
" setosa | \n",
" 0.578495 | \n",
"
\n",
" \n",
" 6 | \n",
" 6 | \n",
" setosa | \n",
" 0.580451 | \n",
"
\n",
" \n",
" 7 | \n",
" 7 | \n",
" setosa | \n",
" 0.579999 | \n",
"
\n",
" \n",
" 8 | \n",
" 8 | \n",
" setosa | \n",
" 0.579468 | \n",
"
\n",
" \n",
" 9 | \n",
" 9 | \n",
" setosa | \n",
" 0.580387 | \n",
"
\n",
" \n",
" 10 | \n",
" 10 | \n",
" setosa | \n",
" 0.574936 | \n",
"
\n",
" \n",
" 11 | \n",
" 11 | \n",
" setosa | \n",
" 0.580524 | \n",
"
\n",
" \n",
" 12 | \n",
" 12 | \n",
" setosa | \n",
" 0.580249 | \n",
"
\n",
" \n",
" 13 | \n",
" 13 | \n",
" setosa | \n",
" 0.579009 | \n",
"
\n",
" \n",
" 14 | \n",
" 14 | \n",
" setosa | \n",
" 0.542972 | \n",
"
\n",
" \n",
" 15 | \n",
" 15 | \n",
" setosa | \n",
" 0.562199 | \n",
"
\n",
" \n",
" 16 | \n",
" 16 | \n",
" setosa | \n",
" 0.574057 | \n",
"
\n",
" \n",
" 17 | \n",
" 17 | \n",
" setosa | \n",
" 0.579634 | \n",
"
\n",
" \n",
" 18 | \n",
" 18 | \n",
" setosa | \n",
" 0.577878 | \n",
"
\n",
" \n",
" 19 | \n",
" 19 | \n",
" setosa | \n",
" 0.577438 | \n",
"
\n",
" \n",
" 20 | \n",
" 20 | \n",
" setosa | \n",
" 0.580425 | \n",
"
\n",
" \n",
" 21 | \n",
" 21 | \n",
" setosa | \n",
" 0.579509 | \n",
"
\n",
" \n",
" 22 | \n",
" 22 | \n",
" setosa | \n",
" 0.569415 | \n",
"
\n",
" \n",
" 23 | \n",
" 23 | \n",
" setosa | \n",
" 0.570043 | \n",
"
\n",
" \n",
" 24 | \n",
" 24 | \n",
" setosa | \n",
" 0.578701 | \n",
"
\n",
" \n",
" 25 | \n",
" 25 | \n",
" setosa | \n",
" 0.579957 | \n",
"
\n",
" \n",
" 26 | \n",
" 26 | \n",
" setosa | \n",
" 0.578694 | \n",
"
\n",
" \n",
" 27 | \n",
" 27 | \n",
" setosa | \n",
" 0.578688 | \n",
"
\n",
" \n",
" 28 | \n",
" 28 | \n",
" setosa | \n",
" 0.578416 | \n",
"
\n",
" \n",
" 29 | \n",
" 29 | \n",
" setosa | \n",
" 0.580325 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" ID TARGET VALUE\n",
"0 0 setosa 0.577740\n",
"1 1 setosa 0.580586\n",
"2 2 setosa 0.580305\n",
"3 3 setosa 0.580340\n",
"4 4 setosa 0.576891\n",
"5 5 setosa 0.578495\n",
"6 6 setosa 0.580451\n",
"7 7 setosa 0.579999\n",
"8 8 setosa 0.579468\n",
"9 9 setosa 0.580387\n",
"10 10 setosa 0.574936\n",
"11 11 setosa 0.580524\n",
"12 12 setosa 0.580249\n",
"13 13 setosa 0.579009\n",
"14 14 setosa 0.542972\n",
"15 15 setosa 0.562199\n",
"16 16 setosa 0.574057\n",
"17 17 setosa 0.579634\n",
"18 18 setosa 0.577878\n",
"19 19 setosa 0.577438\n",
"20 20 setosa 0.580425\n",
"21 21 setosa 0.579509\n",
"22 22 setosa 0.569415\n",
"23 23 setosa 0.570043\n",
"24 24 setosa 0.578701\n",
"25 25 setosa 0.579957\n",
"26 26 setosa 0.578694\n",
"27 27 setosa 0.578688\n",
"28 28 setosa 0.578416\n",
"29 29 setosa 0.580325"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_model = storage.load_model('iris')\n",
"new_model.predict(df=test_df, id_column='ID')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Cleanup storage"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"storage.clean_up()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For more information, visit AutoML class and Storage class in documentation"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}