{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Classification\n", "In this example we are building a model that classifies iris flowers. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dan0nchik/SAP-HANA-AutoML/blob/dev/docs/source/classification.ipynb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Uncomment to install modules" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# !pip3 install Cython\n", "# !pip3 install hana_automl" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "try:\n", " from hana_automl.automl import AutoML\n", " import pandas as pd\n", " from hana_ml.dataframe import ConnectionContext\n", " from hana_automl.storage import Storage\n", "except ImportError:\n", " sys.exit(\"\"\"You need to install hana_automl and pandas. Uncomment cell above\"\"\")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDsepal_lengthsepal_widthpetal_lengthpetal_widthspecies
0304.83.11.60.2setosa
1315.43.41.50.4setosa
2325.24.11.50.1setosa
3335.54.21.40.2setosa
4344.93.11.50.1setosa
\n", "
" ], "text/plain": [ " ID sepal_length sepal_width petal_length petal_width species\n", "0 30 4.8 3.1 1.6 0.2 setosa\n", "1 31 5.4 3.4 1.5 0.4 setosa\n", "2 32 5.2 4.1 1.5 0.1 setosa\n", "3 33 5.5 4.2 1.4 0.2 setosa\n", "4 34 4.9 3.1 1.5 0.1 setosa" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_df = pd.read_csv('https://raw.githubusercontent.com/dan0nchik/SAP-HANA-AutoML/dev/docs/source/datasets/test_iris.csv', index_col='Unnamed: 0')\n", "df = pd.read_csv('https://raw.githubusercontent.com/dan0nchik/SAP-HANA-AutoML/dev/docs/source/datasets/iris.csv', index_col='Unnamed: 0')\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Pass credentials to the database." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# Replace with your credentials\n", "cc = ConnectionContext(address='address', \n", " port=39015, # default for most databases. Details here: https://help.sap.com/viewer/0eec0d68141541d1b07893a39944924e/2.0.03/en-US/b250e7fef8614ea0a0973d58eb73bda8.html\n", " user='user',\n", " password='password')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "automl = AutoML(connection_context=cc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "automl.fit(\n", " df=df,\n", " task='cls', # if task = None, we'll determine it for you\n", " steps=10,\n", " target='species',\n", " table_name='CLASSIFICATION', # optional\n", " categorical_features=['species'],\n", " id_column='ID', # optional\n", " verbose=False\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Save model" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NAMEVERSIONLIBRARYCLASSJSONTIMESTAMPMODEL_STORAGE_VER
0iris1PALhana_ml.algorithms.pal.neural_network.MLPClass...{\"model_attributes\": {\"activation\": \"sin_asymm...2021-05-29 17:33:151
\n", "
" ], "text/plain": [ " NAME VERSION LIBRARY CLASS \\\n", "0 iris 1 PAL hana_ml.algorithms.pal.neural_network.MLPClass... \n", "\n", " JSON TIMESTAMP \\\n", "0 {\"model_attributes\": {\"activation\": \"sin_asymm... 2021-05-29 17:33:15 \n", "\n", " MODEL_STORAGE_VER \n", "0 1 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "storage = Storage(connection_context=cc, schema='DEVELOPER')\n", "automl.model.name = \"iris\" # don't forget to specify the name\n", "storage.save_model(automl=automl)\n", "storage.list_models()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load model and predict" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Creating table with name: AUTOML2af7880c-467f-437c-b3be-b1c519a7678e\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 1/1 [00:00<00:00, 6.17it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Preprocessor settings: mean\n", "Prediction results (first 20 rows): \n", " ID TARGET VALUE\n", "0 0 setosa 0.577740\n", "1 1 setosa 0.580586\n", "2 2 setosa 0.580305\n", "3 3 setosa 0.580340\n", "4 4 setosa 0.576891\n", "5 5 setosa 0.578495\n", "6 6 setosa 0.580451\n", "7 7 setosa 0.579999\n", "8 8 setosa 0.579468\n", "9 9 setosa 0.580387\n", "10 10 setosa 0.574936\n", "11 11 setosa 0.580524\n", "12 12 setosa 0.580249\n", "13 13 setosa 0.579009\n", "14 14 setosa 0.542972\n", "15 15 setosa 0.562199\n", "16 16 setosa 0.574057\n", "17 17 setosa 0.579634\n", "18 18 setosa 0.577878\n", "19 19 setosa 0.577438\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDTARGETVALUE
00setosa0.577740
11setosa0.580586
22setosa0.580305
33setosa0.580340
44setosa0.576891
55setosa0.578495
66setosa0.580451
77setosa0.579999
88setosa0.579468
99setosa0.580387
1010setosa0.574936
1111setosa0.580524
1212setosa0.580249
1313setosa0.579009
1414setosa0.542972
1515setosa0.562199
1616setosa0.574057
1717setosa0.579634
1818setosa0.577878
1919setosa0.577438
2020setosa0.580425
2121setosa0.579509
2222setosa0.569415
2323setosa0.570043
2424setosa0.578701
2525setosa0.579957
2626setosa0.578694
2727setosa0.578688
2828setosa0.578416
2929setosa0.580325
\n", "
" ], "text/plain": [ " ID TARGET VALUE\n", "0 0 setosa 0.577740\n", "1 1 setosa 0.580586\n", "2 2 setosa 0.580305\n", "3 3 setosa 0.580340\n", "4 4 setosa 0.576891\n", "5 5 setosa 0.578495\n", "6 6 setosa 0.580451\n", "7 7 setosa 0.579999\n", "8 8 setosa 0.579468\n", "9 9 setosa 0.580387\n", "10 10 setosa 0.574936\n", "11 11 setosa 0.580524\n", "12 12 setosa 0.580249\n", "13 13 setosa 0.579009\n", "14 14 setosa 0.542972\n", "15 15 setosa 0.562199\n", "16 16 setosa 0.574057\n", "17 17 setosa 0.579634\n", "18 18 setosa 0.577878\n", "19 19 setosa 0.577438\n", "20 20 setosa 0.580425\n", "21 21 setosa 0.579509\n", "22 22 setosa 0.569415\n", "23 23 setosa 0.570043\n", "24 24 setosa 0.578701\n", "25 25 setosa 0.579957\n", "26 26 setosa 0.578694\n", "27 27 setosa 0.578688\n", "28 28 setosa 0.578416\n", "29 29 setosa 0.580325" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_model = storage.load_model('iris')\n", "new_model.predict(df=test_df, id_column='ID')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Cleanup storage" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "storage.clean_up()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For more information, visit AutoML class and Storage class in documentation" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 2 }