Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bootstrap/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ To use this existing project structure and scripts for your new ML project, you

Bootstrapping will prepare a directory structure for your project which includes:

* renaming files and folders from the base project name `diabetes` to your project name
* renaming files and folders from the base project name `diabetes_regression` to your project name
* fixing imports and absolute path based on your project name
* deleting and cleaning up some directories

Expand Down
2 changes: 1 addition & 1 deletion diabetes_regression/training/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def test_train_model():
reg_model = train_model(data, {"alpha": 1.2})

preds = reg_model.predict([[1], [2]])
np.testing.assert_equal(preds, [9.93939393939394, 9.03030303030303])
np.testing.assert_almost_equal(preds, [9.93939393939394, 9.03030303030303])


def test_get_model_metrics():
Expand Down
10 changes: 6 additions & 4 deletions docs/custom_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

This document provides steps to follow when using this repository as a template to train models and deploy the models with real-time inference in Azure ML with your own scripts and data.

1. Follow the MLOpsPython [Getting Started](https://github.com/microsoft/MLOpsPython/blob/master/docs/getting_started.md) guide
1. Follow the MLOpsPython [bootstrap instructions](https://github.com/microsoft/MLOpsPython/blob/master/bootstrap/README.md) to create your project starting point
1. Follow the MLOpsPython [Getting Started](getting_started.md) guide
1. Follow the MLOpsPython [bootstrap instructions](../bootstrap/README.md) to create your project starting point
1. Configure training data
1. [If necessary] Convert your ML experimental code into production ready code
1. Replace the training code
Expand All @@ -13,11 +13,13 @@ This document provides steps to follow when using this repository as a template

## Follow the Getting Started guide

Follow the [Getting Started](https://github.com/microsoft/MLOpsPython/blob/master/docs/getting_started.md) guide to set up the infrastructure and pipelines to execute MLOpsPython.
Follow the [Getting Started](getting_started.md) guide to set up the infrastructure and pipelines to execute MLOpsPython.

Take a look at the [Repo Details](code_description.md) document for a description of the structure of this repository.

## Follow the Bootstrap instructions

The [Bootstrap from MLOpsPython repository](https://github.com/microsoft/MLOpsPython/blob/master/bootstrap/README.md) guide will help you to quickly prepare the repository for your project.
The [Bootstrap from MLOpsPython repository](../bootstrap/README.md) guide will help you to quickly prepare the repository for your project.

**Note:** Since the bootstrap script will rename the `diabetes_regression` folder to the project name of your choice, we'll refer to your project as `[project name]` when paths are involved.

Expand Down
162 changes: 89 additions & 73 deletions experimentation/Diabetes Ridge Regression Training.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.datasets import load_diabetes\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.metrics import mean_squared_error\n",
"from sklearn.model_selection import train_test_split\n",
"import joblib"
"import joblib\n",
"import pandas as pd"
]
},
{
Expand All @@ -36,16 +37,21 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"X, y = load_diabetes(return_X_y=True)"
"sample_data = load_diabetes()\n",
"\n",
"df = pd.DataFrame(\n",
" data=sample_data.data,\n",
" columns=sample_data.feature_names)\n",
"df['Y'] = sample_data.target"
]
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -57,29 +63,12 @@
}
],
"source": [
"print(X.shape)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(442,)\n"
]
}
],
"source": [
"print(y.shape)"
"print(df.shape)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -103,16 +92,17 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>bmi</th>\n",
" <th>bp</th>\n",
" <th>s1</th>\n",
" <th>s2</th>\n",
" <th>s3</th>\n",
" <th>s4</th>\n",
" <th>s5</th>\n",
" <th>s6</th>\n",
" <th>Y</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
Expand All @@ -128,19 +118,21 @@
" <td>4.420000e+02</td>\n",
" <td>4.420000e+02</td>\n",
" <td>4.420000e+02</td>\n",
" <td>442.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>mean</td>\n",
" <td>-3.639623e-16</td>\n",
" <td>1.309912e-16</td>\n",
" <td>-8.013951e-16</td>\n",
" <td>1.289818e-16</td>\n",
" <td>-9.042540e-17</td>\n",
" <td>1.301121e-16</td>\n",
" <td>-4.563971e-16</td>\n",
" <td>3.863174e-16</td>\n",
" <td>-3.848103e-16</td>\n",
" <td>-3.398488e-16</td>\n",
" <td>-3.634285e-16</td>\n",
" <td>1.308343e-16</td>\n",
" <td>-8.045349e-16</td>\n",
" <td>1.281655e-16</td>\n",
" <td>-8.835316e-17</td>\n",
" <td>1.327024e-16</td>\n",
" <td>-4.574646e-16</td>\n",
" <td>3.777301e-16</td>\n",
" <td>-3.830854e-16</td>\n",
" <td>-3.412882e-16</td>\n",
" <td>152.133484</td>\n",
" </tr>\n",
" <tr>\n",
" <td>std</td>\n",
Expand All @@ -154,6 +146,7 @@
" <td>4.761905e-02</td>\n",
" <td>4.761905e-02</td>\n",
" <td>4.761905e-02</td>\n",
" <td>77.093005</td>\n",
" </tr>\n",
" <tr>\n",
" <td>min</td>\n",
Expand All @@ -167,6 +160,7 @@
" <td>-7.639450e-02</td>\n",
" <td>-1.260974e-01</td>\n",
" <td>-1.377672e-01</td>\n",
" <td>25.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>25%</td>\n",
Expand All @@ -180,6 +174,7 @@
" <td>-3.949338e-02</td>\n",
" <td>-3.324879e-02</td>\n",
" <td>-3.317903e-02</td>\n",
" <td>87.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>50%</td>\n",
Expand All @@ -193,6 +188,7 @@
" <td>-2.592262e-03</td>\n",
" <td>-1.947634e-03</td>\n",
" <td>-1.077698e-03</td>\n",
" <td>140.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>75%</td>\n",
Expand All @@ -206,6 +202,7 @@
" <td>3.430886e-02</td>\n",
" <td>3.243323e-02</td>\n",
" <td>2.791705e-02</td>\n",
" <td>211.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <td>max</td>\n",
Expand All @@ -219,42 +216,52 @@
" <td>1.852344e-01</td>\n",
" <td>1.335990e-01</td>\n",
" <td>1.356118e-01</td>\n",
" <td>346.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 \\\n",
" age sex bmi bp s1 \\\n",
"count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n",
"mean -3.639623e-16 1.309912e-16 -8.013951e-16 1.289818e-16 -9.042540e-17 \n",
"mean -3.634285e-16 1.308343e-16 -8.045349e-16 1.281655e-16 -8.835316e-17 \n",
"std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n",
"min -1.072256e-01 -4.464164e-02 -9.027530e-02 -1.123996e-01 -1.267807e-01 \n",
"25% -3.729927e-02 -4.464164e-02 -3.422907e-02 -3.665645e-02 -3.424784e-02 \n",
"50% 5.383060e-03 -4.464164e-02 -7.283766e-03 -5.670611e-03 -4.320866e-03 \n",
"75% 3.807591e-02 5.068012e-02 3.124802e-02 3.564384e-02 2.835801e-02 \n",
"max 1.107267e-01 5.068012e-02 1.705552e-01 1.320442e-01 1.539137e-01 \n",
"\n",
" 5 6 7 8 9 \n",
"count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n",
"mean 1.301121e-16 -4.563971e-16 3.863174e-16 -3.848103e-16 -3.398488e-16 \n",
"std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n",
"min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260974e-01 -1.377672e-01 \n",
"25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324879e-02 -3.317903e-02 \n",
"50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947634e-03 -1.077698e-03 \n",
"75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243323e-02 2.791705e-02 \n",
"max 1.987880e-01 1.811791e-01 1.852344e-01 1.335990e-01 1.356118e-01 "
" s2 s3 s4 s5 s6 \\\n",
"count 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 4.420000e+02 \n",
"mean 1.327024e-16 -4.574646e-16 3.777301e-16 -3.830854e-16 -3.412882e-16 \n",
"std 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 4.761905e-02 \n",
"min -1.156131e-01 -1.023071e-01 -7.639450e-02 -1.260974e-01 -1.377672e-01 \n",
"25% -3.035840e-02 -3.511716e-02 -3.949338e-02 -3.324879e-02 -3.317903e-02 \n",
"50% -3.819065e-03 -6.584468e-03 -2.592262e-03 -1.947634e-03 -1.077698e-03 \n",
"75% 2.984439e-02 2.931150e-02 3.430886e-02 3.243323e-02 2.791705e-02 \n",
"max 1.987880e-01 1.811791e-01 1.852344e-01 1.335990e-01 1.356118e-01 \n",
"\n",
" Y \n",
"count 442.000000 \n",
"mean 152.133484 \n",
"std 77.093005 \n",
"min 25.000000 \n",
"25% 87.000000 \n",
"50% 140.500000 \n",
"75% 211.500000 \n",
"max 346.000000 "
]
},
"execution_count": 8,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"features = pd.DataFrame(X)\n",
"features.describe()"
"# All data in a single dataframe\n",
"df.describe()"
]
},
{
Expand All @@ -266,11 +273,15 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n",
"X = df.drop('Y', axis=1).values\n",
"y = df['Y'].values\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=0)\n",
"data = {\"train\": {\"X\": X_train, \"y\": y_train},\n",
" \"test\": {\"X\": X_test, \"y\": y_test}}"
]
Expand All @@ -284,7 +295,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 16,
"metadata": {},
"outputs": [
{
Expand All @@ -294,16 +305,19 @@
" normalize=False, random_state=None, solver='auto', tol=0.001)"
]
},
"execution_count": 4,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"alpha = 0.5\n",
"# experiment parameters\n",
"args = {\n",
" \"alpha\": 0.5\n",
"}\n",
"\n",
"reg = Ridge(alpha=alpha)\n",
"reg.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])"
"reg_model = Ridge(**args)\n",
"reg_model.fit(data[\"train\"][\"X\"], data[\"train\"][\"y\"])"
]
},
{
Expand All @@ -315,20 +329,22 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"mse: 3298.9096058070622\n"
"{'mse': 3298.9096058070622}\n"
]
}
],
"source": [
"preds = reg.predict(data[\"test\"][\"X\"])\n",
"print(\"mse: \", mean_squared_error(preds, y_test))"
"preds = reg_model.predict(data[\"test\"][\"X\"])\n",
"mse = mean_squared_error(preds, y_test)\n",
"metrics = {\"mse\": mse}\n",
"print(metrics)"
]
},
{
Expand Down Expand Up @@ -363,9 +379,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python (storedna)",
"display_name": "Python 3",
"language": "python",
"name": "storedna"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -377,7 +393,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.7.4"
}
},
"nbformat": 4,
Expand Down