Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 23 additions & 12 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,32 +1,43 @@
# Azure Subscription Variables
WORKSPACE_NAME = ''
RESOURCE_GROUP = ''
SUBSCRIPTION_ID = ''
LOCATION = ''
TENANT_ID = ''
BASE_NAME = ''
SP_APP_ID = ''
SP_APP_SECRET = ''

# Mock build/release ID for local testing - update ReleaseID each "release"
BUILD_BUILDID = '001'
RELEASE_RELEASEID = '001'

# Azure ML Workspace Variables
EXPERIMENT_NAME = ''
SCRIPT_FOLDER = './'
BLOB_STORE_NAME = ''
# Remote VM Config
REMOTE_VM_NAME = ''
REMOTE_VM_USERNAME = ''
REMOTE_VM_PASSWORD = ''
REMOTE_VM_IP = ''

# AML Compute Cluster Config
AML_CLUSTER_NAME = ''
AML_CLUSTER_VM_SIZE = ''
AML_COMPUTE_CLUSTER_NAME = ''
AML_COMPUTE_CLUSTER_CPU_SKU = ''
AML_CLUSTER_MAX_NODES = ''
AML_CLUSTER_MIN_NODES = ''
AML_CLUSTER_PRIORITY = 'lowpriority'
# Training Config
MODEL_NAME = ''
MODEL_VERSION = ''
MODEL_NAME = 'sklearn_regression_model.pkl'
MODEL_VERSION = '1'
TRAIN_SCRIPT_PATH = 'training/train.py'
# AML Pipeline Config
TRAINING_PIPELINE_NAME = ''
PIPELINE_CONDA_PATH = 'aml_config/conda_dependencies.yml'
MODEL_PATH = ''
EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py'
REGISTER_SCRIPT_PATH = 'register/register_model.py'
SOURCES_DIR_TRAIN = 'code'

# These are not mandatory for the core workflow
# Remote VM Config
REMOTE_VM_NAME = ''
REMOTE_VM_USERNAME = ''
REMOTE_VM_PASSWORD = ''
REMOTE_VM_IP = ''
# Image config
IMAGE_NAME = ''
IMAGE_DESCRIPTION = ''
Expand Down
4 changes: 2 additions & 2 deletions .pipelines/azdo-ci-build-train.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ steps:
failOnStderr: 'false'
env:
SP_APP_SECRET: '$(SP_APP_SECRET)'
displayName: 'Train model using AML with Remote Compute'
displayName: 'Publish Azure Machine Learning Pipeline'
enabled: 'true'

- task: CopyFiles@2
Expand All @@ -32,7 +32,7 @@ steps:
SourceFolder: '$(Build.SourcesDirectory)'
TargetFolder: '$(Build.ArtifactStagingDirectory)'
Contents: |
ml_service/pipelines/?(run_train_pipeline.py|*.json)
ml_service/pipelines/?(run_train_pipeline.py|*.json)
code/scoring/**


Expand Down
62 changes: 25 additions & 37 deletions code/evaluate/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,53 +24,45 @@
POSSIBILITY OF SUCH DAMAGE.
"""
import os
import json
from azureml.core.model import Model
from azureml.core import Run
from azureml.core import Model, Run
import argparse


# Get workspace
# ws = Workspace.from_config()
run = Run.get_context()
exp = run.experiment
ws = run.experiment.workspace


parser = argparse.ArgumentParser("evaluate")
parser.add_argument(
"--config_suffix", type=str, help="Datetime suffix for json config files"
"--release_id",
type=str,
help="The ID of the release triggering this pipeline run",
)
parser.add_argument(
"--json_config",
"--model_name",
type=str,
help="Directory to write all the intermediate json configs",
help="Name of the Model",
default="sklearn_regression_model.pkl",
)
args = parser.parse_args()

print("Argument 1: %s" % args.config_suffix)
print("Argument 2: %s" % args.json_config)
print("Argument 1: %s" % args.release_id)
print("Argument 2: %s" % args.model_name)
model_name = args.model_name
release_id = args.release_id

if not (args.json_config is None):
os.makedirs(args.json_config, exist_ok=True)
print("%s created" % args.json_config)
# Paramaterize the matrics on which the models should be compared
# Add golden data set on which all the model performance can be evaluated

# Get the latest run_id
# with open("aml_config/run_id.json") as f:
# config = json.load(f)

train_run_id_json = "run_id_{}.json".format(args.config_suffix)
train_output_path = os.path.join(args.json_config, train_run_id_json)
with open(train_output_path) as f:
config = json.load(f)


new_model_run_id = config["run_id"] # args.train_run_id
experiment_name = config["experiment_name"]
# exp = Experiment(workspace=ws, name=experiment_name)

all_runs = exp.get_runs(
properties={"release_id": release_id, "run_type": "train"},
include_children=True
)
new_model_run = next(all_runs)
new_model_run_id = new_model_run.id
print(f'New Run found with Run ID of: {new_model_run_id}')

try:
# Get most recently registered model, we assume that
Expand Down Expand Up @@ -110,16 +102,12 @@
print("This is the first model to be trained, \
thus nothing to evaluate for now")

run_id = {}
run_id["run_id"] = ""

# Writing the run id to /aml_config/run_id.json
if promote_new_model:
run_id["run_id"] = new_model_run_id
# register new model
# new_model_run.register_model(model_name='',model_path='outputs/sklearn_regression_model.pkl')

run_id["experiment_name"] = experiment_name
filename = "run_id_{}.json".format(args.config_suffix)
output_path = os.path.join(args.json_config, filename)
with open(output_path, "w") as outfile:
json.dump(run_id, outfile)
model_path = os.path.join('outputs', model_name)
new_model_run.register_model(
model_name=model_name,
model_path=model_path,
properties={"release_id": release_id})
print("Registered new model!")
27 changes: 8 additions & 19 deletions code/training/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,13 @@
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib
import numpy as np
import json


parser = argparse.ArgumentParser("train")
parser.add_argument(
"--config_suffix", type=str, help="Datetime suffix for json config files"
)
parser.add_argument(
"--json_config",
"--release_id",
type=str,
help="Directory to write all the intermediate json configs",
help="The ID of the release triggering this pipeline run",
)
parser.add_argument(
"--model_name",
Expand All @@ -53,14 +49,11 @@

args = parser.parse_args()

print("Argument 1: %s" % args.config_suffix)
print("Argument 2: %s" % args.json_config)
print("Argument 1: %s" % args.release_id)
print("Argument 2: %s" % args.model_name)

model_name = args.model_name

if not (args.json_config is None):
os.makedirs(args.json_config, exist_ok=True)
print("%s created" % args.json_config)
release_id = args.release_id

run = Run.get_context()
exp = run.experiment
Expand Down Expand Up @@ -102,12 +95,8 @@
print("Following files are uploaded ")
print(run.get_file_names())

run_id = {}
run_id["run_id"] = run.id
run_id["experiment_name"] = run.experiment.name
filename = "run_id_{}.json".format(args.config_suffix)
output_path = os.path.join(args.json_config, filename)
with open(output_path, "w") as outfile:
json.dump(run_id, outfile)
# Add properties to identify this specific training run
run.add_properties({"release_id": release_id, "run_type": "train"})
print(f"added properties: {run.properties}")

run.complete()
4 changes: 2 additions & 2 deletions docs/code_description.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
### Code

- `code/training/train.py` : a training step of an ML training pipeline.
- `code/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline.
- `code/evaluate/register_model.py` : registers a new trained model if evaluation shows the new model is more performant than the previous one.
- `code/evaluate/evaluate_model.py` : an evaluating step of an ML training pipeline which registers a new trained model if evaluation shows the new model is more performant than the previous one.
- `code/evaluate/register_model.py` : (LEGACY) registers a new trained model if evaluation shows the new model is more performant than the previous one.

### Scoring
- code/scoring/score.py : a scoring script which is about to be packed into a Docker Image along with a model while being deployed to QA/Prod environment.
Expand Down
10 changes: 7 additions & 3 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ The variable group should contain the following variables:
| SUBSCRIPTION_ID | |
| TENANT_ID | |
| TRAIN_SCRIPT_PATH | training/train.py |
| TRAINING_PIPELINE_NAME | training-pipeline |

Mark **SP_APP_SECRET** variable as a secret one.

Expand Down Expand Up @@ -88,6 +89,7 @@ Check out created resources in the [Azure Portal](portal.azure.com):

Alternatively, you can also use a [cleaning pipeline](../environment_setup/iac-remove-environment.yml) that removes resources created for this project or you can just delete a resource group in the [Azure Portal](portal.azure.com).

Once this resource group is created, be sure that the Service Principal you have created has access to this resource group.

### 6. Set up Build Pipeline

Expand Down Expand Up @@ -127,9 +129,11 @@ Rename the default "Stage 1" to **Invoke Training Pipeline** and make sure that
Add a **Command Line Script** step, rename it to **Run Training Pipeline** with the following script:

```bash
docker run -v $(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/ml_service/pipelines:/pipelines \
-w=/pipelines -e MODEL_NAME=$MODEL_NAME -e EXPERIMENT_NAME=$EXPERIMENT_NAME \
-e TENANT_ID=$TENANT_ID -e SP_APP_ID=$SP_APP_ID -e SP_APP_SECRET=$(SP_APP_SECRET) \
docker run -v $(System.DefaultWorkingDirectory)/_ci-build/mlops-pipelines/ml_service/pipelines:/pipelines \
-w=/pipelines -e MODEL_NAME=$MODEL_NAME -e EXPERIMENT_NAME=$EXPERIMENT_NAME \
-e TENANT_ID=$TENANT_ID -e SP_APP_ID=$SP_APP_ID -e SP_APP_SECRET=$(SP_APP_SECRET) \
-e SUBSCRIPTION_ID=$SUBSCRIPTION_ID -e RELEASE_RELEASEID=$RELEASE_RELEASEID \
-e BUILD_BUILDID=$BUILD_BUILDID -e BASE_NAME=$BASE_NAME \
mcr.microsoft.com/mlops/python:latest python run_train_pipeline.py
```

Expand Down
3 changes: 2 additions & 1 deletion environment_setup/arm-templates/cloud-environment.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@
"southeastasia",
"westcentralus",
"westeurope",
"westus2"
"westus2",
"centralus"
],
"metadata": {
"description": "Specifies the location for all resources."
Expand Down
2 changes: 1 addition & 1 deletion environment_setup/iac-create-environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ steps:
location: $(LOCATION)
templateLocation: 'Linked artifact'
csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json'
overrideParameters: '-baseName $(BASE_NAME)'
overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION)'
deploymentMode: 'Incremental'
displayName: 'Deploy MLOps resources to Azure'

Expand Down
1 change: 0 additions & 1 deletion environment_setup/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
pytest==4.3.0
requests>=2.22
azureml>=0.2
azureml-sdk>=1.0
python-dotenv>=0.10.3
flake8
Expand Down
Empty file added ml_service/__init__.py
Empty file.
Loading