Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
f69a2ca
Merge pull request #2 from microsoft/master
algattik Nov 28, 2019
7b370c1
Merge remote-tracking branch 'upstream/master'
algattik Jan 19, 2020
3ab3230
Merge branch 'master' of https://github.com/microsoft/MLOpsPython
algattik Jan 29, 2020
c840623
.
algattik Jan 29, 2020
c2b953f
.
algattik Jan 30, 2020
bceeba6
Update code_test.py
algattik Jan 30, 2020
897b7d4
.
algattik Jan 30, 2020
e304fd2
Update Dockerfile
algattik Jan 30, 2020
41c2499
Do not use conda-merge
algattik Jan 31, 2020
963b36a
Merge remote-tracking branch 'upstream/master' into algattik/conda-envs
algattik Jan 31, 2020
921b65f
PR review fixes
algattik Jan 31, 2020
2956ada
Merge remote-tracking branch 'upstream/master'
algattik Jan 31, 2020
b05c3b7
Update Dockerfile
algattik Jan 31, 2020
dd04d4b
Merge branch 'algattik/conda-envs' into algattik/conda-envs-aml-env
algattik Jan 31, 2020
e312ae6
.
algattik Jan 31, 2020
7124f5c
Merge branch 'master' into algattik/conda-envs
algattik Jan 31, 2020
23d48ce
Merge branch 'algattik/conda-envs' into algattik/conda-envs-aml-env
algattik Jan 31, 2020
4b9323f
Update deploy_web_service.py
algattik Jan 31, 2020
a12bcb2
Update deploy_web_service.py
algattik Jan 31, 2020
732761d
Merge remote-tracking branch 'upstream/master' into algattik/conda-en…
algattik Jan 31, 2020
1cc78b7
Merge remote-tracking branch 'upstream/master' into algattik/conda-envs
algattik Jan 31, 2020
5f81ea1
PR review fixes
algattik Jan 31, 2020
3b6974b
PR review fixes
algattik Jan 31, 2020
de72bde
PR review fixes
algattik Jan 31, 2020
568bdee
Update training_dependencies.yml
algattik Jan 31, 2020
b719003
Merge branch 'algattik/conda-envs' into algattik/conda-envs-aml-env
algattik Jan 31, 2020
1ddc19e
Linting fixes
algattik Jan 31, 2020
08509c8
Merge remote-tracking branch 'upstream/master' into algattik/conda-en…
algattik Jan 31, 2020
2fa64b8
Fixed merge
algattik Jan 31, 2020
a0052dd
Update code_test.py
algattik Jan 31, 2020
2e1192c
Merge branch 'master' of https://github.com/algattik/MLOpsPython
algattik Feb 1, 2020
f0a5900
Merge branch 'master' into algattik/conda-envs-aml-env
algattik Feb 1, 2020
a80e64a
Simplified environment management, restored deploy task
algattik Feb 2, 2020
2214ad0
Simplified environment management, restored deploy task
algattik Feb 2, 2020
15c7cad
Merge branch 'master' into algattik/conda-envs-aml-env
algattik Feb 3, 2020
d4a589a
Fixed doc for new file path
algattik Feb 3, 2020
fe35f83
Added comments
algattik Feb 3, 2020
85238c3
Merge branch 'master' into algattik/conda-envs-aml-env
sudivate Feb 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions diabetes_regression/azureml_environment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"name": "diabetes_regression_sklearn",
"version": null,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is this file used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In Environment.load_from_directory(). Added comment to script for clarity fe35f83

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

azureml_environment.json is not pointing to any conda specification file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is the file as generated by az ml environment scaffold.

I assume your concern is about the null values in this section of the file:

    "python": {
        "userManagedDependencies": false,
        "interpreterPath": "python",
        "condaDependenciesFile": null,
        "baseCondaEnvironment": null
    },

If you look at the AML SDK source code, load_from_directory() actually fills the conda_dependencies from a file named conda_dependencies.yml is one exists in the passed directory:

    def load_from_directory(path):
        """Load an environment definition from the files in a directory.

        :param path: Path to the source directory.
        :type path: str
        """
        definition_path = os.path.join(path, _DEFINITION_FILE_NAME)
        if not os.path.isfile(definition_path):
            raise FileNotFoundError(definition_path)

        with open(definition_path, "r") as definition:
            environment_dict = json.load(definition)
        env = Environment._deserialize_and_add_to_object(environment_dict)

        conda_file_path = os.path.join(path, _CONDA_DEPENDENCIES_FILE_NAME)
        if os.path.isfile(conda_file_path):
            env.python = env.python or PythonSection(_skip_defaults=True)
            env.python.conda_dependencies = CondaDependencies(conda_file_path)

        base_dockerfile_path = os.path.join(path, _BASE_DOCKERFILE_FILE_NAME)
        if os.path.isfile(base_dockerfile_path):
            with open(base_dockerfile_path, "r") as base_dockerfile:
                env.docker = env.docker or DockerSection(_skip_defaults=True)
                env.docker.base_dockerfile = base_dockerfile.read()

I've tested that things works as expected, as when I uncomment r-essentials from the conda file and run ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py, training succeeds as shown by R output in the logs.

"environmentVariables": {
"EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
},
"python": {
"userManagedDependencies": false,
"interpreterPath": "python",
"condaDependenciesFile": null,
"baseCondaEnvironment": null
},
"docker": {
"enabled": true,
"baseImage": "mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04",
"baseDockerfile": null,
"sharedVolumes": true,
"shmSize": "2g",
"arguments": [],
"baseImageRegistry": {
"address": null,
"username": null,
"password": null
}
},
"spark": {
"repositories": [],
"packages": [],
"precachePackages": true
},
"databricks": {
"mavenLibraries": [],
"pypiLibraries": [],
"rcranLibraries": [],
"jarLibraries": [],
"eggLibraries": []
},
"inferencingStackVersion": null
}
9 changes: 6 additions & 3 deletions diabetes_regression/ci_dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@ dependencies:
# The python interpreter version.
- python=3.7.5

- r=3.6.0
- r-essentials=3.6.0
# dependencies with versions aligned with conda_dependencies.yml.
- numpy=1.18.1
- pandas=1.0.0
- scikit-learn=0.22.1

# dependencies for MLOps with R.
- r=3.6.0
- r-essentials=3.6.0

- pip=20.0.2
- pip:

# dependencies shared with other environment .yml files.
# dependencies with versions aligned with conda_dependencies.yml.
- azureml-sdk==1.0.79

# Additional pip dependencies for the CI environment.
Expand Down
33 changes: 33 additions & 0 deletions diabetes_regression/conda_dependencies.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Details about the Conda environment file format:
# https://conda.io/docs/using/envs.html#create-environment-file-by-hand

name: diabetes_regression_sklearn

dependencies:

# The python interpreter version.
- python=3.7.5

# Required by azureml-defaults, installed separately through Conda to
# get a prebuilt version and not require build tools for the install.
- psutil=5.6 #latest

- numpy=1.18.1
- pandas=1.0.0
- scikit-learn=0.22.1
#- r-essentials
#- tensorflow
#- keras

- pip=20.0.2
- pip:

# Dependencies for training environment.

- azureml-core==1.0.79

# Dependencies for scoring environment.

# You must list azureml-defaults as a pip dependency
- azureml-defaults==1.0.79
- inference-schema[numpy-support]==1.0.1
3 changes: 1 addition & 2 deletions diabetes_regression/scoring/deployment_config_aci.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
---
computeType: ACI
containerResourceRequirements:
cpu: 1
memoryInGB: 4
computeType: ACI
2 changes: 1 addition & 1 deletion diabetes_regression/scoring/inference_config.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
entryScript: score.py
runtime: python
condaFile: ../scoring_dependencies.yml
condaFile: ../conda_dependencies.yml
extraDockerfileSteps:
schemaFile:
sourceDirectory:
Expand Down
36 changes: 0 additions & 36 deletions diabetes_regression/scoring_dependencies.yml

This file was deleted.

17 changes: 0 additions & 17 deletions diabetes_regression/training_dependencies.yml

This file was deleted.

4 changes: 2 additions & 2 deletions docs/code_description.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@

### Environment Definitions

- `diabetes_regression/training_dependencies.yml` : Conda environment definition for the training environment (Docker image in which train.py is run).
- `diabetes_regression/scoring_dependencies.yml` : Conda environment definition for the scoring environment (Docker image in which score.py is run).
- `diabetes_regression/azureml_environment.json` : Azure ML environment definition for the training environment, including base Docker image and a reference to `conda_dependencies.yml` Conda environment file.
- `diabetes_regression/conda_dependencies.yml` : Conda environment definition for the environment used for both training and scoring (Docker image in which train.py and score.py are run).
- `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment.

### Code
Expand Down
6 changes: 3 additions & 3 deletions docs/getting_started.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ Great, you now have the build pipeline set up which automatically triggers every
pipelines using R to train a model. This is enabled
by changing the `build-train-script` pipeline variable to either of:
* `diabetes_regression_build_train_pipeline_with_r.py` to train a model
with R on Azure ML Compute. You will also need to add the
`r-essentials` Conda packages into `diabetes_regression/scoring_dependencies.yml`
and `diabetes_regression/training_dependencies.yml`.
with R on Azure ML Compute. You will also need to uncomment (i.e. include) the
`r-essentials` Conda packages in the environment definition
`diabetes_regression/conda_dependencies.yml`.
* `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py`
to train a model with R on Databricks. You will need
to manually create a Databricks cluster and attach it to the ML Workspace as a
Expand Down
20 changes: 10 additions & 10 deletions ml_service/pipelines/diabetes_regression_build_train_pipeline.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from azureml.pipeline.core.graph import PipelineParameter
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline
from azureml.core import Workspace
from azureml.core.runconfig import RunConfiguration, CondaDependencies
from azureml.core import Workspace, Environment
from azureml.core.runconfig import RunConfiguration
from azureml.core import Dataset, Datastore
from ml_service.util.attach_compute import get_compute
from ml_service.util.env_variables import Env
Expand All @@ -28,17 +28,17 @@ def main():
print("aml_compute:")
print(aml_compute)

# Create a run configuration environment
conda_deps_file = "diabetes_regression/training_dependencies.yml"
conda_deps = CondaDependencies(conda_deps_file)
run_config = RunConfiguration(conda_dependencies=conda_deps)
run_config.environment.docker.enabled = True
config_envvar = {}
# Create a reusable run configuration environment
# Read definition from diabetes_regression/azureml_environment.json
environment = Environment.load_from_directory(e.sources_directory_train)
if (e.collection_uri is not None and e.teamproject_name is not None):
builduri_base = e.collection_uri + e.teamproject_name
builduri_base = builduri_base + "/_build/results?buildId="
config_envvar["BUILDURI_BASE"] = builduri_base
run_config.environment.environment_variables = config_envvar
environment.environment_variables["BUILDURI_BASE"] = builduri_base
environment.register(aml_workspace)

run_config = RunConfiguration()
run_config.environment = environment

model_name_param = PipelineParameter(
name="model_name", default_value=e.model_name)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline
from azureml.core import Workspace
from azureml.core.runconfig import RunConfiguration, CondaDependencies
from azureml.core import Workspace, Environment
from azureml.core.runconfig import RunConfiguration
from ml_service.util.attach_compute import get_compute
from ml_service.util.env_variables import Env

Expand All @@ -26,11 +26,19 @@ def main():
print("aml_compute:")
print(aml_compute)

# Create a run configuration environment
conda_deps_file = "diabetes_regression/training_dependencies.yml"
conda_deps = CondaDependencies(conda_deps_file)
run_config = RunConfiguration(conda_dependencies=conda_deps)
run_config.environment.docker.enabled = True
# Create a reusable run configuration environment
# Read definition from diabetes_regression/azureml_environment.json
# Make sure to include `r-essentials'
# in diabetes_regression/conda_dependencies.yml
environment = Environment.load_from_directory(e.sources_directory_train)
if (e.collection_uri is not None and e.teamproject_name is not None):
builduri_base = e.collection_uri + e.teamproject_name
builduri_base = builduri_base + "/_build/results?buildId="
environment.environment_variables["BUILDURI_BASE"] = builduri_base
environment.register(aml_workspace)

run_config = RunConfiguration()
run_config.environment = environment

train_step = PythonScriptStep(
name="Train Model",
Expand Down