Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 10 additions & 18 deletions bigframes/ml/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"distance_type": "distanceType",
"max_iter": "maxIterations",
"tol": "minRelativeProgress",
"warm_start": "warmStart",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should add the unit test for all these newly-added params. Same for all other BQML models, but feel free to address in a follow-up PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, will do a follow up.

}


Expand Down Expand Up @@ -67,27 +68,18 @@ def __init__(
self._bqml_model_factory = globals.bqml_model_factory()

@classmethod
def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> KMeans:
assert model.model_type == "KMEANS"
def _from_bq(cls, session: bigframes.Session, bq_model: bigquery.Model) -> KMeans:
assert bq_model.model_type == "KMEANS"

kwargs: dict = {}

# See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun
last_fitting = model.training_runs[-1]["trainingOptions"]
dummy_kmeans = cls()
for bf_param, bf_value in dummy_kmeans.__dict__.items():
bqml_param = _BQML_PARAMS_MAPPING.get(bf_param)
if bqml_param in last_fitting:
# Convert types
kwargs[bf_param] = (
str(last_fitting[bqml_param])
if bf_param in ["init"]
else type(bf_value)(last_fitting[bqml_param])
)

new_kmeans = cls(**kwargs)
new_kmeans._bqml_model = core.BqmlModel(session, model)
return new_kmeans
kwargs = utils.retrieve_params_from_bq_model(
cls, bq_model, _BQML_PARAMS_MAPPING
)

model = cls(**kwargs)
model._bqml_model = core.BqmlModel(session, bq_model)
return model

@property
def _bqml_options(self) -> dict:
Expand Down
23 changes: 12 additions & 11 deletions bigframes/ml/decomposition.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from bigframes.ml import base, core, globals, utils
import bigframes.pandas as bpd

_BQML_PARAMS_MAPPING = {"svd_solver": "pcaSolver"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one isn't a new adding, but moved piece.



@log_adapter.class_logger
class PCA(
Expand All @@ -47,23 +49,22 @@ def __init__(
self._bqml_model_factory = globals.bqml_model_factory()

@classmethod
def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> PCA:
assert model.model_type == "PCA"
def _from_bq(cls, session: bigframes.Session, bq_model: bigquery.Model) -> PCA:
assert bq_model.model_type == "PCA"

kwargs: dict = {}
kwargs = utils.retrieve_params_from_bq_model(
cls, bq_model, _BQML_PARAMS_MAPPING
)

# See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun
last_fitting = model.training_runs[-1]["trainingOptions"]
last_fitting = bq_model.training_runs[-1]["trainingOptions"]
if "numPrincipalComponents" in last_fitting:
kwargs["n_components"] = int(last_fitting["numPrincipalComponents"])
if "pcaExplainedVarianceRatio" in last_fitting:
elif "pcaExplainedVarianceRatio" in last_fitting:
kwargs["n_components"] = float(last_fitting["pcaExplainedVarianceRatio"])
if "pcaSolver" in last_fitting:
kwargs["svd_solver"] = str(last_fitting["pcaSolver"])

new_pca = cls(**kwargs)
new_pca._bqml_model = core.BqmlModel(session, model)
return new_pca
model = cls(**kwargs)
model._bqml_model = core.BqmlModel(session, bq_model)
return model

@property
def _bqml_options(self) -> dict:
Expand Down
119 changes: 47 additions & 72 deletions bigframes/ml/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@

_BQML_PARAMS_MAPPING = {
"booster": "boosterType",
"dart_normalized_type": "dartNormalizeType",
"tree_method": "treeMethod",
"colsample_bytree": "colsampleBylevel",
"colsample_bylevel": "colsampleBytree",
"colsample_bytree": "colsampleBytree",
"colsample_bylevel": "colsampleBylevel",
"colsample_bynode": "colsampleBynode",
"gamma": "minSplitLoss",
"subsample": "subsample",
Expand All @@ -44,6 +45,8 @@
"min_tree_child_weight": "minTreeChildWeight",
"max_depth": "maxTreeDepth",
"max_iterations": "maxIterations",
"enable_global_explain": "enableGlobalExplain",
"xgboost_version": "xgboostVersion",
}


Expand Down Expand Up @@ -99,24 +102,17 @@ def __init__(

@classmethod
def _from_bq(
cls, session: bigframes.Session, model: bigquery.Model
cls, session: bigframes.Session, bq_model: bigquery.Model
) -> XGBRegressor:
assert model.model_type == "BOOSTED_TREE_REGRESSOR"
assert bq_model.model_type == "BOOSTED_TREE_REGRESSOR"

kwargs = {}

# See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun
last_fitting = model.training_runs[-1]["trainingOptions"]

dummy_regressor = cls()
for bf_param, bf_value in dummy_regressor.__dict__.items():
bqml_param = _BQML_PARAMS_MAPPING.get(bf_param)
if bqml_param in last_fitting:
kwargs[bf_param] = type(bf_value)(last_fitting[bqml_param])
kwargs = utils.retrieve_params_from_bq_model(
cls, bq_model, _BQML_PARAMS_MAPPING
)

new_xgb_regressor = cls(**kwargs)
new_xgb_regressor._bqml_model = core.BqmlModel(session, model)
return new_xgb_regressor
model = cls(**kwargs)
model._bqml_model = core.BqmlModel(session, bq_model)
return model

@property
def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
Expand Down Expand Up @@ -255,24 +251,17 @@ def __init__(

@classmethod
def _from_bq(
cls, session: bigframes.Session, model: bigquery.Model
cls, session: bigframes.Session, bq_model: bigquery.Model
) -> XGBClassifier:
assert model.model_type == "BOOSTED_TREE_CLASSIFIER"
assert bq_model.model_type == "BOOSTED_TREE_CLASSIFIER"

kwargs = {}

# See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun
last_fitting = model.training_runs[-1]["trainingOptions"]

dummy_classifier = XGBClassifier()
for bf_param, bf_value in dummy_classifier.__dict__.items():
bqml_param = _BQML_PARAMS_MAPPING.get(bf_param)
if bqml_param is not None:
kwargs[bf_param] = type(bf_value)(last_fitting[bqml_param])
kwargs = utils.retrieve_params_from_bq_model(
cls, bq_model, _BQML_PARAMS_MAPPING
)

new_xgb_classifier = cls(**kwargs)
new_xgb_classifier._bqml_model = core.BqmlModel(session, model)
return new_xgb_classifier
model = cls(**kwargs)
model._bqml_model = core.BqmlModel(session, bq_model)
return model

@property
def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
Expand Down Expand Up @@ -370,16 +359,16 @@ def __init__(
*,
tree_method: Literal["auto", "exact", "approx", "hist"] = "auto",
min_tree_child_weight: int = 1,
colsample_bytree=1.0,
colsample_bylevel=1.0,
colsample_bynode=0.8,
gamma=0.00,
colsample_bytree: float = 1.0,
colsample_bylevel: float = 1.0,
colsample_bynode: float = 0.8,
gamma: float = 0.0,
max_depth: int = 15,
subsample=0.8,
reg_alpha=0.0,
reg_lambda=1.0,
tol=0.01,
enable_global_explain=False,
subsample: float = 0.8,
reg_alpha: float = 0.0,
reg_lambda: float = 1.0,
tol: float = 0.01,
enable_global_explain: bool = False,
xgboost_version: Literal["0.9", "1.1"] = "0.9",
):
self.n_estimators = n_estimators
Expand All @@ -401,24 +390,17 @@ def __init__(

@classmethod
def _from_bq(
cls, session: bigframes.Session, model: bigquery.Model
cls, session: bigframes.Session, bq_model: bigquery.Model
) -> RandomForestRegressor:
assert model.model_type == "RANDOM_FOREST_REGRESSOR"

kwargs = {}

# See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun
last_fitting = model.training_runs[-1]["trainingOptions"]
assert bq_model.model_type == "RANDOM_FOREST_REGRESSOR"

dummy_model = cls()
for bf_param, bf_value in dummy_model.__dict__.items():
bqml_param = _BQML_PARAMS_MAPPING.get(bf_param)
if bqml_param in last_fitting:
kwargs[bf_param] = type(bf_value)(last_fitting[bqml_param])
kwargs = utils.retrieve_params_from_bq_model(
cls, bq_model, _BQML_PARAMS_MAPPING
)

new_random_forest_regressor = cls(**kwargs)
new_random_forest_regressor._bqml_model = core.BqmlModel(session, model)
return new_random_forest_regressor
model = cls(**kwargs)
model._bqml_model = core.BqmlModel(session, bq_model)
return model

@property
def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
Expand Down Expand Up @@ -542,7 +524,7 @@ def __init__(
reg_alpha: float = 0.0,
reg_lambda: float = 1.0,
tol: float = 0.01,
enable_global_explain=False,
enable_global_explain: bool = False,
xgboost_version: Literal["0.9", "1.1"] = "0.9",
):
self.n_estimators = n_estimators
Expand All @@ -564,24 +546,17 @@ def __init__(

@classmethod
def _from_bq(
cls, session: bigframes.Session, model: bigquery.Model
cls, session: bigframes.Session, bq_model: bigquery.Model
) -> RandomForestClassifier:
assert model.model_type == "RANDOM_FOREST_CLASSIFIER"

kwargs = {}
assert bq_model.model_type == "RANDOM_FOREST_CLASSIFIER"

# See https://cloud.google.com/bigquery/docs/reference/rest/v2/models#trainingrun
last_fitting = model.training_runs[-1]["trainingOptions"]

dummy_model = RandomForestClassifier()
for bf_param, bf_value in dummy_model.__dict__.items():
bqml_param = _BQML_PARAMS_MAPPING.get(bf_param)
if bqml_param is not None:
kwargs[bf_param] = type(bf_value)(last_fitting[bqml_param])
kwargs = utils.retrieve_params_from_bq_model(
cls, bq_model, _BQML_PARAMS_MAPPING
)

new_random_forest_classifier = cls(**kwargs)
new_random_forest_classifier._bqml_model = core.BqmlModel(session, model)
return new_random_forest_classifier
model = cls(**kwargs)
model._bqml_model = core.BqmlModel(session, bq_model)
return model

@property
def _bqml_options(self) -> Dict[str, str | int | bool | float | List[str]]:
Expand Down
42 changes: 13 additions & 29 deletions bigframes/ml/forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
"auto_arima_min_order": "autoArimaMinOrder",
"order": "nonSeasonalOrder",
"data_frequency": "dataFrequency",
"include_drift": "includeDrift",
"holiday_region": "holidayRegion",
"clean_spikes_and_dips": "cleanSpikesAndDips",
"adjust_step_changes": "adjustStepChanges",
Expand Down Expand Up @@ -131,35 +132,18 @@ def __init__(
self._bqml_model_factory = globals.bqml_model_factory()

@classmethod
def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> ARIMAPlus:
assert model.model_type == "ARIMA_PLUS"

kwargs: dict = {}
last_fitting = model.training_runs[-1]["trainingOptions"]

dummy_arima = cls()
for bf_param, bf_value in dummy_arima.__dict__.items():
bqml_param = _BQML_PARAMS_MAPPING.get(bf_param)
if bqml_param in last_fitting:
# Convert types
if bf_param in ["time_series_length_fraction"]:
kwargs[bf_param] = float(last_fitting[bqml_param])
elif bf_param in [
"auto_arima_max_order",
"auto_arima_min_order",
"min_time_series_length",
"max_time_series_length",
"trend_smoothing_window_size",
]:
kwargs[bf_param] = int(last_fitting[bqml_param])
elif bf_param in ["holiday_region"]:
kwargs[bf_param] = str(last_fitting[bqml_param])
else:
kwargs[bf_param] = type(bf_value)(last_fitting[bqml_param])

new_arima_plus = cls(**kwargs)
new_arima_plus._bqml_model = core.BqmlModel(session, model)
return new_arima_plus
def _from_bq(
cls, session: bigframes.Session, bq_model: bigquery.Model
) -> ARIMAPlus:
assert bq_model.model_type == "ARIMA_PLUS"

kwargs = utils.retrieve_params_from_bq_model(
cls, bq_model, _BQML_PARAMS_MAPPING
)

model = cls(**kwargs)
model._bqml_model = core.BqmlModel(session, bq_model)
return model

@property
def _bqml_options(self) -> dict:
Expand Down
32 changes: 17 additions & 15 deletions bigframes/ml/imported.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ def _create_bqml_model(self):

@classmethod
def _from_bq(
cls, session: bigframes.Session, model: bigquery.Model
cls, session: bigframes.Session, bq_model: bigquery.Model
) -> TensorFlowModel:
assert model.model_type == "TENSORFLOW"
assert bq_model.model_type == "TENSORFLOW"

tf_model = cls(session=session, model_path="")
tf_model._bqml_model = core.BqmlModel(session, model)
return tf_model
model = cls(session=session, model_path="")
model._bqml_model = core.BqmlModel(session, bq_model)
return model

def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
"""Predict the result from input DataFrame.
Expand Down Expand Up @@ -134,12 +134,14 @@ def _create_bqml_model(self):
)

@classmethod
def _from_bq(cls, session: bigframes.Session, model: bigquery.Model) -> ONNXModel:
assert model.model_type == "ONNX"
def _from_bq(
cls, session: bigframes.Session, bq_model: bigquery.Model
) -> ONNXModel:
assert bq_model.model_type == "ONNX"

onnx_model = cls(session=session, model_path="")
onnx_model._bqml_model = core.BqmlModel(session, model)
return onnx_model
model = cls(session=session, model_path="")
model._bqml_model = core.BqmlModel(session, bq_model)
return model

def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
"""Predict the result from input DataFrame.
Expand Down Expand Up @@ -249,13 +251,13 @@ def _create_bqml_model(self):

@classmethod
def _from_bq(
cls, session: bigframes.Session, model: bigquery.Model
cls, session: bigframes.Session, bq_model: bigquery.Model
) -> XGBoostModel:
assert model.model_type == "XGBOOST"
assert bq_model.model_type == "XGBOOST"

xgboost_model = cls(session=session, model_path="")
xgboost_model._bqml_model = core.BqmlModel(session, model)
return xgboost_model
model = cls(session=session, model_path="")
model._bqml_model = core.BqmlModel(session, bq_model)
return model

def predict(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
"""Predict the result from input DataFrame.
Expand Down
Loading