Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docs/pacemaker/inputfile.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,8 +332,12 @@ fit:
## will not be added until the list of functions of the previous body-order is exhausted
## power_order - the order of adding new basis functions is defined by the "power rank" p of a function.
## p = len(ns) + sum(ns) + sum(ls). Functions with the smallest p are added first
#ladder_type: body_order

#ladder_type: body_order

# early stoppping
## min_relative_train_loss_per_iter: 5e-5
## min_relative_test_loss_per_iter: 1e-5
## early_stopping_patience: 200

## callbacks during the fitting. Module quick_validation.py should be available for import
## see example/pacemaker_with_callback for more details and examples
Expand Down
2 changes: 1 addition & 1 deletion lib/pybind11/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.

cmake_minimum_required(VERSION 3.4)
cmake_minimum_required(VERSION 3.7)

# The `cmake_minimum_required(VERSION 3.4...3.22)` syntax does not work with
# some versions of VS that have a patched CMake 3.11. This forces us to emulate
Expand Down
5 changes: 5 additions & 0 deletions src/pyace/data/input_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ fit:
# ladder_step: 100
# ladder_type: power_order

# Early stopping
# min_relative_train_loss_per_iter: 5e-5
# min_relative_test_loss_per_iter: 1e-5
# early_stopping_patience: 200

#################################################################
## Backend specification section
#################################################################
Expand Down
95 changes: 92 additions & 3 deletions src/pyace/generalfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ def save_dataset(dataframe, fname):
log.info("Dataset saved into {}".format(fname))


class TestLossChangeTooSmallException(StopIteration):
pass


class GeneralACEFit:
"""
Main fitting wrapper class
Expand All @@ -150,6 +154,7 @@ def __init__(self,
seed=None,
callbacks=None
):
self.early_stopping_occured = None
self.seed = seed
if self.seed is not None:
log.info("Set numpy random seed to {}".format(self.seed))
Expand Down Expand Up @@ -293,11 +298,11 @@ def __init__(self,

self.data_config = data_config
self.weighting_policy_spec = self.fit_config.get(FIT_WEIGHTING_KW)
display_step = backend_config.get('display_step', 20)
self.display_step = backend_config.get('display_step', 20)
if self.ladder_scheme:
self.metrics_aggregator = MetricsAggregator(extended_display_step=display_step)
self.metrics_aggregator = MetricsAggregator(extended_display_step=self.display_step)
else:
self.metrics_aggregator = MetricsAggregator(extended_display_step=display_step,
self.metrics_aggregator = MetricsAggregator(extended_display_step=self.display_step,
ladder_metrics_filename=None)
self.fit_backend = FitBackendAdapter(backend_config,
fit_metrics_callback=self.fit_metric_callback,
Expand Down Expand Up @@ -354,6 +359,24 @@ def __init__(self,

self.loss_spec = LossFunctionSpecification(**loss_spec_dict)

# attributes for early stopping
self.train_loss_list = []
self.test_loss_list = []
self.early_stopping_occured = False
self.early_stopping_patience = fit_config.get("early_stopping_patience", 200)
self.min_relative_train_loss_per_iter = fit_config.get('min_relative_train_loss_per_iter')
self.min_relative_test_loss_per_iter = fit_config.get('min_relative_test_loss_per_iter')
if self.min_relative_train_loss_per_iter:
self.min_relative_train_loss_per_iter=-abs(self.min_relative_train_loss_per_iter)
log.info(
f"Slowest relative change of TRAIN loss is set to {self.min_relative_train_loss_per_iter :+1.2e}/iter, " +
f"patience = {self.early_stopping_patience} iters")
if self.min_relative_test_loss_per_iter:
self.min_relative_test_loss_per_iter = -abs(self.min_relative_test_loss_per_iter)
log.info(
f"Slowest relative change of TEST loss is set to {self.min_relative_test_loss_per_iter :+1.2e}/iter, " +
f"patience = {self.early_stopping_patience} iters")

def set_core_rep(self, basis_conf):
# automatic repulsion selection
if "repulsion" in self.fit_config and self.fit_config["repulsion"] == "auto":
Expand All @@ -372,11 +395,71 @@ def fit_metric_callback(self, metrics_dict, extended_display_step=None):
metrics_dict["cycle_step"] = self.current_fit_cycle
metrics_dict["ladder_step"] = self.current_ladder_step
self.metrics_aggregator.fit_metric_callback(metrics_dict, extended_display_step=extended_display_step)
self.train_loss_list.append(metrics_dict['loss'])
self.log_d_rel_loss(metrics_dict["iter_num"], mode='train')
if self.min_relative_train_loss_per_iter is not None:
self.detect_early_stopping(mode='train')

def test_metric_callback(self, metrics_dict, extended_display_step=None):
metrics_dict["cycle_step"] = self.current_fit_cycle
metrics_dict["ladder_step"] = self.current_ladder_step
self.metrics_aggregator.test_metric_callback(metrics_dict, extended_display_step=extended_display_step)
self.test_loss_list.append(metrics_dict['loss'])
self.log_d_rel_loss(metrics_dict["iter_num"], mode='test')
if self.min_relative_test_loss_per_iter is not None:
self.detect_early_stopping(mode='test')

def compute_d_rel_loss_d_step(self, loss_list, mode):
iter_step = self.display_step if mode == 'test' else 1
min_loss_depth = int(np.ceil(self.early_stopping_patience / iter_step))
# take last min_loss_depth
loss_list = np.array(loss_list[-min_loss_depth:])
d_rel_loss_d_step = (loss_list[1:] - loss_list[:-1]) / loss_list[:-1] / iter_step # normally - big negative
return d_rel_loss_d_step

def log_d_rel_loss(self, iter_num, mode):
if iter_num > 0 and iter_num % self.display_step == 0 and not self.early_stopping_occured:
loss_list = self.get_loss_list(mode)
d_rel_loss_d_step = self.compute_d_rel_loss_d_step(loss_list, mode)
if len(d_rel_loss_d_step) > 0:
last_d_rel_loss_d_step = d_rel_loss_d_step[-1]
log.info(f"Last relative {mode.upper()} loss change {last_d_rel_loss_d_step :+1.2e}/iter")

def get_loss_list(self, mode):
assert mode in ['train', 'test'], f"Unsupported {mode=}"

if mode == 'train':
return self.train_loss_list
elif mode == 'test':
return self.test_loss_list

def detect_early_stopping(self, mode):
loss_list = self.get_loss_list(mode)
if self.early_stopping_occured:
# early stopping already occured
return

iter_step = self.display_step if mode == 'test' else 1
min_loss_depth = int(np.ceil(self.early_stopping_patience / iter_step))

if len(loss_list) - 1 < min_loss_depth: # -1 because test loss is written at it=0
# trajectory is not long enough
return

d_rel_loss_d_step = self.compute_d_rel_loss_d_step(loss_list, mode)

min_relative_loss_per_iter = self.min_relative_test_loss_per_iter if mode == 'test' else self.min_relative_train_loss_per_iter
if min(d_rel_loss_d_step) > min_relative_loss_per_iter:
# early stopping
min_d_rel_loss_d_step = min(d_rel_loss_d_step)
last_d_rel_loss_d_step = d_rel_loss_d_step[-1]
msg = f"EARLY STOPPING: Too small or even positive {mode.upper()} loss change (best={min_d_rel_loss_d_step:+1.2e} / iter, " + \
f"last={last_d_rel_loss_d_step:+1.2e}/iter, " + \
f"threshold = {min_relative_loss_per_iter :+1.2e}/iter) " + \
f"within last {self.early_stopping_patience} iterations. Stopping"
log.info(msg)
self.early_stopping_occured = True
raise TestLossChangeTooSmallException(msg)

def fit(self) -> BBasisConfiguration:
gc.collect()
Expand Down Expand Up @@ -474,6 +557,7 @@ def cycle_fitting(self, bbasisconfig: BBasisConfiguration) -> BBasisConfiguratio
num_of_parameters))
log.info("Running fit backend")
self.current_fit_iteration = 0
self.reset_early_stopping()
current_bbasisconfig = self.fit_backend.fit(
current_bbasisconfig,
dataframe=self.fitting_data, loss_spec=self.loss_spec, fit_config=self.fit_config,
Expand Down Expand Up @@ -535,6 +619,11 @@ def cycle_fitting(self, bbasisconfig: BBasisConfiguration) -> BBasisConfiguratio
save_interim_potential(current_best_bbasisconfig, potential_filename="interim_potential_best_cycle.yaml")
return current_best_bbasisconfig

def reset_early_stopping(self):
self.early_stopping_occured = False
self.test_loss_list = []
self.train_loss_list = []

@staticmethod
def apply_gaussian_noise(current_bbasisconfig, trainable_parameters_dict, noise_abs_sigma, noise_rel_sigma):
cur_bbasis = ACEBBasisSet(current_bbasisconfig)
Expand Down
2 changes: 1 addition & 1 deletion src/pyace/metrics_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ def print_detailed_metrics(fit_metrics_dict, title='Iteration:'):
log.info('{:<12}'.format(title) +
"#{iter_num:<5}".format(iter_num=iter_num) +
'{:<14}'.format('({numeval} evals):'.format(numeval=fit_metrics_dict["eval_count"])) +
'{:>10}'.format('Loss: ') + "{loss: >3.6f}".format(loss=total_loss) +
'{:>10}'.format('Loss: ') + "{loss: >1.4e}".format(loss=total_loss) +
'{str1:>21}{rmse_epa:>.2f} ({low_rmse_e:>.2f}) meV/at' \
.format(str1=" | RMSE Energy(low): ",
rmse_epa=1e3 * fit_metrics_dict["rmse_epa"],
Expand Down
15 changes: 12 additions & 3 deletions src/pyace/preparedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,9 +669,18 @@ def prepare_datasets(self):
self.fitting_data, self.test_data = train_test_split(self.fitting_data, test_size=test_size)
self.test_data = self.process_dataset(self.test_data)

# apply weights (TODO: for joint train+test?)
self.fitting_data = apply_weights(self.fitting_data, self.weighting_policy_spec, self.ignore_weights)
self.test_data = apply_weights(self.test_data, self.weighting_policy_spec, self.ignore_weights)
# apply weights
if self.test_data is not None:
# for joint train+test
self.fitting_data["train"] = True
self.test_data["train"] = False
joint_df = pd.concat([self.fitting_data, self.test_data], axis=0)
joint_df = apply_weights(joint_df, self.weighting_policy_spec, self.ignore_weights)
self.fitting_data = joint_df.query("train").reset_index(drop=True)
self.test_data = joint_df.query("~train").reset_index(drop=True)
# self.test_data = apply_weights(self.test_data, self.weighting_policy_spec, self.ignore_weights)
else:
self.fitting_data = apply_weights(self.fitting_data, self.weighting_policy_spec, self.ignore_weights)

# decrease augmented weights
aug_factor = self.data_config.get("aug_factor", 1e-4)
Expand Down
8 changes: 6 additions & 2 deletions tests/test-CLI/Cu-I/input.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,19 @@ fit:

## scipy.minimze algorithm: BFGS / L-BFGS-B / Nelder-Mead / etc...
optimizer: BFGS
repulsion: auto

## maximum number of scipy.minimize iterations
maxiter: 20

# early stopping
min_relative_train_loss_per_iter: 5e-5
min_relative_test_loss_per_iter: 1e-5
early_stopping_patience: 10

#################################################################
## Backend specification section
#################################################################
backend:
evaluator: tensorpot
batch_size: 100
display_step: 50
display_step: 10