diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 8e131aed..76eba228 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,7 @@ Changelog ========= +- Add RobustGPyRegression model that can handle non-finite output values - Update surrogate model initialisation to use all initial evidence - Update BOLFI and BOLFIRE to use a shared sample class that returns individual chains in the arviz inference data - Use kernel copy to avoid pickle issue and allow BOLFI parallelisation with non-default kernel diff --git a/elfi/__init__.py b/elfi/__init__.py index 0e0a17b9..b6e75989 100644 --- a/elfi/__init__.py +++ b/elfi/__init__.py @@ -26,7 +26,7 @@ from elfi.visualization.visualization import nx_draw as draw from elfi.visualization.visualization import plot_params_vs_node from elfi.visualization.visualization import plot_predicted_summaries -from elfi.methods.bo.gpy_regression import GPyRegression +from elfi.methods.bo.gpy_regression import GPyRegression, RobustGPyRegression __author__ = 'ELFI authors' __email__ = 'elfi-support@hiit.fi' diff --git a/elfi/methods/bo/acquisition.py b/elfi/methods/bo/acquisition.py index 0f44c656..17bbcacd 100644 --- a/elfi/methods/bo/acquisition.py +++ b/elfi/methods/bo/acquisition.py @@ -435,7 +435,7 @@ def evaluate_gradient(self, theta_new, t=None): a = (self.eps - mean) / scale b = np.sqrt(sigma2_n) / np.sqrt(sigma2_n + 2 * var) grad_a = (-1. / scale) * grad_mean - \ - ((self.eps - mean) / (2. * (sigma2_n + var)**(1.5))) * grad_var + np.nan_to_num((self.eps - mean) / (2. * (sigma2_n + var)**(1.5))) * grad_var grad_b = (-np.sqrt(sigma2_n) / (sigma2_n + 2 * var)**(1.5)) * grad_var _phi_a = phi(a) diff --git a/elfi/methods/bo/gpy_regression.py b/elfi/methods/bo/gpy_regression.py index a44d63be..d921f811 100644 --- a/elfi/methods/bo/gpy_regression.py +++ b/elfi/methods/bo/gpy_regression.py @@ -41,7 +41,7 @@ def __init__(self, Alternatives: "scg", "fmin_tnc", "simplex", "lbfgsb", "lbfgs", "sgd" See also: paramz.Model.optimize() max_opt_iters : int, optional - gp : GPy.model.GPRegression instance, optional + gp : GPy.models.GPRegression instance, optional **gp_params kernel : GPy.Kern noise_var : float @@ -362,3 +362,250 @@ def copy(self): kopy.gp_params['mean_function'] = self.gp_params['mean_function'].copy() return kopy + + +class RobustGPyRegression(GPyRegression): + """Robust Gaussian Process regression using the GPy library. + + Restricts regression model to use evidence with finite output values. Simulations that + produce non-finite output are considered failed. + """ + + def __init__(self, + parameter_names=None, + bounds=None, + train_classifier=False, + thd=0, + clf=None, + clf_kernel=None, + **kwargs): + """Initialize RobustGPyRegression. + + Parameters + ---------- + parameter_names : list of str, optional + Names of parameter nodes. If None, sets dimension to 1. + bounds : dict, optional + The region where to estimate the posterior for each parameter in + model.parameters. + `{'parameter_name':(lower, upper), ... }` + If not supplied, defaults to (0, 1) bounds for all dimensions. + train_classifier : boolean, optional + Train a classifier to predict finite output probabilities. + thd : float, optional + Threshold value used to exclude inputs that return non-finite output values. + clf: GPy.models.GPClassification instance, optional + Differentiates between inputs that return finite (1) and non-finite (0) outputs. + clf_kernel : GPy.Kern, optional + Kernel function, defaults to RBF. + **kwargs: see GPyRegression + + """ + super().__init__(parameter_names, bounds, **kwargs) + + self._X = np.zeros((0, self.input_dim)) + self._Y = np.zeros((0, 1)) + + self.thd = thd + self.train_classifier = self.thd > 0 or train_classifier + + self._clf = clf + self._clf_kernel = clf_kernel or GPy.kern.RBF(self.input_dim, ARD=True) + + self.FAILED_OUTPUT = np.inf + self.FAILED_VAR = 0.00001 + + def success_proba(self, x): + """Return predicted finite output probabilities at x. + + Parameters + ---------- + x : np.array + numpy compatible (n, input_dim) array of points to evaluate + + Returns + ------- + np.array + with shape (x.shape[0], 1) + + """ + # Ensure it's 2d for GPy + x = np.asanyarray(x).reshape((-1, self.input_dim)) + + if self._clf is not None: + return self._clf.predict(x)[0] + else: + return np.ones((len(x), 1)) + + def success_proba_gradients(self, x): + """Return predicted finite output probability gradients at x. + + Parameters + ---------- + x : np.array + numpy compatible (n, input_dim) array of points to evaluate + + Returns + ------- + np.array + with shape (x.shape[0], input_dim) + + """ + # Ensure it's 2d for GPy + x = np.asanyarray(x).reshape((-1, self.input_dim)) + + if self._clf is not None: + return self._clf.predictive_gradients(x)[0][:, :, 0] + else: + return np.zeros_like((x)) + + def predict(self, x, **kwargs): + """Return predicted mean and variance at x. + + Parameters + ---------- + x : np.array + numpy compatible (n, input_dim) array of points to evaluate + if len(x.shape) == 1 will be cast to 2D with x[None, :] + + Returns + ------- + tuple + GP (mean, var) at x where + mean : np.array + with shape (x.shape[0], 1) + var : np.array + with shape (x.shape[0], 1) + + """ + # Ensure it's 2d for GPy + x = np.asanyarray(x).reshape((-1, self.input_dim)) + mean, var = super().predict(x) + + if self.thd > 0 and self._clf is not None: + mask = (self.success_proba(x) < self.thd).reshape(-1) + mean[mask] = self.FAILED_OUTPUT + var[mask] = self.FAILED_VAR + + return mean, var + + def predictive_gradients(self, x): + """Return the gradients of predicted mean and variance at x. + + Parameters + ---------- + x : np.array + numpy compatible (n, input_dim) array of points to evaluate + if len(x.shape) == 1 will be cast to 2D with x[None, :] + + Returns + ------- + tuple + GP (grad_mean, grad_var) at x where + grad_mean : np.array + with shape (x.shape[0], input_dim) + grad_var : np.array + with shape (x.shape[0], input_dim) + + """ + # Ensure it's 2d for GPy + x = np.asanyarray(x).reshape((-1, self.input_dim)) + grad_mean, grad_var = super().predictive_gradients(x) + + if self.thd > 0 and self._clf is not None: + mask = (self.success_proba(x) < self.thd).reshape(-1) + grad_mean[mask] = 0 + grad_var[mask] = 0 + + return grad_mean, grad_var + + def _make_classifier_instance(self, x, y, kernel): + return GPy.models.GPClassification(x, y, kernel=kernel) + + def update(self, x, y, optimize=False): + """Update the surrogate model with new data. + + Parameters + ---------- + x : np.array + y : np.array + optimize : bool, optional + Whether to optimize hyperparameters. + + """ + # Must cast these as 2d for GPy + x = x.reshape((-1, self.input_dim)) + y = y.reshape((-1, 1)) + + self._X = np.r_[self._X, x] + self._Y = np.r_[self._Y, y] + + # Update regression model without failed simulations + mask = np.isfinite(y).reshape(-1) + if mask.any(): + super().update(x[mask], y[mask], optimize=False) + elif self._gp is None: + raise RuntimeError("No finite outputs available for model initialisation.") + + # Update classification model + if self.train_classifier: + labels = np.isfinite(self._Y).astype(int) + if self._clf is not None: + # Reconstruct with new data + self._clf = self._make_classifier_instance(self._X, labels, self._clf.kern.copy()) + elif not mask.all(): + # Initialise + self._clf = self._make_classifier_instance(self._X, labels, self._clf_kernel) + + if optimize: + self.optimize() + + def optimize(self): + """Optimize GP hyperparameters.""" + super().optimize() + if self._clf is not None: + try: + self._clf.optimize(self.optimizer, max_iters=self.max_opt_iters) + except np.linalg.linalg.LinAlgError: + logger.warning("Numerical error in GP optimization. Stopping optimization") + + @property + def n_evidence(self): + """Return the number of observed samples.""" + return len(self._Y) + + @property + def n_valid_evidence(self): + """Return the number of valid observed samples.""" + return np.sum(np.isfinite(self._Y)) + + @property + def X(self): + """Return input evidence.""" + return self._X + + @property + def Y(self): + """Return output evidence.""" + Y = self._Y.copy() + Y[~np.isfinite(Y)] = self.FAILED_OUTPUT + return Y + + @property + def classifier_instance(self): + """Return the classifier instance.""" + return self._clf + + def copy(self): + """Return a copy of current instance.""" + kopy = super().copy() + + if self._clf: + x = self._clf.X.copy() + y = self._clf.Y.copy() + kopy._clf = self._make_classifier_instance(x, y, self._clf.kern.copy()) + + if self._clf_kernel: + kopy._clf_kernel = self._clf_kernel.copy() + + return kopy diff --git a/elfi/visualization/visualization.py b/elfi/visualization/visualization.py index d6c29eca..9ca870c2 100644 --- a/elfi/visualization/visualization.py +++ b/elfi/visualization/visualization.py @@ -450,8 +450,10 @@ def plot_gp(gp, parameter_names, axes=None, resol=50, shape = (n_plots, n_plots) axes, kwargs = _create_axes(axes, shape, **kwargs) - x_evidence = gp.X - y_evidence = gp.Y + valid_inds = np.isfinite(gp.Y).squeeze() + x_evidence = gp.X[valid_inds] + y_evidence = gp.Y[valid_inds] + x_evidence_failed = gp.X[~valid_inds] if const is None: const = x_evidence[np.argmin(y_evidence), :] bounds = bounds or gp.bounds @@ -488,6 +490,13 @@ def plot_gp(gp, parameter_names, axes=None, resol=50, color="red", alpha=0.7, s=5) + if len(x_evidence_failed) > 0: + axes[jy, ix].scatter(x_evidence_failed[:, ix], + x_evidence_failed[:, jy], + marker="^", + color="blue", + alpha=0.5, + s=7) if true_params is not None: axes[jy, ix].plot([true_params[parameter_names[ix]], diff --git a/tests/functional/test_failure_robust.py b/tests/functional/test_failure_robust.py new file mode 100644 index 00000000..e2d5292b --- /dev/null +++ b/tests/functional/test_failure_robust.py @@ -0,0 +1,54 @@ +from functools import partial + +import numpy as np +import pytest + +import elfi +from elfi.examples.gauss import euclidean_multidim, gauss_nd_mean + + +def toy_sim(*mu, cov, batch_size=1, random_state=None): + out = gauss_nd_mean(*mu, cov_matrix=cov, batch_size=batch_size, random_state=random_state) + failed = np.sqrt(np.sum(np.power(mu, 2), axis=0)) < 2.5 + out[failed] = np.nan + return out + + +def get_model(true_params, seed): + cov = (0.1 * np.diag(np.ones((2,))) + 0.5 * np.ones((2, 2))) + sim = partial(toy_sim, cov=cov) + + random_state = np.random.RandomState(seed) + obs = sim(*true_params, random_state=random_state) + + m = elfi.ElfiModel() + mu_1 = elfi.Prior('uniform', 0, 5, model=m) + mu_2 = elfi.Prior('uniform', 0, 5, model=m) + y = elfi.Simulator(sim, mu_1, mu_2, observed=obs) + mean = elfi.Summary(partial(np.mean, axis=1), y) + d = elfi.Discrepancy(euclidean_multidim, mean) + return m + + +@pytest.mark.slowtest +def test_failure_robust_BOLFI(): + seed = 123 + true_params = [3, 3] + m = get_model(true_params, seed) + bounds = {'mu_1': (0, 5), 'mu_2': (0, 5)} + target_model = elfi.RobustGPyRegression(m.parameter_names, bounds=bounds, thd=0.5) + bolfi = elfi.BOLFI(m['d'], initial_evidence=20, target_model=target_model, seed=seed) + post = bolfi.fit(n_evidence=100) + + # check that optimisation avoided infeasible parameter combinations + assert bolfi.target_model.n_valid_evidence > 90 + + # check model minimum + res_1 = bolfi.extract_result() + assert np.isclose(res_1.x_min['mu_1'], 3, atol=0.35) + assert np.isclose(res_1.x_min['mu_2'], 3, atol=0.35) + + # check posterior mean + res_2 = bolfi.sample(500) + assert np.isclose(np.mean(res_2.samples['mu_1']), 3, atol=0.35) + assert np.isclose(np.mean(res_2.samples['mu_2']), 3, atol=0.35) diff --git a/tests/unit/test_failure_robust_model.py b/tests/unit/test_failure_robust_model.py new file mode 100644 index 00000000..c0b4b47c --- /dev/null +++ b/tests/unit/test_failure_robust_model.py @@ -0,0 +1,132 @@ +from functools import partial + +import numpy as np +import pytest + +import elfi +from elfi.examples.gauss import euclidean_multidim, gauss_nd_mean + + +def toy_sim(*mu, cov, batch_size=1, random_state=None): + out = gauss_nd_mean(*mu, cov_matrix=cov, batch_size=batch_size, random_state=random_state) + failed = np.sqrt(np.sum(np.power(mu, 2), axis=0)) < 2.5 + out[failed] = np.nan + return out + + +def get_model(true_params, seed): + cov = (0.1 * np.diag(np.ones((2,))) + 0.5 * np.ones((2, 2))) + sim = partial(toy_sim, cov=cov) + + random_state = np.random.RandomState(seed) + obs = sim(*true_params, random_state=random_state) + + m = elfi.ElfiModel() + mu_1 = elfi.Prior('uniform', 0, 5, model=m) + mu_2 = elfi.Prior('uniform', 0, 5, model=m) + y = elfi.Simulator(sim, mu_1, mu_2, observed=obs) + mean = elfi.Summary(partial(np.mean, axis=1), y) + d = elfi.Discrepancy(euclidean_multidim, mean) + return m + + +@pytest.fixture(scope='module') +def model_1(): + seed = 123 + true_params = [3, 3] + m = get_model(true_params, seed) + data = m.generate(20, seed=seed) + X = np.column_stack((data['mu_1'], data['mu_2'])) + Y = data['d'] + bounds = {'mu_1': (0, 5), 'mu_2': (0, 5)} + target_model = elfi.RobustGPyRegression(m.parameter_names, bounds=bounds) + target_model.update(X, Y) + return target_model + + +@pytest.fixture(scope='module') +def model_2(): + seed = 123 + true_params = [3, 3] + m = get_model(true_params, seed) + data = m.generate(20, seed=seed) + X = np.column_stack((data['mu_1'], data['mu_2'])) + Y = data['d'] + bounds = {'mu_1': (0, 5), 'mu_2': (0, 5)} + target_model = elfi.RobustGPyRegression(m.parameter_names, bounds=bounds, thd=0.5) + target_model.update(X, Y) + return target_model + + +def test_update(): + X = np.random.rand(1, 2) + Y = np.ones((1, 1)) + target_model = elfi.RobustGPyRegression(['param_1', 'param_2'], thd=0.5) + target_model.update(X, Y) + assert np.all(target_model.X == X) + assert np.all(target_model.Y == Y) + + +def test_cannot_update(): + X = np.random.rand(1, 2) + Y = np.nan * np.ones((1, 1)) + target_model = elfi.RobustGPyRegression(['param_1', 'param_2'], thd=0.5) + with pytest.raises(RuntimeError): + target_model.update(X, Y) + + +@pytest.mark.parametrize('model', ['model_1', 'model_2']) +def test_n_evidence(model, request): + target_model = request.getfixturevalue(model) + assert target_model.n_evidence == 20 + assert target_model.n_valid_evidence < target_model.n_evidence + + +@pytest.mark.parametrize('model', ['model_1', 'model_2']) +def test_predict(model, request): + target_model = request.getfixturevalue(model) + mu1, _ = target_model.predict([3, 3]) + mu2, _ = target_model.predict([4, 4]) + assert mu1 < mu2 + + +def test_predict_infeasible(model_2): + target_model = model_2 + pred = target_model.predict([0, 0]) + assert pred[0] == target_model.FAILED_OUTPUT + assert pred[1] == target_model.FAILED_VAR + + +def test_predict_gradients_infeasible(model_2): + target_model = model_2 + grad = target_model.predictive_gradients([0, 0]) + assert grad[0].shape == (1, 2) + assert grad[1].shape == (1, 2) + assert np.all(grad[0] == 0) + assert np.all(grad[1] == 0) + + +def test_success_proba(model_2): + target_model = model_2 + prob1 = target_model.success_proba([4, 4]) + prob2 = target_model.success_proba([2, 2]) + assert prob1 > prob2 + + +def test_success_proba_default(model_1): + target_model = model_1 + prob = target_model.success_proba([3, 3]) + assert float(prob) == 1 + + +def test_success_proba_gradients(model_2): + target_model = model_2 + grad = target_model.success_proba_gradients([0, 0]) + assert grad.shape == (1, 2) + + +def test_success_proba_gradients_default(model_1): + target_model = model_1 + grad = target_model.success_proba_gradients([0, 0]) + assert grad.shape == (1, 2) + assert np.all(grad == 0)