Source code for swiftemulator.emulators.gaussian_process_one_dim

"""
Gaussian Process Emulator for emulating single values
"""

import attr
import copy
import numpy as np
import george

from typing import Hashable, List, Optional, Dict

from swiftemulator.emulators.base import BaseEmulator

from swiftemulator.backend.model_parameters import ModelParameters
from swiftemulator.backend.model_specification import ModelSpecification
from swiftemulator.backend.model_values import ModelValues
from swiftemulator.mean_models import MeanModel

from scipy.optimize import minimize


[docs]@attr.s class GaussianProcessEmulator1D(BaseEmulator): """ Generator for emulators for individual values. In this case no independent values are used. The prediction is based on the model parameters and model values only. Parameters ---------- kernel, george.kernels.Kernel, optional The ``george`` kernel to use. The GPE here uses a copy of this instance. By default, this is the ``ExpSquaredKernel`` in George mean_model, MeanModel, optional A mean model conforming to the ``swiftemulator`` mean model protocol (several pre-made models are available in the :mod:`swiftemulator.mean_models` module). """ kernel: Optional[george.kernels.Kernel] = attr.ib(default=None) mean_model: Optional[MeanModel] = attr.ib(default=None) model_specification: Optional[ModelSpecification] = None model_parameters: Optional[ModelParameters] = None model_values: Optional[ModelValues] = None ordering: Optional[List[Hashable]] = None parameter_order: Optional[List[str]] = None independent_variables: Optional[np.array] = None dependent_variables: Optional[np.array] = None dependent_variable_errors: Optional[np.array] = None emulator: Optional[george.GP] = None def _build_arrays( self, model_specification: ModelSpecification, model_parameters: ModelParameters, model_values: ModelValues, ): """ Builds the arrays for passing to `george`. Parameters ---------- model_specification: ModelSpecification Full instance of the model specification. model_parameters: ModelParameters Full instance of the model parameters. model_values: ModelValues Full instance of the model values describing this individual scaling relation. """ self.model_specification = model_specification self.model_parameters = model_parameters self.model_values = model_values unique_identifiers = model_values.model_values.keys() number_of_independents = model_values.number_of_variables number_of_model_parameters = model_specification.number_of_parameters model_parameters = model_parameters.model_parameters independent_variables = np.empty( (number_of_independents, number_of_model_parameters), dtype=np.float32 ) dependent_variables = np.empty((number_of_independents), dtype=np.float32) dependent_variable_errors = np.empty((number_of_independents), dtype=np.float32) self.parameter_order = model_specification.parameter_names self.ordering = [] filled_lines = 0 for unique_identifier in unique_identifiers: self.ordering.append(unique_identifier) # Unpack model parameters into an array model_parameter_array = np.array( [ model_parameters[unique_identifier][parameter] for parameter in self.parameter_order ] ) this_model = model_values.model_values[unique_identifier] model_dependent = this_model["dependent"] model_error = this_model.get( "dependent_error", np.zeros(len(model_dependent)) ) if np.ndim(model_error) != 1: raise AttributeError( "Multiple dimensional errors are not currently supported in GPE mode" ) for line in range(len(model_dependent)): independent_variables[filled_lines] = model_parameter_array dependent_variables[filled_lines] = model_dependent[line] dependent_variable_errors[filled_lines] = model_error[line] filled_lines += 1 assert filled_lines == number_of_independents self.independent_variables = independent_variables self.dependent_variables = dependent_variables self.dependent_variable_errors = dependent_variable_errors
[docs] def fit_model( self, model_specification: ModelSpecification, model_parameters: ModelParameters, model_values: ModelValues, ): """ Fits the gaussian process model, as determined by the initialiser variables of the class (i.e. the kernel and the mean model). Parameters ---------- model_specification: ModelSpecification Full instance of the model specification. model_parameters: ModelParameters Full instance of the model parameters. model_values: ModelValues Full instance of the model values describing this individual scaling relation. Notes ----- This method uses copies of the internal kernel and mean model objects, as those objects contain slightly unhelpful state information. """ if self.independent_variables is None: # Creates independent_variables, dependent_variables. self._build_arrays( model_specification=model_specification, model_parameters=model_parameters, model_values=model_values, ) if self.kernel is None: number_of_kernel_dimensions = model_specification.number_of_parameters self.kernel = 1**2 * george.kernels.ExpSquaredKernel( np.ones(number_of_kernel_dimensions), ndim=number_of_kernel_dimensions ) if self.mean_model is not None: self.mean_model.train( independent=self.independent_variables, dependent=self.dependent_variables, ) gaussian_process = george.GP( copy.deepcopy(self.kernel), fit_kernel=True, mean=self.mean_model.george_model, fit_mean=False, ) else: gaussian_process = george.GP( copy.deepcopy(self.kernel), ) # TODO: Figure out how to include non-symmetric errors. gaussian_process.compute( x=self.independent_variables, yerr=self.dependent_variable_errors, ) def negative_log_likelihood(p): gaussian_process.set_parameter_vector(p) return -gaussian_process.log_likelihood(self.dependent_variables) def grad_negative_log_likelihood(p): gaussian_process.set_parameter_vector(p) return -gaussian_process.grad_log_likelihood(self.dependent_variables) # Optimize the hyperparameter values in the emulator result = minimize( fun=negative_log_likelihood, x0=gaussian_process.get_parameter_vector(), jac=grad_negative_log_likelihood, ) # Load in the optimal hyperparameters gaussian_process.set_parameter_vector(result.x) self.emulator = gaussian_process return
[docs] def predict_values(self, model_parameters: Dict[str, float]) -> np.array: """ Predict a value from the trained emulator contained within this object. returns the value at the input model parameters. Parameters ---------- model_parameters: Dict[str, float] The point in model parameter space to create predicted values at. Returns ------- dependent_prediction, float Value of predictions, if the emulator is a function f, this is the predicted value of f(independent) evaluted at the position of the input model_parameters. dependent_prediction_error, float Error on the model prediction. """ if self.emulator is None: raise AttributeError( "Please train the emulator with fit_model before attempting " "to make predictions." ) model_parameter_array = np.array( [model_parameters[parameter] for parameter in self.parameter_order] ) # Create a fake duplicate as george always needs two points to predict t = np.empty((2, len(model_parameter_array)), dtype=np.float32) t[0] = model_parameter_array t[1] = model_parameter_array model, errors = self.emulator.predict( y=self.dependent_variables, t=t, return_cov=False, return_var=True ) return model[0], errors[0]