"""
Gaussian Process Emulator for emulating single values
"""
import attr
import copy
import numpy as np
import george
from typing import Hashable, List, Optional, Dict
from swiftemulator.emulators.base import BaseEmulator
from swiftemulator.backend.model_parameters import ModelParameters
from swiftemulator.backend.model_specification import ModelSpecification
from swiftemulator.backend.model_values import ModelValues
from swiftemulator.mean_models import MeanModel
from scipy.optimize import minimize
[docs]@attr.s
class GaussianProcessEmulator1D(BaseEmulator):
"""
Generator for emulators for individual values. In this case
no independent values are used. The prediction is based on
the model parameters and model values only.
Parameters
----------
kernel, george.kernels.Kernel, optional
The ``george`` kernel to use. The GPE here uses a copy
of this instance. By default, this is the
``ExpSquaredKernel`` in George
mean_model, MeanModel, optional
A mean model conforming to the ``swiftemulator`` mean model
protocol (several pre-made models are available in the
:mod:`swiftemulator.mean_models` module).
"""
kernel: Optional[george.kernels.Kernel] = attr.ib(default=None)
mean_model: Optional[MeanModel] = attr.ib(default=None)
model_specification: Optional[ModelSpecification] = None
model_parameters: Optional[ModelParameters] = None
model_values: Optional[ModelValues] = None
ordering: Optional[List[Hashable]] = None
parameter_order: Optional[List[str]] = None
independent_variables: Optional[np.array] = None
dependent_variables: Optional[np.array] = None
dependent_variable_errors: Optional[np.array] = None
emulator: Optional[george.GP] = None
def _build_arrays(
self,
model_specification: ModelSpecification,
model_parameters: ModelParameters,
model_values: ModelValues,
):
"""
Builds the arrays for passing to `george`.
Parameters
----------
model_specification: ModelSpecification
Full instance of the model specification.
model_parameters: ModelParameters
Full instance of the model parameters.
model_values: ModelValues
Full instance of the model values describing
this individual scaling relation.
"""
self.model_specification = model_specification
self.model_parameters = model_parameters
self.model_values = model_values
unique_identifiers = model_values.model_values.keys()
number_of_independents = model_values.number_of_variables
number_of_model_parameters = model_specification.number_of_parameters
model_parameters = model_parameters.model_parameters
independent_variables = np.empty(
(number_of_independents, number_of_model_parameters), dtype=np.float32
)
dependent_variables = np.empty((number_of_independents), dtype=np.float32)
dependent_variable_errors = np.empty((number_of_independents), dtype=np.float32)
self.parameter_order = model_specification.parameter_names
self.ordering = []
filled_lines = 0
for unique_identifier in unique_identifiers:
self.ordering.append(unique_identifier)
# Unpack model parameters into an array
model_parameter_array = np.array(
[
model_parameters[unique_identifier][parameter]
for parameter in self.parameter_order
]
)
this_model = model_values.model_values[unique_identifier]
model_dependent = this_model["dependent"]
model_error = this_model.get(
"dependent_error", np.zeros(len(model_dependent))
)
if np.ndim(model_error) != 1:
raise AttributeError(
"Multiple dimensional errors are not currently supported in GPE mode"
)
for line in range(len(model_dependent)):
independent_variables[filled_lines] = model_parameter_array
dependent_variables[filled_lines] = model_dependent[line]
dependent_variable_errors[filled_lines] = model_error[line]
filled_lines += 1
assert filled_lines == number_of_independents
self.independent_variables = independent_variables
self.dependent_variables = dependent_variables
self.dependent_variable_errors = dependent_variable_errors
[docs] def fit_model(
self,
model_specification: ModelSpecification,
model_parameters: ModelParameters,
model_values: ModelValues,
):
"""
Fits the gaussian process model, as determined by the
initialiser variables of the class (i.e. the kernel and
the mean model).
Parameters
----------
model_specification: ModelSpecification
Full instance of the model specification.
model_parameters: ModelParameters
Full instance of the model parameters.
model_values: ModelValues
Full instance of the model values describing
this individual scaling relation.
Notes
-----
This method uses copies of the internal kernel and mean model
objects, as those objects contain slightly unhelpful state information.
"""
if self.independent_variables is None:
# Creates independent_variables, dependent_variables.
self._build_arrays(
model_specification=model_specification,
model_parameters=model_parameters,
model_values=model_values,
)
if self.kernel is None:
number_of_kernel_dimensions = model_specification.number_of_parameters
self.kernel = 1**2 * george.kernels.ExpSquaredKernel(
np.ones(number_of_kernel_dimensions), ndim=number_of_kernel_dimensions
)
if self.mean_model is not None:
self.mean_model.train(
independent=self.independent_variables,
dependent=self.dependent_variables,
)
gaussian_process = george.GP(
copy.deepcopy(self.kernel),
fit_kernel=True,
mean=self.mean_model.george_model,
fit_mean=False,
)
else:
gaussian_process = george.GP(
copy.deepcopy(self.kernel),
)
# TODO: Figure out how to include non-symmetric errors.
gaussian_process.compute(
x=self.independent_variables,
yerr=self.dependent_variable_errors,
)
def negative_log_likelihood(p):
gaussian_process.set_parameter_vector(p)
return -gaussian_process.log_likelihood(self.dependent_variables)
def grad_negative_log_likelihood(p):
gaussian_process.set_parameter_vector(p)
return -gaussian_process.grad_log_likelihood(self.dependent_variables)
# Optimize the hyperparameter values in the emulator
result = minimize(
fun=negative_log_likelihood,
x0=gaussian_process.get_parameter_vector(),
jac=grad_negative_log_likelihood,
)
# Load in the optimal hyperparameters
gaussian_process.set_parameter_vector(result.x)
self.emulator = gaussian_process
return
[docs] def predict_values(self, model_parameters: Dict[str, float]) -> np.array:
"""
Predict a value from the trained emulator contained within this object.
returns the value at the input model parameters.
Parameters
----------
model_parameters: Dict[str, float]
The point in model parameter space to create predicted
values at.
Returns
-------
dependent_prediction, float
Value of predictions, if the emulator is a function f, this
is the predicted value of f(independent) evaluted at the position
of the input model_parameters.
dependent_prediction_error, float
Error on the model prediction.
"""
if self.emulator is None:
raise AttributeError(
"Please train the emulator with fit_model before attempting "
"to make predictions."
)
model_parameter_array = np.array(
[model_parameters[parameter] for parameter in self.parameter_order]
)
# Create a fake duplicate as george always needs two points to predict
t = np.empty((2, len(model_parameter_array)), dtype=np.float32)
t[0] = model_parameter_array
t[1] = model_parameter_array
model, errors = self.emulator.predict(
y=self.dependent_variables, t=t, return_cov=False, return_var=True
)
return model[0], errors[0]