Source code for swiftemulator.sensitivity.cross_check

"""
Basic test for any emulator. Leave one simulation out
and test how well the emulator fits those values.
"""

import attr
import numpy as np
import matplotlib.pyplot as plt
import george
import copy

from tqdm import tqdm
from pathlib import Path

from swiftemulator.backend.model_parameters import ModelParameters
from swiftemulator.backend.model_values import ModelValues
from swiftemulator.backend.model_specification import ModelSpecification

from swiftemulator.emulators.gaussian_process import GaussianProcessEmulator
from swiftemulator.emulators.gaussian_process_mcmc import GaussianProcessEmulatorMCMC
from swiftemulator.emulators.linear_model import LinearModelEmulator

from swiftemulator.mean_models import MeanModel


from typing import List, Dict, Tuple, Union, Optional, Hashable


[docs]@attr.s class CrossCheck(object): """ Generator for emulators for leave one out checks. Parameters ---------- kernel, george.kernels The ``george`` kernel to use. The GPE here uses a copy of this instance. By default, this is the ``ExpSquaredKernel`` in George mean_model, MeanModel, optional A mean model conforming to the ``swiftemulator`` mean model protocol (several pre-made models are available in the :mod:`swiftemulator.mean_models` module). hide_progress: bool Option to display a tqdm bar when creating the emulators, Default is to hide progress bar. """ kernel: Optional[george.kernels.Kernel] = attr.ib(default=None) mean_model: Optional[MeanModel] = attr.ib(default=None) hide_progress: bool = attr.ib(default=True) model_specification: ModelSpecification model_parameters: ModelParameters model_values: ModelValues leave_out_order: Optional[List[int]] = None cross_emulators: Optional[Dict[Hashable, george.GP]] = None
[docs] def build_emulators( self, model_specification: ModelSpecification, model_parameters: ModelParameters, model_values: ModelValues, ): """ Build a dictonary with an emulator for each simulation where the data of that simulation is left out Note: this can take a long time Parameters ---------- model_specification: ModelSpecification Full instance of the model specification. model_parameters: ModelParameters Full instance of the model parameters. model_values: ModelValues Full instance of the model values describing this individual scaling relation. """ self.model_specification = model_specification self.model_parameters = model_parameters self.model_values = model_values self.leave_out_order = list(model_values.model_values.keys()) emulators = {} for unique_identifier in tqdm(self.leave_out_order, disable=self.hide_progress): left_out_data = model_values.model_values.pop(unique_identifier) emulator = GaussianProcessEmulator( kernel=self.kernel, mean_model=self.mean_model, ) emulator.fit_model( model_specification=model_specification, model_parameters=model_parameters, model_values=model_values, ) emulators[unique_identifier] = emulator model_values.model_values[unique_identifier] = left_out_data self.cross_emulators = emulators return
[docs] def build_mocked_model_values_original_independent(self) -> ModelValues: """ " Builds a mocked :class:`ModelValues` container, using the cross emulators. The emulators are evaluated at the same independent variables that were 'left out'. Returns ------- model_values: ModelValues The model values container with each leave-one-out scaling relation predicted. This is also set as ``cross_model_values``. """ if self.cross_emulators is None: raise AttributeError( "You need to build the emulators before the prediction step." ) cross_model_values = {} for unique_identifier in self.model_values.keys(): independent = self.model_values[unique_identifier]["independent"] emulated, emulated_error = self.cross_emulators[ unique_identifier ].predict_values( independent, model_parameters=self.model_parameters[unique_identifier], ) cross_model_values[unique_identifier] = { "independent": independent, "dependent": emulated, "dependent_error": np.sqrt(emulated_error), } cross_model_values = ModelValues(cross_model_values) return cross_model_values
[docs] def build_mocked_model_values(self, emulate_at: np.array) -> ModelValues: """ Builds a mocked :class:`ModelValues` container, using the cross emulators. Similar to ``build_mocked_model_value_original_independent`` but evaluates all emulators at a consistent set of independent variables. Parameters ---------- emulate_at: np.array independent array where the emulator is evaluated. Returns ------- model_values: ModelValues The model values container with each leave-one-out scaling relation predicted. This is also set as ``cross_model_values``. """ if self.cross_emulators is None: raise AttributeError( "You need to build the emulators before the prediction step." ) cross_model_values = {} for unique_identifier in self.model_values.keys(): emulated, emulated_error = self.cross_emulators[ unique_identifier ].predict_values( emulate_at, model_parameters=self.model_parameters[unique_identifier], ) cross_model_values[unique_identifier] = { "independent": emulate_at, "dependent": emulated, "dependent_error": np.sqrt(emulated_error), } cross_model_values = ModelValues(cross_model_values) return cross_model_values
[docs] def plot_results( self, emulate_at: np.array, output_path: Optional[Union[str, Path]] = None, xlabel: Optional[str] = None, ylabel: Optional[str] = None, ): """ Make a plot of each of the leave_out emulators vs the original data. Parameters ---------- emulate_at: np.array independent array where the emulator is evaluated. output_path: Union[str, Path], optional Optional, name of the folder where you want to save the figures. xlabel: str, optional Label for horizontal axis on the resultant figure. ylabel: str, optional Label for vertical axis on the resultant figure. """ cross_model_values = self.build_mocked_model_values(emulate_at=emulate_at) for unique_identifier in self.cross_emulators.keys(): fig, ax = plt.subplots() emulated = cross_model_values[unique_identifier]["dependent"] emulated_error = cross_model_values[unique_identifier]["dependent_error"] ax.fill_between( emulate_at, emulated - emulated_error, emulated + emulated_error, color="C1", alpha=0.3, linewidth=0.0, ) ax.errorbar( self.model_values.model_values[unique_identifier]["independent"], self.model_values.model_values[unique_identifier]["dependent"], yerr=self.model_values.model_values[unique_identifier][ "dependent_error" ], label="True", marker=".", linestyle="none", color="C0", ) ax.plot(emulate_at, emulated, label="Emulated", color="C1") ax.set_xlabel("Independent Variable" if xlabel is None else xlabel) ax.set_ylabel("Dependent Variable" if ylabel is None else ylabel) ax.legend() ax.set_title(f"Leave Out Run {unique_identifier}") if output_path is None: plt.show() else: fig.savefig(Path(output_path) / f"leave_out_{unique_identifier}.png")
[docs] def get_mean_squared( self, use_dependent_error: bool = False, use_y_as_error: bool = False, use_squared_difference: bool = True, ): """ Calculates the mean squared per simulation and the total mean squared of the entire set of left-out simulations. Parameters ---------- use_dependent_error: boolean Use the simulation errors as weights for the mean squared calulation. Default is false. use_y_as_error: boolean Use the model y values as the weights for the calculation. use_squared_difference: boolean Use the simulation errors as weights for the mean squared calulation. Default is false. Returns ------- total_square_mean: float Mean (square) error across the bins. mean_squared_dict: Dict[Hashable, float] Error per unique identifier. """ mean_squared_dict = {} total_mean_squared = [] for unique_identifier in self.cross_emulators.keys(): x_model = self.model_values.model_values[unique_identifier]["independent"] y_model = self.model_values.model_values[unique_identifier]["dependent"] emulated, _ = self.cross_emulators[unique_identifier].predict_values( independent=x_model, model_parameters=self.model_parameters.model_parameters[ unique_identifier ], ) if use_y_as_error: y_model_error = y_model else: y_model_error = self.model_values.model_values[unique_identifier][ "dependent_error" ] if use_dependent_error: uniq_mean_squared = (y_model - emulated) / y_model_error else: uniq_mean_squared = y_model - emulated if use_squared_difference: uniq_mean_squared = uniq_mean_squared**2 mean_squared_dict[unique_identifier] = uniq_mean_squared total_mean_squared.extend(uniq_mean_squared) return np.mean(total_mean_squared), mean_squared_dict