Source code for swiftemulator.emulators.linear_model

"""
Linear Model Emulator
"""

import attr
import copy
import numpy as np
import sklearn.linear_model as lm

from typing import Hashable, List, Optional, Dict

from swiftemulator.emulators.base import BaseEmulator

from swiftemulator.backend.model_parameters import ModelParameters
from swiftemulator.backend.model_specification import ModelSpecification
from swiftemulator.backend.model_values import ModelValues


[docs]@attr.s class LinearModelEmulator(BaseEmulator): """ Emulator that builds an internal linear model (either a basic linear model or a Lasso model), and fits it to the data provided in the model specification, parameters, and values containers. Parameters ---------- lasso_model_alpha: float Alpha for the Lasso model. If this is 0.0 (the default) basic linear regression is used. """ lasso_model_alpha: float = attr.ib(default=0.0) ordering: Optional[List[Hashable]] = None parameter_order: Optional[List[str]] = None independent_variables: Optional[np.array] = None dependent_variables: Optional[np.array] = None dependent_variable_errors: Optional[np.array] = None model_specification: Optional[ModelSpecification] = None model_parameters: Optional[ModelParameters] = None model_values: Optional[ModelValues] = None emulator: Optional[lm.LinearRegression] = None def _build_arrays( self, model_specification: ModelSpecification, model_parameters: ModelParameters, model_values: ModelValues, ): """ Builds the arrays for passing to the linear model. """ self.model_specification = model_specification self.model_parameters = model_parameters self.model_values = model_values unique_identifiers = model_values.model_values.keys() number_of_independents = model_values.number_of_variables number_of_model_parameters = model_specification.number_of_parameters model_parameters = model_parameters.model_parameters independent_variables = np.empty( (number_of_independents, number_of_model_parameters + 1), dtype=np.float32 ) dependent_variables = np.empty((number_of_independents), dtype=np.float32) dependent_variable_errors = np.empty((number_of_independents), dtype=np.float32) self.parameter_order = model_specification.parameter_names self.ordering = [] filled_lines = 0 for unique_identifier in unique_identifiers: self.ordering.append(unique_identifier) # Unpack model parameters into an array model_parameter_array = np.array( [ model_parameters[unique_identifier][parameter] for parameter in self.parameter_order ] ) this_model = model_values.model_values[unique_identifier] model_independent = this_model["independent"] model_dependent = this_model["dependent"] model_error = this_model.get( "dependent_error", np.zeros(len(model_independent)) ) if np.ndim(model_error) != 1: raise AttributeError( "Multiple dimensional errors are not currently supported in LM mode" ) for line in range(len(model_independent)): independent_variables[filled_lines][0] = model_independent[line] independent_variables[filled_lines][1:] = model_parameter_array dependent_variables[filled_lines] = model_dependent[line] dependent_variable_errors[filled_lines] = model_error[line] filled_lines += 1 assert filled_lines == number_of_independents self.independent_variables = independent_variables self.dependent_variables = dependent_variables self.dependent_variable_errors = dependent_variable_errors
[docs] def fit_model( self, model_specification: ModelSpecification, model_parameters: ModelParameters, model_values: ModelValues, ): """ Fits the linear model, given the specification, parameters, and values of the space. Parameters ---------- model_specification: ModelSpecification Full instance of the model specification. model_parameters: ModelParameters Full instance of the model parameters. model_values: ModelValues Full instance of the model values describing this individual scaling relation. """ if self.independent_variables is None: # Creates independent_variables, dependent_variables. self._build_arrays( model_specification=model_specification, model_parameters=model_parameters, model_values=model_values, ) if self.lasso_model_alpha == 0.0: linear_model = lm.LinearRegression(fit_intercept=True, normalize=True) else: linear_model = lm.Lasso(alpha=self.lasso_model_alpha) # Conform the model to the modelling protocol linear_model.fit(self.independent_variables, self.dependent_variables) self.emulator = linear_model return
[docs] def predict_values( self, independent: np.array, model_parameters: Dict[str, float] ) -> np.array: """ Predict values from the trained emulator contained within this object. Parameters ---------- independent, np.array Independent continuous variables to evaluate the emulator at. model_parameters: Dict[str, float] The point in model parameter space to create predicted values at. Returns ------- dependent_predictions, np.array Array of predictions, if the emulator is a function f, these are the predicted values of f(independent) evaluted at the position of the input model_parameters. dependent_prediction_errors, np.array Errors on the model predictions. For the linear model these are all zeroes, as the errors are unconstrained. """ if self.emulator is None: raise AttributeError( "Please train the emulator with fit_model before attempting " "to make predictions." ) model_parameter_array = np.array( [model_parameters[parameter] for parameter in self.parameter_order] ) t = np.empty( (len(independent), len(model_parameter_array) + 1), dtype=np.float32 ) for line, value in enumerate(independent): t[line][0] = value t[line][1:] = model_parameter_array model = self.emulator.predict(X=t) return model, np.zeros_like(model)