Source code for equisolve.numpy.models.linear_model

# -*- Mode: python3; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
#
# Copyright (c) 2023 Authors and contributors
# (see the AUTHORS.rst file for the full list of names)
#
# Released under the BSD 3-Clause "New" or "Revised" License
# SPDX-License-Identifier: BSD-3-Clause

from typing import Optional, Union

import metatensor
import numpy as np
import scipy.linalg
from metatensor import Labels, TensorBlock, TensorMap

from ...module import NumpyModule, _Estimator
from ...utils.metrics import rmse
from ..utils import array_from_block


[docs] class Ridge(_Estimator): r"""Linear least squares with l2 regularization for :class:`metatensor.Tensormap`'s. Weights :math:`w` are calculated according to .. math:: w = X^T \left( X \cdot X^T + α I \right)^{-1} \cdot y \,, where :math:`X` is the training data, :math:`y` the target data and :math:`α` is the regularization strength. Ridge will regress a model for each block in X. If a block contains components the component values will be stacked along the sample dimension for the fit. Therefore, the corresponding weights will be the same for each component. """ def __init__(self) -> None: self._weights = None def _validate_data(self, X: TensorMap, y: Optional[TensorMap] = None) -> None: """Validates :class:`metatensor.TensorBlock`'s for the usage in models. :param X: training data to check :param y: target data to check """ if y is not None and not metatensor.equal_metadata( X, y, check=["samples", "components"] ): raise ValueError( "Metadata (samples, components) of X and y does not agree!" ) def _validate_params( self, X: TensorBlock, alpha: TensorBlock, sample_weight: Optional[TensorBlock] = None, ) -> None: """Check regulerizer and sample weights have are correct wrt. to ``X``. :param X: training data for reference :param sample_weight: sample weights """ if not metatensor.equal_metadata(X, alpha, check=["components", "properties"]): raise ValueError( "Metadata (components, properties) of X and alpha does not agree!" ) if sample_weight is not None and not metatensor.equal_metadata( X, sample_weight, check=[ "samples", "components", ], ): raise ValueError( "Metadata (samples, components) of X and sample_weight does not agree!" ) for key, alpha_block in alpha.items(): if len(alpha_block.samples) != 1: raise ValueError( "Only one sample is allowed for regularization. Given " f"`alpha` contains {len(alpha_block.samples)} samples." ) if sample_weight is not None: sw_block = sample_weight.block(key) if len(sw_block.properties) != 1: raise ValueError( "Only one property is allowed for sample weights. Given " f"`sample_weight` contains {len(sw_block.properties)} " "properties." ) def _solve( self, X: TensorBlock, y: TensorBlock, alpha: TensorBlock, sample_weight: TensorBlock, cond: float, ) -> TensorBlock: """A regularized solver using ``np.linalg.lstsq``.""" self._used_auto_solver = None # Convert TensorMaps into arrays for processing them with NumPy. # X_arr has shape of (n_targets, n_properties) X_arr = array_from_block(X) # y_arr has shape lentgth of n_targets y_arr = array_from_block(y) # sw_arr has shape of (n_samples, 1) sw_arr = array_from_block(sample_weight) # alpha_arr has shape of (1, n_properties) alpha_arr = array_from_block(alpha) # Flatten into 1d arrays y_arr = y_arr.ravel() sw_arr = sw_arr.ravel() alpha_arr = alpha_arr.ravel() num_properties = X_arr.shape[1] sw_arr = sw_arr.reshape(-1, 1) sqrt_sw_arr = np.sqrt(sw_arr) if self._solver == "auto": n_samples, n_features = X_arr.shape if n_features > n_samples: # solves linear system (K*sw + alpha*Id) @ dual_w = y*sw dual_alpha_arr = X_arr @ alpha_arr X_eff = sqrt_sw_arr * X_arr y_eff = y_arr * sw_arr.flatten() K = X_eff @ X_eff.T + np.diag(dual_alpha_arr) try: # change to cholesky (issue #36) dual_w = scipy.linalg.solve(K, y_eff, assume_a="pos").ravel() self._used_auto_solver = "cholesky_dual" except np.linalg.LinAlgError: # scipy.linalg.pinv default rcond value # https://github.com/scipy/scipy/blob/c1ed5ece8ffbf05356a22a8106affcd11bd3aee0/scipy/linalg/_basic.py#L1341 rcond = max(X_arr.shape) * np.finfo(X_arr.dtype.char.lower()).eps dual_w = scipy.linalg.lstsq(K, y_eff, cond=rcond, overwrite_a=True)[ 0 ].ravel() self._used_auto_solver = "lstsq_dual" w = X_arr.T @ dual_w else: XtX = X_arr.T @ (sw_arr * X_arr) + np.diag(alpha_arr) Xt_y = X_arr.T @ (sw_arr.flatten() * y_arr) try: # change to cholesky (issue #36) # solves linear system (X.T @ X * sw + alpha*Id) @ w = X.T @ sw*y w = scipy.linalg.solve(XtX, Xt_y, assume_a="pos") self._used_auto_solver = "cholesky" except np.linalg.LinAlgError: s, U = scipy.linalg.eigh(XtX) # scipy.linalg.pinv default rcond value # https://github.com/scipy/scipy/blob/c1ed5ece8ffbf05356a22a8106affcd11bd3aee0/scipy/linalg/_basic.py#L1341 rcond = max(X_arr.shape) * np.finfo(X_arr.dtype.char.lower()).eps eigval_cutoff = np.sqrt(s[-1]) * rcond n_dim = sum(s > eigval_cutoff) w = (U[:, -n_dim:] / s[-n_dim:]) @ U[:, -n_dim:].T @ Xt_y w = w.ravel() self._used_auto_solver = "svd_primal" elif self._solver == "cholesky": # solves linear system (X.T @ X * sw + alpha*Id) @ w = X.T @ sw*y X_eff = np.vstack([sqrt_sw_arr * X_arr, np.diag(np.sqrt(alpha_arr))]) y_eff = np.hstack([y_arr * sqrt_sw_arr.flatten(), np.zeros(num_properties)]) XXt = X_eff.T @ X_eff Xt_y = X_eff.T @ y_eff # change to cholesky (issue #36) w = scipy.linalg.solve(XXt, Xt_y, assume_a="pos") elif self._solver == "cholesky_dual": # solves linear system (K*sw + alpha*Id) @ dual_w = y*sw dual_alpha_arr = X_arr @ alpha_arr X_eff = sqrt_sw_arr * X_arr y_eff = y_arr * sw_arr.flatten() K = X_eff @ X_eff.T + np.diag(dual_alpha_arr) # change to cholesky (issue #36) dual_w = scipy.linalg.solve(K, y_eff, assume_a="pos").ravel() w = X_arr.T @ dual_w elif self._solver == "lstsq": # We solve system of form Ax=b, where A is [X*sqrt(sw), Id*sqrt(alpha)] # and b is [y*sqrt(w), 0] X_eff = np.vstack([sqrt_sw_arr * X_arr, np.diag(np.sqrt(alpha_arr))]) y_eff = np.hstack([y_arr * sqrt_sw_arr.flatten(), np.zeros(num_properties)]) w = scipy.linalg.lstsq(X_eff, y_eff, cond=cond, overwrite_a=True)[0].ravel() else: raise ValueError( f"Unknown solver {self._solver} only 'auto', 'cholesky'," " 'cholesky_dual' and 'lstsq' are supported." ) # Reshape values into 1 sample + component shape + num_properties. # The weights will the same for each component. shape = [1] + [len(c) for c in X.components] + [num_properties] values = np.tile(w, np.prod(shape[:-1])).reshape(shape) weights_block = TensorBlock( values=values, samples=y.properties, components=X.components, properties=X.properties, ) return weights_block
[docs] def fit( self, X: TensorMap, y: TensorMap, alpha: Union[float, TensorMap] = 1.0, sample_weight: Union[float, TensorMap] = None, solver="auto", cond: float = None, ) -> None: """Fit a regression model to each block in `X`. Ridge takes all available values and gradients in the provided TensorMap for the fit. Gradients can be exlcuded from the fit if removed from the TensorMap. See :py:func:`metatensor.remove_gradients` for details. :param X: training data :param y: target values :param alpha: Constant α that multiplies the L2 term, controlling regularization strength. Values must be non-negative floats i.e. in [0, inf). α can be different for each column in `X` to regulerize each property differently. :param sample_weight: Individual weights for each sample. For `None` or a float, every sample will have the same weight of 1 or the float, respectively.. :param solver: Solver to use in the computational routines: - **"auto"**: If n_features > n_samples in X, it solves the dual problem using cholesky_dual. If this fails, it switches to :func:`scipy.linalg.lstsq` to solve the dual problem. If n_features <= n_samples in X, it solves the primal problem using cholesky. If this fails, it switches to :func:`scipy.linalg.eigh` on (X.T @ X) and cuts off eigenvalues below machine precision times maximal shape of X. - **"cholesky"**: using :func:`scipy.linalg.solve` on (X.T@X) w = X.T @ y - **"cholesky_dual"**: using :func:`scipy.linalg.solve` on the dual problem (X@X.T) w_dual = y, the primal weights are obtained by w = X.T @ w_dual - **"lstsq"**: using :func:`scipy.linalg.lstsq` on the linear system X w = y :param cond: Cut-off ratio for small singular values during the fit. For the purposes of rank determination, singular values are treated as zero if they are smaller than `cond` times the largest singular value in "weights" matrix. """ self._solver = solver if type(alpha) is float: alpha_tensor = metatensor.ones_like(X) samples = Labels( names=X.samples_names, values=np.zeros([1, len(X.samples_names)], dtype=int), ) alpha_tensor = metatensor.slice( alpha_tensor, axis="samples", labels=samples ) alpha = metatensor.multiply(alpha_tensor, alpha) elif type(alpha) is not TensorMap: raise ValueError("alpha must either be a float or a TensorMap") if sample_weight is None: sample_weight = metatensor.ones_like(y) elif type(sample_weight) is float: sample_weight = metatensor.multiply(metatensor.ones_like(y), sample_weight) elif type(sample_weight) is not TensorMap: raise ValueError("sample_weight must either be a float or a TensorMap.") self._validate_data(X, y) self._validate_params(X, alpha, sample_weight) # Remove all gradients here. This is a workaround until we can exclude gradients # for metadata check (see metatensor issue #285 for details) alpha = metatensor.remove_gradients(alpha) weights_blocks = [] for key, X_block in X.items(): y_block = y.block(key) alpha_block = alpha.block(key) sw_block = sample_weight.block(key) weight_block = self._solve(X_block, y_block, alpha_block, sw_block, cond) weights_blocks.append(weight_block) self._weights = TensorMap(X.keys, weights_blocks) return self
@property def weights(self) -> TensorMap: """``Tensormap`` containing the weights of the provided training data.""" if self._weights is None: raise ValueError("No weights. Call fit method first.") return self._weights
[docs] def predict(self, X: TensorMap) -> TensorMap: """ Predict using the linear model. :param X: samples :returns: predicted values """ return metatensor.dot(X, self.weights)
def forward(self, X: TensorMap) -> TensorMap: return self.predict(X)
[docs] def score(self, X: TensorMap, y: TensorMap, parameter_key: str) -> float: r"""Return the weights of determination of the prediction. :param X: Test samples :param y: True values for ``X``. :param parameter_key: Parameter to score for. Examples are ``"values"``, ``"positions"`` or ``"cell"``. :returns score: :math:`\mathrm{RMSE}` for each block in ``self.predict(X)`` with respecy to `y`. """ y_pred = self.predict(X) return rmse(y, y_pred, parameter_key)
def export_torchscript(self): from ... import HAS_METATENSOR_TORCH if not HAS_METATENSOR_TORCH: raise ImportError( "To export your model to TorchScript torch needs to be installed. " "Please install torch, then reimport equisolve or " "use equisolve.refresh_global_flags()." ) from ...nn import Linear # TODO should be metatensor.torch weights = metatensor.to(self.weights, backend="torch") return Linear.from_weights(weights)