chemotools.smooth._whittaker_smooth 源代码

"""
The :mod:`chemotools.smooth._whittaker_smooth` module implements
the Whittaker smoothing algorithm.
"""

# Authors: Niklas Zell <nik.zoe@web.de>, Pau Cabaneros
# License: MIT

from typing import Literal

import numpy as np
from sklearn.utils._param_validation import Interval, Real, StrOptions

from chemotools.utils._whittaker_solvers import WhittakerSolver

from ._base import _BaseWhittaker


[文档] class WhittakerSmooth(_BaseWhittaker): """ Whittaker smoothing for noise reduction and signal trend estimation. Whittaker smoothing is a penalized least squares method that estimates smooth trends from noisy data by balancing fidelity to the input signal with a smoothness constraint. A second-order difference operator is used as the penalty term, ensuring that the estimated signal is smooth while preserving overall shape. The Whittaker smoothing step can be solved using either: - a **banded solver** (fast and memory-efficient, recommended for most spectra), or - a **sparse LU solver** (more stable for ill-conditioned problems). Optional weights can be provided to emphasize or downweight certain observations during smoothing. If no weights are supplied, all points are treated equally. Parameters ---------- lam : float, default=1e4 Regularization parameter controlling smoothness of the fitted signal. Larger values yield smoother trends. weights : ndarray of shape (n_features,), optional, default=None Non-negative weights applied to each observation. If None, all observations are weighted equally. solver_type : Literal["banded", "sparse"], default="banded" If "banded", use the banded solver for Whittaker smoothing. If "sparse", use a sparse LU decomposition. Attributes ---------- n_features_in_ : int The number of features in the training data. References ---------- [1] Eilers, P.H. (2003). "A perfect smoother." Analytical Chemistry 75 (14), 3631–3636. Examples -------- >>> from chemotools.datasets import load_fermentation_train >>> from chemotools.smooth import WhittakerSmooth >>> # Load sample data >>> X, _ = load_fermentation_train() >>> # Initialize WhittakerSmooth >>> ws = WhittakerSmooth() WhittakerSmooth() >>> # Fit and transform the data >>> X_smoothed = ws.fit_transform(X) """ _parameter_constraints: dict = { "lam": [Interval(Real, 0, None, closed="neither")], "weights": ["array-like", None], "solver_type": [StrOptions({"banded", "sparse"})], } def __init__( self, lam: float = 1e4, weights: np.ndarray | None = None, solver_type: Literal["banded", "sparse"] = "banded", n_jobs: int = 1, ): super().__init__( lam=lam, weights=weights, solver_type=solver_type, n_jobs=n_jobs )
[文档] def fit(self, X: np.ndarray, y=None) -> "WhittakerSmooth": """ Fit the Whittaker smoother to input data. Parameters ---------- X : ndarray of shape (n_samples, n_features) The input data matrix, where rows correspond to samples and columns correspond to features (e.g., spectra). y : None Ignored, present for API consistency with scikit-learn. Returns ------- self : WhittakerSmooth Fitted estimator. """ return super().fit(X, y)
[文档] def transform(self, X: np.ndarray, y=None) -> np.ndarray: """ Apply Whittaker smoothing to input data. Parameters ---------- X : ndarray of shape (n_samples, n_features) The input data matrix to smooth. y : None Ignored, present for API consistency with scikit-learn. Returns ------- X_transformed : ndarray of shape (n_samples, n_features) The smoothed version of the input data. """ from sklearn.utils.validation import check_is_fitted, validate_data check_is_fitted(self, ["DtD_", "solver_", "weights_"]) X_ = validate_data( self, X, ensure_2d=True, copy=True, reset=False, dtype=np.float64 ) # solve_batch issues a single batched LAPACK call over all rows. # Chunking via parallel_apply_by_rows splits that into N smaller calls # and adds process/thread overhead without gain — bypass it entirely. return self.solver_.solve_batch(X_, self.weights_)
def _fit_core( self, X: np.ndarray, y=None, solver: WhittakerSolver | None = None, ) -> "WhittakerSmooth": """ Core fitting logic for Whittaker smoothing. Stores the observation weights to be used in subsequent smoothing operations. If no custom weights were provided, uniform weights are applied. Parameters ---------- X : ndarray of shape (n_samples, n_features) The input data matrix. y : None Ignored. solver : WhittakerSolver or None Whittaker solver instance, provided by ``_BaseWhittaker.fit``. Not used by this implementation. Returns ------- self : WhittakerSmooth Fitted smoother with stored weights. """ # Default weights if not provided self.weights_ = ( self.weights if self.weights is not None else np.ones(X.shape[1]) ) return self def _transform_block(self, X_block: np.ndarray) -> np.ndarray: return self.solver_.solve_batch(X_block, self.weights_)