chemotools.smooth._whittaker_smooth 源代码

"""
The :mod:`chemotools.smooth._whittaker_smooth` module implements
the Whittaker smoothing algorithm.
"""

# Authors: Niklas Zell <nik.zoe@web.de>, Pau Cabaneros
# License: MIT

from typing import Literal

import numpy as np
from sklearn.utils._param_validation import Interval, Real, StrOptions

from chemotools.utils._whittaker_solvers import WhittakerSolver

from ._base import _BaseWhittaker



[文档]
class WhittakerSmooth(_BaseWhittaker):
    """
    Whittaker smoothing for noise reduction and signal trend estimation.

    Whittaker smoothing is a penalized least squares method that estimates
    smooth trends from noisy data by balancing fidelity to the input signal
    with a smoothness constraint. A second-order difference operator is used
    as the penalty term, ensuring that the estimated signal is smooth while
    preserving overall shape.

    The Whittaker smoothing step can be solved using either:
    - a **banded solver** (fast and memory-efficient, recommended for most spectra), or
    - a **sparse LU solver** (more stable for ill-conditioned problems).

    Optional weights can be provided to emphasize or downweight certain
    observations during smoothing. If no weights are supplied, all points
    are treated equally.

    Parameters
    ----------
    lam : float, default=1e4
        Regularization parameter controlling smoothness of the fitted signal.
        Larger values yield smoother trends.

    weights : ndarray of shape (n_features,), optional, default=None
        Non-negative weights applied to each observation. If None,
        all observations are weighted equally.

    solver_type : Literal["banded", "sparse"], default="banded"
        If "banded", use the banded solver for Whittaker smoothing.
        If "sparse", use a sparse LU decomposition.

    Attributes
    ----------
    n_features_in_ : int
        The number of features in the training data.

    References
    ----------
    [1] Eilers, P.H. (2003).
        "A perfect smoother." Analytical Chemistry 75 (14), 3631–3636.

    Examples
    --------
    >>> from chemotools.datasets import load_fermentation_train
    >>> from chemotools.smooth import WhittakerSmooth
    >>> # Load sample data
    >>> X, _ = load_fermentation_train()
    >>> # Initialize WhittakerSmooth
    >>> ws = WhittakerSmooth()
    WhittakerSmooth()
    >>> # Fit and transform the data
    >>> X_smoothed = ws.fit_transform(X)
    """

    _parameter_constraints: dict = {
        "lam": [Interval(Real, 0, None, closed="neither")],
        "weights": ["array-like", None],
        "solver_type": [StrOptions({"banded", "sparse"})],
    }

    def __init__(
        self,
        lam: float = 1e4,
        weights: np.ndarray | None = None,
        solver_type: Literal["banded", "sparse"] = "banded",
        n_jobs: int = 1,
    ):
        super().__init__(
            lam=lam, weights=weights, solver_type=solver_type, n_jobs=n_jobs
        )


[文档]
    def fit(self, X: np.ndarray, y=None) -> "WhittakerSmooth":
        """
        Fit the Whittaker smoother to input data.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            The input data matrix, where rows correspond to samples
            and columns correspond to features (e.g., spectra).

        y : None
            Ignored, present for API consistency with scikit-learn.

        Returns
        -------
        self : WhittakerSmooth
            Fitted estimator.
        """
        return super().fit(X, y)



[文档]
    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
        """
        Apply Whittaker smoothing to input data.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            The input data matrix to smooth.

        y : None
            Ignored, present for API consistency with scikit-learn.

        Returns
        -------
        X_transformed : ndarray of shape (n_samples, n_features)
            The smoothed version of the input data.
        """
        from sklearn.utils.validation import check_is_fitted, validate_data

        check_is_fitted(self, ["DtD_", "solver_", "weights_"])
        X_ = validate_data(
            self, X, ensure_2d=True, copy=True, reset=False, dtype=np.float64
        )
        # solve_batch issues a single batched LAPACK call over all rows.
        # Chunking via parallel_apply_by_rows splits that into N smaller calls
        # and adds process/thread overhead without gain — bypass it entirely.
        return self.solver_.solve_batch(X_, self.weights_)


    def _fit_core(
        self,
        X: np.ndarray,
        y=None,
        solver: WhittakerSolver | None = None,
    ) -> "WhittakerSmooth":
        """
        Core fitting logic for Whittaker smoothing.

        Stores the observation weights to be used in subsequent
        smoothing operations. If no custom weights were provided,
        uniform weights are applied.

        Parameters
        ----------
        X : ndarray of shape (n_samples, n_features)
            The input data matrix.

        y : None
            Ignored.

        solver : WhittakerSolver or None
            Whittaker solver instance, provided by ``_BaseWhittaker.fit``.
            Not used by this implementation.

        Returns
        -------
        self : WhittakerSmooth
            Fitted smoother with stored weights.
        """
        # Default weights if not provided
        self.weights_ = (
            self.weights if self.weights is not None else np.ones(X.shape[1])
        )
        return self

    def _transform_block(self, X_block: np.ndarray) -> np.ndarray:
        return self.solver_.solve_batch(X_block, self.weights_)