Source code for chemotools.smooth._savitzky_golay_filter

"""
The :mod:`chemotools.smooth._savitzky_golay_filter` module implements the Savitzky-Golay Filter (SGF) transformation.
"""

# Authors: Nusret Emirhan Salli <nusret.emirhan.salli@gmail.com>, Pau Cabaneros
# License: MIT

from typing import Literal, Optional
from numbers import Integral

import numpy as np
from sklearn.utils._param_validation import Interval, StrOptions
from ._base import _BaseFIRFilter



[docs]
class SavitzkyGolayFilter(_BaseFIRFilter):
    """
    A transformer that calculates the Savitzky-Golay filter of the input data.

    Parameters
    ----------
    window_size : int, optional
        The size of the window to use for the Savitzky-Golay filter. Must be odd. Default
        is 3.

    polynomial_order : int, optional
        The order of the polynomial to use for the Savitzky-Golay filter. Must be less
        than window_size. Default is 1.

    mode : str, optional
        The mode to use for the Savitzky-Golay filter. Can be "nearest", "constant",
        "reflect", "wrap", "mirror" or "interp". Default is "nearest".

    Attributes
    ----------
    n_features_in_ : int
        The number of features in the training data.

    Examples
    --------
    >>> from chemotools.datasets import load_fermentation_train
    >>> from chemotools.smooth import SavitzkyGolayFilter
    >>> # Load sample data
    >>> X, _ = load_fermentation_train()
    >>> # Initialize SavitzkyGolayFilter
    >>> sgf = SavitzkyGolayFilter()
    SavitzkyGolayFilter()
    >>> # Fit and transform the data
    >>> X_smoothed = sgf.fit_transform(X)

    """

    _parameter_constraints: dict = {
        "window_size": [Interval(Integral, 1, None, closed="left")],
        "polynomial_order": [Interval(Integral, 0, None, closed="left")],
        "mode": [StrOptions({"mirror", "constant", "nearest", "wrap", "interp"})],
        "axis": [Interval(Integral, 0, None, closed="left")],
    }

    def __init__(
        self,
        window_size: int = 3,
        polynomial_order: int = 1,
        mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "nearest",
        axis: int = 1,
    ) -> None:
        super().__init__(window_size=window_size, mode=mode, axis=axis)
        self.polynomial_order = polynomial_order


[docs]
    def fit(
        self, X: np.ndarray, y: Optional[np.ndarray] = None
    ) -> "SavitzkyGolayFilter":
        """
        Fit the Savitzky-Golay filter to the data.

        Parameters
        ----------
        X : np.ndarray of shape (n_samples, n_features)
            The input data to fit the transformer.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : SavitzkyGolayFilter
            The fitted transformer.
        """
        return super().fit(X, y)



[docs]
    def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray:
        """
        Transform the input data by applying the Savitzky-Golay filter.

        Parameters
        ----------
        X : np.ndarray of shape (n_samples, n_features)
            The input data to transform.

        y : None
            Ignored to align with API.

        Returns
        -------
        X_transformed : np.ndarray of shape (n_samples, n_features)
            The transformed data.
        """
        return super().transform(X, y)


    def _compute_kernel(self) -> np.ndarray:
        if self.polynomial_order >= self.window_size:
            raise ValueError("polynomial_order must be < window_size.")
        # Prefer SciPy's reference coefficients in convolution form
        try:
            from scipy.signal import savgol_coeffs

            k = np.asarray(
                savgol_coeffs(
                    self.window_size, self.polynomial_order, deriv=0, use="conv"
                ),
                dtype=np.float64,
            )
        except Exception:
            # Robust LS fallback (intercept row of (A^T A)^{-1} A^T)
            m = (self.window_size - 1) // 2
            i = np.arange(-m, m + 1, dtype=np.float64)
            A = np.vander(i, N=self.polynomial_order + 1, increasing=True)
            ATA_inv = np.linalg.pinv(A.T @ A)
            k = (ATA_inv @ A.T)[0, :]
            k = 0.5 * (k + k[::-1])
        k /= k.sum()
        return k

    def _get_interp_order(self) -> int:
        """
        Return polynomial order for Savitzky-Golay extrapolation.

        This enables scipy-compatible polynomial extrapolation when mode='interp'.

        Returns
        -------
        int
            The polynomial order used in the Savitzky-Golay filter.
        """
        return self.polynomial_order

    def _apply_filter_1d(self, x: np.ndarray) -> np.ndarray:
        """
        Override to use scipy-compatible edge handling for mode='interp'.

        For Savitzky-Golay with mode='interp', scipy uses a special algorithm:
        - Fits polynomial to edge windows
        - Evaluates polynomial values directly at edge points (no convolution!)
        - Uses standard convolution for middle section
        """
        if self.mode == "interp":
            # Use scipy's exact algorithm for interp mode
            return self._apply_sg_interp(x)
        else:
            # Use base class pad-then-convolve for other modes
            return super()._apply_filter_1d(x)

    def _apply_sg_interp(self, x: np.ndarray) -> np.ndarray:
        """
        Apply Savitzky-Golay filter with scipy-compatible interp mode.

        Replicates scipy's _fit_edges_polyfit behavior:
        1. Fit polynomial to first window_size points, evaluate at first half_window positions
        2. Apply standard convolution to ALL positions (with padding)
        3. Replace first half_window and last half_window positions with polynomial values
        """
        m = self._half_
        n = len(x)
        window_len = self.window_size

        if n < window_len:
            # Signal too short for interp mode, fall back to edge padding
            x_padded = np.pad(x, (m, m), mode="edge")
            return np.convolve(x_padded, self.kernel_, mode="valid")

        # Apply standard convolution to entire signal (with edge padding for the convolution itself)
        x_padded = np.pad(x, (m, m), mode="edge")
        result = np.convolve(x_padded, self.kernel_, mode="valid")

        # Now replace edges with polynomial-fitted values

        # Left edge: fit polynomial to first window and evaluate at edge positions
        x_left_window = x[:window_len]
        poly_left = np.polyfit(
            np.arange(window_len, dtype=np.float64),
            x_left_window,
            self.polynomial_order,
        )
        indices_left = np.arange(m, dtype=np.float64)
        result[:m] = np.polyval(poly_left, indices_left)

        # Right edge: fit polynomial to last window and evaluate at edge positions
        x_right_window = x[-window_len:]
        poly_right = np.polyfit(
            np.arange(window_len, dtype=np.float64),
            x_right_window,
            self.polynomial_order,
        )
        # Evaluate at the last m positions: [window_len-m, ..., window_len-1]
        indices_right = np.arange(window_len - m, window_len, dtype=np.float64)
        result[-m:] = np.polyval(poly_right, indices_right)

        return result