Source code for chemotools.smooth._modified_sinc_smoother

"""
The :mod:chemotools.smooth._modified_sinc_smoother module implements the Modified Sinc Filter (MSF) transformation.
"""

# Authors: Nusret Emirhan Salli <nusret.emirhan.salli@gmail.com>
# License: MIT

from __future__ import annotations
from typing import Literal, Optional
import numpy as np
from sklearn.utils._param_validation import Integral, Interval, StrOptions, Real

from ._base import _BaseFIRFilter



[docs]
class ModifiedSincFilter(_BaseFIRFilter):
    """
    Modified Sinc smoothing (MS) for denoising while preserving peak positions based on the paper "Why and How Savitzky–Golay Filters Should Be Replaced."

    The Modified Sinc smoother is a linear-phase FIR filter: the signal is
    convolved with a fixed, symmetric kernel. The kernel is built from:
        1) a sinc core with argument ((n + 4) / 2) * pi * x (Eq. 3, p. 187),
        2) a special Gaussian-like window w(x) whose value and slope vanish at
            the window ends (Eq. 4, p. 187), and
        3) small optional correction terms that flatten the passband so low
            frequencies are almost unattenuated (Eqs. 7–8 and Table 1, pp. 187–188).

    Parameters
    ----------
    window_size : int, default=21
        Odd number of taps (2*m + 1). Larger values give stronger smoothing.

    n : int, default=6
        Even integer >= 2. Controls how many inner zeros the sinc has within
        the window. The paper discusses n = 2, 4, 6, 8, 10 (Fig. 2).

    alpha : float, default=4.0
        Positive window width parameter. Larger alpha reduces side lobes more
        aggressively (steeper roll-off).

    use_corrections : bool, default=True
        If True and n in {6, 8, 10} and the window is large enough, add the
        small passband-flattening terms from Eqs. 7–8 (coefficients from Table 1).

    mode : {"mirror", "constant", "nearest", "wrap", "interp"}, default="interp"
        Boundary strategy, passed to the base FIR class. "interp" performs
        linear extrapolation (recommended in the paper).

    axis : int, default=1
        Axis along which to smooth for 2D inputs (rows x features). Use 1 to
        smooth within each row.

    Methods
    -------
    fit(X, y=None)
        Inherited from the base class. Validates input and builds the kernel.

    transform(X, y=None)
        Inherited from the base class. Pads, convolves, and returns the smoothed data.

    References
    ----------
    [1] Schmid, M.; Rath, D.; Diebold, U. "Why and How Savitzky–Golay Filters Should Be Replaced."
    ACS measurement science Au 2022, 2 (2), 185-196.

    Examples
    --------
    >>> from chemotools.datasets import load_fermentation_train
    >>> from chemotools.smooth import ModifiedSincFilter
    >>> # Load sample data
    >>> X, _ = load_fermentation_train()
    >>> # Initialize ModifiedSincFilter
    >>> msf = ModifiedSincFilter()
    ModifiedSincFilter()
    >>> # Fit and transform the data
    >>> X_smoothed = msf.fit_transform(X)
    """

    _parameter_constraints: dict = {
        "window_size": [Interval(Integral, 1, None, closed="left")],
        "n": [Interval(Integral, 2, None, closed="left")],
        "alpha": [Interval(Real, 0, None, closed="neither")],
        "use_corrections": ["boolean"],
        "mode": [StrOptions({"mirror", "constant", "nearest", "wrap", "interp"})],
        "axis": [Interval(Integral, 0, None, closed="left")],
    }

    def __init__(
        self,
        window_size: int = 21,
        n: int = 6,
        alpha: float = 4.0,
        use_corrections: bool = True,
        mode: Literal["mirror", "constant", "nearest", "wrap", "interp"] = "interp",
        axis: int = 1,
    ) -> None:
        super().__init__(window_size=window_size, mode=mode, axis=axis)
        self.n = n
        self.alpha = alpha
        self.use_corrections = use_corrections


[docs]
    def fit(
        self, X: np.ndarray, y: Optional[np.ndarray] = None
    ) -> "ModifiedSincFilter":
        """
        Fit the Modified Sinc Filter to the data.

        Parameters
        ----------
        X : np.ndarray of shape (n_samples, n_features)
            The input data to fit the transformer.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : ModifiedSincFilter
            The fitted transformer.
        """
        return super().fit(X, y)



[docs]
    def transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray:
        """
        Transform the input data by applying the Modified Sinc Filter.

        Parameters
        ----------
        X : np.ndarray of shape (n_samples, n_features)
            The input data to transform.

        y : None
            Ignored to align with API.

        Returns
        -------
        X_transformed : np.ndarray of shape (n_samples, n_features)
            The transformed data.
        """
        return super().transform(X, y)


    def _compute_kernel(self) -> np.ndarray:
        """
        Build the Modified Sinc kernel h[i] for i = -m ... m.

        Implementation map to the paper:
          1) Map index to x = i / (m + 1)  (Eq. 5).
          2) Base sinc: np.sinc(0.5 * (n + 4) * x)  (Eq. 3).
          3) Solve a 3x3 system for the window coefficients so that
             w(0)=1, w(1)=0, w'(1)=0; then we form w(x)  (Eq. 4).
          4) h = sinc * w.
          5) Optional corrections from based on Eqs. 7–8 (Table 1).
          6) Symmetrize and normalize so sum(h)=1  (Eq. 6).
        """

        # ---- parameter checks ----
        if self.n % 2 != 0 or self.n < 2:
            raise ValueError("n must be an even integer >= 2.")
        if self.alpha <= 0:
            raise ValueError("alpha must be positive.")

        # ---- 1) Eq. 5: index -> normalized coordinate in [-1, 1] ----
        m = (self.window_size - 1) // 2
        i = np.arange(-m, m + 1, dtype=np.float64)
        x = i / (m + 1) if m >= 0 else np.array([0.0])

        # ---- 2) Eq. 3: modified sinc core (note that numpy's sinc uses sin(pi*u)/(pi*u)) ----
        core = np.sinc(0.5 * (self.n + 4) * x)

        # ---- 3) Eq. 4: window with w(0)=1, w(1)=0, w'(1)=0 ----
        # Precompute the exponentials appearing in those constraints.
        E1 = np.exp(
            -self.alpha * 1.0
        )  # exp(-alpha * 1^2)   at x = 1 (central Gaussian)
        Ep = np.exp(-self.alpha * 1.0)  # exp(-alpha * (1-2)^2) = exp(-alpha) for (x-2)
        Em = np.exp(
            -self.alpha * 9.0
        )  # exp(-alpha * (1+2)^2) = exp(-9*alpha) for (x+2)
        e4 = np.exp(-self.alpha * 4.0)  # exp(-alpha * (±2)^2) at x = 0

        # Linear system rows correspond to: w(0)=1, w(1)=0, w'(1)=0
        M = np.array(
            [
                [1.0, 2.0 * e4, 1.0],
                [E1, (Ep + Em), 1.0],
                [-2 * self.alpha * E1, 2 * self.alpha * (Ep - 3 * Em), 0.0],
            ],
            dtype=np.float64,
        )
        # solve the system using linear algebra.
        rhs = np.array([1.0, 0.0, 0.0], dtype=np.float64)

        Acoef, Bcoef, Ccoef = np.linalg.solve(M, rhs)

        # Window values calculated
        window = (
            Acoef * np.exp(-self.alpha * x**2)
            + Bcoef
            * (
                np.exp(-self.alpha * (x - 2.0) ** 2)
                + np.exp(-self.alpha * (x + 2.0) ** 2)
            )
            + Ccoef
        )

        # ---- 4) base kernel = windowed sinc ----
        h = core * window

        # ---- 5) Eqs. 7–8 + Table 1: optional passband-flattening corrections ----
        if (
            self.use_corrections
            and self._has_kappa_table(self.n)
            and (m >= self.n // 2 + 2)
        ):
            # nu = 1 for n/2 odd (n=6,10); nu = 2 for n=8 (Eq. 7)
            nu = 1 if ((self.n // 2) % 2 == 1) else 2
            kappas = self._kappa_coeffs(
                self.n, m
            )  # kappa = a + b / (c - m)^3  (Eq. 8; Table 1)
            corr = 0.0
            # add the correction term according to Eq. 7
            for j, kappa in enumerate(kappas):
                corr += kappa * window * x * np.sin((2 * j + nu) * np.pi * x)
            h = h + corr

        # ---- 6) Eq. 6: Enforce symmetry and Direct Current = 1 ----
        h = 0.5 * (h + h[::-1])  # make it symmetric
        s = h.sum()
        if not np.isfinite(s) or abs(s) < 1e-15:
            raise FloatingPointError(
                "Kernel normalization failed; try different parameters."
            )
        h = h / s  # Normalize so sum = 1
        return h

    # ====== Table 1 (p. 188): kappa fit coefficients for Eq. 8 ======
    @staticmethod
    def _has_kappa_table(n: int) -> bool:
        # The paper provides corrections for n = 6, 8, 10
        return n in (6, 8, 10)

    @staticmethod
    def _kappa_coeffs(n: int, m: int) -> np.ndarray:
        """
        Return [kappa_0] for n=6; [kappa_0, kappa_1] for n=8 or 10.
        Formula: kappa_j^(n)(m) = a_j^(n) + b_j^(n) / (c_j^(n) - m)^3
        (Eq. 8, p. 188; coefficients from Table 1).
        """
        if n == 6:
            ABC = [(0.00172, 0.02437, 1.64375)]
        elif n == 8:
            ABC = [(0.00440, 0.08821, 2.35938), (0.00615, 0.02472, 3.63594)]
        elif n == 10:
            ABC = [(0.00118, 0.04219, 2.74688), (0.00367, 0.12780, 2.77031)]
        else:
            return np.zeros(0, dtype=np.float64)
        return np.asarray([a + b / ((c - m) ** 3) for a, b, c in ABC], dtype=np.float64)