Código fuente para chemotools.baseline._rubberband_correction

"""
The :mod:`chemotools.baseline._rubberband_correction` module implements
a rubberband baseline correction transformer.
"""

# Author: Lasse Skjoldborg Krog
# License: MIT

import numpy as np
from sklearn.base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin
from sklearn.utils.validation import check_is_fitted, validate_data

from chemotools._doc_mixin import DocLinkMixin



[documentos]
class RubberbandCorrection(
    DocLinkMixin, TransformerMixin, OneToOneFeatureMixin, BaseEstimator
):
    """
    A transformer that removes a baseline using the rubberband method.

    The rubberband baseline is the lower convex hull of the spectrum — the
    set of straight-line segments a rubber band would form if stretched
    along the underside of the spectrum. The baseline is subtracted from
    each spectrum, leaving the peaks resting on a flat zero background.

    The lower convex hull is computed with Andrew's monotone chain
    algorithm [1]_ [2]_ in feature-index space, so the feature axis
    (e.g. wavenumbers) is assumed to be sorted; even spacing is not required.
    The method has no parameters.

    Attributes
    ----------
    n_features_in_ : int
        The number of features in the input data.

    References
    ----------
    .. [1] A. M. Andrew, "Another efficient algorithm for convex hulls in two
       dimensions", Information Processing Letters, 9(5), 216-219, 1979.
    .. [2] Monotone chain convex hull, reference implementation:
       https://en.wikibooks.org/wiki/Algorithm_Implementation/Geometry/Convex_hull/Monotone_chain

    Examples
    --------
    >>> from chemotools.baseline import RubberbandCorrection
    >>> from chemotools.datasets import load_fermentation_train
    >>> # Load sample data
    >>> X, _ = load_fermentation_train()
    >>> # Instantiate the transformer
    >>> transformer = RubberbandCorrection()
    RubberbandCorrection()
    >>> transformer.fit(X)
    >>> # Generate baseline-corrected data
    >>> X_corrected = transformer.transform(X)
    """

    _parameter_constraints: dict = {}


[documentos]
    def fit(self, X: np.ndarray, y=None) -> "RubberbandCorrection":
        """
        Fit the transformer to the input data.

        Parameters
        ----------
        X : np.ndarray of shape (n_samples, n_features)
            The input data to fit the transformer to.

        y : None
            Ignored to align with API.

        Returns
        -------
        self : RubberbandCorrection
            The fitted transformer.
        """
        # Validate the input parameters
        self._validate_params()

        # Check that X is a 2D array and has only finite values
        X = validate_data(
            self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64
        )

        return self



[documentos]
    def transform(self, X: np.ndarray, y=None) -> np.ndarray:
        """
        Transform the input data by subtracting the rubberband baseline.

        Parameters
        ----------
        X : np.ndarray of shape (n_samples, n_features)
            The input data to transform.

        y : None
            Ignored to align with API.

        Returns
        -------
        X_transformed : np.ndarray of shape (n_samples, n_features)
            The baseline-corrected data.
        """
        # Check that the estimator is fitted
        check_is_fitted(self, "n_features_in_")

        # Check that X is a 2D array and has only finite values
        X_ = validate_data(
            self,
            X,
            y="no_validation",
            ensure_2d=True,
            copy=True,
            reset=False,
            dtype=np.float64,
        )

        # Subtract the rubberband baseline from each spectrum
        for i, x in enumerate(X_):
            X_[i] = x - self._rubberband_baseline(x)

        return X_.reshape(-1, 1) if X_.ndim == 1 else X_


    @staticmethod
    def _rubberband_baseline(x: np.ndarray) -> np.ndarray:
        """Return the rubberband (lower convex hull) baseline of one spectrum.

        The lower convex hull is found with Andrew's monotone chain
        algorithm and linearly interpolated back onto every feature index.

        See the ``References`` section of the class docstring for the
        algorithm source.
        """
        n = x.size

        # Andrew's monotone chain — lower hull only. Reference implementation:
        # https://en.wikibooks.org/wiki/Algorithm_Implementation/Geometry/Convex_hull/Monotone_chain
        lower: list[tuple[int, float]] = []
        for i in range(n):
            point = (i, float(x[i]))
            while len(lower) >= 2:
                o, a = lower[-2], lower[-1]
                cross = (a[0] - o[0]) * (point[1] - o[1]) - (a[1] - o[1]) * (
                    point[0] - o[0]
                )
                if cross <= 0:
                    lower.pop()
                else:
                    break
            lower.append(point)

        hull_idx = np.array([p[0] for p in lower])
        hull_val = np.array([p[1] for p in lower])
        return np.interp(np.arange(n), hull_idx, hull_val)