Source code for chemotools.feature_selection._range_cut

"""
The :mod:`chemotools.feature_selection._range_cut` module implements the RangeCut
to select specific features from spectral data based on start and end indices or
x-axis values.
"""

from typing import Optional

import numpy as np
from sklearn.base import BaseEstimator
from sklearn.feature_selection._base import SelectorMixin
from sklearn.utils._param_validation import Integral, Interval
from sklearn.utils.validation import check_is_fitted, validate_data

from chemotools._axis_mixin import XAxisMixin
from chemotools._deprecation import (
    DEPRECATED_PARAMETER,
    deprecated_parameter_constraint,
)


[docs] class RangeCut(XAxisMixin, SelectorMixin, BaseEstimator): """Select a contiguous spectral region by index or by x-axis value. The range can be specified in two ways: * By integer indices (``start`` and ``end``) * By x-axis values (``start`` and ``end`` interpreted against the provided ``x_axis`` array) If ``x_axis`` is supplied, the closest indices to the given start / end x-axis values are located. Otherwise numeric ``start`` / ``end`` are treated directly as indices. X-axis values must be in ascending order. Parameters ---------- start : int, default=0 Index or x-axis value of the start of the range. end : int, default=-1 Index or x-axis value of the end of the range. x_axis : array-like, optional X-axis values corresponding to columns. Must be ascending if provided. wavenumbers : array-like, optional Deprecated alias for ``x_axis``. Use ``x_axis`` instead. Attributes ---------- start_index_ : int Resolved start index. end_index_ : int Resolved end index. x_axis_ : array-like or None Selected x-axis values (if provided), else ``None``. wavenumbers_ : array-like or None Deprecated alias for ``x_axis_``. Examples -------- >>> from chemotools.feature_selection import RangeCut >>> from chemotools.datasets import load_fermentation_train >>> X, _ = load_fermentation_train() >>> wavenumbers = X.columns.values >>> rc = RangeCut(start=1000, end=2000, x_axis=wavenumbers) >>> rc.fit(X) RangeCut(start=1000, end=2000, x_axis=wavenumbers) >>> X_cut = rc.transform(X) >>> X_cut.shape (21, 616) """ _parameter_constraints: dict = { "start": Interval(Integral, 0, None, closed="left"), "end": [Integral], "x_axis": ["array-like", None], "wavenumbers": ["array-like", None, deprecated_parameter_constraint()], } def __init__( self, start: int = 0, end: int = -1, x_axis: Optional[np.ndarray] = None, wavenumbers=DEPRECATED_PARAMETER, ): self.start = start self.end = end self.x_axis = x_axis self.wavenumbers = wavenumbers
[docs] def fit(self, X: np.ndarray, y=None) -> "RangeCut": """ Fit the transformer to the input data. Parameters ---------- X : array-like of shape (n_samples, n_features) The input data to fit the transformer to. y : None Ignored to align with API. Returns ------- self : RangeCut The fitted transformer. """ # Check that X is a 2D array and has only finite values X = validate_data( self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64 ) axis_values = self._resolve_x_axis(self.x_axis, self.wavenumbers) # Set the start and end indices if axis_values is None: self.start_index_ = self.start self.end_index_ = self.end self.x_axis_ = None self.wavenumbers_ = None else: axis = np.asarray(axis_values) self.start_index_ = self._find_index(self.start, axis) self.end_index_ = self._find_index(self.end, axis) self.x_axis_ = axis_values[self.start_index_ : self.end_index_] self.wavenumbers_ = self.x_axis_ return self
def _get_support_mask(self): """ Get the boolean mask indicating which features are selected. Returns ------- mask : np.ndarray of shape (n_features,) The boolean mask indicating which features are selected. """ # Check that the estimator is fitted check_is_fitted(self, ["start_index_", "end_index_"]) # Create the mask mask = np.zeros(self.n_features_in_, dtype=bool) # type: ignore[unresolved-attribute] # sklearn fitted attribute mask[self.start_index_ : self.end_index_] = True return mask