Source code for chemotools.augmentation._add_noise

"""
The :mod:`chemotools.augmentation._add_noise` module implements the AddNoise
transformer to add random noise from various probability distributions to input data.
"""

# Authors: Pau Cabaneros
# License: MIT

from typing import Literal, Optional

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
from sklearn.utils import check_random_state
from sklearn.utils.validation import check_is_fitted, validate_data
from sklearn.utils._param_validation import Interval, Real, StrOptions


[docs] class AddNoise(TransformerMixin, OneToOneFeatureMixin, BaseEstimator): """Add noise to input data from various probability distributions. This transformer adds random noise from specified probability distributions to the input data. Supported distributions include Gaussian, Poisson, and exponential. Parameters ---------- distribution : {'gaussian', 'poisson', 'exponential'}, default='gaussian' The probability distribution to sample noise from. scale : float, default=0.0 Scale parameter for the noise distribution: - For gaussian: standard deviation - For poisson: multiplication factor for sampled values - For exponential: scale parameter (1/λ) Must be non-negative. random_state : int, optional Random seed for reproducibility. Attributes ---------- n_features_in_ : int Number of features in the training data. Examples -------- >>> from chemotools.augmentation import AddNoise >>> from chemotools.datasets import load_fermentation_train >>> # Load sample data >>> X, _ = load_fermentation_train() >>> # Instantiate the transformer >>> transformer = AddNoise(distribution="gaussian", scale=0.1) AddNoise() >>> transformer.fit(X) >>> # Generate noisy data >>> X_noisy = transformer.transform(X) """ _parameter_constraints: dict = { "distribution": StrOptions({"gaussian", "poisson", "exponential"}), "scale": [Interval(Real, 0, None, closed="both")], "random_state": [None, int, np.random.RandomState], } def __init__( self, distribution: Literal["gaussian", "poisson", "exponential"] = "gaussian", scale: float = 0.0, random_state: Optional[int] = None, ): self.distribution = distribution self.scale = scale self.random_state = random_state
[docs] def fit(self, X: np.ndarray, y=None) -> "AddNoise": """Fit the transformer to the input data. Parameters ---------- X : np.ndarray of shape (n_samples, n_features) Training data. y : None Ignored. Present for API consistency. Returns ------- self : AddNoise Fitted transformer. Raises ------ ValueError If X is not a 2D array or contains non-finite values. """ # Check that X is a 2D array and has only finite values X = validate_data( self, X, y="no_validation", ensure_2d=True, reset=True, dtype=np.float64 ) # Instantiate the random number generator self._rng = check_random_state(self.random_state) return self
[docs] def transform(self, X: np.ndarray, y=None) -> np.ndarray: """Transform the input data by adding random noise. Parameters ---------- X : np.ndarray of shape (n_samples, n_features) Input data to transform. y : None Ignored. Present for API consistency. Returns ------- X_transformed : np.ndarray of shape (n_samples, n_features) Transformed data with added noise. Raises ------ ValueError If X has different number of features than the training data, or if an invalid noise distribution is specified. """ # Check that the estimator is fitted check_is_fitted(self, "n_features_in_") # Check that X is a 2D array and has only finite values X_ = validate_data( self, X, y="no_validation", ensure_2d=True, copy=True, reset=False, dtype=np.float64, ) # Select the noise function based on the selected distribution noise_func = { "gaussian": self._add_gaussian_noise, "poisson": self._add_poisson_noise, "exponential": self._add_exponential_noise, }.get(self.distribution) if noise_func is None: raise ValueError( f"Invalid noise distribution: {self.distribution}. " "Expected one of: gaussian, poisson, exponential" ) return noise_func(X_)
def _add_gaussian_noise(self, X: np.ndarray) -> np.ndarray: """Add Gaussian noise to the input array.""" return X + self._rng.normal(0, self.scale, size=X.shape) def _add_poisson_noise(self, X: np.ndarray) -> np.ndarray: """Add Poisson noise to the input array.""" return X + self._rng.poisson(X, size=X.shape) * self.scale def _add_exponential_noise(self, X: np.ndarray) -> np.ndarray: """Add exponential noise to the input array.""" return X + self._rng.exponential(self.scale, size=X.shape)