Source code for chemotools.inspector._pls_regression_inspector

"""PLS Regression Inspector for model diagnostics and visualization."""

from __future__ import annotations
from dataclasses import asdict
from typing import (
    Dict,
    Optional,
    Sequence,
    Tuple,
    Union,
    Any,
    TYPE_CHECKING,
    Literal,
    List,
)
import numpy as np
from sklearn.cross_decomposition._pls import _PLS
from sklearn.pipeline import Pipeline

if TYPE_CHECKING:
    import matplotlib.figure

from chemotools.outliers import QResiduals, HotellingT2, Leverage, StudentizedResiduals

from .core.base import _BaseInspector, InspectorPlotConfig
from .core.latent import LatentVariableMixin
from .core.regression import RegressionMixin
from .core.spectra import SpectraMixin
from .core.summaries import PLSRegressionSummary
from .core.utils import (
    get_xlabel_for_features,
    get_default_scores_components,
    get_default_loadings_components,
    select_components,
)
from .helpers import _latent as _latent_plots
from .helpers._regression import (
    create_predicted_vs_actual_plot,
    create_y_residual_plot,
    create_qq_plot,
    create_residual_distribution_plot,
    create_regression_distances_plot,
)

SummaryStep = Dict[str, Union[int, str]]
SummaryValue = Union[
    str, int, float, Dict[str, Any], List[SummaryStep], np.ndarray, None
]


[docs] class PLSRegressionInspector( SpectraMixin, RegressionMixin, LatentVariableMixin, _BaseInspector ): """Inspector for PLS Regression model diagnostics and visualization. This class provides a unified interface for inspecting PLS regression models by creating multiple independent diagnostic plots. Instead of complex dashboards with many subplots, each method produces several separate figure windows that are easier to customize, save, and interact with individually. The inspector provides convenience methods that create multiple independent plots: - ``inspect()``: Creates all diagnostic plots (scores, loadings, explained variance, regression diagnostics, and distance plots) - ``inspect_spectra()``: Creates raw and preprocessed spectra plots (if preprocessing exists) Parameters ---------- model : _PLS or Pipeline Fitted PLS model or pipeline ending with PLS X_train : array-like of shape (n_samples, n_features) Training data y_train : array-like of shape (n_samples,) Training targets (required for supervised PLS) X_test : array-like of shape (n_samples, n_features), optional Test data y_test : array-like of shape (n_samples,), optional Test targets X_val : array-like of shape (n_samples, n_features), optional Validation data y_val : array-like of shape (n_samples,), optional Validation targets x_axis : array-like of shape (n_features,), optional Feature names (e.g., wavenumbers for spectroscopy) If None, uses feature indices confidence : float, default=0.95 Confidence level for outlier detection limits (Hotelling's T², Q residuals, leverage, and studentized residuals). Must be between 0 and 1. Attributes ---------- model : _PLS or Pipeline The original model passed to the inspector estimator : _PLS The PLS estimator transformer : Pipeline or None Preprocessing pipeline before PLS (if model was a Pipeline) n_components : int Number of latent variables n_features : int Number of features in original data n_samples : dict Number of samples in each dataset x_axis : ndarray Feature names/indices confidence : float Confidence level for outlier detection RMSE_train : float Root mean squared error on training data RMSE_test : float or None Root mean squared error on test data (if available) RMSE_val : float or None Root mean squared error on validation data (if available) R2_train : float R² score on training data R2_test : float or None R² score on test data (if available) R2_val : float or None R² score on validation data (if available) hotelling_t2_limit : float Critical value for Hotelling's T² statistic (computed on training data) q_residuals_limit : float Critical value for Q residuals statistic (computed on training data) Examples -------- >>> from sklearn.cross_decomposition import PLSRegression >>> from sklearn.pipeline import make_pipeline >>> from sklearn.preprocessing import StandardScaler >>> from chemotools.datasets import load_fermentation_train >>> from chemotools.inspector import PLSRegressionInspector >>> >>> # Load data >>> X, y = load_fermentation_train() >>> >>> # Create and fit pipeline >>> pipeline = make_pipeline( ... StandardScaler(), ... PLSRegression(n_components=5) ... ) >>> pipeline.fit(X, y) >>> >>> # Create inspector >>> inspector = PLSRegressionInspector(pipeline, X, y, x_axis=X.columns) >>> >>> # Print summary >>> inspector.summary() >>> >>> # Create all diagnostic plots >>> inspector.inspect() # Creates scores, loadings, variance, regression plots >>> >>> # Compare preprocessing >>> inspector.inspect_spectra() >>> >>> # Access underlying data for custom analysis >>> x_scores = inspector.get_x_scores('train') >>> y_scores = inspector.get_y_scores('train') >>> x_loadings = inspector.get_x_loadings([0, 1, 2]) >>> coeffs = inspector.get_regression_coefficients() Notes ----- Memory usage scales linearly with dataset size. For very large datasets (>100,000 samples), consider subsampling for initial exploration. """ component_label = "LV" def __init__( self, model: Union[_PLS, Pipeline], X_train: np.ndarray, y_train: np.ndarray, X_test: Optional[np.ndarray] = None, y_test: Optional[np.ndarray] = None, X_val: Optional[np.ndarray] = None, y_val: Optional[np.ndarray] = None, x_axis: Optional[Sequence] = None, confidence: float = 0.95, ): super().__init__( model=model, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, X_val=X_val, y_val=y_val, supervised=True, feature_names=x_axis, confidence=confidence, ) self._x_scores_cache: Dict[str, np.ndarray] = {} self._y_scores_cache: Dict[str, np.ndarray] = {} self._leverage_detector: Optional[Leverage] = None self._studentized_detector: Optional[StudentizedResiduals] = None # ================================================================================== # Properties (PLS-specific) # ================================================================================== @property def leverage_detector(self) -> Leverage: """Return a fitted leverage detector cached for reuse.""" if self._leverage_detector is None: detector = Leverage(self.model, confidence=self.confidence) X_train, y_train = self._get_raw_data("train") detector.fit(X_train, y_train) self._leverage_detector = detector return self._leverage_detector @property def studentized_detector(self) -> StudentizedResiduals: """Return a fitted studentized residuals detector cached for reuse.""" if self._studentized_detector is None: detector = StudentizedResiduals(self.model, confidence=self.confidence) X_train, y_train = self._get_raw_data("train") detector.fit(X_train, y_train) self._studentized_detector = detector return self._studentized_detector # ================================================================================== # Private Methods (PLS-specific) # ================================================================================== def _get_regression_stats( self, dataset: str, target_index: int, leverage_detector: Leverage, ) -> Dict[str, Any]: """Calculate regression statistics for a single dataset. This method computes leverage and studentized residuals which require the latent space representation from PLS, making it PLS-specific. Parameters ---------- dataset : str Dataset name ('train', 'test', or 'val') target_index : int Index of the target variable for multi-target PLS leverage_detector : Leverage Fitted leverage detector Returns ------- dict Dictionary containing X, y, y_true, y_pred, studentized residuals, and leverages for the specified dataset """ from chemotools.outliers._studentized_residuals import ( calculate_studentized_residuals, ) X, y_true = self._get_raw_data(dataset) assert y_true is not None, f"y data is required for dataset {dataset}" y_pred = self._get_predictions(dataset) # Slice Y data for the specific target if y_true.ndim > 1: y_true_sliced = y_true[:, target_index] else: y_true_sliced = y_true if y_pred.ndim > 1: y_pred_sliced = y_pred[:, target_index] else: y_pred_sliced = y_pred # Calculate studentized residuals for the specific target y_res = y_true_sliced - y_pred_sliced if y_res.ndim == 1: y_res = y_res.reshape(-1, 1) studentized = calculate_studentized_residuals( self.estimator, self._get_preprocessed_data(dataset), y_res ) leverages = leverage_detector.predict_residuals(X) return { "X": X, "y_true": y_true_sliced, "y_pred": y_pred_sliced, "studentized": studentized, "leverages": leverages, } # ================================================================================== # Public Methods # ================================================================================== # ------------------------------------------------------------------ # LatentVariableMixin hooks # ------------------------------------------------------------------
[docs] def get_latent_scores(self, dataset: str) -> np.ndarray: """Hook for LatentVariableMixin - returns X-scores.""" return self.get_x_scores(dataset)
[docs] def get_latent_explained_variance(self) -> Optional[np.ndarray]: """Hook for LatentVariableMixin - returns explained X variance ratio.""" return self.get_explained_x_variance_ratio()
[docs] def get_latent_loadings(self) -> np.ndarray: """Hook for LatentVariableMixin - returns X-loadings.""" return self.get_x_loadings()
# ------------------------------------------------------------------ # Scores methods # ------------------------------------------------------------------
[docs] def get_x_scores(self, dataset: str = "train") -> np.ndarray: """Get PLS X-scores for specified dataset. Parameters ---------- dataset : {'train', 'test', 'val'}, default='train' Which dataset to get scores for Returns ------- x_scores : ndarray of shape (n_samples, n_components) PLS X-scores (latent variables from X) """ if dataset not in self._x_scores_cache: X_preprocessed = self._get_preprocessed_data(dataset) x_scores = self.estimator.transform(X_preprocessed) self._x_scores_cache[dataset] = x_scores return self._x_scores_cache[dataset]
[docs] def get_y_scores(self, dataset: str = "train") -> np.ndarray: """Get PLS Y-scores for specified dataset. Parameters ---------- dataset : {'train', 'test', 'val'}, default='train' Which dataset to get scores for Returns ------- y_scores : ndarray of shape (n_samples, n_components) PLS Y-scores (latent variables from Y) """ if dataset not in self._y_scores_cache: X_preprocessed = self._get_preprocessed_data(dataset) _, y = self._get_raw_data(dataset) # Use transform with Y to get Y-scores _, y_scores = self.estimator.transform(X_preprocessed, y) self._y_scores_cache[dataset] = y_scores return self._y_scores_cache[dataset]
# ------------------------------------------------------------------ # Loadings and weights methods # ------------------------------------------------------------------
[docs] def get_x_loadings( self, components: Optional[Union[int, Sequence[int]]] = None ) -> np.ndarray: """Get PLS X-loadings. Parameters ---------- components : int, list of int, or None, default=None Which components to return. If None, returns all components. Returns ------- x_loadings : ndarray of shape (n_features, n_components_selected) PLS X-loadings """ return select_components(self.estimator.x_loadings_, components)
[docs] def get_x_weights( self, components: Optional[Union[int, Sequence[int]]] = None ) -> np.ndarray: """Get PLS X-weights. Parameters ---------- components : int, list of int, or None, default=None Which components to return. If None, returns all components. Returns ------- x_weights : ndarray of shape (n_features, n_components_selected) PLS X-weights """ return select_components(self.estimator.x_weights_, components)
[docs] def get_x_rotations( self, components: Optional[Union[int, Sequence[int]]] = None ) -> np.ndarray: """Get PLS X-rotations. Parameters ---------- components : int, list of int, or None, default=None Which components to return. If None, returns all components. Returns ------- x_rotations : ndarray of shape (n_features, n_components_selected) PLS X-rotations """ return select_components(self.estimator.x_rotations_, components)
# ------------------------------------------------------------------ # Regression coefficients # ------------------------------------------------------------------
[docs] def get_regression_coefficients(self) -> np.ndarray: """Get PLS regression coefficients (regression vector). Returns ------- coef : ndarray of shape (n_features,) or (n_features, n_targets) PLS regression coefficients """ coef = self.estimator.coef_ # sklearn PLS stores coef_ as (n_targets, n_features) # Transpose to get (n_features, n_targets) for consistency coef = coef.T # For univariate targets, flatten to 1D if coef.shape[1] == 1: coef = coef.ravel() return coef
# ------------------------------------------------------------------ # Variance methods # ------------------------------------------------------------------
[docs] def get_explained_x_variance_ratio(self) -> Optional[np.ndarray]: """Get explained variance ratio in X-space for all components. Returns ------- explained_x_variance_ratio : ndarray of shape (n_components,) or None Explained variance ratio in X-space, or None if not available """ if hasattr(self.estimator, "explained_x_variance_ratio_"): return self.estimator.explained_x_variance_ratio_ return None
[docs] def get_explained_y_variance_ratio(self) -> Optional[np.ndarray]: """Get explained variance ratio in Y-space for all components. Returns ------- explained_y_variance_ratio : ndarray of shape (n_components,) or None Explained variance ratio in Y-space, or None if not available """ if hasattr(self.estimator, "explained_y_variance_ratio_"): return self.estimator.explained_y_variance_ratio_ return None
# ------------------------------------------------------------------ # Summary method # ------------------------------------------------------------------
[docs] def summary(self) -> PLSRegressionSummary: """Get a summary of the PLS regression model. Returns ------- summary : PLSRegressionSummary Object containing model information """ x_var = self.get_explained_x_variance_ratio() y_var = self.get_explained_y_variance_ratio() base_summary = self._base_summary() latent_summary = self.latent_summary() regression_summary = self.regression_summary() return PLSRegressionSummary( # Base fields **base_summary.to_dict(), # Latent fields **asdict(latent_summary), # Regression fields train=regression_summary.train, test=regression_summary.test, val=regression_summary.val, # PLS specific fields # TODO: remove when PR approved (#32722 in sklearn) explained_x_variance_ratio=x_var.tolist() if x_var is not None else None, total_x_variance=float(np.sum(x_var) * 100) if x_var is not None else None, explained_y_variance_ratio=y_var.tolist() if y_var is not None else None, total_y_variance=float(np.sum(y_var) * 100) if y_var is not None else None, )
# ------------------------------------------------------------------ # Main inspection method # ------------------------------------------------------------------
[docs] def inspect( self, dataset: Union[str, Sequence[str]] = "train", components_scores: Optional[ Union[int, Tuple[int, int], Sequence[Union[int, Tuple[int, int]]]] ] = None, loadings_components: Optional[Union[int, Sequence[int]]] = None, variance_threshold: float = 0.95, color_by: Optional[ Union[str, Dict[str, np.ndarray], Sequence, np.ndarray] ] = None, annotate_by: Optional[ Union[str, Dict[str, np.ndarray], Sequence, np.ndarray] ] = None, plot_config: Optional[InspectorPlotConfig] = None, color_mode: Literal["continuous", "categorical"] = "continuous", target_index: int = 0, **kwargs, ) -> Dict[str, matplotlib.figure.Figure]: """Create all diagnostic plots for the PLS model. Parameters ---------- dataset : str or sequence of str, default='train' Dataset(s) to visualize. Can be 'train', 'test', 'val', or a list. components_scores : int, tuple, or sequence, optional Components to plot for scores. - If int: plots first N components against sample index - If tuple (i, j): plots component i vs j - If sequence: plots multiple specifications - If None: defaults to (0, 1) and (1, 2) if enough components exist loadings_components : int or sequence of int, optional Components to plot for loadings. - If int: plots first N components - If sequence: plots specified components - If None: defaults to first 3 components variance_threshold : float, default=0.95 Cumulative variance threshold for variance plots color_by : str or dict, optional Coloring specification. - "y": Color by target values (default for single dataset) - "sample_index": Color by sample index - dict: Dictionary mapping dataset names to color arrays - None: Color by dataset (for multi-dataset plots) or 'y' (for single dataset) annotate_by : str or dict, optional Annotations for plot points. - "sample_index": Annotate with sample indices - dict: Dictionary mapping dataset names to annotation arrays plot_config : InspectorPlotConfig, optional Configuration for plot sizes and styles color_mode : str, optional Coloring mode ("continuous" or "categorical"). target_index : int, default=0 Index of the target variable to inspect (for multi-output PLS). **kwargs Additional arguments passed to InspectorPlotConfig Returns ------- dict Dictionary of matplotlib Figures with keys: - 'scores_1', 'scores_2', ...: Scores plots - 'x_vs_y_scores_1', 'x_vs_y_scores_2', ...: X-scores vs Y-scores plots (training set only) - 'loadings_x', 'loadings_weights', 'loadings_rotations': X-related loadings plots - 'regression_coefficients': Regression coefficient traces (one per target when multi-output) - 'variance_x', 'variance_y': Explained variance plots (when available) - 'distances_hotelling_q', 'distances_q_y_residuals', 'distances_leverage_studentized': Distance diagnostics - 'predicted_vs_actual', 'residuals', 'qq_plot', 'residual_distribution': Regression diagnostics - 'raw_spectra', 'preprocessed_spectra': Spectra plots (when preprocessing exists) """ # ------------------------------------------------------------------ # Input Validation # ------------------------------------------------------------------ # Validate target_index _, y_train_full = self._get_raw_data("train") # Validated in __init__, but needed for type narrowing :/ assert y_train_full is not None, "y_train is required for PLS inspection" if y_train_full.ndim > 1: n_targets = y_train_full.shape[1] if target_index < 0 or target_index >= n_targets: raise ValueError( f"target_index {target_index} is out of bounds for " f"y_train with {n_targets} targets" ) elif target_index != 0: raise ValueError( f"target_index {target_index} is invalid for single-target model" ) # Validate color_mode if color_mode not in ["continuous", "categorical"]: raise ValueError( f"color_mode must be either 'continuous' or 'categorical', got '{color_mode}'" ) # Close previous figures to prevent memory leaks self._cleanup_previous_figures() # ------------------------------------------------------------------ # Configs # ------------------------------------------------------------------ # Generate "smart" defaults based on number of components if components_scores is None: components_scores = get_default_scores_components(self.n_components) if loadings_components is None: loadings_components = get_default_loadings_components(self.n_components) # Handle configuration config = plot_config or InspectorPlotConfig() # Allow kwargs to override config for convenience for key, value in kwargs.items(): if hasattr(config, key): setattr(config, key, value) figures = {} # Prepare datasets and visual properties # Normalizes inputs (e.g. single string -> list) and resolves # color/annotation dictionaries for each dataset datasets, color_by, annotate_by = self._prepare_inspection_config( dataset, color_by, annotate_by ) # If multiple datasets are being inspected, we append suffixes to keys # to distinguish them (e.g. 'scores_train', 'scores_test') use_suffix = len(datasets) > 1 # ------------------------------------------------------------------ # Plotting Setup # ------------------------------------------------------------------ # For plots that separate datasets (subplots) or show only one dataset, # we prefer coloring by target 'y' instead of dataset color (which is uniform) separated_color_by = color_by if separated_color_by is None: separated_color_by = "y" xlabel = get_xlabel_for_features(self.feature_names is not None) preprocessed_x_axis = self._get_preprocessed_x_axis() # ------------------------------------------------------------------ # Variance plots (X and Y space) # ------------------------------------------------------------------ x_var = self.get_explained_x_variance_ratio() if x_var is not None: variance_x_fig = self.create_latent_variance_figure( variance_threshold=variance_threshold, figsize=config.variance_figsize, ) if variance_x_fig is not None: variance_x_fig.axes[0].set_title( "Explained Variance in X-space", fontsize=12, fontweight="bold", ) figures["variance_x"] = variance_x_fig # Y-space variance y_var = self.get_explained_y_variance_ratio() if y_var is not None: variance_y_fig = _latent_plots.create_variance_plot( explained_variance_ratio=y_var, variance_threshold=variance_threshold, figsize=config.variance_figsize, ) variance_y_fig.axes[0].set_title( "Explained Variance in Y-space", fontsize=12, fontweight="bold" ) figures["variance_y"] = variance_y_fig # ------------------------------------------------------------------ # Loadings plots (X-loadings, X-weights, X-rotations, coefficients) # ------------------------------------------------------------------ loadings_x_fig = self.create_latent_loadings_figure( loadings_components=loadings_components, xlabel=xlabel, figsize=config.loadings_figsize, ) loadings_x_fig.axes[0].set_title("X-Loadings", fontsize=12, fontweight="bold") figures["loadings_x"] = loadings_x_fig figures["loadings_weights"] = _latent_plots.create_loadings_plot( loadings=self.get_x_weights(), feature_names=preprocessed_x_axis, loadings_components=loadings_components, xlabel=xlabel, figsize=config.loadings_figsize, component_label=self.component_label, ) figures["loadings_weights"].axes[0].set_title( "X-Weights", fontsize=12, fontweight="bold" ) figures["loadings_rotations"] = _latent_plots.create_loadings_plot( loadings=self.get_x_rotations(), feature_names=preprocessed_x_axis, loadings_components=loadings_components, xlabel=xlabel, figsize=config.loadings_figsize, component_label=self.component_label, ) figures["loadings_rotations"].axes[0].set_title( "X-Rotations", fontsize=12, fontweight="bold" ) coef = self.get_regression_coefficients() manual_legend = None if coef.ndim == 1: coef_matrix = coef.reshape(-1, 1) coef_components = [0] component_label = "Coeff" manual_legend = ["Coefficient"] else: # Plot all targets coef_matrix = coef coef_components = list(range(coef.shape[1])) component_label = "Target " coef_fig = _latent_plots.create_loadings_plot( loadings=coef_matrix, feature_names=preprocessed_x_axis, loadings_components=coef_components, xlabel=xlabel, figsize=config.loadings_figsize, component_label=component_label, ) coef_ax = coef_fig.axes[0] coef_ax.set_title("Regression Coefficients", fontsize=12, fontweight="bold") if manual_legend: handles, _ = coef_ax.get_legend_handles_labels() if handles: coef_ax.legend(handles, manual_legend, loc="best") figures["regression_coefficients"] = coef_fig # ------------------------------------------------------------------ # Scores plots (X-scores and X vs Y scores) # ------------------------------------------------------------------ scores_figures = self.create_latent_scores_figures( dataset=dataset, components=components_scores, color_by=color_by, annotate_by=annotate_by, figsize=config.scores_figsize, color_mode=color_mode, ) figures.update(scores_figures) # X-scores vs Y-scores plots (training set only) x_scores = self.get_x_scores("train") y_scores = self.get_y_scores("train") _, y_train = self._get_raw_data("train") # Validated in __init__, but needed for type narrowing :/ assert y_train is not None, "y_train is required for PLS inspection" # Slice y_train if needed for coloring if y_train.ndim > 1: y_train = y_train[:, target_index] x_y_scores_figures = _latent_plots.create_x_vs_y_scores_plots( x_scores=x_scores, y_scores=y_scores, y_train=y_train, components=components_scores, color_by=separated_color_by, annotate_by=annotate_by, figsize=config.scores_figsize, component_label=self.component_label, color_mode=color_mode, ) figures.update(x_y_scores_figures) # ------------------------------------------------------------------ # Latent Variable Distances (Hotelling T² vs Q residuals) # ------------------------------------------------------------------ # Fit detectors once on training data for consistent limits and efficiency X_train, y_train_full = self._get_raw_data("train") # Validated in __init__, but needed for type narrowing :/ assert y_train_full is not None, "y_train is required for PLS inspection" # Fit the Hotelling T² hotelling_detector = HotellingT2(self.model, confidence=self.confidence) hotelling_detector.fit(X_train) # Fit the Q residuals q_detector = QResiduals(self.model, confidence=self.confidence) q_detector.fit(X_train) # Q residuals vs Hotelling T² plot figures["distances_hotelling_q"] = self.create_latent_distance_figure( dataset=dataset, color_by=color_by, figsize=config.distances_figsize, annotate_by=annotate_by, color_mode=color_mode, hotelling_detector=hotelling_detector, q_residuals_detector=q_detector, ) # ------------------------------------------------------------------ # Regression Diagnostics Setup # ------------------------------------------------------------------ # Prepare leverage detector (needed for stats) leverage_detector = Leverage(self.model, confidence=self.confidence) leverage_detector.fit(X_train) # Calculate stats (needed for all regression plots) # We always calculate train stats to determine limits (e.g. studentized) train_stats = self._get_regression_stats( "train", target_index, leverage_detector ) # Prepare data for regression diagnostics datasets_data: Dict[str, Dict[str, Any]] = {} for ds in datasets: if ds == "train": datasets_data[ds] = train_stats else: datasets_data[ds] = self._get_regression_stats( ds, target_index, leverage_detector ) # ------------------------------------------------------------------ # Regression Distances: Leverage vs Studentized # ------------------------------------------------------------------ # Calculate studentized residuals for training data to determine limit studentized_train = train_stats["studentized"] student_limit = np.percentile(np.abs(studentized_train), self.confidence * 100) student_detector = StudentizedResiduals(self.model, confidence=self.confidence) student_detector.critical_value_ = student_limit # Always plot training data for this diagnostic plot # We extract the training data directly from train_stats figures["distances_leverage_studentized"] = create_regression_distances_plot( X=train_stats["X"], y_true=train_stats["y_true"], leverage_detector=leverage_detector, student_detector=student_detector, color_by=separated_color_by, figsize=config.distances_figsize, annotate_by=annotate_by, color_mode=color_mode, ) # ------------------------------------------------------------------ # Regression Distances: Q vs Y residuals # ------------------------------------------------------------------ figures["distances_q_y_residuals"] = _latent_plots.create_q_vs_y_residuals_plot( datasets_data=datasets_data, model=self.model, confidence=self.confidence, color_by=color_by, figsize=config.distances_figsize, q_residuals_detector=q_detector, annotate_by=annotate_by, color_mode=color_mode, ) # ------------------------------------------------------------------ # Regression diagnostic plots # ------------------------------------------------------------------ # Predicted vs Actual figures["predicted_vs_actual"] = create_predicted_vs_actual_plot( datasets_data=datasets_data, color_by=color_by, figsize=config.regression_figsize, annotate_by=annotate_by, color_mode=color_mode, ) # Residual scatter plot figures["residuals"] = create_y_residual_plot( datasets_data=datasets_data, color_by=separated_color_by, figsize=config.regression_figsize, annotate_by=annotate_by, color_mode=color_mode, ) # Q-Q plot figures["qq_plot"] = create_qq_plot( datasets_data=datasets_data, figsize=config.regression_figsize, confidence=self.confidence, ) # Residual distribution figures["residual_distribution"] = create_residual_distribution_plot( datasets_data=datasets_data, figsize=config.regression_figsize, ) # ------------------------------------------------------------------ # Spectra plots (if preprocessing exists) # ------------------------------------------------------------------ if self.transformer is not None: spectra_figs = self.inspect_spectra( dataset=datasets if use_suffix else datasets[0], color_by=color_by, figsize=config.spectra_figsize, color_mode=color_mode, ) figures.update(spectra_figs) return self._track_figures(figures)