Source code for chemotools.plotting._distances

"""Distances plot for visualizing diagnostic measures and outlier detection."""

from typing import Optional, Any, Tuple, Literal

import numpy as np
from matplotlib.figure import Figure
from matplotlib.axes import Axes

from chemotools.plotting._base import BasePlot, ColoringMixin
from chemotools.plotting._utils import (
    annotate_points,
    add_confidence_lines,
    validate_data,
    scatter_with_colormap,
)



[docs]
class DistancesPlot(BasePlot, ColoringMixin):
    """Simple, composable distances plot for a single dataset.

    This class creates scatter plots of distance measures (e.g., Q residuals, Hotelling's T²)
    for outlier detection. Supports plotting one distance vs another or distance vs sample index.
    Multiple datasets can be overlaid by using the render() method on shared axes.

    Parameters
    ----------
    x : np.ndarray, optional
        Explicit x-axis values. Must match the length of ``y``. When omitted,
        the sample index (0, 1, ..., n_samples-1) is used.
    y : np.ndarray, optional
        Y-axis values to plot. Accepts 1D arrays only.
    color_by : np.ndarray, optional
        Values for coloring samples. Can be either:

        - Continuous (numeric): shows colorbar
        - Categorical (strings/classes): shows legend with discrete colors

    annotations : list[str], optional
        Labels for annotating individual points.
    label : str, optional
        Legend label for this dataset (default: "Data").
    color : str, optional
        Color for all points when color_by is None (default: auto-assigned).
    colormap : str, optional
        Colormap name. Colorblind-friendly defaults:

        - "tab10" for categorical data
        - "viridis" for continuous data

    marker : str, optional
        Marker style for scatter points (default: "o"). Examples: "o", "s", "^", "v", "D".
    confidence_lines : bool or tuple[float | None, float | None], optional
        Whether to draw confidence/threshold lines.

        - If True: draws lines at distances using default method
        - If tuple: (x_threshold, y_threshold) values for lines
        - If False or None: no lines (default)

        Examples: True, (12.5, 5.2), (None, 5.2), (12.5, None)
    color_mode : {"continuous", "categorical"}, optional
        Explicitly specify coloring mode. If None (default), automatically
        detects based on dtype and unique values of color_by.
    colorbar_label : str, optional
        Label for the colorbar when using continuous coloring.
        Default is "Value". Only applies when color_by is continuous.

    Raises
    ------
    ValueError
        If distances have invalid shapes or index selections.

    Examples
    --------
    **Simple single dataset plot (Q residuals vs sample index):**

    >>> plot = DistancesPlot(q_residuals, confidence_lines=(None, 5.2))
    >>> fig = plot.show(title="Q Residuals with Control Limit")

    **Multiple datasets composed together (T² vs Q):**

    >>> fig, ax = plt.subplots()
    >>> DistancesPlot(
    ...     y=train_q,
    ...     x=train_t2,
    ...     label="Train",
    ...     color="blue",
    ...     confidence_lines=(12.5, 5.2),
    ... ).render(ax)
    >>> DistancesPlot(
    ...     y=test_q,
    ...     x=test_t2,
    ...     label="Test",
    ...     color="red",
    ... ).render(ax)
    >>> ax.set_xlabel("Hotelling's T²")
    >>> ax.set_ylabel("Q Residuals")
    >>> ax.legend()
    >>> plt.show()

    **With categorical coloring:**

    >>> plot = DistancesPlot(
    ...     y=q_residuals,
    ...     x=t2_residuals,
    ...     color_by=classes,
    ...     confidence_lines=(12.5, 5.2),
    ... )
    >>> fig = plot.show(title="Outliers by Class")

    **With annotations for outliers:**

    >>> outliers = [5, 23, 47]
    >>> annotations = [f"S{i}" if i in outliers else "" for i in range(len(q_residuals))]
    >>> plot = DistancesPlot(
    ...     y=q_residuals,
    ...     annotations=annotations,
    ...     confidence_lines=(None, 5.2),
    ... )
    >>> fig = plot.show(title="Annotated Outliers")

    **Explicit x/y arrays:**

    >>> plot = DistancesPlot(
    ...     y=q_residuals,
    ...     x=t2_residuals,
    ...     confidence_lines=(9.35, 12.0),
    ... )
    >>> fig = plot.show(
    ...     title="T² vs Q",
    ...     xlabel="Hotelling's T²",
    ...     ylabel="Q Residuals",
    ... )
    """

    def __init__(
        self,
        y: np.ndarray,
        *,
        x: Optional[np.ndarray] = None,
        color_by: Optional[np.ndarray] = None,
        annotations: Optional[list[str]] = None,
        label: str = "Data",
        color: Optional[str] = None,
        colormap: Optional[str] = None,
        marker: str = "o",
        confidence_lines: Optional[bool | tuple[float | None, float | None]] = None,
        color_mode: Optional[Literal["continuous", "categorical"]] = None,
        colorbar_label: str = "Value",
    ):
        self.annotations = annotations
        self.label = label
        self.color = color
        self.marker = marker

        # Process confidence lines parameter
        if confidence_lines is True:
            # True means calculate from data - we'll implement later if needed
            self.x_threshold = None
            self.y_threshold = None
        elif isinstance(confidence_lines, tuple):
            self.x_threshold, self.y_threshold = confidence_lines
        else:
            self.x_threshold = None
            self.y_threshold = None

        # Validate inputs
        y = validate_data(y, name="y", ensure_2d=False)
        if x is not None:
            x = validate_data(x, name="x", ensure_2d=False)

        if color_by is not None:
            color_by = validate_data(
                color_by, name="color_by", ensure_2d=False, numeric=False
            )

        self._default_xlabel: str
        self._default_ylabel: str
        self._init_from_xy(x, y)

        # Initialize coloring
        self._init_coloring(
            color_by, colormap, color_mode=color_mode, colorbar_label=colorbar_label
        )

        self._validate_color_and_annotations()

    def _init_from_xy(
        self,
        x: Optional[np.ndarray],
        y: np.ndarray,
    ) -> None:
        """Initialize internal state from explicit x/y arrays."""

        if y.ndim != 1:
            raise ValueError("Explicit 'y' must be a 1D array.")

        if x is None:
            self._x = np.arange(y.shape[0])
            auto_xlabel = "Sample Index"
        else:
            if x.ndim != 1:
                raise ValueError("Explicit 'x' must be a 1D array.")
            if x.shape[0] != y.shape[0]:
                raise ValueError("'x' and 'y' must have the same length.")
            self._x = x
            auto_xlabel = "X"

        self._y = y

        auto_ylabel = "Distance"

        self._default_xlabel = auto_xlabel
        self._default_ylabel = auto_ylabel

    def _validate_color_and_annotations(self) -> None:
        """Ensure optional color and annotation arrays align with the data length."""

        n_points = self._y.shape[0]

        if self.color_by is not None and len(self.color_by) != n_points:
            raise ValueError("color_by must have the same length as the plotted data.")

        if self.annotations is not None and len(self.annotations) != n_points:
            raise ValueError(
                "annotations must have the same length as the plotted data."
            )

    def _get_default_labels(self) -> dict[str, str]:
        return {
            "xlabel": self._default_xlabel,
            "ylabel": self._default_ylabel,
        }


[docs]
    def show(
        self,
        *,
        figsize: Optional[Tuple[float, float]] = None,
        title: Optional[str] = None,
        xlabel: Optional[str] = None,
        ylabel: Optional[str] = None,
        xlim: Optional[Tuple[float, float]] = None,
        ylim: Optional[Tuple[float, float]] = None,
        **kwargs: Any,
    ) -> Figure:
        """Create and return a complete figure with the distances plot.

        This method handles figure creation and then delegates to `render()`.

        Parameters
        ----------
        figsize : tuple[float, float], optional
            Figure size in inches (width, height).
        title : str, optional
            Figure title.
        xlabel : str, optional
            Custom x-axis label. If None, uses existing label or default.
        ylabel : str, optional
            Custom y-axis label. If None, uses existing label or default.
        xlim : tuple[float, float], optional
            X-axis limits as (xmin, xmax).
        ylim : tuple[float, float], optional
            Y-axis limits as (ymin, ymax).
        **kwargs : Any
            Additional keyword arguments passed to the render() method.

        Returns
        -------
        Figure
            The matplotlib Figure object containing the plot.
        """
        return super().show(
            figsize=figsize,
            title=title,
            xlabel=xlabel,
            ylabel=ylabel,
            xlim=xlim,
            ylim=ylim,
            **kwargs,
        )



[docs]
    def render(
        self,
        ax: Optional[Axes] = None,
        *,
        xlabel: Optional[str] = None,
        ylabel: Optional[str] = None,
        xlim: Optional[tuple[float, float]] = None,
        ylim: Optional[tuple[float, float]] = None,
        **kwargs: Any,
    ) -> tuple[Figure, Axes]:
        """Render the plot on the given axes or create new ones.

        Use this method to compose multiple plots on the same axes.

        Parameters
        ----------
        ax : Axes, optional
            Matplotlib axes to plot on. If None, creates new figure and axes.
        xlabel : str, optional
            Custom x-axis label. If None, uses existing label or the default label
            configured at initialization.
        ylabel : str, optional
            Custom y-axis label. If None, uses existing label or the default label
            configured at initialization.
        xlim : tuple[float, float], optional
            X-axis limits as (xmin, xmax).
        ylim : tuple[float, float], optional
            Y-axis limits as (ymin, ymax).
        **kwargs : Any
            Additional keyword arguments passed to ax.scatter().

        Returns
        -------
        fig : Figure
            The matplotlib Figure object.
        ax : Axes
            The matplotlib Axes object with the rendered plot.

        Examples
        --------
        Compose multiple datasets:

        >>> fig, ax = plt.subplots()
        >>> DistancesPlot(train_dist, label="Train").render(ax)
        >>> DistancesPlot(test_dist, label="Test").render(ax)
        >>> ax.set_xlabel("Hotelling T²")
        >>> ax.set_ylabel("Q Residuals")
        >>> ax.legend()
        >>> plt.show()
        """
        fig, ax = super().render(
            ax=ax,
            xlabel=xlabel,
            ylabel=ylabel,
            xlim=xlim,
            ylim=ylim,
            **kwargs,
        )

        # Add colorbar for continuous data
        self._add_colorbar_if_needed(ax)

        # Add legend
        ax.legend()

        return fig, ax


    def _render_plot(self, ax: Axes, **kwargs: Any) -> None:
        """Internal method to render the distances plot on given axes."""
        alpha = kwargs.pop("alpha", 0.7)
        s = kwargs.pop("s", 50)
        marker = kwargs.pop("marker", self.marker)

        # Extract data for plotting
        x = self._x
        y = self._y

        scatter_with_colormap(
            ax,
            x,
            y,
            color_by=self.color_by,
            is_categorical=self.is_categorical,
            colormap=self.colormap,
            color=self.color,
            label=self.label,
            alpha=alpha,
            s=s,
            marker=marker,
            **kwargs,
        )

        # Add confidence lines if requested
        if self.x_threshold is not None or self.y_threshold is not None:
            add_confidence_lines(
                ax,
                x_threshold=self.x_threshold,
                y_threshold=self.y_threshold,
                color="red",
                linestyle="--",
                linewidth=1,
                alpha=0.7,
            )

        # Add point annotations if provided
        if self.annotations is not None:
            annotate_points(
                ax,
                x,
                y,
                self.annotations,
                fontsize=8,
                xytext=(5, 5),
                textcoords="offset points",
            )