Source code for chemotools.plotting._scores

"""Scores plot for visualizing model projections and latent space."""

from typing import Literal, Optional, Any, Tuple
import numpy as np
from matplotlib.figure import Figure
from matplotlib.axes import Axes

from chemotools.plotting._base import BasePlot, ColoringMixin
from chemotools.plotting._utils import (
    annotate_points,
    add_confidence_ellipse,
    validate_data,
    scatter_with_colormap,
)



[docs]
class ScoresPlot(BasePlot, ColoringMixin):
    """Simple, composable scores plot for a single dataset.

    This class creates scatter plots of model scores (projections) for one dataset.
    Multiple datasets can be overlaid by using the render() method on shared axes.

    Parameters
    ----------
    scores : np.ndarray
        Score array with shape (n_samples, n_components).
    components : tuple[int, int], optional
        Component indices to plot (default is (0, 1) for PC1 vs PC2).
        Uses 0-based indexing (e.g., (0, 1) plots PC1 vs PC2).
    color_by : np.ndarray, optional
        Values for coloring samples. Can be either:

        - Continuous (numeric): shows colorbar (e.g., concentration, temperature)
        - Categorical (strings/classes): shows legend with discrete colors

    annotations : list[str], optional
        Labels for annotating individual points.
    label : str, optional
        Legend label for this dataset (default: "Data").
    color : str, optional
        Color for all points when color_by is None (default: auto-assigned).
    colormap : str, optional
        Colormap name. Colorblind-friendly defaults:

        - "tab10" for categorical data
        - "viridis" for continuous data

    confidence_ellipse : bool or float, optional
        Whether to draw a confidence ellipse around the data.

        - If True: draws 95% confidence ellipse
        - If float: draws ellipse at specified confidence level (e.g., 0.90, 0.99)
        - If False or None: no ellipse (default)

    color_mode : {"continuous", "categorical"}, optional
        Explicitly specify coloring mode. If None (default), automatically
        detects based on dtype and unique values of color_by.
    colorbar_label : str, optional
        Label for the colorbar when using continuous coloring.
        Default is "Value". Only applies when color_by is continuous.

    Raises
    ------
    ValueError
        If components tuple contains invalid component indices.

    Examples
    --------
    **Simple single dataset plot:**

    >>> plot = ScoresPlot(train_scores)
    >>> fig = plot.show(title="PCA Scores")

    **Multiple datasets composed together:**

    >>> fig, ax = plt.subplots()
    >>> ScoresPlot(train_scores, label="Train", color="blue").render(ax)
    >>> ScoresPlot(test_scores, label="Test", color="red").render(ax)
    >>> ax.legend()
    >>> plt.show()

    **With categorical coloring:**

    >>> plot = ScoresPlot(train_scores, color_by=train_classes)
    >>> fig = plot.show(title="Scores by Class")

    **With continuous coloring:**

    >>> plot = ScoresPlot(train_scores, color_by=concentrations, colormap='viridis')
    >>> fig = plot.show(title="Scores by Concentration")

    **Custom components and labels:**

    >>> plot = ScoresPlot(scores, components=(1, 2))
    >>> fig = plot.show(
    ...     title="PC2 vs PC3",
    ...     xlabel="Second Component",
    ...     ylabel="Third Component"
    ... )

    **With annotations:**

    >>> annotations = [f"S{i}" if i in outliers else "" for i in range(len(scores))]
    >>> plot = ScoresPlot(scores, annotations=annotations)
    >>> fig = plot.show(title="Annotated Scores")

    **With confidence ellipse:**

    >>> plot = ScoresPlot(train_scores, confidence_ellipse=True)
    >>> fig = plot.show(title="Scores with 95% Confidence Ellipse")

    >>> plot = ScoresPlot(train_scores, confidence_ellipse=0.99, color="blue")
    >>> fig = plot.show(title="Scores with 99% Confidence Ellipse")
    """

    def __init__(
        self,
        scores: np.ndarray,
        *,
        components: tuple[int, int] = (0, 1),
        color_by: Optional[np.ndarray] = None,
        annotations: Optional[list[str]] = None,
        label: str = "Data",
        color: Optional[str] = None,
        colormap: Optional[str] = None,
        confidence_ellipse: Optional[bool | float] = None,
        color_mode: Optional[Literal["continuous", "categorical"]] = None,
        colorbar_label: str = "Value",
    ):
        self.scores = validate_data(scores, name="scores", ensure_2d=True)
        self.components = components
        self.annotations = annotations
        self.label = label
        self.color = color

        # Process confidence ellipse parameter
        self.confidence_level: Optional[float]
        if confidence_ellipse is True:
            self.confidence_level = 0.95
        elif isinstance(confidence_ellipse, (int, float)) and confidence_ellipse:
            self.confidence_level = float(confidence_ellipse)
        else:
            self.confidence_level = None

        # Validate inputs
        self._validate_components()

        if color_by is not None:
            color_by = validate_data(
                color_by, name="color_by", ensure_2d=False, numeric=False
            )

        # Initialize coloring
        self._init_coloring(
            color_by, colormap, color_mode=color_mode, colorbar_label=colorbar_label
        )

    def _validate_components(self) -> None:
        """Validate that component indices are valid.

        Raises
        ------
        ValueError
            If components are invalid for the scores array.
        """
        comp1, comp2 = self.components
        n_components = self.scores.shape[1]

        if comp1 < 0 or comp1 >= n_components:
            raise ValueError(
                f"Component index {comp1} is invalid. "
                f"Valid range: 0-{n_components - 1} (have {n_components} components)"
            )

        if comp2 < 0 or comp2 >= n_components:
            raise ValueError(
                f"Component index {comp2} is invalid. "
                f"Valid range: 0-{n_components - 1} (have {n_components} components)"
            )

        if comp1 == comp2:
            raise ValueError(
                f"Component indices must be different, got both as {comp1}"
            )

    def _get_default_labels(self) -> dict[str, str]:
        comp1, comp2 = self.components
        return {
            "xlabel": f"PC{comp1 + 1}",
            "ylabel": f"PC{comp2 + 1}",
        }


[docs]
    def show(
        self,
        *,
        figsize: Optional[Tuple[float, float]] = None,
        title: Optional[str] = None,
        xlabel: Optional[str] = None,
        ylabel: Optional[str] = None,
        xlim: Optional[Tuple[float, float]] = None,
        ylim: Optional[Tuple[float, float]] = None,
        **kwargs: Any,
    ) -> Figure:
        """Create and return a complete figure with the scores plot.

        This method handles figure creation and then delegates to `render()`.

        Parameters
        ----------
        figsize : tuple[float, float], optional
            Figure size in inches (width, height).
        title : str, optional
            Figure title.
        xlabel : str, optional
            Custom x-axis label. If None, uses existing label or default.
        ylabel : str, optional
            Custom y-axis label. If None, uses existing label or default.
        xlim : tuple[float, float], optional
            X-axis limits as (xmin, xmax).
        ylim : tuple[float, float], optional
            Y-axis limits as (ymin, ymax).
        **kwargs : Any
            Additional keyword arguments passed to the render() method.

        Returns
        -------
        Figure
            The matplotlib Figure object containing the plot.
        """
        return super().show(
            figsize=figsize,
            title=title,
            xlabel=xlabel,
            ylabel=ylabel,
            xlim=xlim,
            ylim=ylim,
            **kwargs,
        )



[docs]
    def render(
        self,
        ax: Optional[Axes] = None,
        *,
        xlabel: Optional[str] = None,
        ylabel: Optional[str] = None,
        xlim: Optional[tuple[float, float]] = None,
        ylim: Optional[tuple[float, float]] = None,
        **kwargs: Any,
    ) -> tuple[Figure, Axes]:
        """Render the plot on the given axes or create new ones.

        Use this method to compose multiple plots on the same axes.

        Parameters
        ----------
        ax : Axes, optional
            Matplotlib axes to plot on. If None, creates new figure and axes.
        xlabel : str, optional
            Custom x-axis label. If None, uses existing label or defaults to "PC{comp1+1}".
        ylabel : str, optional
            Custom y-axis label. If None, uses existing label or defaults to "PC{comp2+1}".
        xlim : tuple[float, float], optional
            X-axis limits as (xmin, xmax).
        ylim : tuple[float, float], optional
            Y-axis limits as (ymin, ymax).
        **kwargs : Any
            Additional keyword arguments passed to ax.scatter().

        Returns
        -------
        fig : Figure
            The matplotlib Figure object.
        ax : Axes
            The matplotlib Axes object with the rendered plot.

        Examples
        --------
        Compose multiple datasets:

        >>> fig, ax = plt.subplots()
        >>> ScoresPlot(train_scores, label="Train").render(ax)
        >>> ScoresPlot(test_scores, label="Test").render(ax)
        >>> ax.set_xlabel("PC1")
        >>> ax.set_ylabel("PC2")
        >>> ax.legend()
        >>> plt.show()
        """
        fig, ax = super().render(
            ax=ax,
            xlabel=xlabel,
            ylabel=ylabel,
            xlim=xlim,
            ylim=ylim,
            **kwargs,
        )

        # Add colorbar for continuous data
        self._add_colorbar_if_needed(ax)

        # Add legend only if there are labeled artists
        handles, _ = ax.get_legend_handles_labels()
        if handles:
            ax.legend()

        return fig, ax


    def _render_plot(self, ax: Axes, **kwargs: Any) -> None:
        """Internal method to render the scores plot on given axes."""
        comp1, comp2 = self.components
        alpha = kwargs.pop("alpha", 0.7)
        s = kwargs.pop("s", 50)

        x = self.scores[:, comp1]
        y = self.scores[:, comp2]

        scatter_with_colormap(
            ax,
            x,
            y,
            color_by=self.color_by,
            is_categorical=self.is_categorical,
            colormap=self.colormap,
            color=self.color,
            label=self.label,
            alpha=alpha,
            s=s,
            **kwargs,
        )

        # Add confidence ellipse if requested
        if self.confidence_level is not None:
            # Default to black if no color specified
            edgecolor = self.color if self.color is not None else "black"
            add_confidence_ellipse(
                ax,
                x,
                y,
                confidence=self.confidence_level,
                edgecolor=edgecolor,
                linewidth=1,
                linestyle="--",
                alpha=0.8,
            )

        # Add point annotations if provided
        if self.annotations is not None:
            annotate_points(
                ax,
                x,
                y,
                self.annotations,
                fontsize=8,
                xytext=(5, 5),
                textcoords="offset points",
            )