Source code for pyco2stats.visualize_mpl

import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from .sinclair import Sinclair
from .gaussian_mixtures import GMM 

"""
The Visualize classes are meant to provide tools to create graphical representations related to pyco2stats analyzed data.
Visualize_Mpl relies on the Matplotlib library

"""


[docs]
class Visualize_Mpl:
    """
    Class for plotting Sinclair-style probability plots for raw data and GMMs.
    """


[docs]
    @staticmethod
    def pp_raw_data(raw_data, ax=None, **scatter_kwargs):
        """
        Plot a probability plot of raw data using Sinclair transformation.

        Parameters
        ----------
        raw_data : array-like
            Array of raw data values.
        ax : matplotlib.axes.Axes, optional
            Matplotlib Axes object to plot on. Creates new one if None.
        **scatter_kwargs : dict
            Additional keyword arguments passed to ax.scatter().

        Returns
        -------
        ax : matplotlib.axes.Axes
            The Axes object with the plot.
        """
        sigma_vals, sorted_data = Sinclair.get_raw_data(raw_data)
        if ax is None:
            fig, ax = plt.subplots()
        ax.scatter(sigma_vals, sorted_data, **scatter_kwargs)
        return ax



[docs]
    @staticmethod
    def pp_combined_population(means, stds, weights, x_range=(-3.5, 3.5), ax=None, **line_kwargs):        
        """
        Plot the cumulative distribution of a Gaussian mixture model on a probability plot.

        Parameters
        ----------
        means : array-like
            Means of Gaussian components.
        stds : array-like
            Standard deviations of Gaussian components.
        weights : array-like
            Weights of each Gaussian component.
        x_range : tuple, optional
            Range of sigma-values (x-axis) to display.
        ax : matplotlib.axes.Axes, optional
            Axes to plot on. Creates new one if None.
        **line_kwargs : dict
            Additional arguments passed to ax.plot().

        Returns
        -------
        ax : matplotlib.axes.Axes
            The Axes object with the plot.
        """
        # Use extended x_vals to compute tails beyond the plot window
        x_vals = np.linspace(x_range[0] - 1.5, x_range[1] + 1.5, 600)
        y_cdf = Sinclair.calculate_combined_population(x_vals, means, stds, weights)
        sigma_vals = Sinclair.cumulative_to_sigma(y_cdf)

        if ax is None:
            fig, ax = plt.subplots()

        # Just plot the full curve
        ax.plot(sigma_vals, x_vals, **line_kwargs)
        ax.set_xlim(x_range)
        return ax




[docs]
    @staticmethod
    def pp_single_populations(means, stds, z_range=(-3.5, 3.5), ax=None, **line_kwargs):
        """
        Plot individual Gaussian distributions on a probability plot.

        Parameters
        ----------
        means : array-like
            Means of the Gaussian components.
        stds : array-like
            Standard deviations of the Gaussian components.
        z_range : tuple, optional
            Range of z-values to use for plotting.
        ax : matplotlib.axes.Axes, optional
            Axes to plot on. Creates new one if None.
        **line_kwargs : dict
            Additional arguments passed to ax.plot().

        Returns
        -------
        ax : matplotlib.axes.Axes
            The Axes object with the plots.
        """

        means = np.atleast_1d(means)
        stds  = np.atleast_1d(stds)

        for mean, std in zip(means, stds):
            Visualize_Mpl.pp_one_population(mean, std, z_range=(-3.5, 3.5), ax=ax, **line_kwargs)

        return ax  




[docs]
    def pp_one_population(mean, std, z_range=(-3.5, 3.5), ax=None, **line_kwargs):
        """
        Plot a single Gaussian distribution on a probability plot.

        Parameters
        ----------
        mean : float
            Mean of the Gaussian distribution.
        std : float
            Standard deviation of the Gaussian distribution.
        z_range : tuple, optional
            Range of z-values to use for plotting.
        ax : matplotlib.axes.Axes, optional
            Axes to plot on. Creates new one if None.
        **line_kwargs : dict
            Additional arguments passed to ax.plot().

        Returns
        -------
        ax : matplotlib.axes.Axes
            The Axes object with the plot.
        """
        z_vals = np.linspace(z_range[0], z_range[1], 600)

        if ax is None:
            fig, ax = plt.subplots()

        x_vals = mean + z_vals * std
        ax.plot(z_vals, x_vals, **line_kwargs)

        return ax   




[docs]
    @staticmethod
    def pp_add_sigma_grid(ax=None, sigma_ticks=np.arange(-3, 4, 1)):
        """
        Add vertical grid lines at specified sigma (z-score) positions.

        Parameters
        ----------
        ax : matplotlib.axes.Axes, optional
            Axes to add the grid to. Creates new one if None.
        sigma_ticks : array-like
            Positions (z-scores) where grid lines should be added.

        Returns
        -------
        ax : matplotlib.axes.Axes
            The Axes object with the updated grid.
        """
        if ax is None:
            fig, ax = plt.subplots()

        ax.xaxis.set_major_locator(ticker.FixedLocator(sigma_ticks))
        ax.grid(True, which='both', linestyle='--', linewidth=0.5)
        ax.set_xlim(-3.5, 3.5)
        return ax




[docs]
    @staticmethod
    def pp_add_percentiles(ax=None, percentiles='standard', linestyle='-.', linewidth=1, color='green', label_size=10, **plot_kwargs):
        """
        Add percentile reference lines and labels to the top axis.

        Parameters
        ----------
        ax : matplotlib.axes.Axes, optional
            Axes to annotate. Creates new one if None.
        percentiles : str or list, optional
            Which percentiles to use: 'standard', 'full', or custom list.
        linestyle : str
            Line style for vertical lines.
        linewidth : float
            Width of the percentile lines.
        color : str
            Color of percentile lines.
        label_size : int
            Font size of percentile labels.
        **plot_kwargs : dict
            Additional keyword arguments for ax.axvline().

        Returns
        -------
        ax : matplotlib.axes.Axes
            The Axes object with added percentile lines and labels.
        """
        if ax is None:
            fig, ax = plt.subplots()

        if percentiles == 'standard':
            perc_values = [1, 5, 10, 25, 50, 75, 95, 90, 99]
        elif percentiles == 'full':
            perc_values = [0.5, 1, 2, 4, 6, 8, 10, 15, 20, 25, 30, 35, 40, 50,
                           60, 65, 70, 75, 80, 85, 90, 92, 94, 96, 98, 99, 99.5]
        else:
            perc_values = percentiles

        sigma_ticks = norm.ppf(np.array(perc_values) / 100.0)
        ax_secondary = ax.secondary_xaxis('top')
        ax_secondary.set_xticks(sigma_ticks)
        #ax_secondary.set_xticklabels([f"{p:g}%" for p in perc_values], fontsize=label_size, rotation=90)
        ax_secondary.set_xticklabels([])

        for i, (perc, sigma) in enumerate(zip(perc_values, sigma_ticks)):
            ax.axvline(x=sigma, linestyle=linestyle, linewidth=linewidth, color=color, **plot_kwargs)
            y_offset = 1.01 + (i % 2) * 0.04 if percentiles == 'full' else 1.01
            ax.text(sigma, y_offset, f"{perc}", ha='center', va='bottom',
                    transform=ax.get_xaxis_transform(), fontsize=label_size, color='black')

        return ax




[docs]
    @staticmethod
    def qq_plot(raw_data, model_data, ax = None, line_kwargs=None, marker_kwargs=None):
        
        """
        Create a Q-Q plot comparing raw data to model-simulated data.

        Parameters
        ----------
        raw_data : array-like
            Observed dataset.
        model_data : array-like
            Simulated or reference dataset.
        ax : matplotlib.axes.Axes
            Axes object on which to draw the plot.
        line_kwargs : dict, optional
            Keyword arguments for the reference line.
        marker_kwargs : dict, optional
            Keyword arguments for the scatter points.

        Returns
        -------
        None
        """
        
        # Sort both observed data and reference population
        observed_data_sorted = np.sort(raw_data)
        reference_population_sorted = np.sort(model_data)

        # Number of data points
        n = len(observed_data_sorted)

        # Calculate the empirical percentiles for the observed data
        percentiles = np.linspace(0, 100, n)

        # Match the reference percentiles to the same empirical percentiles
        reference_percentiles = np.percentile(reference_population_sorted, percentiles)

        if ax is None:
            fig, ax = plt.subplots()


        # Plot the observed data percentiles vs. reference population percentiles
        ax.plot(observed_data_sorted, reference_percentiles,  **marker_kwargs, linestyle='', label='Observed Data vs. Reference Population')

        # Plot the 45‑degree reference line
        # — remove the 'r--' fmt string, rely exclusively on line_kwargs
        # — default to color='r', linestyle='--' if user didn't pass any
        lk = line_kwargs or {}
        # ensure we don’t accidentally pass the fmt‑style redundant args
        ax.plot(
            [observed_data_sorted[0], observed_data_sorted[-1]],
            [observed_data_sorted[0], observed_data_sorted[-1]],
            **lk,
            label='45° Line'
        )



[docs]
    def plot_gmm_pdf(x, meds, stds, weights, ax = None, data=None,
                 pdf_plot_kwargs=None, component_plot_kwargs=None, hist_plot_kwargs=None):
        """
        Plot the Gaussian Mixture Model PDF and its components.

        Parameters
        ----------
        x : array
            x values.
        meds : list or array
            Means of the Gaussian components.
        stds : list or array
            Standard deviations of the Gaussian components.
        weights : list or array
            Weights of the Gaussian components.
        ax : Matplotlib axis object
            Axes object where to plot.
        data : list or array, optional 
            Raw data to plot as a histogram.
        pdf_plot_kwargs : list
            Keyword arguments for the main GMM PDF plot.
        component_plot_kwargs : list 
            Keyword arguments for the individual component plots.
        hist_plot_kwargs : list
             Keyword arguments for the histogram plot.

        Returns
        -------
        None
        """
        if pdf_plot_kwargs is None:
            pdf_plot_kwargs = {}
        if component_plot_kwargs is None:
            component_plot_kwargs = {}
        if hist_plot_kwargs is None:
            hist_plot_kwargs = {}

        if ax is None:
            fig, ax = plt.subplots()

        weight_sum = np.sum(weights)
    
        if weight_sum <= 0:
            raise ValueError("The sum of weights must be greater than zero.")
    
        # Normalize weights when necessary
        weights = weights / weight_sum
    
        # Sort the Gaussian components by increasing mean
        component_order = np.argsort(meds)
        meds = meds[component_order]
        stds = stds[component_order]
        weights = weights[component_order]
    
        # Crucial correction: sort x before drawing connected lines
        x_plot = np.sort(x)

        # Compute the Gaussian Mixture PDF
        pdf = GMM.gaussian_mixture_pdf(x, meds, stds, weights)

        # Plot the Gaussian Mixture PDF
        ax.plot(x, pdf, label='Gaussian Mixture PDF', **pdf_plot_kwargs)

        # Plot each Gaussian component
        for i, (med, std, weight) in enumerate(zip(meds, stds, weights)):
            ax.plot(x, weight * norm.pdf(x, med, std), label=f'Component {i + 1}', **component_plot_kwargs)

        # Plot the histogram of the raw data if provided
        if data is not None:
            ax.hist(data, bins=20, density=True, **hist_plot_kwargs)

        ax.legend()