Source code for pyco2stats.visualize_plotly


import numpy as np
import plotly.graph_objects as go
from scipy.stats import norm
from .sinclair import Sinclair
from .gaussian_mixtures import GMM

"""
The Visualize classes are meant to provide tools to create graphical representations related to pyco2stats analyzed data.
Visualize_Mpl relies on the Plotly library.

"""

[docs] class Visualize_Plotly: """ Plotly-based Sinclair-style probability plots for raw data and GMMs. """
[docs] @staticmethod def pp_raw_data(raw_data, fig=None, marker_kwargs=None): """ Plot raw data on log-normal probability paper. Parameters ---------- raw_data : array-like The raw data values to plot. fig : plotly.graph_objects.Figure, optional Existing figure to add the trace to. If None, the trace is returned without adding to any figure. marker_kwargs : dict, optional Marker style options, either as top-level keys (size, color, etc.) or nested under 'marker'. Returns ------- trace : plotly.graph_objects.Scatter The Scatter trace representing the raw data. """ # 1) compute the sigma‐quantiles and sort sigma_vals, sorted_data = Sinclair.raw_data_to_sigma(raw_data) # 2) copy the user kwargs so we don't mutate their dict margs = {} if marker_kwargs is None else marker_kwargs.copy() # 3) extract any nested marker dict marker = margs.pop('marker', {}).copy() # 4) also move any top‑level size/color/etc into that dict for key in ('size', 'color', 'opacity', 'symbol'): if key in margs: marker[key] = margs.pop(key) # 5) if we collected any marker attributes, re‑nest them if marker: margs['marker'] = marker # 6) build the scatter trace trace = go.Scatter( x=sigma_vals, y=sorted_data, mode='markers', name='Raw Data', **margs ) # 7) add to figure if given if fig is not None: fig.add_trace(trace) return trace
@staticmethod def qq_plot(raw_data, fig=None, scatter_kwargs=None, line_kwargs=None): """ Generate a QQ-plot with sigma quantiles versus sorted data. Parameters ---------- raw_data : array-like The raw data values to plot. fig : plotly.graph_objects.Figure, optional Existing figure to add traces to. If None, traces are returned only. scatter_kwargs : dict, optional Style kwargs for the scatter plot. line_kwargs : dict, optional Style kwargs for the reference line. Returns ------- scatter : plotly.graph_objects.Scatter Scatter trace for the QQ plot data. line : plotly.graph_objects.Scatter Line trace representing the fitted reference line. """ sigma_vals, sorted_data = Sinclair.raw_data_to_sigma(raw_data) skw = scatter_kwargs or {} scatter = go.Scatter( x=sigma_vals, y=sorted_data, mode='markers', name='QQ Plot', **skw ) # fit reference line slope, intercept = np.polyfit(sigma_vals, sorted_data, 1) x_line = np.array([sigma_vals.min(), sigma_vals.max()]) y_line = intercept + slope * x_line lkw = line_kwargs or {} line = go.Scatter( x=x_line, y=y_line, mode='lines', name='Fit Line', line=lkw ) if fig is not None: fig.add_trace(scatter) fig.add_trace(line) return scatter, line
[docs] @staticmethod def pp_one_population(mean, std, fig=None, z_range=(-3.5,3.5), line_kwargs=None): """ Plot a single Gaussian population line on probability paper. Parameters ---------- mean : float Mean of the Gaussian. std : float Standard deviation of the Gaussian. fig : plotly.graph_objects.Figure, optional Existing figure to add the trace to. z_range : tuple, optional Z-score range over which to compute the line. line_kwargs : dict, optional Line styling arguments. Returns ------- trace : plotly.graph_objects.Scatter Line trace of the Gaussian population. """ z_vals = np.linspace(z_range[0], z_range[1], 600) x_vals = mean + z_vals * std largs = line_kwargs or {} trace = go.Scatter( x=z_vals, y=x_vals, mode='lines', name=f'Pop μ={mean:.2f}, σ={std:.2f}', line=largs ) if fig is not None: fig.add_trace(trace) return trace
[docs] @staticmethod def pp_single_populations(means, stds, fig=None, z_range=(-3.5,3.5), line_kwargs=None): """ Plot each Gaussian component as a separate line. Parameters ---------- means : array-like Means of the Gaussian components. stds : array-like Standard deviations of the components. fig : plotly.graph_objects.Figure, optional Existing figure to add the traces to. z_range : tuple, optional Z-score range over which to plot. line_kwargs : dict, optional Line styling options. Returns ------- traces : list of plotly.graph_objects.Scatter List of traces, one per component. """ means = np.atleast_1d(means) stds = np.atleast_1d(stds) traces = [] for mean, std in zip(means, stds): tr = Visualize_Plotly.pp_one_population(mean, std, fig, z_range, line_kwargs) traces.append(tr) return traces
[docs] @staticmethod def pp_combined_population(means, stds, weights, fig=None, z_range=(-3.5,3.5), line_kwargs=None): """ Plot the combined Gaussian mixture CDF as a line on probability paper. Parameters ---------- means : array-like Means of the Gaussian components. stds : array-like Standard deviations of the components. weights : array-like Mixture weights of the components. fig : plotly.graph_objects.Figure, optional Existing figure to add the trace to. z_range : tuple, optional Z-value range for evaluation. line_kwargs : dict, optional Line styling options. Returns ------- trace : plotly.graph_objects.Scatter Trace representing the combined population CDF. """ x = np.linspace(z_range[0], z_range[1], 600) cdf = Sinclair.combine_gaussians(x, np.array(means), np.array(stds), np.array(weights)) sigma_vals = Sinclair.cumulative_to_sigma(cdf) largs = line_kwargs or {} trace = go.Scatter( x=sigma_vals, y=x, mode='lines', name='Combined Population', line=largs ) if fig is not None: fig.add_trace(trace) return trace
[docs] @staticmethod def pp_add_percentiles( fig, percentiles="full", line_kwargs: dict = None, label_kwargs: dict = None, y_min: float = None, y_max: float = None ): """ Add vertical percentile lines and labels to a Plotly figure. Parameters ---------- fig : plotly.graph_objects.Figure The figure to which the percentiles are added. percentiles : str or list of float Either 'full' for default percentiles or custom list. line_kwargs : dict, optional Styling for vertical percentile lines. label_kwargs : dict, optional Styling for percentile text annotations. y_min : float, optional Minimum y-coordinate for the vertical lines. y_max : float, optional Maximum y-coordinate for the vertical lines. Returns ------- None """ # 1) choose levels levels = [1,5,10,25,50,75,90,95,99] if percentiles=="full" else list(percentiles) # 2) infer y-span if y_min is None or y_max is None: rng = fig.layout.yaxis.range or [] if len(rng)==2: y_min, y_max = rng else: y_min, y_max = 0, 1 # 3) defaults lkw = line_kwargs or dict(color="#c8c8c8", dash="dash", width=0.5) awk = label_kwargs or dict(font=dict(size=12,color="#555555"), showarrow=False, yshift=8) # 4) draw each from scipy.stats import norm for p in levels: z = norm.ppf(p/100.0) fig.add_shape( type="line", x0=z, x1=z, y0=y_min, y1=y_max, line=lkw, layer="below" ) fig.add_annotation( x=z, y=y_max, text=f"{p}%", xanchor="center", **awk )
[docs] @staticmethod def plot_gmm_pdf(x_values, meds, stds, weights, data=None, pdf_plot_kwargs=None, component_plot_kwargs=None, hist_plot_kwargs=None): """ Generate Plotly traces for a Gaussian mixture PDF: 1) Histogram of raw data (probability density) 2) Individual component PDFs 3) Total mixture PDF Parameters ---------- x_values : np.ndarray Points at which to evaluate the PDFs. meds : array-like Means of the Gaussian components. stds : array-like Standard deviations of the components. weights : array-like Mixture weights for each component. data : array-like, optional Raw data to include as a histogram. pdf_plot_kwargs : dict, optional Style arguments for the total PDF line. component_plot_kwargs : dict, optional Style arguments for the component lines. hist_plot_kwargs : dict, optional Style arguments for the histogram, including 'bins'. Returns ------- hist_trace : plotly.graph_objects.Histogram or None Histogram trace if data is provided. comp_traces : list of plotly.graph_objects.Scatter List of individual component PDF traces. pdf_trace : plotly.graph_objects.Scatter Trace for the full GMM PDF. """ import numpy as _np from scipy.stats import norm import plotly.graph_objects as go from .gaussian_mixtures import GMM # prepare kwargs dicts pdf_plot_kwargs = pdf_plot_kwargs or {} component_plot_kwargs = component_plot_kwargs or {} hist_plot_kwargs = hist_plot_kwargs or {} # 1) histogram trace hist_trace = None if data is not None: # copy so we don't pop from the user's dict hkwargs = hist_plot_kwargs.copy() bins = hkwargs.pop('bins', 20) if bins == 'auto': # compute edges via numpy 'auto' edges = _np.histogram_bin_edges(data, bins='auto') size = edges[1] - edges[0] hargs = { 'xbins': dict(start=edges[0], end=edges[-1], size=size), 'histnorm': 'probability density' } else: # fixed number of bins hargs = { 'nbinsx': bins, 'histnorm': 'probability density' } # merge any remaining user kwargs hargs.update(hkwargs) hist_trace = go.Histogram(x=data, **hargs) # 2) individual component traces comp_traces = [] for idx, (mu, sigma, w) in enumerate(zip(meds, stds, weights), start=1): y_comp = w * norm.pdf(x_values, mu, sigma) comp_traces.append( go.Scatter( x=x_values, y=y_comp, mode='lines', name=f'Component {idx}', **component_plot_kwargs ) ) # 3) total mixture PDF try: # if your GMM class defines this pdf_vals = GMM.gaussian_mixture_pdf(x_values, meds, stds, weights) except Exception: # fallback manual sum pdf_vals = _np.zeros_like(x_values, dtype=float) for mu_i, sigma_i, w_i in zip(meds, stds, weights): pdf_vals += w_i * norm.pdf(x_values, mu_i, sigma_i) pdf_trace = go.Scatter( x=x_values, y=pdf_vals, mode='lines', name='Gaussian Mixture PDF', **pdf_plot_kwargs ) return hist_trace, comp_traces, pdf_trace
[docs] @staticmethod def qq_plot(raw_data, model_data, fig=None, marker_kwargs=None, line_kwargs=None): """ Draw a Q–Q plot comparing two samples: Parameters ---------- raw_data : array-like Observed dataset. model_data : array-like Simulated or modeled dataset. fig : plotly.graph_objects.Figure, optional Figure to which the traces will be added. marker_kwargs : dict, optional Styling options for the Q–Q points. line_kwargs : dict, optional Styling options for the y = x reference line. Returns ------- pts : plotly.graph_objects.Scatter Q–Q scatter trace. line : plotly.graph_objects.Scatter Identity line trace (y = x). """ import numpy as np from scipy.stats import norm import plotly.graph_objects as go # 1) compute matching percentiles probs = np.linspace(0, 1, len(raw_data)) q_raw = np.quantile(raw_data, probs) q_model = np.quantile(model_data, probs) # 2) the Q–Q scatter mk = marker_kwargs or {} pts = go.Scatter( x=q_raw, y=q_model, mode="markers", name="Q–Q points", **mk ) if fig is not None: fig.add_trace(pts) # 3) identity line mn = min(q_raw.min(), q_model.min()) mx = max(q_raw.max(), q_model.max()) lk = line_kwargs or {} line = go.Scatter( x=[mn, mx], y=[mn, mx], mode="lines", name="y = x", **lk ) if fig is not None: fig.add_trace(line) return pts, line