import numpy as np
import plotly.graph_objects as go
from scipy.stats import norm
from .sinclair import Sinclair
from .gaussian_mixtures import GMM
"""
The Visualize classes are meant to provide tools to create graphical representations related to pyco2stats analyzed data.
Visualize_Mpl relies on the Plotly library.
"""
[docs]
class Visualize_Plotly:
"""
Plotly-based Sinclair-style probability plots for raw data and GMMs.
"""
[docs]
@staticmethod
def pp_raw_data(raw_data, fig=None, marker_kwargs=None):
"""
Plot raw data on log-normal probability paper.
Parameters
----------
raw_data : array-like
The raw data values to plot.
fig : plotly.graph_objects.Figure, optional
Existing figure to add the trace to. If None, the trace is returned without adding to any figure.
marker_kwargs : dict, optional
Marker style options, either as top-level keys (size, color, etc.) or nested under 'marker'.
Returns
-------
trace : plotly.graph_objects.Scatter
The Scatter trace representing the raw data.
"""
# 1) compute the sigma‐quantiles and sort
sigma_vals, sorted_data = Sinclair.raw_data_to_sigma(raw_data)
# 2) copy the user kwargs so we don't mutate their dict
margs = {} if marker_kwargs is None else marker_kwargs.copy()
# 3) extract any nested marker dict
marker = margs.pop('marker', {}).copy()
# 4) also move any top‑level size/color/etc into that dict
for key in ('size', 'color', 'opacity', 'symbol'):
if key in margs:
marker[key] = margs.pop(key)
# 5) if we collected any marker attributes, re‑nest them
if marker:
margs['marker'] = marker
# 6) build the scatter trace
trace = go.Scatter(
x=sigma_vals,
y=sorted_data,
mode='markers',
name='Raw Data',
**margs
)
# 7) add to figure if given
if fig is not None:
fig.add_trace(trace)
return trace
@staticmethod
def qq_plot(raw_data, fig=None, scatter_kwargs=None, line_kwargs=None):
"""
Generate a QQ-plot with sigma quantiles versus sorted data.
Parameters
----------
raw_data : array-like
The raw data values to plot.
fig : plotly.graph_objects.Figure, optional
Existing figure to add traces to. If None, traces are returned only.
scatter_kwargs : dict, optional
Style kwargs for the scatter plot.
line_kwargs : dict, optional
Style kwargs for the reference line.
Returns
-------
scatter : plotly.graph_objects.Scatter
Scatter trace for the QQ plot data.
line : plotly.graph_objects.Scatter
Line trace representing the fitted reference line.
"""
sigma_vals, sorted_data = Sinclair.raw_data_to_sigma(raw_data)
skw = scatter_kwargs or {}
scatter = go.Scatter(
x=sigma_vals,
y=sorted_data,
mode='markers',
name='QQ Plot',
**skw
)
# fit reference line
slope, intercept = np.polyfit(sigma_vals, sorted_data, 1)
x_line = np.array([sigma_vals.min(), sigma_vals.max()])
y_line = intercept + slope * x_line
lkw = line_kwargs or {}
line = go.Scatter(
x=x_line,
y=y_line,
mode='lines',
name='Fit Line',
line=lkw
)
if fig is not None:
fig.add_trace(scatter)
fig.add_trace(line)
return scatter, line
[docs]
@staticmethod
def pp_one_population(mean, std, fig=None, z_range=(-3.5,3.5), line_kwargs=None):
"""
Plot a single Gaussian population line on probability paper.
Parameters
----------
mean : float
Mean of the Gaussian.
std : float
Standard deviation of the Gaussian.
fig : plotly.graph_objects.Figure, optional
Existing figure to add the trace to.
z_range : tuple, optional
Z-score range over which to compute the line.
line_kwargs : dict, optional
Line styling arguments.
Returns
-------
trace : plotly.graph_objects.Scatter
Line trace of the Gaussian population.
"""
z_vals = np.linspace(z_range[0], z_range[1], 600)
x_vals = mean + z_vals * std
largs = line_kwargs or {}
trace = go.Scatter(
x=z_vals,
y=x_vals,
mode='lines',
name=f'Pop μ={mean:.2f}, σ={std:.2f}',
line=largs
)
if fig is not None:
fig.add_trace(trace)
return trace
[docs]
@staticmethod
def pp_single_populations(means, stds, fig=None, z_range=(-3.5,3.5), line_kwargs=None):
"""
Plot each Gaussian component as a separate line.
Parameters
----------
means : array-like
Means of the Gaussian components.
stds : array-like
Standard deviations of the components.
fig : plotly.graph_objects.Figure, optional
Existing figure to add the traces to.
z_range : tuple, optional
Z-score range over which to plot.
line_kwargs : dict, optional
Line styling options.
Returns
-------
traces : list of plotly.graph_objects.Scatter
List of traces, one per component.
"""
means = np.atleast_1d(means)
stds = np.atleast_1d(stds)
traces = []
for mean, std in zip(means, stds):
tr = Visualize_Plotly.pp_one_population(mean, std, fig, z_range, line_kwargs)
traces.append(tr)
return traces
[docs]
@staticmethod
def pp_combined_population(means, stds, weights, fig=None, z_range=(-3.5,3.5), line_kwargs=None):
"""
Plot the combined Gaussian mixture CDF as a line on probability paper.
Parameters
----------
means : array-like
Means of the Gaussian components.
stds : array-like
Standard deviations of the components.
weights : array-like
Mixture weights of the components.
fig : plotly.graph_objects.Figure, optional
Existing figure to add the trace to.
z_range : tuple, optional
Z-value range for evaluation.
line_kwargs : dict, optional
Line styling options.
Returns
-------
trace : plotly.graph_objects.Scatter
Trace representing the combined population CDF.
"""
x = np.linspace(z_range[0], z_range[1], 600)
cdf = Sinclair.combine_gaussians(x, np.array(means), np.array(stds), np.array(weights))
sigma_vals = Sinclair.cumulative_to_sigma(cdf)
largs = line_kwargs or {}
trace = go.Scatter(
x=sigma_vals,
y=x,
mode='lines',
name='Combined Population',
line=largs
)
if fig is not None:
fig.add_trace(trace)
return trace
[docs]
@staticmethod
def pp_add_percentiles(
fig,
percentiles="full",
line_kwargs: dict = None,
label_kwargs: dict = None,
y_min: float = None,
y_max: float = None
):
"""
Add vertical percentile lines and labels to a Plotly figure.
Parameters
----------
fig : plotly.graph_objects.Figure
The figure to which the percentiles are added.
percentiles : str or list of float
Either 'full' for default percentiles or custom list.
line_kwargs : dict, optional
Styling for vertical percentile lines.
label_kwargs : dict, optional
Styling for percentile text annotations.
y_min : float, optional
Minimum y-coordinate for the vertical lines.
y_max : float, optional
Maximum y-coordinate for the vertical lines.
Returns
-------
None
"""
# 1) choose levels
levels = [1,5,10,25,50,75,90,95,99] if percentiles=="full" else list(percentiles)
# 2) infer y-span
if y_min is None or y_max is None:
rng = fig.layout.yaxis.range or []
if len(rng)==2:
y_min, y_max = rng
else:
y_min, y_max = 0, 1
# 3) defaults
lkw = line_kwargs or dict(color="#c8c8c8", dash="dash", width=0.5)
awk = label_kwargs or dict(font=dict(size=12,color="#555555"),
showarrow=False, yshift=8)
# 4) draw each
from scipy.stats import norm
for p in levels:
z = norm.ppf(p/100.0)
fig.add_shape(
type="line",
x0=z, x1=z,
y0=y_min, y1=y_max,
line=lkw,
layer="below"
)
fig.add_annotation(
x=z, y=y_max,
text=f"{p}%",
xanchor="center",
**awk
)
[docs]
@staticmethod
def plot_gmm_pdf(x_values, meds, stds, weights,
data=None,
pdf_plot_kwargs=None,
component_plot_kwargs=None,
hist_plot_kwargs=None):
"""
Generate Plotly traces for a Gaussian mixture PDF:
1) Histogram of raw data (probability density)
2) Individual component PDFs
3) Total mixture PDF
Parameters
----------
x_values : np.ndarray
Points at which to evaluate the PDFs.
meds : array-like
Means of the Gaussian components.
stds : array-like
Standard deviations of the components.
weights : array-like
Mixture weights for each component.
data : array-like, optional
Raw data to include as a histogram.
pdf_plot_kwargs : dict, optional
Style arguments for the total PDF line.
component_plot_kwargs : dict, optional
Style arguments for the component lines.
hist_plot_kwargs : dict, optional
Style arguments for the histogram, including 'bins'.
Returns
-------
hist_trace : plotly.graph_objects.Histogram or None
Histogram trace if data is provided.
comp_traces : list of plotly.graph_objects.Scatter
List of individual component PDF traces.
pdf_trace : plotly.graph_objects.Scatter
Trace for the full GMM PDF.
"""
import numpy as _np
from scipy.stats import norm
import plotly.graph_objects as go
from .gaussian_mixtures import GMM
# prepare kwargs dicts
pdf_plot_kwargs = pdf_plot_kwargs or {}
component_plot_kwargs = component_plot_kwargs or {}
hist_plot_kwargs = hist_plot_kwargs or {}
# 1) histogram trace
hist_trace = None
if data is not None:
# copy so we don't pop from the user's dict
hkwargs = hist_plot_kwargs.copy()
bins = hkwargs.pop('bins', 20)
if bins == 'auto':
# compute edges via numpy 'auto'
edges = _np.histogram_bin_edges(data, bins='auto')
size = edges[1] - edges[0]
hargs = {
'xbins': dict(start=edges[0], end=edges[-1], size=size),
'histnorm': 'probability density'
}
else:
# fixed number of bins
hargs = {
'nbinsx': bins,
'histnorm': 'probability density'
}
# merge any remaining user kwargs
hargs.update(hkwargs)
hist_trace = go.Histogram(x=data, **hargs)
# 2) individual component traces
comp_traces = []
for idx, (mu, sigma, w) in enumerate(zip(meds, stds, weights), start=1):
y_comp = w * norm.pdf(x_values, mu, sigma)
comp_traces.append(
go.Scatter(
x=x_values,
y=y_comp,
mode='lines',
name=f'Component {idx}',
**component_plot_kwargs
)
)
# 3) total mixture PDF
try:
# if your GMM class defines this
pdf_vals = GMM.gaussian_mixture_pdf(x_values, meds, stds, weights)
except Exception:
# fallback manual sum
pdf_vals = _np.zeros_like(x_values, dtype=float)
for mu_i, sigma_i, w_i in zip(meds, stds, weights):
pdf_vals += w_i * norm.pdf(x_values, mu_i, sigma_i)
pdf_trace = go.Scatter(
x=x_values,
y=pdf_vals,
mode='lines',
name='Gaussian Mixture PDF',
**pdf_plot_kwargs
)
return hist_trace, comp_traces, pdf_trace
[docs]
@staticmethod
def qq_plot(raw_data, model_data, fig=None,
marker_kwargs=None, line_kwargs=None):
"""
Draw a Q–Q plot comparing two samples:
Parameters
----------
raw_data : array-like
Observed dataset.
model_data : array-like
Simulated or modeled dataset.
fig : plotly.graph_objects.Figure, optional
Figure to which the traces will be added.
marker_kwargs : dict, optional
Styling options for the Q–Q points.
line_kwargs : dict, optional
Styling options for the y = x reference line.
Returns
-------
pts : plotly.graph_objects.Scatter
Q–Q scatter trace.
line : plotly.graph_objects.Scatter
Identity line trace (y = x).
"""
import numpy as np
from scipy.stats import norm
import plotly.graph_objects as go
# 1) compute matching percentiles
probs = np.linspace(0, 1, len(raw_data))
q_raw = np.quantile(raw_data, probs)
q_model = np.quantile(model_data, probs)
# 2) the Q–Q scatter
mk = marker_kwargs or {}
pts = go.Scatter(
x=q_raw, y=q_model,
mode="markers",
name="Q–Q points",
**mk
)
if fig is not None:
fig.add_trace(pts)
# 3) identity line
mn = min(q_raw.min(), q_model.min())
mx = max(q_raw.max(), q_model.max())
lk = line_kwargs or {}
line = go.Scatter(
x=[mn, mx], y=[mn, mx],
mode="lines",
name="y = x",
**lk
)
if fig is not None:
fig.add_trace(line)
return pts, line