Source code for pyco2stats.sinclair

import numpy as np
from scipy.stats import norm

"""
The Sinclair method [Sinclair, 1974] is a reliable graphical procedure aimed to partition datasets
of polymodal values into two or more log-normal sub-populations. The method is based on the evidence that, on a cumulative probability plot, a dataset composed of
multiple superimposed log-normal populations plots as a series of joined straight-line segments with
different slopes. Each straight-line segment represents a distinct statistical population, while changes in the slope of the curve (inflection points) indicate threshold values between different populations.

Sinclair, A. J. (1974). “Selection of threshold values in geochemical data using probability graphs”. Journal of Geochemical
Exploration 3(2), pages 129–149. doi: 10.1016/0375-6742(74)90030-2

"""

[docs] class Sinclair: """ Implements transformations between cumulative probabilities and sigma-values (standard normal quantiles) for probability plots. """
[docs] @staticmethod def cumulative_to_sigma(p: np.ndarray) -> np.ndarray: """ Converts cumulative probabilities to sigma-values (z-scores). Parameters ---------- p : array Array of cumulative probabilities in the range [0, 1]. Returns ------- sigma_values : array Array of sigma-values corresponding to the input probabilities. """ p_clipped = np.clip(p, 1e-10, 1 - 1e-10) return norm.ppf(p_clipped)
[docs] @staticmethod def sigma_to_cumulative(sigma: np.ndarray) -> np.ndarray: """ Converts sigma-values (z-scores) to cumulative probabilities. Parameters ---------- sigma : array Array of sigma-values. Returns ------- cumulative_probs : array Array of cumulative probabilities corresponding to the input sigma-values. """ return norm.cdf(sigma)
[docs] @staticmethod def raw_data_to_sigma(raw_data: np.ndarray) -> tuple[np.ndarray, np.ndarray]: """ Converts raw data into sorted values and their corresponding sigma-values. Parameters ---------- raw_data : array Array of raw data values. Returns ------- sigma_values : array Sigma-values corresponding to the empirical cumulative probabilities. sorted_data : array Raw data sorted in ascending order. """ sorted_data = np.sort(raw_data) p = np.linspace(0, 1, len(sorted_data), endpoint=False) + 0.5 / len(sorted_data) sigma_values = Sinclair.cumulative_to_sigma(p) return sigma_values, sorted_data
[docs] @staticmethod def combine_gaussians(x: np.ndarray, means: np.ndarray, stds: np.ndarray, weights: np.ndarray) -> np.ndarray: """ Computes the cumulative distribution of a weighted mixture of Gaussian distributions. Parameters ---------- x : array Points at which to evaluate the combined cumulative distribution. means : array Means of the individual Gaussian components. stds : array Standard deviations of the Gaussian components. weights : array Weights for each Gaussian component. Returns ------- y_comb : array The combined cumulative distribution evaluated at the points x. """ y_comb = np.zeros_like(x) for mu, sigma, w in zip(means, stds, weights): y_comb += w * norm.cdf(x, loc=mu, scale=sigma) return y_comb