Estimating Mean and Confidence Interval

Estimation of mean and confidence interval of a lognormal distribution using the pyco2stats.Stats module. The Stats module includes a variety of statistical methodologies to analyze \(CO_{2}\) flux data and geochemical samplings in environmental and volcanic systems. Stats comprises robust statistics (i.e. biweight estimators, sigma-clipping, data-trimming, winsorizing procedures) and specific tools to estimate the central tendency and confidence intervals of log-normally distributed data.

[1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import lognorm
import pyco2stats as PyCO2

# 1) Fix random state
rng = np.random.RandomState(32)

# 2) Parameters
mu = 1
sigma = 0.5
n1 = 30
n2 = 250
# 3) Draw sample
sample1 = rng.lognormal(mean=mu, sigma=sigma, size=n1)
sample2 = rng.lognormal(mean=mu, sigma=sigma, size=n2)

# 4) Compute PDF at each sample point
#    Note: scipy’s lognorm takes `s = sigma` and `scale = exp(mu)`
x = np.linspace(-0.5, 15, 500)
pdf = lognorm.pdf(x, s=sigma, scale=np.exp(mu))

result1 = PyCO2.Stats.lognormal_estimator(
                    sample1,
                    method='umvue',
                    ci=True,
                    ci_type='two-sided',
                    ci_method='land',
                    conf_level=0.95
                )

result2 = PyCO2.Stats.lognormal_estimator(
                    sample2,
                    method='qmle',
                    ci=True,
                    ci_type='two-sided',
                    ci_method='cox',
                    conf_level=0.95
                )


import matplotlib.pyplot as plt
# Global style
plt.style.use('default')
plt.rcParams.update({
    'font.size': 18,
    'font.family'   : 'Times New Roman',
    'axes.labelsize': 18,
    'legend.fontsize': 16,
    'xtick.direction':  'in',
    'ytick.direction':  'in',
    'xtick.major.size': 8,
    'ytick.major.size': 8,
})

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14,6), sharey=True)

bins = np.arange(0, 12.5, 0.25)

def format_ax(ax, sample, result, title, n, ci_method):
    # Histogram
    ax.hist(sample, bins=bins, density=True,
            color='#dddddd', edgecolor='darkgray', linewidth=1, alpha=0.9)
    # PDF line
    ax.plot(x, pdf, label='Theoretical PDF',
            lw=2, color='#1f77b4')
    # Mean estimate
    ax.axvline(result['mean_estimate'], label='Mean estimate',
               linestyle='--', lw=2, color='black', alpha=0.5)
    # CI band
    ax.axvspan(result['LCL'], result['UCL'], label='95% CI (' + ci_method +')',
               color='#1f77b4', alpha=0.2)
    # Labels & limits
    ax.set_xlabel('Value')
    ax.set_ylabel('Probability Density')
    ax.set_xlim(0, 12)
    ax.set_ylim(0, 0.6)
    ax.grid(True, linestyle=':', linewidth=0.5)
    ax.legend(loc='upper right', title=title + ' - n=' + str(n))

format_ax(ax1, sample1, result1, 'UMVUE estimator', n1, 'Land\'s method')
format_ax(ax2, sample2, result2, 'QMLE estimator', n2, 'Cox\'s method')

fig.tight_layout()
plt.savefig("Stats.png", dpi=300)
../_images/notebooks_Stats_1_0.png