Files
Clutter_chuva/etc/tools/statistics.py
neutonsevero 9a3f5959cd feat(distributions): add lognakagami, loggamma, and kl_statistic
Implement two new scipy-compatible distributions : Log-Nakagami
(lognakagami) and Log-Gamma (loggamma_dist), with complete
logpdf/cdf/ppf/stats/entropy/rvs methods derived from the
change-of-variable Y = ln(X).

Add kl_statistic, a KDE-based KL-divergence goodness-of-fit callable
compatible with the Fitter class. Extend k_gen with _stats (improving speed), _cdf, and
a fit guard, and switch kv → kve to improve numerical stability at large arguments.

Add unit tests for all three additions covering normalization,
monotonicity, ppf inversion, moment formulas, and Fitter integration.
2026-04-26 23:17:22 -03:00

62 lines
1.7 KiB
Python

import numpy as np
from scipy.stats import gaussian_kde
def aic_statistic(dist, data, axis):
"""
AIC-based goodness-of-fit statistic.
AIC = 2k - 2ln(L)
Lower AIC indicates better fit, but since goodness_of_fit()
treats larger statistic values as worse fit, AIC works directly.
"""
k = len(dist.args)
log_likelihood = np.sum(dist.logpdf(data), axis=axis)
aic = 2 * k - 2 * log_likelihood
return aic
def bic_statistic(dist, data, axis):
"""
BIC-based goodness-of-fit statistic.
BIC = ln(n)k - 2ln(L)s
Lower BIC indicates better fit, but since goodness_of_fit()
treats larger statistic values as worse fit, BIC works directly.
"""
n = data.size if axis is None else data.shape[axis]
k = len(dist.args)
log_likelihood = np.sum(dist.logpdf(data), axis=axis)
bic = np.log(n) * k - 2 * log_likelihood
return bic
def kl_statistic(dist, data, axis):
"""
KL divergence-based goodness-of-fit statistic.
KL(P || Q) = ∑ P(x) log(P(x) / Q(x))
Lower KL divergence indicates better fit, but since goodness_of_fit()
treats larger statistic values as worse fit, KL works directly.
"""
# Estimate the PDF of the data using KDE
kde = gaussian_kde(data)
data_pdf = kde(data)
# Get the PDF of the distribution at the data points
dist_pdf = dist.pdf(data)
# normalize the PDFs to ensure they sum to 1
data_pdf /= np.sum(data_pdf)
dist_pdf /= np.sum(dist_pdf)
# Avoid division by zero and log of zero by adding a small constant
epsilon = 1e-10
kl_divergence = np.sum(data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon)), axis=axis)
return kl_divergence