Implement two new scipy-compatible distributions : Log-Nakagami (lognakagami) and Log-Gamma (loggamma_dist), with complete logpdf/cdf/ppf/stats/entropy/rvs methods derived from the change-of-variable Y = ln(X). Add kl_statistic, a KDE-based KL-divergence goodness-of-fit callable compatible with the Fitter class. Extend k_gen with _stats (improving speed), _cdf, and a fit guard, and switch kv → kve to improve numerical stability at large arguments. Add unit tests for all three additions covering normalization, monotonicity, ppf inversion, moment formulas, and Fitter integration.
62 lines
1.7 KiB
Python
62 lines
1.7 KiB
Python
import numpy as np
|
|
from scipy.stats import gaussian_kde
|
|
|
|
|
|
def aic_statistic(dist, data, axis):
|
|
"""
|
|
AIC-based goodness-of-fit statistic.
|
|
|
|
AIC = 2k - 2ln(L)
|
|
|
|
Lower AIC indicates better fit, but since goodness_of_fit()
|
|
treats larger statistic values as worse fit, AIC works directly.
|
|
"""
|
|
|
|
k = len(dist.args)
|
|
log_likelihood = np.sum(dist.logpdf(data), axis=axis)
|
|
aic = 2 * k - 2 * log_likelihood
|
|
|
|
return aic
|
|
|
|
def bic_statistic(dist, data, axis):
|
|
"""
|
|
BIC-based goodness-of-fit statistic.
|
|
|
|
BIC = ln(n)k - 2ln(L)s
|
|
|
|
Lower BIC indicates better fit, but since goodness_of_fit()
|
|
treats larger statistic values as worse fit, BIC works directly.
|
|
"""
|
|
|
|
n = data.size if axis is None else data.shape[axis]
|
|
k = len(dist.args)
|
|
log_likelihood = np.sum(dist.logpdf(data), axis=axis)
|
|
bic = np.log(n) * k - 2 * log_likelihood
|
|
|
|
return bic
|
|
|
|
def kl_statistic(dist, data, axis):
|
|
"""
|
|
KL divergence-based goodness-of-fit statistic.
|
|
|
|
KL(P || Q) = ∑ P(x) log(P(x) / Q(x))
|
|
|
|
Lower KL divergence indicates better fit, but since goodness_of_fit()
|
|
treats larger statistic values as worse fit, KL works directly.
|
|
"""
|
|
|
|
# Estimate the PDF of the data using KDE
|
|
kde = gaussian_kde(data)
|
|
data_pdf = kde(data)
|
|
|
|
# Get the PDF of the distribution at the data points
|
|
dist_pdf = dist.pdf(data)
|
|
|
|
# normalize the PDFs to ensure they sum to 1
|
|
data_pdf /= np.sum(data_pdf)
|
|
dist_pdf /= np.sum(dist_pdf)
|
|
# Avoid division by zero and log of zero by adding a small constant
|
|
epsilon = 1e-10
|
|
kl_divergence = np.sum(data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon)), axis=axis)
|
|
|
|
return kl_divergence |