feat(distributions): add lognakagami, loggamma, and kl_statistic
Implement two new scipy-compatible distributions : Log-Nakagami (lognakagami) and Log-Gamma (loggamma_dist), with complete logpdf/cdf/ppf/stats/entropy/rvs methods derived from the change-of-variable Y = ln(X). Add kl_statistic, a KDE-based KL-divergence goodness-of-fit callable compatible with the Fitter class. Extend k_gen with _stats (improving speed), _cdf, and a fit guard, and switch kv → kve to improve numerical stability at large arguments. Add unit tests for all three additions covering normalization, monotonicity, ppf inversion, moment formulas, and Fitter integration.
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import numpy as np
|
||||
from scipy.stats import gaussian_kde
|
||||
|
||||
|
||||
def aic_statistic(dist, data, axis):
|
||||
@@ -32,4 +33,30 @@ def bic_statistic(dist, data, axis):
|
||||
log_likelihood = np.sum(dist.logpdf(data), axis=axis)
|
||||
bic = np.log(n) * k - 2 * log_likelihood
|
||||
|
||||
return bic
|
||||
return bic
|
||||
|
||||
def kl_statistic(dist, data, axis):
|
||||
"""
|
||||
KL divergence-based goodness-of-fit statistic.
|
||||
|
||||
KL(P || Q) = ∑ P(x) log(P(x) / Q(x))
|
||||
|
||||
Lower KL divergence indicates better fit, but since goodness_of_fit()
|
||||
treats larger statistic values as worse fit, KL works directly.
|
||||
"""
|
||||
|
||||
# Estimate the PDF of the data using KDE
|
||||
kde = gaussian_kde(data)
|
||||
data_pdf = kde(data)
|
||||
|
||||
# Get the PDF of the distribution at the data points
|
||||
dist_pdf = dist.pdf(data)
|
||||
|
||||
# normalize the PDFs to ensure they sum to 1
|
||||
data_pdf /= np.sum(data_pdf)
|
||||
dist_pdf /= np.sum(dist_pdf)
|
||||
# Avoid division by zero and log of zero by adding a small constant
|
||||
epsilon = 1e-10
|
||||
kl_divergence = np.sum(data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon)), axis=axis)
|
||||
|
||||
return kl_divergence
|
||||
Reference in New Issue
Block a user