import numpy as np from scipy.stats import gaussian_kde def aic_statistic(dist, data, axis): """ AIC-based goodness-of-fit statistic. AIC = 2k - 2ln(L) Lower AIC indicates better fit, but since goodness_of_fit() treats larger statistic values as worse fit, AIC works directly. """ k = len(dist.args) log_likelihood = np.sum(dist.logpdf(data), axis=axis) aic = 2 * k - 2 * log_likelihood return aic def bic_statistic(dist, data, axis): """ BIC-based goodness-of-fit statistic. BIC = ln(n)k - 2ln(L)s Lower BIC indicates better fit, but since goodness_of_fit() treats larger statistic values as worse fit, BIC works directly. """ n = data.size if axis is None else data.shape[axis] k = len(dist.args) log_likelihood = np.sum(dist.logpdf(data), axis=axis) bic = np.log(n) * k - 2 * log_likelihood return bic def kl_statistic(dist, data, axis): """ KL divergence-based goodness-of-fit statistic. KL(P || Q) = ∑ P(x) log(P(x) / Q(x)) Lower KL divergence indicates better fit, but since goodness_of_fit() treats larger statistic values as worse fit, KL works directly. """ # Estimate the PDF of the data using KDE kde = gaussian_kde(data) data_pdf = kde(data) # Get the PDF of the distribution at the data points dist_pdf = dist.pdf(data) # normalize the PDFs to ensure they sum to 1 data_pdf /= np.sum(data_pdf) dist_pdf /= np.sum(dist_pdf) # Avoid division by zero and log of zero by adding a small constant epsilon = 1e-10 kl_divergence = np.sum(data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon)), axis=axis) return kl_divergence