feat(distributions): add lognakagami, loggamma, and kl_statistic

Implement two new scipy-compatible distributions : Log-Nakagami
(lognakagami) and Log-Gamma (loggamma_dist), with complete
logpdf/cdf/ppf/stats/entropy/rvs methods derived from the
change-of-variable Y = ln(X).

Add kl_statistic, a KDE-based KL-divergence goodness-of-fit callable
compatible with the Fitter class. Extend k_gen with _stats (improving speed), _cdf, and
a fit guard, and switch kv → kve to improve numerical stability at large arguments.

Add unit tests for all three additions covering normalization,
monotonicity, ppf inversion, moment formulas, and Fitter integration.
This commit is contained in:
2026-04-26 23:17:22 -03:00
parent d07590e73d
commit 9a3f5959cd
5 changed files with 600 additions and 7 deletions

1
.gitignore vendored
View File

@@ -35,3 +35,4 @@ data/*
etc/tools/read_raw_data.py etc/tools/read_raw_data.py
scripts/separate_data.py

View File

@@ -1,12 +1,13 @@
import numpy as np import numpy as np
import pytest import pytest
import scipy.special as sc
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import sys import sys
import os import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from tools.distributions import k_dist from tools.distributions import k_dist, lognakagami, loggamma_dist
X = np.linspace(0.01, 10.0, 500) X = np.linspace(0.01, 10.0, 500)
@@ -167,6 +168,178 @@ class TestKDistPlots:
# ── Entry-point: run plots interactively ───────────────────────────────────── # ── Entry-point: run plots interactively ─────────────────────────────────────
Y = np.linspace(-5.0, 5.0, 500)
# ── lognakagami unit tests ────────────────────────────────────────────────────
class TestLogNakagami:
def test_logpdf_is_finite_on_real_line(self):
"""logpdf must be finite for all real y — tests positivity without float64 underflow."""
log_vals = lognakagami.logpdf(Y, m=2.0, Omega=1.0)
assert np.all(np.isfinite(log_vals))
def test_pdf_integrates_to_one(self):
"""Numerical integral of PDF over the real line should be ≈ 1."""
y_fine = np.linspace(-30, 10, 200_000)
integral = np.trapezoid(lognakagami.pdf(y_fine, m=2.0, Omega=1.0), y_fine)
assert pytest.approx(integral, abs=1e-3) == 1.0
def test_pdf_integrates_to_one_nonunit_omega(self):
"""Normalisation must hold for Omega != 1."""
y_fine = np.linspace(-30, 15, 200_000)
integral = np.trapezoid(lognakagami.pdf(y_fine, m=2.0, Omega=4.0), y_fine)
assert pytest.approx(integral, abs=1e-3) == 1.0
def test_logpdf_equals_log_pdf(self):
"""logpdf must equal log(pdf) at points where pdf does not underflow."""
y_bulk = np.linspace(-4.0, 2.0, 50)
log_via_pdf = np.log(lognakagami.pdf(y_bulk, m=2.0, Omega=1.0))
log_direct = lognakagami.logpdf(y_bulk, m=2.0, Omega=1.0)
np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
def test_cdf_is_monotone_increasing(self):
"""CDF must be strictly non-decreasing."""
cdf_vals = lognakagami.cdf(Y, m=2.0, Omega=1.0)
assert np.all(np.diff(cdf_vals) >= 0)
def test_ppf_inverts_cdf(self):
"""ppf(cdf(y)) must recover y."""
y_test = np.array([-2.0, 0.0, 0.5])
cdf_vals = lognakagami.cdf(y_test, m=2.0, Omega=1.0)
np.testing.assert_allclose(lognakagami.ppf(cdf_vals, m=2.0, Omega=1.0), y_test, atol=1e-8)
def test_argcheck_rejects_m_below_half(self):
"""m < 0.5 must not produce a valid (positive-finite) PDF value."""
val = lognakagami.pdf(0.0, m=0.3, Omega=1.0)
assert not (np.isfinite(val) and val > 0)
def test_argcheck_rejects_non_positive_omega(self):
"""Omega <= 0 must not produce a valid (positive-finite) PDF value."""
val = lognakagami.pdf(0.0, m=2.0, Omega=-1.0)
assert not (np.isfinite(val) and val > 0)
def test_stats_mean(self):
"""Analytical mean must equal 0.5 * (digamma(m) - log(m) + log(Omega))."""
m, Omega = 3.0, 2.0
expected_mean = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega))
dist_mean = float(lognakagami.stats(m=m, Omega=Omega, moments="m"))
assert pytest.approx(dist_mean, rel=1e-10) == expected_mean
def test_stats_mean_omega_shifts_by_half_log_omega(self):
"""Changing Omega shifts the mean by 0.5*log(Omega) and leaves variance unchanged."""
m = 2.0
mean1 = float(lognakagami.stats(m=m, Omega=1.0, moments="m"))
mean4 = float(lognakagami.stats(m=m, Omega=4.0, moments="m"))
assert pytest.approx(mean4 - mean1, rel=1e-10) == 0.5 * np.log(4.0)
def test_stats_variance_independent_of_omega(self):
"""Variance must equal 0.25 * polygamma(1, m) and not depend on Omega."""
m = 3.0
expected_var = 0.25 * sc.polygamma(1, m)
for Omega in [0.5, 1.0, 4.0]:
_, dist_var, *_ = lognakagami.stats(m=m, Omega=Omega, moments="mv")
assert pytest.approx(float(dist_var), rel=1e-10) == expected_var
def test_rvs_samples_are_finite(self):
"""Random samples must be finite real numbers."""
rng = np.random.default_rng(42)
samples = lognakagami.rvs(m=2.0, Omega=1.0, size=200, random_state=rng)
assert samples.shape == (200,)
assert np.all(np.isfinite(samples))
def test_rvs_sample_mean_near_expected(self):
"""Sample mean of many RVS should be close to the distribution mean."""
m, Omega = 2.0, 3.0
rng = np.random.default_rng(0)
samples = lognakagami.rvs(m=m, Omega=Omega, size=50_000, random_state=rng)
expected_mean = float(lognakagami.stats(m=m, Omega=Omega, moments="m"))
assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean
# ── loggamma_dist unit tests ──────────────────────────────────────────────────
class TestLogGamma:
def test_pdf_is_positive_on_real_line(self):
"""PDF must be strictly positive for all real y and a > 0."""
vals = loggamma_dist.pdf(Y, a=2.0)
assert np.all(vals > 0)
def test_pdf_integrates_to_one(self):
"""Numerical integral of PDF over the real line should be ≈ 1."""
y_fine = np.linspace(-30, 10, 200_000)
integral = np.trapezoid(loggamma_dist.pdf(y_fine, a=2.0), y_fine)
assert pytest.approx(integral, abs=1e-3) == 1.0
def test_logpdf_equals_log_pdf(self):
"""logpdf must equal log(pdf) for numerical consistency."""
log_via_pdf = np.log(loggamma_dist.pdf(Y, a=2.0))
log_direct = loggamma_dist.logpdf(Y, a=2.0)
np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
def test_cdf_is_monotone_increasing(self):
"""CDF must be strictly non-decreasing."""
cdf_vals = loggamma_dist.cdf(Y, a=2.0)
assert np.all(np.diff(cdf_vals) >= 0)
def test_cdf_and_sf_sum_to_one(self):
"""CDF + SF must equal 1 at every point."""
cdf_vals = loggamma_dist.cdf(Y, a=2.0)
sf_vals = loggamma_dist.sf(Y, a=2.0)
np.testing.assert_allclose(cdf_vals + sf_vals, 1.0, atol=1e-12)
def test_ppf_inverts_cdf(self):
"""ppf(cdf(y)) must recover y."""
y_test = np.array([-2.0, 0.0, 1.0])
cdf_vals = loggamma_dist.cdf(y_test, a=2.0)
np.testing.assert_allclose(loggamma_dist.ppf(cdf_vals, a=2.0), y_test, atol=1e-8)
def test_argcheck_rejects_non_positive_a(self):
"""a <= 0 must not produce a valid (positive-finite) PDF value."""
val = loggamma_dist.pdf(0.0, a=-1.0)
assert not (np.isfinite(val) and val > 0)
def test_stats_mean_equals_digamma(self):
"""Analytical mean must equal digamma(a)."""
a = 3.0
expected_mean = sc.digamma(a)
dist_mean = float(loggamma_dist.stats(a=a, moments="m"))
assert pytest.approx(dist_mean, rel=1e-10) == expected_mean
def test_stats_variance_equals_trigamma(self):
"""Analytical variance must equal polygamma(1, a)."""
a = 3.0
expected_var = sc.polygamma(1, a)
_, dist_var, *_ = loggamma_dist.stats(a=a, moments="mv")
assert pytest.approx(float(dist_var), rel=1e-10) == expected_var
def test_log_transform_relation_to_gamma(self):
"""loggamma_dist.pdf(y) must equal gamma.pdf(exp(y)) * exp(y) (change-of-variable)."""
from scipy.stats import gamma as scipy_gamma
y_test = np.linspace(-3.0, 3.0, 20)
direct = loggamma_dist.pdf(y_test, a=2.0)
via_gamma = scipy_gamma.pdf(np.exp(y_test), a=2.0) * np.exp(y_test)
np.testing.assert_allclose(direct, via_gamma, rtol=1e-6)
def test_rvs_samples_are_finite(self):
"""Random samples must be finite real numbers."""
rng = np.random.default_rng(42)
samples = loggamma_dist.rvs(a=2.0, size=200, random_state=rng)
assert samples.shape == (200,)
assert np.all(np.isfinite(samples))
def test_rvs_sample_mean_near_expected(self):
"""Sample mean of many RVS should be close to the distribution mean."""
a = 2.0
rng = np.random.default_rng(0)
samples = loggamma_dist.rvs(a=a, size=50_000, random_state=rng)
expected_mean = float(loggamma_dist.stats(a=a, moments="m"))
assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean
if __name__ == "__main__": if __name__ == "__main__":
plot_k_dist_varying_alpha() plot_k_dist_varying_alpha()
plot_k_dist_varying_mu() plot_k_dist_varying_mu()

View File

@@ -6,7 +6,7 @@ import os
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from tools.statistics import aic_statistic, bic_statistic from tools.statistics import aic_statistic, bic_statistic, kl_statistic
from fitting.fitter import Fitter from fitting.fitter import Fitter
@@ -208,3 +208,129 @@ class TestBicStatisticInFitter:
f.validate(n_mc_samples=99) f.validate(n_mc_samples=99)
assert f["gamma"].test_result is not None assert f["gamma"].test_result is not None
assert f["expon"].test_result is not None assert f["expon"].test_result is not None
# ── kl_statistic unit tests ───────────────────────────────────────────────────
class TestKlStatistic:
def _fitted_dist(self, dist, data, **kwargs):
"""Return a frozen distribution fitted to data."""
params = dist.fit(data, **kwargs)
return dist(*params)
def test_returns_float(self):
"""kl_statistic must return a numeric scalar."""
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
result = kl_statistic(frozen, GAMMA_DATA, axis=None)
assert isinstance(float(result), float)
def test_returns_real_value(self):
"""kl_statistic returns a real number (KDE finite-sum approximation can be negative)."""
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
result = kl_statistic(frozen, GAMMA_DATA, axis=None)
assert np.isreal(result)
def test_result_is_finite(self):
"""kl_statistic must return a finite value for valid input."""
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
assert np.isfinite(kl_statistic(frozen, GAMMA_DATA, axis=None))
def test_works_with_axis_zero(self):
"""kl_statistic must return a finite value when axis=0."""
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
result = kl_statistic(frozen, GAMMA_DATA, axis=0)
assert np.isfinite(result)
def test_axis_zero_same_as_axis_none_for_1d(self):
"""For 1-D data, axis=0 and axis=None must return the same value."""
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
result_none = kl_statistic(frozen, GAMMA_DATA, axis=None)
result_axis0 = kl_statistic(frozen, GAMMA_DATA, axis=0)
assert pytest.approx(result_none) == result_axis0
def test_better_fit_has_lower_kl(self):
"""Gamma fitted to gamma data should have lower KL than normal fitted to gamma data."""
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
norm_frozen = self._fitted_dist(norm, GAMMA_DATA)
kl_gamma = kl_statistic(gamma_frozen, GAMMA_DATA, axis=None)
kl_norm = kl_statistic(norm_frozen, GAMMA_DATA, axis=None)
assert kl_gamma < kl_norm
def test_matches_manual_formula(self):
"""kl_statistic result must match the KDE-based KL formula computed manually."""
from scipy.stats import gaussian_kde
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
kde = gaussian_kde(GAMMA_DATA)
data_pdf = kde(GAMMA_DATA)
dist_pdf = frozen.pdf(GAMMA_DATA)
epsilon = 1e-10
expected = np.sum(
data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon))
)
assert pytest.approx(kl_statistic(frozen, GAMMA_DATA, axis=None)) == expected
def test_no_nan_when_dist_pdf_near_zero(self):
"""epsilon guard must prevent NaN when dist PDF is effectively zero over data."""
# expon with large loc has near-zero PDF over positive-skewed gamma data
far_dist = expon(loc=100.0, scale=1.0)
result = kl_statistic(far_dist, GAMMA_DATA, axis=None)
assert not np.isnan(result)
def test_result_is_consistent_across_calls(self):
"""Two calls with identical inputs must return the same value."""
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
r1 = kl_statistic(frozen, GAMMA_DATA, axis=None)
r2 = kl_statistic(frozen, GAMMA_DATA, axis=None)
assert r1 == r2
# ── Integration: kl_statistic as callable in Fitter ──────────────────────────
class TestKlStatisticInFitter:
def test_fitter_accepts_kl_callable(self):
f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
f.fit(GAMMA_DATA)
f.validate(n_mc_samples=99)
assert f["gamma"].test_result is not None
def test_fitter_kl_statistic_is_finite(self):
f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
f.fit(GAMMA_DATA)
f.validate(n_mc_samples=99)
assert np.isfinite(f["gamma"].gof_statistic)
def test_fitter_kl_pvalue_in_range(self):
f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
f.fit(GAMMA_DATA)
f.validate(n_mc_samples=99)
pval = f["gamma"].pvalue
assert 0.0 <= pval <= 1.0
def test_fitter_kl_vs_ad_different_statistic_values(self):
"""KL and AD statistics should differ numerically."""
f_kl = Fitter(
[gamma], statistic_method=kl_statistic, gamma_params={"floc": 0}
)
f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0})
f_kl.fit(GAMMA_DATA)
f_ad.fit(GAMMA_DATA)
f_kl.validate(n_mc_samples=99)
f_ad.validate(n_mc_samples=99)
assert f_kl["gamma"].gof_statistic != pytest.approx(
f_ad["gamma"].gof_statistic
)
def test_fitter_kl_multiple_distributions(self):
f = Fitter(
[gamma, expon],
statistic_method=kl_statistic,
gamma_params={"floc": 0},
expon_params={"floc": 0},
)
f.fit(GAMMA_DATA)
f.validate(n_mc_samples=99)
assert f["gamma"].test_result is not None
assert f["expon"].test_result is not None

View File

@@ -1,5 +1,7 @@
from scipy.stats import rv_continuous from scipy.stats import rv_continuous
from scipy.special import kv, gammaln from scipy.special import kve, gammaln
from scipy.integrate import quad
import scipy.special as sc
from scipy.stats._distn_infrastructure import _ShapeInfo from scipy.stats._distn_infrastructure import _ShapeInfo
import numpy as np import numpy as np
@@ -36,16 +38,280 @@ class k_gen(rv_continuous):
return np.exp(self._logpdf(x, mu, alpha, beta)) return np.exp(self._logpdf(x, mu, alpha, beta))
def _logpdf(self, x, mu, alpha, beta): def _logpdf(self, x, mu, alpha, beta):
half_sum = (alpha + beta) / 2.0
log_ab_over_mu = np.log(alpha) + np.log(beta) - np.log(mu)
z = 2.0 * np.sqrt(alpha * beta * x / mu) z = 2.0 * np.sqrt(alpha * beta * x / mu)
return ( return (
np.log(2.0) np.log(2.0)
- gammaln(alpha) - gammaln(alpha)
- gammaln(beta) - gammaln(beta)
+ (alpha + beta) / 2.0 * np.log(alpha * beta / mu) + half_sum * log_ab_over_mu
+ ((alpha + beta) / 2.0 - 1.0) * np.log(x) + (half_sum - 1.0) * np.log(x)
+ np.log(kv(alpha - beta, z)) + np.log(kve(alpha - beta, z))
- z
) )
def _stats(self, mu, alpha, beta, moments="mv"):
mean = mu if "m" in moments or "v" in moments or "s" in moments or "k" in moments else None
variance = None
if "v" in moments or "s" in moments or "k" in moments:
second_raw = (mu ** 2) * sc.poch(alpha, 2) * sc.poch(beta, 2) / ((alpha * beta) ** 2)
variance = second_raw - mean ** 2
skewness = None
if "s" in moments or "k" in moments:
third_raw = (mu ** 3) * sc.poch(alpha, 3) * sc.poch(beta, 3) / ((alpha * beta) ** 3)
third_central = third_raw - 3.0 * mean * second_raw + 2.0 * mean ** 3
skewness = third_central / np.power(variance, 1.5)
kurtosis = None
if "k" in moments:
fourth_raw = (mu ** 4) * sc.poch(alpha, 4) * sc.poch(beta, 4) / ((alpha * beta) ** 4)
fourth_central = fourth_raw - 4.0 * mean * third_raw + 6.0 * mean ** 2 * second_raw - 3.0 * mean ** 4
kurtosis = fourth_central / (variance ** 2) - 3.0
return mean, variance, skewness, kurtosis
def _cdf(self, x, mu, alpha, beta):
# scipy broadcasts params to match x's shape before calling _cdf,
# so mu/alpha/beta may be arrays. Pass all four to np.vectorize so
# each argument arrives as a scalar inside _scalar.
# Substitution u = sqrt(x) regularises the integrand near u=0:
# f(u²)*2u ~ u^(alpha+beta-1), smooth for alpha,beta > 0.
def _scalar(xi, mui, ai, bi):
val, _ = quad(
lambda u: float(self._pdf(float(u * u), float(mui), float(ai), float(bi))) * 2.0 * u,
0.0, float(np.sqrt(xi)),
limit=200, epsabs=1.49e-10, epsrel=1.49e-8,
)
return val
return np.vectorize(_scalar)(x, mu, alpha, beta)
def fit(self, data, *args, **kwds):
if ("loc" in kwds and kwds["loc"] != 0.0) or ("floc" in kwds and kwds["floc"] != 0.0):
raise ValueError("k_distribution uses a fixed loc=0; use mu to control the mean/scale.")
if ("scale" in kwds and kwds["scale"] != 1.0) or ("fscale" in kwds and kwds["fscale"] != 1.0):
raise ValueError("k_distribution uses a fixed scale=1; use mu to control the mean/scale.")
kwds.pop("loc", None)
kwds.pop("scale", None)
return super().fit(data, *args, floc=0.0, fscale=1.0, **kwds)
k_dist = k_gen(a=0.0, name="k_distribution", shapes="mu, alpha, beta") k_dist = k_gen(a=0.0, name="k_distribution", shapes="mu, alpha, beta")
class lognakagami_gen(rv_continuous):
r"""A Log-Nakagami continuous random variable.
If X follows a Nakagami distribution with shape parameter m >= 0.5
and spread Omega > 0, then Y = ln(X) follows the Log-Nakagami distribution.
%(before_notes)s
Notes
-----
The probability density function for `lognakagami` is:
f(y; m, Omega) = 2*m^m / (Gamma(m)*Omega^m)
* exp(2*m*y) * exp(-(m/Omega)*exp(2*y))
for y in (-inf, +inf). Derived via the change of variable Y = ln X,
whose Jacobian e^y combines with x^{2m-1} to give the e^{2my} term.
The mean and variance are:
E[Y] = 0.5 * (psi(m) - ln(m) + ln(Omega))
Var[Y] = 0.25 * psi_1(m)
where psi and psi_1 are the digamma and trigamma functions. The spread
Omega shifts the mean by 0.5*ln(Omega) but leaves all higher moments
unchanged; entropy is also Omega-independent.
`lognakagami` takes ``m`` and ``Omega`` as shape parameters.
See Also
--------
nakagami : The Nakagami distribution (before the log transform).
"""
def _shape_info(self):
return [
_ShapeInfo("m", False, (0, np.inf), (False, False)),
_ShapeInfo("Omega", False, (0, np.inf), (False, False)),
]
def _argcheck(self, m, Omega):
return (m >= 0.5) & (Omega > 0)
# ---------- PDF / log-PDF ----------
def _pdf(self, y, m, Omega):
return np.exp(self._logpdf(y, m, Omega))
def _logpdf(self, y, m, Omega):
# ln f = ln2 + m*ln(m) - ln Γ(m) - m*ln(Ω) + 2m*y - (m/Ω)*exp(2y)
return (np.log(2.0)
+ sc.xlogy(m, m)
- sc.gammaln(m)
- m * np.log(Omega)
+ 2.0 * m * y
- (m / Omega) * np.exp(2.0 * y))
# ---------- CDF / SF / log-CDF / log-SF ----------
def _cdf(self, y, m, Omega):
# P(Y <= y) = P(X <= e^y) = gammainc(m, (m/Omega) * e^{2y})
return sc.gammainc(m, (m / Omega) * np.exp(2.0 * y))
def _sf(self, y, m, Omega):
return sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y))
def _logcdf(self, y, m, Omega):
return np.log(sc.gammainc(m, (m / Omega) * np.exp(2.0 * y)))
def _logsf(self, y, m, Omega):
return np.log(sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y)))
# ---------- PPF / ISF ----------
def _ppf(self, q, m, Omega):
# q = gammainc(m, (m/Omega)*e^{2y})
# => (m/Omega)*e^{2y} = gammaincinv(m, q)
# => y = 0.5 * ln(Omega * gammaincinv(m, q) / m)
return 0.5 * np.log(Omega * sc.gammaincinv(m, q) / m)
def _isf(self, q, m, Omega):
return 0.5 * np.log(Omega * sc.gammainccinv(m, q) / m)
# ---------- Moments & statistics ----------
def _stats(self, m, Omega):
# E[Y] = 0.5*(psi(m) - ln(m) + ln(Omega))
mu = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega))
# Var[Y] = 0.25 * psi_1(m) (Omega cancels — pure shift)
mu2 = 0.25 * sc.polygamma(1, m)
g1 = sc.polygamma(2, m) / sc.polygamma(1, m) ** 1.5
g2 = sc.polygamma(3, m) / sc.polygamma(1, m) ** 2
return mu, mu2, g1, g2
def _entropy(self, m, Omega):
# H = -E[ln f(Y)] = -ln2 + ln Γ(m) - m*psi(m) + m
# Omega cancels exactly (translation invariance of differential entropy)
return -np.log(2.0) + sc.gammaln(m) - m * sc.digamma(m) + m
# ---------- Random variate generation ----------
def _rvs(self, m, Omega, size=None, random_state=None):
# X ~ Nakagami(m, Omega) => X^2 ~ Gamma(m, Omega/m)
g = random_state.gamma(m, Omega / m, size=size)
return 0.5 * np.log(g)
lognakagami = lognakagami_gen(name='lognakagami', shapes="m, Omega")
class loggamma_gen(rv_continuous):
r"""A Log-Gamma continuous random variable.
If X follows a Gamma distribution with shape a
and unit scale (beta = 1), then Y = ln X follows
the Log-Gamma distribution.
%(before_notes)s
Notes
-----
The probability density function for `loggamma_dist` is:
f(y, a) = 1 / Gamma(a) * exp(a*y - exp(y))
for y in (-inf, +inf), a > 0.
This is the standardised form (scale beta = 1).
`loggamma_dist` takes ``a`` as a shape parameter.
The non-unit scale beta is recovered through scipy's
``loc`` parameter: loc = ln(beta), since
ln(beta * X) = ln(beta) + ln(X).
See Also
--------
gamma : The Gamma distribution (before the log transform).
"""
def _shape_info(self):
return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
def _argcheck(self, a):
return a > 0
# ---------- PDF / log-PDF ----------
def _pdf(self, y, a):
return np.exp(self._logpdf(y, a))
def _logpdf(self, y, a):
# ln f = a*y - e^y - ln Γ(a)
return a * y - np.exp(y) - sc.gammaln(a)
# ---------- CDF / SF / log-CDF / log-SF ----------
def _cdf(self, y, a):
# P(Y <= y) = P(X <= e^y) = gammainc(a, e^y)
return sc.gammainc(a, np.exp(y))
def _sf(self, y, a):
return sc.gammaincc(a, np.exp(y))
def _logcdf(self, y, a):
return np.log(sc.gammainc(a, np.exp(y)))
def _logsf(self, y, a):
return np.log(sc.gammaincc(a, np.exp(y)))
# ---------- PPF / ISF ----------
def _ppf(self, q, a):
# Invert CDF: q = gammainc(a, e^y)
# => e^y = gammaincinv(a, q)
# => y = ln(gammaincinv(a, q))
return np.log(sc.gammaincinv(a, q))
def _isf(self, q, a):
return np.log(sc.gammainccinv(a, q))
# ---------- Moments & statistics ----------
def _stats(self, a):
# Y = ln(X), X ~ Gamma(a, 1)
# E[Y] = psi(a) (digamma)
# Var[Y] = psi_1(a) (trigamma)
# skew = psi_2(a) / psi_1(a)^{3/2}
# excess kurtosis = psi_3(a) / psi_1(a)^2
mu = sc.digamma(a)
mu2 = sc.polygamma(1, a)
g1 = sc.polygamma(2, a) / mu2 ** 1.5
g2 = sc.polygamma(3, a) / mu2 ** 2
return mu, mu2, g1, g2
def _entropy(self, a):
# H(Y) = -E[ln f(Y)] = -E[a*Y - e^Y - ln Γ(a)]
# = ln Γ(a) - a*psi(a) + E[e^Y]
# E[e^Y] = E[X] = a (for Gamma(a, 1))
return sc.gammaln(a) - a * sc.digamma(a) + a
# ---------- Random variate generation ----------
def _rvs(self, a, size=None, random_state=None):
# Generate Gamma(a, 1) variates, then take log.
# For small a, use Gamma(a) ~ Gamma(a+1) * U^(1/a)
# so ln Gamma(a) ~ ln Gamma(a+1) + ln(U)/a
# This avoids precision loss when a << 1.
return (np.log(random_state.gamma(a + 1, size=size))
+ np.log(random_state.uniform(size=size)) / a)
loggamma_dist = loggamma_gen(name='loggamma_dist')

View File

@@ -1,4 +1,5 @@
import numpy as np import numpy as np
from scipy.stats import gaussian_kde
def aic_statistic(dist, data, axis): def aic_statistic(dist, data, axis):
@@ -32,4 +33,30 @@ def bic_statistic(dist, data, axis):
log_likelihood = np.sum(dist.logpdf(data), axis=axis) log_likelihood = np.sum(dist.logpdf(data), axis=axis)
bic = np.log(n) * k - 2 * log_likelihood bic = np.log(n) * k - 2 * log_likelihood
return bic return bic
def kl_statistic(dist, data, axis):
"""
KL divergence-based goodness-of-fit statistic.
KL(P || Q) = ∑ P(x) log(P(x) / Q(x))
Lower KL divergence indicates better fit, but since goodness_of_fit()
treats larger statistic values as worse fit, KL works directly.
"""
# Estimate the PDF of the data using KDE
kde = gaussian_kde(data)
data_pdf = kde(data)
# Get the PDF of the distribution at the data points
dist_pdf = dist.pdf(data)
# normalize the PDFs to ensure they sum to 1
data_pdf /= np.sum(data_pdf)
dist_pdf /= np.sum(dist_pdf)
# Avoid division by zero and log of zero by adding a small constant
epsilon = 1e-10
kl_divergence = np.sum(data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon)), axis=axis)
return kl_divergence