feat(distributions): add lognakagami, loggamma, and kl_statistic
Implement two new scipy-compatible distributions : Log-Nakagami (lognakagami) and Log-Gamma (loggamma_dist), with complete logpdf/cdf/ppf/stats/entropy/rvs methods derived from the change-of-variable Y = ln(X). Add kl_statistic, a KDE-based KL-divergence goodness-of-fit callable compatible with the Fitter class. Extend k_gen with _stats (improving speed), _cdf, and a fit guard, and switch kv → kve to improve numerical stability at large arguments. Add unit tests for all three additions covering normalization, monotonicity, ppf inversion, moment formulas, and Fitter integration.
This commit is contained in:
@@ -1,12 +1,13 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.special as sc
|
||||
import matplotlib.pyplot as plt
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from tools.distributions import k_dist
|
||||
from tools.distributions import k_dist, lognakagami, loggamma_dist
|
||||
|
||||
|
||||
X = np.linspace(0.01, 10.0, 500)
|
||||
@@ -167,6 +168,178 @@ class TestKDistPlots:
|
||||
|
||||
# ── Entry-point: run plots interactively ─────────────────────────────────────
|
||||
|
||||
Y = np.linspace(-5.0, 5.0, 500)
|
||||
|
||||
|
||||
# ── lognakagami unit tests ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestLogNakagami:
|
||||
def test_logpdf_is_finite_on_real_line(self):
|
||||
"""logpdf must be finite for all real y — tests positivity without float64 underflow."""
|
||||
log_vals = lognakagami.logpdf(Y, m=2.0, Omega=1.0)
|
||||
assert np.all(np.isfinite(log_vals))
|
||||
|
||||
def test_pdf_integrates_to_one(self):
|
||||
"""Numerical integral of PDF over the real line should be ≈ 1."""
|
||||
y_fine = np.linspace(-30, 10, 200_000)
|
||||
integral = np.trapezoid(lognakagami.pdf(y_fine, m=2.0, Omega=1.0), y_fine)
|
||||
assert pytest.approx(integral, abs=1e-3) == 1.0
|
||||
|
||||
def test_pdf_integrates_to_one_nonunit_omega(self):
|
||||
"""Normalisation must hold for Omega != 1."""
|
||||
y_fine = np.linspace(-30, 15, 200_000)
|
||||
integral = np.trapezoid(lognakagami.pdf(y_fine, m=2.0, Omega=4.0), y_fine)
|
||||
assert pytest.approx(integral, abs=1e-3) == 1.0
|
||||
|
||||
def test_logpdf_equals_log_pdf(self):
|
||||
"""logpdf must equal log(pdf) at points where pdf does not underflow."""
|
||||
y_bulk = np.linspace(-4.0, 2.0, 50)
|
||||
log_via_pdf = np.log(lognakagami.pdf(y_bulk, m=2.0, Omega=1.0))
|
||||
log_direct = lognakagami.logpdf(y_bulk, m=2.0, Omega=1.0)
|
||||
np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
|
||||
|
||||
def test_cdf_is_monotone_increasing(self):
|
||||
"""CDF must be strictly non-decreasing."""
|
||||
cdf_vals = lognakagami.cdf(Y, m=2.0, Omega=1.0)
|
||||
assert np.all(np.diff(cdf_vals) >= 0)
|
||||
|
||||
def test_ppf_inverts_cdf(self):
|
||||
"""ppf(cdf(y)) must recover y."""
|
||||
y_test = np.array([-2.0, 0.0, 0.5])
|
||||
cdf_vals = lognakagami.cdf(y_test, m=2.0, Omega=1.0)
|
||||
np.testing.assert_allclose(lognakagami.ppf(cdf_vals, m=2.0, Omega=1.0), y_test, atol=1e-8)
|
||||
|
||||
def test_argcheck_rejects_m_below_half(self):
|
||||
"""m < 0.5 must not produce a valid (positive-finite) PDF value."""
|
||||
val = lognakagami.pdf(0.0, m=0.3, Omega=1.0)
|
||||
assert not (np.isfinite(val) and val > 0)
|
||||
|
||||
def test_argcheck_rejects_non_positive_omega(self):
|
||||
"""Omega <= 0 must not produce a valid (positive-finite) PDF value."""
|
||||
val = lognakagami.pdf(0.0, m=2.0, Omega=-1.0)
|
||||
assert not (np.isfinite(val) and val > 0)
|
||||
|
||||
def test_stats_mean(self):
|
||||
"""Analytical mean must equal 0.5 * (digamma(m) - log(m) + log(Omega))."""
|
||||
m, Omega = 3.0, 2.0
|
||||
expected_mean = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega))
|
||||
dist_mean = float(lognakagami.stats(m=m, Omega=Omega, moments="m"))
|
||||
assert pytest.approx(dist_mean, rel=1e-10) == expected_mean
|
||||
|
||||
def test_stats_mean_omega_shifts_by_half_log_omega(self):
|
||||
"""Changing Omega shifts the mean by 0.5*log(Omega) and leaves variance unchanged."""
|
||||
m = 2.0
|
||||
mean1 = float(lognakagami.stats(m=m, Omega=1.0, moments="m"))
|
||||
mean4 = float(lognakagami.stats(m=m, Omega=4.0, moments="m"))
|
||||
assert pytest.approx(mean4 - mean1, rel=1e-10) == 0.5 * np.log(4.0)
|
||||
|
||||
def test_stats_variance_independent_of_omega(self):
|
||||
"""Variance must equal 0.25 * polygamma(1, m) and not depend on Omega."""
|
||||
m = 3.0
|
||||
expected_var = 0.25 * sc.polygamma(1, m)
|
||||
for Omega in [0.5, 1.0, 4.0]:
|
||||
_, dist_var, *_ = lognakagami.stats(m=m, Omega=Omega, moments="mv")
|
||||
assert pytest.approx(float(dist_var), rel=1e-10) == expected_var
|
||||
|
||||
def test_rvs_samples_are_finite(self):
|
||||
"""Random samples must be finite real numbers."""
|
||||
rng = np.random.default_rng(42)
|
||||
samples = lognakagami.rvs(m=2.0, Omega=1.0, size=200, random_state=rng)
|
||||
assert samples.shape == (200,)
|
||||
assert np.all(np.isfinite(samples))
|
||||
|
||||
def test_rvs_sample_mean_near_expected(self):
|
||||
"""Sample mean of many RVS should be close to the distribution mean."""
|
||||
m, Omega = 2.0, 3.0
|
||||
rng = np.random.default_rng(0)
|
||||
samples = lognakagami.rvs(m=m, Omega=Omega, size=50_000, random_state=rng)
|
||||
expected_mean = float(lognakagami.stats(m=m, Omega=Omega, moments="m"))
|
||||
assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean
|
||||
|
||||
|
||||
# ── loggamma_dist unit tests ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestLogGamma:
|
||||
def test_pdf_is_positive_on_real_line(self):
|
||||
"""PDF must be strictly positive for all real y and a > 0."""
|
||||
vals = loggamma_dist.pdf(Y, a=2.0)
|
||||
assert np.all(vals > 0)
|
||||
|
||||
def test_pdf_integrates_to_one(self):
|
||||
"""Numerical integral of PDF over the real line should be ≈ 1."""
|
||||
y_fine = np.linspace(-30, 10, 200_000)
|
||||
integral = np.trapezoid(loggamma_dist.pdf(y_fine, a=2.0), y_fine)
|
||||
assert pytest.approx(integral, abs=1e-3) == 1.0
|
||||
|
||||
def test_logpdf_equals_log_pdf(self):
|
||||
"""logpdf must equal log(pdf) for numerical consistency."""
|
||||
log_via_pdf = np.log(loggamma_dist.pdf(Y, a=2.0))
|
||||
log_direct = loggamma_dist.logpdf(Y, a=2.0)
|
||||
np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
|
||||
|
||||
def test_cdf_is_monotone_increasing(self):
|
||||
"""CDF must be strictly non-decreasing."""
|
||||
cdf_vals = loggamma_dist.cdf(Y, a=2.0)
|
||||
assert np.all(np.diff(cdf_vals) >= 0)
|
||||
|
||||
def test_cdf_and_sf_sum_to_one(self):
|
||||
"""CDF + SF must equal 1 at every point."""
|
||||
cdf_vals = loggamma_dist.cdf(Y, a=2.0)
|
||||
sf_vals = loggamma_dist.sf(Y, a=2.0)
|
||||
np.testing.assert_allclose(cdf_vals + sf_vals, 1.0, atol=1e-12)
|
||||
|
||||
def test_ppf_inverts_cdf(self):
|
||||
"""ppf(cdf(y)) must recover y."""
|
||||
y_test = np.array([-2.0, 0.0, 1.0])
|
||||
cdf_vals = loggamma_dist.cdf(y_test, a=2.0)
|
||||
np.testing.assert_allclose(loggamma_dist.ppf(cdf_vals, a=2.0), y_test, atol=1e-8)
|
||||
|
||||
def test_argcheck_rejects_non_positive_a(self):
|
||||
"""a <= 0 must not produce a valid (positive-finite) PDF value."""
|
||||
val = loggamma_dist.pdf(0.0, a=-1.0)
|
||||
assert not (np.isfinite(val) and val > 0)
|
||||
|
||||
def test_stats_mean_equals_digamma(self):
|
||||
"""Analytical mean must equal digamma(a)."""
|
||||
a = 3.0
|
||||
expected_mean = sc.digamma(a)
|
||||
dist_mean = float(loggamma_dist.stats(a=a, moments="m"))
|
||||
assert pytest.approx(dist_mean, rel=1e-10) == expected_mean
|
||||
|
||||
def test_stats_variance_equals_trigamma(self):
|
||||
"""Analytical variance must equal polygamma(1, a)."""
|
||||
a = 3.0
|
||||
expected_var = sc.polygamma(1, a)
|
||||
_, dist_var, *_ = loggamma_dist.stats(a=a, moments="mv")
|
||||
assert pytest.approx(float(dist_var), rel=1e-10) == expected_var
|
||||
|
||||
def test_log_transform_relation_to_gamma(self):
|
||||
"""loggamma_dist.pdf(y) must equal gamma.pdf(exp(y)) * exp(y) (change-of-variable)."""
|
||||
from scipy.stats import gamma as scipy_gamma
|
||||
|
||||
y_test = np.linspace(-3.0, 3.0, 20)
|
||||
direct = loggamma_dist.pdf(y_test, a=2.0)
|
||||
via_gamma = scipy_gamma.pdf(np.exp(y_test), a=2.0) * np.exp(y_test)
|
||||
np.testing.assert_allclose(direct, via_gamma, rtol=1e-6)
|
||||
|
||||
def test_rvs_samples_are_finite(self):
|
||||
"""Random samples must be finite real numbers."""
|
||||
rng = np.random.default_rng(42)
|
||||
samples = loggamma_dist.rvs(a=2.0, size=200, random_state=rng)
|
||||
assert samples.shape == (200,)
|
||||
assert np.all(np.isfinite(samples))
|
||||
|
||||
def test_rvs_sample_mean_near_expected(self):
|
||||
"""Sample mean of many RVS should be close to the distribution mean."""
|
||||
a = 2.0
|
||||
rng = np.random.default_rng(0)
|
||||
samples = loggamma_dist.rvs(a=a, size=50_000, random_state=rng)
|
||||
expected_mean = float(loggamma_dist.stats(a=a, moments="m"))
|
||||
assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
plot_k_dist_varying_alpha()
|
||||
plot_k_dist_varying_mu()
|
||||
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from tools.statistics import aic_statistic, bic_statistic
|
||||
from tools.statistics import aic_statistic, bic_statistic, kl_statistic
|
||||
from fitting.fitter import Fitter
|
||||
|
||||
|
||||
@@ -208,3 +208,129 @@ class TestBicStatisticInFitter:
|
||||
f.validate(n_mc_samples=99)
|
||||
assert f["gamma"].test_result is not None
|
||||
assert f["expon"].test_result is not None
|
||||
|
||||
|
||||
# ── kl_statistic unit tests ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestKlStatistic:
|
||||
def _fitted_dist(self, dist, data, **kwargs):
|
||||
"""Return a frozen distribution fitted to data."""
|
||||
params = dist.fit(data, **kwargs)
|
||||
return dist(*params)
|
||||
|
||||
def test_returns_float(self):
|
||||
"""kl_statistic must return a numeric scalar."""
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
result = kl_statistic(frozen, GAMMA_DATA, axis=None)
|
||||
assert isinstance(float(result), float)
|
||||
|
||||
def test_returns_real_value(self):
|
||||
"""kl_statistic returns a real number (KDE finite-sum approximation can be negative)."""
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
result = kl_statistic(frozen, GAMMA_DATA, axis=None)
|
||||
assert np.isreal(result)
|
||||
|
||||
def test_result_is_finite(self):
|
||||
"""kl_statistic must return a finite value for valid input."""
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
assert np.isfinite(kl_statistic(frozen, GAMMA_DATA, axis=None))
|
||||
|
||||
def test_works_with_axis_zero(self):
|
||||
"""kl_statistic must return a finite value when axis=0."""
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
result = kl_statistic(frozen, GAMMA_DATA, axis=0)
|
||||
assert np.isfinite(result)
|
||||
|
||||
def test_axis_zero_same_as_axis_none_for_1d(self):
|
||||
"""For 1-D data, axis=0 and axis=None must return the same value."""
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
result_none = kl_statistic(frozen, GAMMA_DATA, axis=None)
|
||||
result_axis0 = kl_statistic(frozen, GAMMA_DATA, axis=0)
|
||||
assert pytest.approx(result_none) == result_axis0
|
||||
|
||||
def test_better_fit_has_lower_kl(self):
|
||||
"""Gamma fitted to gamma data should have lower KL than normal fitted to gamma data."""
|
||||
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
norm_frozen = self._fitted_dist(norm, GAMMA_DATA)
|
||||
kl_gamma = kl_statistic(gamma_frozen, GAMMA_DATA, axis=None)
|
||||
kl_norm = kl_statistic(norm_frozen, GAMMA_DATA, axis=None)
|
||||
assert kl_gamma < kl_norm
|
||||
|
||||
def test_matches_manual_formula(self):
|
||||
"""kl_statistic result must match the KDE-based KL formula computed manually."""
|
||||
from scipy.stats import gaussian_kde
|
||||
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
kde = gaussian_kde(GAMMA_DATA)
|
||||
data_pdf = kde(GAMMA_DATA)
|
||||
dist_pdf = frozen.pdf(GAMMA_DATA)
|
||||
epsilon = 1e-10
|
||||
expected = np.sum(
|
||||
data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon))
|
||||
)
|
||||
assert pytest.approx(kl_statistic(frozen, GAMMA_DATA, axis=None)) == expected
|
||||
|
||||
def test_no_nan_when_dist_pdf_near_zero(self):
|
||||
"""epsilon guard must prevent NaN when dist PDF is effectively zero over data."""
|
||||
# expon with large loc has near-zero PDF over positive-skewed gamma data
|
||||
far_dist = expon(loc=100.0, scale=1.0)
|
||||
result = kl_statistic(far_dist, GAMMA_DATA, axis=None)
|
||||
assert not np.isnan(result)
|
||||
|
||||
def test_result_is_consistent_across_calls(self):
|
||||
"""Two calls with identical inputs must return the same value."""
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
r1 = kl_statistic(frozen, GAMMA_DATA, axis=None)
|
||||
r2 = kl_statistic(frozen, GAMMA_DATA, axis=None)
|
||||
assert r1 == r2
|
||||
|
||||
|
||||
# ── Integration: kl_statistic as callable in Fitter ──────────────────────────
|
||||
|
||||
|
||||
class TestKlStatisticInFitter:
|
||||
def test_fitter_accepts_kl_callable(self):
|
||||
f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
|
||||
f.fit(GAMMA_DATA)
|
||||
f.validate(n_mc_samples=99)
|
||||
assert f["gamma"].test_result is not None
|
||||
|
||||
def test_fitter_kl_statistic_is_finite(self):
|
||||
f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
|
||||
f.fit(GAMMA_DATA)
|
||||
f.validate(n_mc_samples=99)
|
||||
assert np.isfinite(f["gamma"].gof_statistic)
|
||||
|
||||
def test_fitter_kl_pvalue_in_range(self):
|
||||
f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
|
||||
f.fit(GAMMA_DATA)
|
||||
f.validate(n_mc_samples=99)
|
||||
pval = f["gamma"].pvalue
|
||||
assert 0.0 <= pval <= 1.0
|
||||
|
||||
def test_fitter_kl_vs_ad_different_statistic_values(self):
|
||||
"""KL and AD statistics should differ numerically."""
|
||||
f_kl = Fitter(
|
||||
[gamma], statistic_method=kl_statistic, gamma_params={"floc": 0}
|
||||
)
|
||||
f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0})
|
||||
f_kl.fit(GAMMA_DATA)
|
||||
f_ad.fit(GAMMA_DATA)
|
||||
f_kl.validate(n_mc_samples=99)
|
||||
f_ad.validate(n_mc_samples=99)
|
||||
assert f_kl["gamma"].gof_statistic != pytest.approx(
|
||||
f_ad["gamma"].gof_statistic
|
||||
)
|
||||
|
||||
def test_fitter_kl_multiple_distributions(self):
|
||||
f = Fitter(
|
||||
[gamma, expon],
|
||||
statistic_method=kl_statistic,
|
||||
gamma_params={"floc": 0},
|
||||
expon_params={"floc": 0},
|
||||
)
|
||||
f.fit(GAMMA_DATA)
|
||||
f.validate(n_mc_samples=99)
|
||||
assert f["gamma"].test_result is not None
|
||||
assert f["expon"].test_result is not None
|
||||
|
||||
Reference in New Issue
Block a user