Implement two new scipy-compatible distributions : Log-Nakagami (lognakagami) and Log-Gamma (loggamma_dist), with complete logpdf/cdf/ppf/stats/entropy/rvs methods derived from the change-of-variable Y = ln(X). Add kl_statistic, a KDE-based KL-divergence goodness-of-fit callable compatible with the Fitter class. Extend k_gen with _stats (improving speed), _cdf, and a fit guard, and switch kv → kve to improve numerical stability at large arguments. Add unit tests for all three additions covering normalization, monotonicity, ppf inversion, moment formulas, and Fitter integration.
348 lines
14 KiB
Python
348 lines
14 KiB
Python
import numpy as np
|
|
import pytest
|
|
import scipy.special as sc
|
|
import matplotlib.pyplot as plt
|
|
import sys
|
|
import os
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
from tools.distributions import k_dist, lognakagami, loggamma_dist
|
|
|
|
|
|
X = np.linspace(0.01, 10.0, 500)
|
|
|
|
|
|
# ── k_dist unit tests ────────────────────────────────────────────────────────
|
|
|
|
|
|
class TestKDistPdf:
|
|
def test_pdf_is_positive_for_valid_input(self):
|
|
"""PDF must be strictly positive for x > 0 and valid parameters."""
|
|
vals = k_dist.pdf(X, mu=1.0, alpha=2.0, beta=2.0)
|
|
assert np.all(vals > 0)
|
|
|
|
def test_pdf_integrates_to_one(self):
|
|
"""Numerical integral of PDF over a wide domain should be ≈ 1."""
|
|
x_fine = np.linspace(1e-4, 200.0, 100_000)
|
|
integral = np.trapezoid(k_dist.pdf(x_fine, mu=1.0, alpha=2.0, beta=2.0), x_fine)
|
|
assert pytest.approx(integral, abs=1e-3) == 1.0
|
|
|
|
def test_mean_equals_mu(self):
|
|
"""Numerical mean of distribution should match the mu parameter."""
|
|
x_grid = np.linspace(1e-4, 500.0, 200_000)
|
|
for mu in [0.5, 1.0, 3.0]:
|
|
mean_num = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=mu, alpha=2.0, beta=3.0), x_grid)
|
|
assert pytest.approx(mean_num, rel=1e-2) == mu
|
|
|
|
def test_logpdf_equals_log_pdf(self):
|
|
"""logpdf must equal log(pdf) for numerical consistency."""
|
|
x_test = np.linspace(0.1, 5.0, 20)
|
|
log_via_pdf = np.log(k_dist.pdf(x_test, mu=1.0, alpha=2.0, beta=3.0))
|
|
log_direct = k_dist.logpdf(x_test, mu=1.0, alpha=2.0, beta=3.0)
|
|
np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
|
|
|
|
def test_argcheck_rejects_non_positive_mu(self):
|
|
"""mu <= 0 must not produce a valid (positive-finite) PDF value."""
|
|
val = k_dist.pdf(1.0, mu=-1.0, alpha=2.0, beta=2.0)
|
|
assert not (np.isfinite(val) and val > 0)
|
|
|
|
def test_argcheck_rejects_non_positive_alpha(self):
|
|
"""alpha <= 0 must not produce a valid (positive-finite) PDF value."""
|
|
val = k_dist.pdf(1.0, mu=1.0, alpha=-1.0, beta=2.0)
|
|
assert not (np.isfinite(val) and val > 0)
|
|
|
|
def test_argcheck_rejects_non_positive_beta(self):
|
|
"""beta <= 0 must not produce a valid (positive-finite) PDF value."""
|
|
val = k_dist.pdf(1.0, mu=1.0, alpha=2.0, beta=-1.0)
|
|
assert not (np.isfinite(val) and val > 0)
|
|
|
|
def test_larger_alpha_shifts_mass_right(self):
|
|
"""Increasing alpha (with mu and beta fixed) shifts probability mass to the right."""
|
|
x_grid = np.linspace(1e-4, 50.0, 20_000)
|
|
mean_low = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=2.0, alpha=0.5, beta=2.0), x_grid)
|
|
mean_high = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=2.0, alpha=4.0, beta=2.0), x_grid)
|
|
# Both should be close to mu=2.0; variance changes but mean is fixed
|
|
assert pytest.approx(mean_low, rel=5e-2) == 2.0
|
|
assert pytest.approx(mean_high, rel=5e-2) == 2.0
|
|
|
|
def test_symmetry_in_alpha_beta(self):
|
|
"""PDF is symmetric in alpha and beta: swapping them gives the same PDF."""
|
|
x_test = np.linspace(0.1, 5.0, 20)
|
|
pdf_ab = k_dist.pdf(x_test, mu=1.0, alpha=2.0, beta=3.0)
|
|
pdf_ba = k_dist.pdf(x_test, mu=1.0, alpha=3.0, beta=2.0)
|
|
np.testing.assert_allclose(pdf_ab, pdf_ba, rtol=1e-6)
|
|
|
|
|
|
# ── Parametric curve plots ───────────────────────────────────────────────────
|
|
|
|
|
|
def plot_k_dist_varying_alpha(save_path=None):
|
|
"""Plot generalized K-distribution PDF curves for several values of alpha."""
|
|
alpha_values = [0.5, 1.0, 2.0, 4.0, 8.0]
|
|
x = np.linspace(1e-4, 15.0, 1000)
|
|
|
|
fig, ax = plt.subplots(figsize=(8, 5))
|
|
for alpha in alpha_values:
|
|
ax.plot(x, k_dist.pdf(x, mu=2.0, alpha=alpha, beta=2.0), label=f"alpha={alpha}")
|
|
|
|
ax.set_xlabel("x")
|
|
ax.set_ylabel("PDF")
|
|
ax.set_title("Generalized K distribution — varying alpha (mu=2.0, beta=2.0 fixed)")
|
|
ax.legend()
|
|
ax.set_ylim(bottom=0)
|
|
fig.tight_layout()
|
|
|
|
if save_path:
|
|
fig.savefig(save_path, dpi=150)
|
|
return fig
|
|
|
|
|
|
def plot_k_dist_varying_mu(save_path=None):
|
|
"""Plot generalized K-distribution PDF curves for several values of mu."""
|
|
mu_values = [0.5, 1.0, 2.0, 4.0, 8.0]
|
|
x = np.linspace(1e-4, 30.0, 1000)
|
|
|
|
fig, ax = plt.subplots(figsize=(8, 5))
|
|
for mu in mu_values:
|
|
ax.plot(x, k_dist.pdf(x, mu=mu, alpha=2.0, beta=2.0), label=f"mu={mu}")
|
|
|
|
ax.set_xlabel("x")
|
|
ax.set_ylabel("PDF")
|
|
ax.set_title("Generalized K distribution — varying mu (alpha=2.0, beta=2.0 fixed)")
|
|
ax.legend()
|
|
ax.set_ylim(bottom=0)
|
|
fig.tight_layout()
|
|
|
|
if save_path:
|
|
fig.savefig(save_path, dpi=150)
|
|
return fig
|
|
|
|
|
|
def plot_k_dist_varying_beta(save_path=None):
|
|
"""Plot generalized K-distribution PDF curves for several values of beta."""
|
|
beta_values = [0.5, 1.0, 2.0, 4.0, 8.0]
|
|
x = np.linspace(1e-4, 15.0, 1000)
|
|
|
|
fig, ax = plt.subplots(figsize=(8, 5))
|
|
for beta in beta_values:
|
|
ax.plot(x, k_dist.pdf(x, mu=2.0, alpha=2.0, beta=beta), label=f"beta={beta}")
|
|
|
|
ax.set_xlabel("x")
|
|
ax.set_ylabel("PDF")
|
|
ax.set_title("Generalized K distribution — varying beta (mu=2.0, alpha=2.0 fixed)")
|
|
ax.legend()
|
|
ax.set_ylim(bottom=0)
|
|
fig.tight_layout()
|
|
|
|
if save_path:
|
|
fig.savefig(save_path, dpi=150)
|
|
return fig
|
|
|
|
|
|
# ── Test: plots are generated without errors ─────────────────────────────────
|
|
|
|
|
|
class TestKDistPlots:
|
|
def test_plot_varying_alpha_runs_without_error(self, tmp_path):
|
|
"""Curve plot varying alpha must complete and save a file."""
|
|
out = tmp_path / "k_dist_alpha.png"
|
|
fig = plot_k_dist_varying_alpha(save_path=str(out))
|
|
assert out.exists()
|
|
plt.close(fig)
|
|
|
|
def test_plot_varying_mu_runs_without_error(self, tmp_path):
|
|
"""Curve plot varying mu must complete and save a file."""
|
|
out = tmp_path / "k_dist_mu.png"
|
|
fig = plot_k_dist_varying_mu(save_path=str(out))
|
|
assert out.exists()
|
|
plt.close(fig)
|
|
|
|
def test_plot_varying_beta_runs_without_error(self, tmp_path):
|
|
"""Curve plot varying beta must complete and save a file."""
|
|
out = tmp_path / "k_dist_beta.png"
|
|
fig = plot_k_dist_varying_beta(save_path=str(out))
|
|
assert out.exists()
|
|
plt.close(fig)
|
|
|
|
|
|
# ── Entry-point: run plots interactively ─────────────────────────────────────
|
|
|
|
Y = np.linspace(-5.0, 5.0, 500)
|
|
|
|
|
|
# ── lognakagami unit tests ────────────────────────────────────────────────────
|
|
|
|
|
|
class TestLogNakagami:
|
|
def test_logpdf_is_finite_on_real_line(self):
|
|
"""logpdf must be finite for all real y — tests positivity without float64 underflow."""
|
|
log_vals = lognakagami.logpdf(Y, m=2.0, Omega=1.0)
|
|
assert np.all(np.isfinite(log_vals))
|
|
|
|
def test_pdf_integrates_to_one(self):
|
|
"""Numerical integral of PDF over the real line should be ≈ 1."""
|
|
y_fine = np.linspace(-30, 10, 200_000)
|
|
integral = np.trapezoid(lognakagami.pdf(y_fine, m=2.0, Omega=1.0), y_fine)
|
|
assert pytest.approx(integral, abs=1e-3) == 1.0
|
|
|
|
def test_pdf_integrates_to_one_nonunit_omega(self):
|
|
"""Normalisation must hold for Omega != 1."""
|
|
y_fine = np.linspace(-30, 15, 200_000)
|
|
integral = np.trapezoid(lognakagami.pdf(y_fine, m=2.0, Omega=4.0), y_fine)
|
|
assert pytest.approx(integral, abs=1e-3) == 1.0
|
|
|
|
def test_logpdf_equals_log_pdf(self):
|
|
"""logpdf must equal log(pdf) at points where pdf does not underflow."""
|
|
y_bulk = np.linspace(-4.0, 2.0, 50)
|
|
log_via_pdf = np.log(lognakagami.pdf(y_bulk, m=2.0, Omega=1.0))
|
|
log_direct = lognakagami.logpdf(y_bulk, m=2.0, Omega=1.0)
|
|
np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
|
|
|
|
def test_cdf_is_monotone_increasing(self):
|
|
"""CDF must be strictly non-decreasing."""
|
|
cdf_vals = lognakagami.cdf(Y, m=2.0, Omega=1.0)
|
|
assert np.all(np.diff(cdf_vals) >= 0)
|
|
|
|
def test_ppf_inverts_cdf(self):
|
|
"""ppf(cdf(y)) must recover y."""
|
|
y_test = np.array([-2.0, 0.0, 0.5])
|
|
cdf_vals = lognakagami.cdf(y_test, m=2.0, Omega=1.0)
|
|
np.testing.assert_allclose(lognakagami.ppf(cdf_vals, m=2.0, Omega=1.0), y_test, atol=1e-8)
|
|
|
|
def test_argcheck_rejects_m_below_half(self):
|
|
"""m < 0.5 must not produce a valid (positive-finite) PDF value."""
|
|
val = lognakagami.pdf(0.0, m=0.3, Omega=1.0)
|
|
assert not (np.isfinite(val) and val > 0)
|
|
|
|
def test_argcheck_rejects_non_positive_omega(self):
|
|
"""Omega <= 0 must not produce a valid (positive-finite) PDF value."""
|
|
val = lognakagami.pdf(0.0, m=2.0, Omega=-1.0)
|
|
assert not (np.isfinite(val) and val > 0)
|
|
|
|
def test_stats_mean(self):
|
|
"""Analytical mean must equal 0.5 * (digamma(m) - log(m) + log(Omega))."""
|
|
m, Omega = 3.0, 2.0
|
|
expected_mean = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega))
|
|
dist_mean = float(lognakagami.stats(m=m, Omega=Omega, moments="m"))
|
|
assert pytest.approx(dist_mean, rel=1e-10) == expected_mean
|
|
|
|
def test_stats_mean_omega_shifts_by_half_log_omega(self):
|
|
"""Changing Omega shifts the mean by 0.5*log(Omega) and leaves variance unchanged."""
|
|
m = 2.0
|
|
mean1 = float(lognakagami.stats(m=m, Omega=1.0, moments="m"))
|
|
mean4 = float(lognakagami.stats(m=m, Omega=4.0, moments="m"))
|
|
assert pytest.approx(mean4 - mean1, rel=1e-10) == 0.5 * np.log(4.0)
|
|
|
|
def test_stats_variance_independent_of_omega(self):
|
|
"""Variance must equal 0.25 * polygamma(1, m) and not depend on Omega."""
|
|
m = 3.0
|
|
expected_var = 0.25 * sc.polygamma(1, m)
|
|
for Omega in [0.5, 1.0, 4.0]:
|
|
_, dist_var, *_ = lognakagami.stats(m=m, Omega=Omega, moments="mv")
|
|
assert pytest.approx(float(dist_var), rel=1e-10) == expected_var
|
|
|
|
def test_rvs_samples_are_finite(self):
|
|
"""Random samples must be finite real numbers."""
|
|
rng = np.random.default_rng(42)
|
|
samples = lognakagami.rvs(m=2.0, Omega=1.0, size=200, random_state=rng)
|
|
assert samples.shape == (200,)
|
|
assert np.all(np.isfinite(samples))
|
|
|
|
def test_rvs_sample_mean_near_expected(self):
|
|
"""Sample mean of many RVS should be close to the distribution mean."""
|
|
m, Omega = 2.0, 3.0
|
|
rng = np.random.default_rng(0)
|
|
samples = lognakagami.rvs(m=m, Omega=Omega, size=50_000, random_state=rng)
|
|
expected_mean = float(lognakagami.stats(m=m, Omega=Omega, moments="m"))
|
|
assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean
|
|
|
|
|
|
# ── loggamma_dist unit tests ──────────────────────────────────────────────────
|
|
|
|
|
|
class TestLogGamma:
|
|
def test_pdf_is_positive_on_real_line(self):
|
|
"""PDF must be strictly positive for all real y and a > 0."""
|
|
vals = loggamma_dist.pdf(Y, a=2.0)
|
|
assert np.all(vals > 0)
|
|
|
|
def test_pdf_integrates_to_one(self):
|
|
"""Numerical integral of PDF over the real line should be ≈ 1."""
|
|
y_fine = np.linspace(-30, 10, 200_000)
|
|
integral = np.trapezoid(loggamma_dist.pdf(y_fine, a=2.0), y_fine)
|
|
assert pytest.approx(integral, abs=1e-3) == 1.0
|
|
|
|
def test_logpdf_equals_log_pdf(self):
|
|
"""logpdf must equal log(pdf) for numerical consistency."""
|
|
log_via_pdf = np.log(loggamma_dist.pdf(Y, a=2.0))
|
|
log_direct = loggamma_dist.logpdf(Y, a=2.0)
|
|
np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
|
|
|
|
def test_cdf_is_monotone_increasing(self):
|
|
"""CDF must be strictly non-decreasing."""
|
|
cdf_vals = loggamma_dist.cdf(Y, a=2.0)
|
|
assert np.all(np.diff(cdf_vals) >= 0)
|
|
|
|
def test_cdf_and_sf_sum_to_one(self):
|
|
"""CDF + SF must equal 1 at every point."""
|
|
cdf_vals = loggamma_dist.cdf(Y, a=2.0)
|
|
sf_vals = loggamma_dist.sf(Y, a=2.0)
|
|
np.testing.assert_allclose(cdf_vals + sf_vals, 1.0, atol=1e-12)
|
|
|
|
def test_ppf_inverts_cdf(self):
|
|
"""ppf(cdf(y)) must recover y."""
|
|
y_test = np.array([-2.0, 0.0, 1.0])
|
|
cdf_vals = loggamma_dist.cdf(y_test, a=2.0)
|
|
np.testing.assert_allclose(loggamma_dist.ppf(cdf_vals, a=2.0), y_test, atol=1e-8)
|
|
|
|
def test_argcheck_rejects_non_positive_a(self):
|
|
"""a <= 0 must not produce a valid (positive-finite) PDF value."""
|
|
val = loggamma_dist.pdf(0.0, a=-1.0)
|
|
assert not (np.isfinite(val) and val > 0)
|
|
|
|
def test_stats_mean_equals_digamma(self):
|
|
"""Analytical mean must equal digamma(a)."""
|
|
a = 3.0
|
|
expected_mean = sc.digamma(a)
|
|
dist_mean = float(loggamma_dist.stats(a=a, moments="m"))
|
|
assert pytest.approx(dist_mean, rel=1e-10) == expected_mean
|
|
|
|
def test_stats_variance_equals_trigamma(self):
|
|
"""Analytical variance must equal polygamma(1, a)."""
|
|
a = 3.0
|
|
expected_var = sc.polygamma(1, a)
|
|
_, dist_var, *_ = loggamma_dist.stats(a=a, moments="mv")
|
|
assert pytest.approx(float(dist_var), rel=1e-10) == expected_var
|
|
|
|
def test_log_transform_relation_to_gamma(self):
|
|
"""loggamma_dist.pdf(y) must equal gamma.pdf(exp(y)) * exp(y) (change-of-variable)."""
|
|
from scipy.stats import gamma as scipy_gamma
|
|
|
|
y_test = np.linspace(-3.0, 3.0, 20)
|
|
direct = loggamma_dist.pdf(y_test, a=2.0)
|
|
via_gamma = scipy_gamma.pdf(np.exp(y_test), a=2.0) * np.exp(y_test)
|
|
np.testing.assert_allclose(direct, via_gamma, rtol=1e-6)
|
|
|
|
def test_rvs_samples_are_finite(self):
|
|
"""Random samples must be finite real numbers."""
|
|
rng = np.random.default_rng(42)
|
|
samples = loggamma_dist.rvs(a=2.0, size=200, random_state=rng)
|
|
assert samples.shape == (200,)
|
|
assert np.all(np.isfinite(samples))
|
|
|
|
def test_rvs_sample_mean_near_expected(self):
|
|
"""Sample mean of many RVS should be close to the distribution mean."""
|
|
a = 2.0
|
|
rng = np.random.default_rng(0)
|
|
samples = loggamma_dist.rvs(a=a, size=50_000, random_state=rng)
|
|
expected_mean = float(loggamma_dist.stats(a=a, moments="m"))
|
|
assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean
|
|
|
|
|
|
if __name__ == "__main__":
|
|
plot_k_dist_varying_alpha()
|
|
plot_k_dist_varying_mu()
|
|
plot_k_dist_varying_beta()
|
|
plt.show()
|