ADD:
ruff for code formatting BIC statistic AND BIC test implemented test_distributions.py for test new created dists with pytest REFACTOR: k_gen pdf changed from 2 params to generalized
This commit is contained in:
174
etc/tests/test_distributions.py
Normal file
174
etc/tests/test_distributions.py
Normal file
@@ -0,0 +1,174 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
import matplotlib.pyplot as plt
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from tools.distributions import k_dist
|
||||
|
||||
|
||||
X = np.linspace(0.01, 10.0, 500)
|
||||
|
||||
|
||||
# ── k_dist unit tests ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestKDistPdf:
|
||||
def test_pdf_is_positive_for_valid_input(self):
|
||||
"""PDF must be strictly positive for x > 0 and valid parameters."""
|
||||
vals = k_dist.pdf(X, mu=1.0, alpha=2.0, beta=2.0)
|
||||
assert np.all(vals > 0)
|
||||
|
||||
def test_pdf_integrates_to_one(self):
|
||||
"""Numerical integral of PDF over a wide domain should be ≈ 1."""
|
||||
x_fine = np.linspace(1e-4, 200.0, 100_000)
|
||||
integral = np.trapezoid(k_dist.pdf(x_fine, mu=1.0, alpha=2.0, beta=2.0), x_fine)
|
||||
assert pytest.approx(integral, abs=1e-3) == 1.0
|
||||
|
||||
def test_mean_equals_mu(self):
|
||||
"""Numerical mean of distribution should match the mu parameter."""
|
||||
x_grid = np.linspace(1e-4, 500.0, 200_000)
|
||||
for mu in [0.5, 1.0, 3.0]:
|
||||
mean_num = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=mu, alpha=2.0, beta=3.0), x_grid)
|
||||
assert pytest.approx(mean_num, rel=1e-2) == mu
|
||||
|
||||
def test_logpdf_equals_log_pdf(self):
|
||||
"""logpdf must equal log(pdf) for numerical consistency."""
|
||||
x_test = np.linspace(0.1, 5.0, 20)
|
||||
log_via_pdf = np.log(k_dist.pdf(x_test, mu=1.0, alpha=2.0, beta=3.0))
|
||||
log_direct = k_dist.logpdf(x_test, mu=1.0, alpha=2.0, beta=3.0)
|
||||
np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
|
||||
|
||||
def test_argcheck_rejects_non_positive_mu(self):
|
||||
"""mu <= 0 must not produce a valid (positive-finite) PDF value."""
|
||||
val = k_dist.pdf(1.0, mu=-1.0, alpha=2.0, beta=2.0)
|
||||
assert not (np.isfinite(val) and val > 0)
|
||||
|
||||
def test_argcheck_rejects_non_positive_alpha(self):
|
||||
"""alpha <= 0 must not produce a valid (positive-finite) PDF value."""
|
||||
val = k_dist.pdf(1.0, mu=1.0, alpha=-1.0, beta=2.0)
|
||||
assert not (np.isfinite(val) and val > 0)
|
||||
|
||||
def test_argcheck_rejects_non_positive_beta(self):
|
||||
"""beta <= 0 must not produce a valid (positive-finite) PDF value."""
|
||||
val = k_dist.pdf(1.0, mu=1.0, alpha=2.0, beta=-1.0)
|
||||
assert not (np.isfinite(val) and val > 0)
|
||||
|
||||
def test_larger_alpha_shifts_mass_right(self):
|
||||
"""Increasing alpha (with mu and beta fixed) shifts probability mass to the right."""
|
||||
x_grid = np.linspace(1e-4, 50.0, 20_000)
|
||||
mean_low = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=2.0, alpha=0.5, beta=2.0), x_grid)
|
||||
mean_high = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=2.0, alpha=4.0, beta=2.0), x_grid)
|
||||
# Both should be close to mu=2.0; variance changes but mean is fixed
|
||||
assert pytest.approx(mean_low, rel=5e-2) == 2.0
|
||||
assert pytest.approx(mean_high, rel=5e-2) == 2.0
|
||||
|
||||
def test_symmetry_in_alpha_beta(self):
|
||||
"""PDF is symmetric in alpha and beta: swapping them gives the same PDF."""
|
||||
x_test = np.linspace(0.1, 5.0, 20)
|
||||
pdf_ab = k_dist.pdf(x_test, mu=1.0, alpha=2.0, beta=3.0)
|
||||
pdf_ba = k_dist.pdf(x_test, mu=1.0, alpha=3.0, beta=2.0)
|
||||
np.testing.assert_allclose(pdf_ab, pdf_ba, rtol=1e-6)
|
||||
|
||||
|
||||
# ── Parametric curve plots ───────────────────────────────────────────────────
|
||||
|
||||
|
||||
def plot_k_dist_varying_alpha(save_path=None):
|
||||
"""Plot generalized K-distribution PDF curves for several values of alpha."""
|
||||
alpha_values = [0.5, 1.0, 2.0, 4.0, 8.0]
|
||||
x = np.linspace(1e-4, 15.0, 1000)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(8, 5))
|
||||
for alpha in alpha_values:
|
||||
ax.plot(x, k_dist.pdf(x, mu=2.0, alpha=alpha, beta=2.0), label=f"alpha={alpha}")
|
||||
|
||||
ax.set_xlabel("x")
|
||||
ax.set_ylabel("PDF")
|
||||
ax.set_title("Generalized K distribution — varying alpha (mu=2.0, beta=2.0 fixed)")
|
||||
ax.legend()
|
||||
ax.set_ylim(bottom=0)
|
||||
fig.tight_layout()
|
||||
|
||||
if save_path:
|
||||
fig.savefig(save_path, dpi=150)
|
||||
return fig
|
||||
|
||||
|
||||
def plot_k_dist_varying_mu(save_path=None):
|
||||
"""Plot generalized K-distribution PDF curves for several values of mu."""
|
||||
mu_values = [0.5, 1.0, 2.0, 4.0, 8.0]
|
||||
x = np.linspace(1e-4, 30.0, 1000)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(8, 5))
|
||||
for mu in mu_values:
|
||||
ax.plot(x, k_dist.pdf(x, mu=mu, alpha=2.0, beta=2.0), label=f"mu={mu}")
|
||||
|
||||
ax.set_xlabel("x")
|
||||
ax.set_ylabel("PDF")
|
||||
ax.set_title("Generalized K distribution — varying mu (alpha=2.0, beta=2.0 fixed)")
|
||||
ax.legend()
|
||||
ax.set_ylim(bottom=0)
|
||||
fig.tight_layout()
|
||||
|
||||
if save_path:
|
||||
fig.savefig(save_path, dpi=150)
|
||||
return fig
|
||||
|
||||
|
||||
def plot_k_dist_varying_beta(save_path=None):
|
||||
"""Plot generalized K-distribution PDF curves for several values of beta."""
|
||||
beta_values = [0.5, 1.0, 2.0, 4.0, 8.0]
|
||||
x = np.linspace(1e-4, 15.0, 1000)
|
||||
|
||||
fig, ax = plt.subplots(figsize=(8, 5))
|
||||
for beta in beta_values:
|
||||
ax.plot(x, k_dist.pdf(x, mu=2.0, alpha=2.0, beta=beta), label=f"beta={beta}")
|
||||
|
||||
ax.set_xlabel("x")
|
||||
ax.set_ylabel("PDF")
|
||||
ax.set_title("Generalized K distribution — varying beta (mu=2.0, alpha=2.0 fixed)")
|
||||
ax.legend()
|
||||
ax.set_ylim(bottom=0)
|
||||
fig.tight_layout()
|
||||
|
||||
if save_path:
|
||||
fig.savefig(save_path, dpi=150)
|
||||
return fig
|
||||
|
||||
|
||||
# ── Test: plots are generated without errors ─────────────────────────────────
|
||||
|
||||
|
||||
class TestKDistPlots:
|
||||
def test_plot_varying_alpha_runs_without_error(self, tmp_path):
|
||||
"""Curve plot varying alpha must complete and save a file."""
|
||||
out = tmp_path / "k_dist_alpha.png"
|
||||
fig = plot_k_dist_varying_alpha(save_path=str(out))
|
||||
assert out.exists()
|
||||
plt.close(fig)
|
||||
|
||||
def test_plot_varying_mu_runs_without_error(self, tmp_path):
|
||||
"""Curve plot varying mu must complete and save a file."""
|
||||
out = tmp_path / "k_dist_mu.png"
|
||||
fig = plot_k_dist_varying_mu(save_path=str(out))
|
||||
assert out.exists()
|
||||
plt.close(fig)
|
||||
|
||||
def test_plot_varying_beta_runs_without_error(self, tmp_path):
|
||||
"""Curve plot varying beta must complete and save a file."""
|
||||
out = tmp_path / "k_dist_beta.png"
|
||||
fig = plot_k_dist_varying_beta(save_path=str(out))
|
||||
assert out.exists()
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
# ── Entry-point: run plots interactively ─────────────────────────────────────
|
||||
|
||||
if __name__ == "__main__":
|
||||
plot_k_dist_varying_alpha()
|
||||
plot_k_dist_varying_mu()
|
||||
plot_k_dist_varying_beta()
|
||||
plt.show()
|
||||
@@ -6,7 +6,7 @@ import os
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from tools.statistics import aic_statistic
|
||||
from tools.statistics import aic_statistic, bic_statistic
|
||||
from fitting.fitter import Fitter
|
||||
|
||||
|
||||
@@ -110,3 +110,101 @@ class TestAicStatisticInFitter:
|
||||
f.validate(n_mc_samples=99)
|
||||
assert f["gamma"].test_result is not None
|
||||
assert f["expon"].test_result is not None
|
||||
|
||||
|
||||
# ── bic_statistic unit tests ──────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBicStatistic:
|
||||
def _fitted_dist(self, dist, data, **kwargs):
|
||||
"""Return a frozen distribution fitted to data."""
|
||||
params = dist.fit(data, **kwargs)
|
||||
return dist(*params)
|
||||
|
||||
def test_returns_float(self):
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
result = bic_statistic(frozen, GAMMA_DATA, axis=0)
|
||||
assert isinstance(float(result), float)
|
||||
|
||||
def test_formula_correct(self):
|
||||
"""BIC = ln(n)*k - 2*log_likelihood."""
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
n = len(GAMMA_DATA)
|
||||
k = len(frozen.args)
|
||||
log_likelihood = np.sum(frozen.logpdf(GAMMA_DATA), axis=0)
|
||||
expected = np.log(n) * k - 2 * log_likelihood
|
||||
assert pytest.approx(bic_statistic(frozen, GAMMA_DATA, axis=0)) == expected
|
||||
|
||||
def test_penalises_more_parameters(self):
|
||||
"""gamma (3 params) should have higher BIC penalty term than expon (2 params)."""
|
||||
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
expon_frozen = self._fitted_dist(expon, GAMMA_DATA, floc=0)
|
||||
n = len(GAMMA_DATA)
|
||||
assert np.log(n) * len(gamma_frozen.args) > np.log(n) * len(expon_frozen.args)
|
||||
|
||||
def test_better_fit_has_lower_bic(self):
|
||||
"""Gamma fitted to gamma data should have lower BIC than normal fitted to gamma data."""
|
||||
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
norm_frozen = self._fitted_dist(norm, GAMMA_DATA)
|
||||
bic_gamma = bic_statistic(gamma_frozen, GAMMA_DATA, axis=0)
|
||||
bic_norm = bic_statistic(norm_frozen, GAMMA_DATA, axis=0)
|
||||
assert bic_gamma < bic_norm
|
||||
|
||||
def test_works_with_axis_none(self):
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
result = bic_statistic(frozen, GAMMA_DATA, axis=None)
|
||||
assert np.isfinite(result)
|
||||
|
||||
def test_result_is_finite(self):
|
||||
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||
assert np.isfinite(bic_statistic(frozen, GAMMA_DATA, axis=0))
|
||||
|
||||
|
||||
# ── Integration: bic_statistic as callable in Fitter ─────────────────────────
|
||||
|
||||
|
||||
class TestBicStatisticInFitter:
|
||||
def test_fitter_accepts_bic_callable(self):
|
||||
f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
|
||||
f.fit(GAMMA_DATA)
|
||||
f.validate(n_mc_samples=99)
|
||||
assert f["gamma"].test_result is not None
|
||||
|
||||
def test_fitter_bic_statistic_is_finite(self):
|
||||
f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
|
||||
f.fit(GAMMA_DATA)
|
||||
f.validate(n_mc_samples=99)
|
||||
assert np.isfinite(f["gamma"].gof_statistic)
|
||||
|
||||
def test_fitter_bic_pvalue_in_range(self):
|
||||
f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
|
||||
f.fit(GAMMA_DATA)
|
||||
f.validate(n_mc_samples=99)
|
||||
pval = f["gamma"].pvalue
|
||||
assert 0.0 <= pval <= 1.0
|
||||
|
||||
def test_fitter_bic_vs_ad_different_statistic_values(self):
|
||||
"""BIC and AD statistics should differ numerically."""
|
||||
f_bic = Fitter(
|
||||
[gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}
|
||||
)
|
||||
f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0})
|
||||
f_bic.fit(GAMMA_DATA)
|
||||
f_ad.fit(GAMMA_DATA)
|
||||
f_bic.validate(n_mc_samples=99)
|
||||
f_ad.validate(n_mc_samples=99)
|
||||
assert f_bic["gamma"].gof_statistic != pytest.approx(
|
||||
f_ad["gamma"].gof_statistic
|
||||
)
|
||||
|
||||
def test_fitter_bic_multiple_distributions(self):
|
||||
f = Fitter(
|
||||
[gamma, expon],
|
||||
statistic_method=bic_statistic,
|
||||
gamma_params={"floc": 0},
|
||||
expon_params={"floc": 0},
|
||||
)
|
||||
f.fit(GAMMA_DATA)
|
||||
f.validate(n_mc_samples=99)
|
||||
assert f["gamma"].test_result is not None
|
||||
assert f["expon"].test_result is not None
|
||||
|
||||
@@ -1,43 +1,51 @@
|
||||
from scipy.stats import rv_continuous
|
||||
from scipy.special import gamma, kv, gammaln
|
||||
from scipy.special import kv, gammaln
|
||||
from scipy.stats._distn_infrastructure import _ShapeInfo
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
class k_gen(rv_continuous):
|
||||
"""K distribution for radar clutter modeling.
|
||||
"""Generalized K distribution for radar clutter modeling.
|
||||
|
||||
The probability density function is::
|
||||
|
||||
f(x; nu, b) = 2/Gamma(nu) * b^((nu+1)/2) * x^((nu-1)/2) * K_{nu-1}(2*sqrt(b*x))
|
||||
f(x; mu, alpha, beta) = 2 / (Gamma(alpha) * Gamma(beta))
|
||||
* (alpha*beta/mu)^((alpha+beta)/2)
|
||||
* x^((alpha+beta)/2 - 1)
|
||||
* K_{alpha-beta}(2*sqrt(alpha*beta*x/mu))
|
||||
|
||||
for x >= 0, where K_{nu-1} is the modified Bessel function of the second
|
||||
kind of order nu-1, nu > 0 is the shape parameter and b > 0 is the rate
|
||||
parameter.
|
||||
for x > 0, where K_{alpha-beta} is the modified Bessel function of the
|
||||
second kind of order alpha-beta, mu > 0 is the mean, and alpha > 0,
|
||||
beta > 0 are shape parameters.
|
||||
|
||||
The standard (2-parameter) K distribution is the special case beta=1,
|
||||
with nu=alpha and b=alpha/mu.
|
||||
"""
|
||||
|
||||
def _shape_info(self):
|
||||
return [_ShapeInfo("nu", domain=(0, np.inf), inclusive=(False, True)),
|
||||
_ShapeInfo("b", domain=(0, np.inf), inclusive=(False, True))]
|
||||
return [
|
||||
_ShapeInfo("mu", domain=(0, np.inf), inclusive=(False, True)),
|
||||
_ShapeInfo("alpha", domain=(0, np.inf), inclusive=(True, True)),
|
||||
_ShapeInfo("beta", domain=(0, np.inf), inclusive=(True, True)),
|
||||
]
|
||||
|
||||
def _argcheck(self, mu, alpha, beta):
|
||||
return (mu > 0) & (alpha > 0) & (beta > 0)
|
||||
|
||||
def _argcheck(self, nu, b):
|
||||
return (nu > 0) & (b > 0)
|
||||
def _pdf(self, x, mu, alpha, beta):
|
||||
return np.exp(self._logpdf(x, mu, alpha, beta))
|
||||
|
||||
def _pdf(self, x, nu, b):
|
||||
# k.pdf(x, nu, b) = 2/Gamma(nu) * b^((nu+1)/2) * x^((nu-1)/2) * K_{nu-1}(2*sqrt(b*x))
|
||||
return np.exp(self._logpdf(x, nu, b))
|
||||
|
||||
def _logpdf(self, x, nu, b):
|
||||
def _logpdf(self, x, mu, alpha, beta):
|
||||
z = 2.0 * np.sqrt(alpha * beta * x / mu)
|
||||
return (
|
||||
np.log(2.0)
|
||||
- gammaln(nu)
|
||||
+ (nu + 1) / 2.0 * np.log(b)
|
||||
+ (nu - 1) / 2.0 * np.log(x)
|
||||
+ np.log(kv(nu - 1, 2.0 * np.sqrt(b * x)))
|
||||
- gammaln(alpha)
|
||||
- gammaln(beta)
|
||||
+ (alpha + beta) / 2.0 * np.log(alpha * beta / mu)
|
||||
+ ((alpha + beta) / 2.0 - 1.0) * np.log(x)
|
||||
+ np.log(kv(alpha - beta, z))
|
||||
)
|
||||
|
||||
|
||||
k_dist = k_gen(a=0.0, name="k_distribution", shapes="nu, b")
|
||||
k_dist = k_gen(a=0.0, name="k_distribution", shapes="mu, alpha, beta")
|
||||
|
||||
|
||||
@@ -16,3 +16,20 @@ def aic_statistic(dist, data, axis):
|
||||
aic = 2 * k - 2 * log_likelihood
|
||||
|
||||
return aic
|
||||
|
||||
def bic_statistic(dist, data, axis):
|
||||
"""
|
||||
BIC-based goodness-of-fit statistic.
|
||||
|
||||
BIC = ln(n)k - 2ln(L)s
|
||||
|
||||
Lower BIC indicates better fit, but since goodness_of_fit()
|
||||
treats larger statistic values as worse fit, BIC works directly.
|
||||
"""
|
||||
|
||||
n = data.size if axis is None else data.shape[axis]
|
||||
k = len(dist.args)
|
||||
log_likelihood = np.sum(dist.logpdf(data), axis=axis)
|
||||
bic = np.log(n) * k - 2 * log_likelihood
|
||||
|
||||
return bic
|
||||
@@ -27,3 +27,14 @@ build-backend = "uv_build"
|
||||
|
||||
[tool.uv]
|
||||
package = false
|
||||
|
||||
[dependency-groups]
|
||||
dev = [
|
||||
"ruff>=0.15.9",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 120
|
||||
|
||||
[tool.ruff.format]
|
||||
skip-magic-trailing-comma = true
|
||||
8
uv.lock
generated
8
uv.lock
generated
@@ -100,6 +100,11 @@ dependencies = [
|
||||
{ name = "statsmodels" },
|
||||
]
|
||||
|
||||
[package.dev-dependencies]
|
||||
dev = [
|
||||
{ name = "ruff" },
|
||||
]
|
||||
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "ipykernel" },
|
||||
@@ -117,6 +122,9 @@ requires-dist = [
|
||||
{ name = "statsmodels" },
|
||||
]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [{ name = "ruff", specifier = ">=0.15.9" }]
|
||||
|
||||
[[package]]
|
||||
name = "colorama"
|
||||
version = "0.4.6"
|
||||
|
||||
Reference in New Issue
Block a user