diff --git a/etc/tests/test_distributions.py b/etc/tests/test_distributions.py new file mode 100644 index 0000000..41faed2 --- /dev/null +++ b/etc/tests/test_distributions.py @@ -0,0 +1,174 @@ +import numpy as np +import pytest +import matplotlib.pyplot as plt +import sys +import os + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +from tools.distributions import k_dist + + +X = np.linspace(0.01, 10.0, 500) + + +# ── k_dist unit tests ──────────────────────────────────────────────────────── + + +class TestKDistPdf: + def test_pdf_is_positive_for_valid_input(self): + """PDF must be strictly positive for x > 0 and valid parameters.""" + vals = k_dist.pdf(X, mu=1.0, alpha=2.0, beta=2.0) + assert np.all(vals > 0) + + def test_pdf_integrates_to_one(self): + """Numerical integral of PDF over a wide domain should be ≈ 1.""" + x_fine = np.linspace(1e-4, 200.0, 100_000) + integral = np.trapezoid(k_dist.pdf(x_fine, mu=1.0, alpha=2.0, beta=2.0), x_fine) + assert pytest.approx(integral, abs=1e-3) == 1.0 + + def test_mean_equals_mu(self): + """Numerical mean of distribution should match the mu parameter.""" + x_grid = np.linspace(1e-4, 500.0, 200_000) + for mu in [0.5, 1.0, 3.0]: + mean_num = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=mu, alpha=2.0, beta=3.0), x_grid) + assert pytest.approx(mean_num, rel=1e-2) == mu + + def test_logpdf_equals_log_pdf(self): + """logpdf must equal log(pdf) for numerical consistency.""" + x_test = np.linspace(0.1, 5.0, 20) + log_via_pdf = np.log(k_dist.pdf(x_test, mu=1.0, alpha=2.0, beta=3.0)) + log_direct = k_dist.logpdf(x_test, mu=1.0, alpha=2.0, beta=3.0) + np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6) + + def test_argcheck_rejects_non_positive_mu(self): + """mu <= 0 must not produce a valid (positive-finite) PDF value.""" + val = k_dist.pdf(1.0, mu=-1.0, alpha=2.0, beta=2.0) + assert not (np.isfinite(val) and val > 0) + + def test_argcheck_rejects_non_positive_alpha(self): + """alpha <= 0 must not produce a valid (positive-finite) PDF value.""" + val = k_dist.pdf(1.0, mu=1.0, alpha=-1.0, beta=2.0) + assert not (np.isfinite(val) and val > 0) + + def test_argcheck_rejects_non_positive_beta(self): + """beta <= 0 must not produce a valid (positive-finite) PDF value.""" + val = k_dist.pdf(1.0, mu=1.0, alpha=2.0, beta=-1.0) + assert not (np.isfinite(val) and val > 0) + + def test_larger_alpha_shifts_mass_right(self): + """Increasing alpha (with mu and beta fixed) shifts probability mass to the right.""" + x_grid = np.linspace(1e-4, 50.0, 20_000) + mean_low = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=2.0, alpha=0.5, beta=2.0), x_grid) + mean_high = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=2.0, alpha=4.0, beta=2.0), x_grid) + # Both should be close to mu=2.0; variance changes but mean is fixed + assert pytest.approx(mean_low, rel=5e-2) == 2.0 + assert pytest.approx(mean_high, rel=5e-2) == 2.0 + + def test_symmetry_in_alpha_beta(self): + """PDF is symmetric in alpha and beta: swapping them gives the same PDF.""" + x_test = np.linspace(0.1, 5.0, 20) + pdf_ab = k_dist.pdf(x_test, mu=1.0, alpha=2.0, beta=3.0) + pdf_ba = k_dist.pdf(x_test, mu=1.0, alpha=3.0, beta=2.0) + np.testing.assert_allclose(pdf_ab, pdf_ba, rtol=1e-6) + + +# ── Parametric curve plots ─────────────────────────────────────────────────── + + +def plot_k_dist_varying_alpha(save_path=None): + """Plot generalized K-distribution PDF curves for several values of alpha.""" + alpha_values = [0.5, 1.0, 2.0, 4.0, 8.0] + x = np.linspace(1e-4, 15.0, 1000) + + fig, ax = plt.subplots(figsize=(8, 5)) + for alpha in alpha_values: + ax.plot(x, k_dist.pdf(x, mu=2.0, alpha=alpha, beta=2.0), label=f"alpha={alpha}") + + ax.set_xlabel("x") + ax.set_ylabel("PDF") + ax.set_title("Generalized K distribution — varying alpha (mu=2.0, beta=2.0 fixed)") + ax.legend() + ax.set_ylim(bottom=0) + fig.tight_layout() + + if save_path: + fig.savefig(save_path, dpi=150) + return fig + + +def plot_k_dist_varying_mu(save_path=None): + """Plot generalized K-distribution PDF curves for several values of mu.""" + mu_values = [0.5, 1.0, 2.0, 4.0, 8.0] + x = np.linspace(1e-4, 30.0, 1000) + + fig, ax = plt.subplots(figsize=(8, 5)) + for mu in mu_values: + ax.plot(x, k_dist.pdf(x, mu=mu, alpha=2.0, beta=2.0), label=f"mu={mu}") + + ax.set_xlabel("x") + ax.set_ylabel("PDF") + ax.set_title("Generalized K distribution — varying mu (alpha=2.0, beta=2.0 fixed)") + ax.legend() + ax.set_ylim(bottom=0) + fig.tight_layout() + + if save_path: + fig.savefig(save_path, dpi=150) + return fig + + +def plot_k_dist_varying_beta(save_path=None): + """Plot generalized K-distribution PDF curves for several values of beta.""" + beta_values = [0.5, 1.0, 2.0, 4.0, 8.0] + x = np.linspace(1e-4, 15.0, 1000) + + fig, ax = plt.subplots(figsize=(8, 5)) + for beta in beta_values: + ax.plot(x, k_dist.pdf(x, mu=2.0, alpha=2.0, beta=beta), label=f"beta={beta}") + + ax.set_xlabel("x") + ax.set_ylabel("PDF") + ax.set_title("Generalized K distribution — varying beta (mu=2.0, alpha=2.0 fixed)") + ax.legend() + ax.set_ylim(bottom=0) + fig.tight_layout() + + if save_path: + fig.savefig(save_path, dpi=150) + return fig + + +# ── Test: plots are generated without errors ───────────────────────────────── + + +class TestKDistPlots: + def test_plot_varying_alpha_runs_without_error(self, tmp_path): + """Curve plot varying alpha must complete and save a file.""" + out = tmp_path / "k_dist_alpha.png" + fig = plot_k_dist_varying_alpha(save_path=str(out)) + assert out.exists() + plt.close(fig) + + def test_plot_varying_mu_runs_without_error(self, tmp_path): + """Curve plot varying mu must complete and save a file.""" + out = tmp_path / "k_dist_mu.png" + fig = plot_k_dist_varying_mu(save_path=str(out)) + assert out.exists() + plt.close(fig) + + def test_plot_varying_beta_runs_without_error(self, tmp_path): + """Curve plot varying beta must complete and save a file.""" + out = tmp_path / "k_dist_beta.png" + fig = plot_k_dist_varying_beta(save_path=str(out)) + assert out.exists() + plt.close(fig) + + +# ── Entry-point: run plots interactively ───────────────────────────────────── + +if __name__ == "__main__": + plot_k_dist_varying_alpha() + plot_k_dist_varying_mu() + plot_k_dist_varying_beta() + plt.show() diff --git a/etc/tests/test_statistics.py b/etc/tests/test_statistics.py index 7be7c54..96af920 100644 --- a/etc/tests/test_statistics.py +++ b/etc/tests/test_statistics.py @@ -6,7 +6,7 @@ import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from tools.statistics import aic_statistic +from tools.statistics import aic_statistic, bic_statistic from fitting.fitter import Fitter @@ -110,3 +110,101 @@ class TestAicStatisticInFitter: f.validate(n_mc_samples=99) assert f["gamma"].test_result is not None assert f["expon"].test_result is not None + + +# ── bic_statistic unit tests ────────────────────────────────────────────────── + + +class TestBicStatistic: + def _fitted_dist(self, dist, data, **kwargs): + """Return a frozen distribution fitted to data.""" + params = dist.fit(data, **kwargs) + return dist(*params) + + def test_returns_float(self): + frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) + result = bic_statistic(frozen, GAMMA_DATA, axis=0) + assert isinstance(float(result), float) + + def test_formula_correct(self): + """BIC = ln(n)*k - 2*log_likelihood.""" + frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) + n = len(GAMMA_DATA) + k = len(frozen.args) + log_likelihood = np.sum(frozen.logpdf(GAMMA_DATA), axis=0) + expected = np.log(n) * k - 2 * log_likelihood + assert pytest.approx(bic_statistic(frozen, GAMMA_DATA, axis=0)) == expected + + def test_penalises_more_parameters(self): + """gamma (3 params) should have higher BIC penalty term than expon (2 params).""" + gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) + expon_frozen = self._fitted_dist(expon, GAMMA_DATA, floc=0) + n = len(GAMMA_DATA) + assert np.log(n) * len(gamma_frozen.args) > np.log(n) * len(expon_frozen.args) + + def test_better_fit_has_lower_bic(self): + """Gamma fitted to gamma data should have lower BIC than normal fitted to gamma data.""" + gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) + norm_frozen = self._fitted_dist(norm, GAMMA_DATA) + bic_gamma = bic_statistic(gamma_frozen, GAMMA_DATA, axis=0) + bic_norm = bic_statistic(norm_frozen, GAMMA_DATA, axis=0) + assert bic_gamma < bic_norm + + def test_works_with_axis_none(self): + frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) + result = bic_statistic(frozen, GAMMA_DATA, axis=None) + assert np.isfinite(result) + + def test_result_is_finite(self): + frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) + assert np.isfinite(bic_statistic(frozen, GAMMA_DATA, axis=0)) + + +# ── Integration: bic_statistic as callable in Fitter ───────────────────────── + + +class TestBicStatisticInFitter: + def test_fitter_accepts_bic_callable(self): + f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}) + f.fit(GAMMA_DATA) + f.validate(n_mc_samples=99) + assert f["gamma"].test_result is not None + + def test_fitter_bic_statistic_is_finite(self): + f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}) + f.fit(GAMMA_DATA) + f.validate(n_mc_samples=99) + assert np.isfinite(f["gamma"].gof_statistic) + + def test_fitter_bic_pvalue_in_range(self): + f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}) + f.fit(GAMMA_DATA) + f.validate(n_mc_samples=99) + pval = f["gamma"].pvalue + assert 0.0 <= pval <= 1.0 + + def test_fitter_bic_vs_ad_different_statistic_values(self): + """BIC and AD statistics should differ numerically.""" + f_bic = Fitter( + [gamma], statistic_method=bic_statistic, gamma_params={"floc": 0} + ) + f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0}) + f_bic.fit(GAMMA_DATA) + f_ad.fit(GAMMA_DATA) + f_bic.validate(n_mc_samples=99) + f_ad.validate(n_mc_samples=99) + assert f_bic["gamma"].gof_statistic != pytest.approx( + f_ad["gamma"].gof_statistic + ) + + def test_fitter_bic_multiple_distributions(self): + f = Fitter( + [gamma, expon], + statistic_method=bic_statistic, + gamma_params={"floc": 0}, + expon_params={"floc": 0}, + ) + f.fit(GAMMA_DATA) + f.validate(n_mc_samples=99) + assert f["gamma"].test_result is not None + assert f["expon"].test_result is not None diff --git a/etc/tools/distributions.py b/etc/tools/distributions.py index 0de918b..701b4b2 100644 --- a/etc/tools/distributions.py +++ b/etc/tools/distributions.py @@ -1,43 +1,51 @@ from scipy.stats import rv_continuous -from scipy.special import gamma, kv, gammaln +from scipy.special import kv, gammaln from scipy.stats._distn_infrastructure import _ShapeInfo import numpy as np -import pandas as pd class k_gen(rv_continuous): - """K distribution for radar clutter modeling. + """Generalized K distribution for radar clutter modeling. The probability density function is:: - f(x; nu, b) = 2/Gamma(nu) * b^((nu+1)/2) * x^((nu-1)/2) * K_{nu-1}(2*sqrt(b*x)) + f(x; mu, alpha, beta) = 2 / (Gamma(alpha) * Gamma(beta)) + * (alpha*beta/mu)^((alpha+beta)/2) + * x^((alpha+beta)/2 - 1) + * K_{alpha-beta}(2*sqrt(alpha*beta*x/mu)) - for x >= 0, where K_{nu-1} is the modified Bessel function of the second - kind of order nu-1, nu > 0 is the shape parameter and b > 0 is the rate - parameter. + for x > 0, where K_{alpha-beta} is the modified Bessel function of the + second kind of order alpha-beta, mu > 0 is the mean, and alpha > 0, + beta > 0 are shape parameters. + + The standard (2-parameter) K distribution is the special case beta=1, + with nu=alpha and b=alpha/mu. """ def _shape_info(self): - return [_ShapeInfo("nu", domain=(0, np.inf), inclusive=(False, True)), - _ShapeInfo("b", domain=(0, np.inf), inclusive=(False, True))] + return [ + _ShapeInfo("mu", domain=(0, np.inf), inclusive=(False, True)), + _ShapeInfo("alpha", domain=(0, np.inf), inclusive=(True, True)), + _ShapeInfo("beta", domain=(0, np.inf), inclusive=(True, True)), + ] + def _argcheck(self, mu, alpha, beta): + return (mu > 0) & (alpha > 0) & (beta > 0) - def _argcheck(self, nu, b): - return (nu > 0) & (b > 0) + def _pdf(self, x, mu, alpha, beta): + return np.exp(self._logpdf(x, mu, alpha, beta)) - def _pdf(self, x, nu, b): - # k.pdf(x, nu, b) = 2/Gamma(nu) * b^((nu+1)/2) * x^((nu-1)/2) * K_{nu-1}(2*sqrt(b*x)) - return np.exp(self._logpdf(x, nu, b)) - - def _logpdf(self, x, nu, b): + def _logpdf(self, x, mu, alpha, beta): + z = 2.0 * np.sqrt(alpha * beta * x / mu) return ( np.log(2.0) - - gammaln(nu) - + (nu + 1) / 2.0 * np.log(b) - + (nu - 1) / 2.0 * np.log(x) - + np.log(kv(nu - 1, 2.0 * np.sqrt(b * x))) + - gammaln(alpha) + - gammaln(beta) + + (alpha + beta) / 2.0 * np.log(alpha * beta / mu) + + ((alpha + beta) / 2.0 - 1.0) * np.log(x) + + np.log(kv(alpha - beta, z)) ) -k_dist = k_gen(a=0.0, name="k_distribution", shapes="nu, b") +k_dist = k_gen(a=0.0, name="k_distribution", shapes="mu, alpha, beta") diff --git a/etc/tools/statistics.py b/etc/tools/statistics.py index 0c53175..543626b 100644 --- a/etc/tools/statistics.py +++ b/etc/tools/statistics.py @@ -16,3 +16,20 @@ def aic_statistic(dist, data, axis): aic = 2 * k - 2 * log_likelihood return aic + +def bic_statistic(dist, data, axis): + """ + BIC-based goodness-of-fit statistic. + + BIC = ln(n)k - 2ln(L)s + + Lower BIC indicates better fit, but since goodness_of_fit() + treats larger statistic values as worse fit, BIC works directly. + """ + + n = data.size if axis is None else data.shape[axis] + k = len(dist.args) + log_likelihood = np.sum(dist.logpdf(data), axis=axis) + bic = np.log(n) * k - 2 * log_likelihood + + return bic \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a39271a..c67a050 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,3 +27,14 @@ build-backend = "uv_build" [tool.uv] package = false + +[dependency-groups] +dev = [ + "ruff>=0.15.9", +] + +[tool.ruff] +line-length = 120 + +[tool.ruff.format] +skip-magic-trailing-comma = true \ No newline at end of file diff --git a/uv.lock b/uv.lock index 73784c4..88a0c56 100644 --- a/uv.lock +++ b/uv.lock @@ -100,6 +100,11 @@ dependencies = [ { name = "statsmodels" }, ] +[package.dev-dependencies] +dev = [ + { name = "ruff" }, +] + [package.metadata] requires-dist = [ { name = "ipykernel" }, @@ -117,6 +122,9 @@ requires-dist = [ { name = "statsmodels" }, ] +[package.metadata.requires-dev] +dev = [{ name = "ruff", specifier = ">=0.15.9" }] + [[package]] name = "colorama" version = "0.4.6"