ADD:

ruff for code formatting BIC statistic AND BIC test implemented test_distributions.py for test new created dists with pytest REFACTOR: k_gen pdf changed from 2 params to generalized
2026-04-16 11:52:44 -03:00
parent 9aa97fc3d4
commit d07590e73d
6 changed files with 338 additions and 22 deletions
--- a/etc/tests/test_distributions.py
+++ b/etc/tests/test_distributions.py
@@ -0,0 +1,174 @@
+import numpy as np
+import pytest
+import matplotlib.pyplot as plt
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+from tools.distributions import k_dist
+
+
+X = np.linspace(0.01, 10.0, 500)
+
+
+# ── k_dist unit tests ────────────────────────────────────────────────────────
+
+
+class TestKDistPdf:
+    def test_pdf_is_positive_for_valid_input(self):
+        """PDF must be strictly positive for x > 0 and valid parameters."""
+        vals = k_dist.pdf(X, mu=1.0, alpha=2.0, beta=2.0)
+        assert np.all(vals > 0)
+
+    def test_pdf_integrates_to_one(self):
+        """Numerical integral of PDF over a wide domain should be ≈ 1."""
+        x_fine = np.linspace(1e-4, 200.0, 100_000)
+        integral = np.trapezoid(k_dist.pdf(x_fine, mu=1.0, alpha=2.0, beta=2.0), x_fine)
+        assert pytest.approx(integral, abs=1e-3) == 1.0
+
+    def test_mean_equals_mu(self):
+        """Numerical mean of distribution should match the mu parameter."""
+        x_grid = np.linspace(1e-4, 500.0, 200_000)
+        for mu in [0.5, 1.0, 3.0]:
+            mean_num = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=mu, alpha=2.0, beta=3.0), x_grid)
+            assert pytest.approx(mean_num, rel=1e-2) == mu
+
+    def test_logpdf_equals_log_pdf(self):
+        """logpdf must equal log(pdf) for numerical consistency."""
+        x_test = np.linspace(0.1, 5.0, 20)
+        log_via_pdf = np.log(k_dist.pdf(x_test, mu=1.0, alpha=2.0, beta=3.0))
+        log_direct = k_dist.logpdf(x_test, mu=1.0, alpha=2.0, beta=3.0)
+        np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
+
+    def test_argcheck_rejects_non_positive_mu(self):
+        """mu <= 0 must not produce a valid (positive-finite) PDF value."""
+        val = k_dist.pdf(1.0, mu=-1.0, alpha=2.0, beta=2.0)
+        assert not (np.isfinite(val) and val > 0)
+
+    def test_argcheck_rejects_non_positive_alpha(self):
+        """alpha <= 0 must not produce a valid (positive-finite) PDF value."""
+        val = k_dist.pdf(1.0, mu=1.0, alpha=-1.0, beta=2.0)
+        assert not (np.isfinite(val) and val > 0)
+
+    def test_argcheck_rejects_non_positive_beta(self):
+        """beta <= 0 must not produce a valid (positive-finite) PDF value."""
+        val = k_dist.pdf(1.0, mu=1.0, alpha=2.0, beta=-1.0)
+        assert not (np.isfinite(val) and val > 0)
+
+    def test_larger_alpha_shifts_mass_right(self):
+        """Increasing alpha (with mu and beta fixed) shifts probability mass to the right."""
+        x_grid = np.linspace(1e-4, 50.0, 20_000)
+        mean_low = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=2.0, alpha=0.5, beta=2.0), x_grid)
+        mean_high = np.trapezoid(x_grid * k_dist.pdf(x_grid, mu=2.0, alpha=4.0, beta=2.0), x_grid)
+        # Both should be close to mu=2.0; variance changes but mean is fixed
+        assert pytest.approx(mean_low, rel=5e-2) == 2.0
+        assert pytest.approx(mean_high, rel=5e-2) == 2.0
+
+    def test_symmetry_in_alpha_beta(self):
+        """PDF is symmetric in alpha and beta: swapping them gives the same PDF."""
+        x_test = np.linspace(0.1, 5.0, 20)
+        pdf_ab = k_dist.pdf(x_test, mu=1.0, alpha=2.0, beta=3.0)
+        pdf_ba = k_dist.pdf(x_test, mu=1.0, alpha=3.0, beta=2.0)
+        np.testing.assert_allclose(pdf_ab, pdf_ba, rtol=1e-6)
+
+
+# ── Parametric curve plots ───────────────────────────────────────────────────
+
+
+def plot_k_dist_varying_alpha(save_path=None):
+    """Plot generalized K-distribution PDF curves for several values of alpha."""
+    alpha_values = [0.5, 1.0, 2.0, 4.0, 8.0]
+    x = np.linspace(1e-4, 15.0, 1000)
+
+    fig, ax = plt.subplots(figsize=(8, 5))
+    for alpha in alpha_values:
+        ax.plot(x, k_dist.pdf(x, mu=2.0, alpha=alpha, beta=2.0), label=f"alpha={alpha}")
+
+    ax.set_xlabel("x")
+    ax.set_ylabel("PDF")
+    ax.set_title("Generalized K distribution — varying alpha  (mu=2.0, beta=2.0 fixed)")
+    ax.legend()
+    ax.set_ylim(bottom=0)
+    fig.tight_layout()
+
+    if save_path:
+        fig.savefig(save_path, dpi=150)
+    return fig
+
+
+def plot_k_dist_varying_mu(save_path=None):
+    """Plot generalized K-distribution PDF curves for several values of mu."""
+    mu_values = [0.5, 1.0, 2.0, 4.0, 8.0]
+    x = np.linspace(1e-4, 30.0, 1000)
+
+    fig, ax = plt.subplots(figsize=(8, 5))
+    for mu in mu_values:
+        ax.plot(x, k_dist.pdf(x, mu=mu, alpha=2.0, beta=2.0), label=f"mu={mu}")
+
+    ax.set_xlabel("x")
+    ax.set_ylabel("PDF")
+    ax.set_title("Generalized K distribution — varying mu  (alpha=2.0, beta=2.0 fixed)")
+    ax.legend()
+    ax.set_ylim(bottom=0)
+    fig.tight_layout()
+
+    if save_path:
+        fig.savefig(save_path, dpi=150)
+    return fig
+
+
+def plot_k_dist_varying_beta(save_path=None):
+    """Plot generalized K-distribution PDF curves for several values of beta."""
+    beta_values = [0.5, 1.0, 2.0, 4.0, 8.0]
+    x = np.linspace(1e-4, 15.0, 1000)
+
+    fig, ax = plt.subplots(figsize=(8, 5))
+    for beta in beta_values:
+        ax.plot(x, k_dist.pdf(x, mu=2.0, alpha=2.0, beta=beta), label=f"beta={beta}")
+
+    ax.set_xlabel("x")
+    ax.set_ylabel("PDF")
+    ax.set_title("Generalized K distribution — varying beta  (mu=2.0, alpha=2.0 fixed)")
+    ax.legend()
+    ax.set_ylim(bottom=0)
+    fig.tight_layout()
+
+    if save_path:
+        fig.savefig(save_path, dpi=150)
+    return fig
+
+
+# ── Test: plots are generated without errors ─────────────────────────────────
+
+
+class TestKDistPlots:
+    def test_plot_varying_alpha_runs_without_error(self, tmp_path):
+        """Curve plot varying alpha must complete and save a file."""
+        out = tmp_path / "k_dist_alpha.png"
+        fig = plot_k_dist_varying_alpha(save_path=str(out))
+        assert out.exists()
+        plt.close(fig)
+
+    def test_plot_varying_mu_runs_without_error(self, tmp_path):
+        """Curve plot varying mu must complete and save a file."""
+        out = tmp_path / "k_dist_mu.png"
+        fig = plot_k_dist_varying_mu(save_path=str(out))
+        assert out.exists()
+        plt.close(fig)
+
+    def test_plot_varying_beta_runs_without_error(self, tmp_path):
+        """Curve plot varying beta must complete and save a file."""
+        out = tmp_path / "k_dist_beta.png"
+        fig = plot_k_dist_varying_beta(save_path=str(out))
+        assert out.exists()
+        plt.close(fig)
+
+
+# ── Entry-point: run plots interactively ─────────────────────────────────────
+
+if __name__ == "__main__":
+    plot_k_dist_varying_alpha()
+    plot_k_dist_varying_mu()
+    plot_k_dist_varying_beta()
+    plt.show()
--- a/etc/tests/test_statistics.py
+++ b/etc/tests/test_statistics.py
@@ -6,7 +6,7 @@ import os

 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))

-from tools.statistics import aic_statistic
+from tools.statistics import aic_statistic, bic_statistic
 from fitting.fitter import Fitter


@@ -110,3 +110,101 @@ class TestAicStatisticInFitter:
        f.validate(n_mc_samples=99)
        assert f["gamma"].test_result is not None
        assert f["expon"].test_result is not None
+
+
+# ── bic_statistic unit tests ──────────────────────────────────────────────────
+
+
+class TestBicStatistic:
+    def _fitted_dist(self, dist, data, **kwargs):
+        """Return a frozen distribution fitted to data."""
+        params = dist.fit(data, **kwargs)
+        return dist(*params)
+
+    def test_returns_float(self):
+        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
+        result = bic_statistic(frozen, GAMMA_DATA, axis=0)
+        assert isinstance(float(result), float)
+
+    def test_formula_correct(self):
+        """BIC = ln(n)*k - 2*log_likelihood."""
+        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
+        n = len(GAMMA_DATA)
+        k = len(frozen.args)
+        log_likelihood = np.sum(frozen.logpdf(GAMMA_DATA), axis=0)
+        expected = np.log(n) * k - 2 * log_likelihood
+        assert pytest.approx(bic_statistic(frozen, GAMMA_DATA, axis=0)) == expected
+
+    def test_penalises_more_parameters(self):
+        """gamma (3 params) should have higher BIC penalty term than expon (2 params)."""
+        gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
+        expon_frozen = self._fitted_dist(expon, GAMMA_DATA, floc=0)
+        n = len(GAMMA_DATA)
+        assert np.log(n) * len(gamma_frozen.args) > np.log(n) * len(expon_frozen.args)
+
+    def test_better_fit_has_lower_bic(self):
+        """Gamma fitted to gamma data should have lower BIC than normal fitted to gamma data."""
+        gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
+        norm_frozen = self._fitted_dist(norm, GAMMA_DATA)
+        bic_gamma = bic_statistic(gamma_frozen, GAMMA_DATA, axis=0)
+        bic_norm = bic_statistic(norm_frozen, GAMMA_DATA, axis=0)
+        assert bic_gamma < bic_norm
+
+    def test_works_with_axis_none(self):
+        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
+        result = bic_statistic(frozen, GAMMA_DATA, axis=None)
+        assert np.isfinite(result)
+
+    def test_result_is_finite(self):
+        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
+        assert np.isfinite(bic_statistic(frozen, GAMMA_DATA, axis=0))
+
+
+# ── Integration: bic_statistic as callable in Fitter ─────────────────────────
+
+
+class TestBicStatisticInFitter:
+    def test_fitter_accepts_bic_callable(self):
+        f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
+        f.fit(GAMMA_DATA)
+        f.validate(n_mc_samples=99)
+        assert f["gamma"].test_result is not None
+
+    def test_fitter_bic_statistic_is_finite(self):
+        f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
+        f.fit(GAMMA_DATA)
+        f.validate(n_mc_samples=99)
+        assert np.isfinite(f["gamma"].gof_statistic)
+
+    def test_fitter_bic_pvalue_in_range(self):
+        f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
+        f.fit(GAMMA_DATA)
+        f.validate(n_mc_samples=99)
+        pval = f["gamma"].pvalue
+        assert 0.0 <= pval <= 1.0
+
+    def test_fitter_bic_vs_ad_different_statistic_values(self):
+        """BIC and AD statistics should differ numerically."""
+        f_bic = Fitter(
+            [gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}
+        )
+        f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0})
+        f_bic.fit(GAMMA_DATA)
+        f_ad.fit(GAMMA_DATA)
+        f_bic.validate(n_mc_samples=99)
+        f_ad.validate(n_mc_samples=99)
+        assert f_bic["gamma"].gof_statistic != pytest.approx(
+            f_ad["gamma"].gof_statistic
+        )
+
+    def test_fitter_bic_multiple_distributions(self):
+        f = Fitter(
+            [gamma, expon],
+            statistic_method=bic_statistic,
+            gamma_params={"floc": 0},
+            expon_params={"floc": 0},
+        )
+        f.fit(GAMMA_DATA)
+        f.validate(n_mc_samples=99)
+        assert f["gamma"].test_result is not None
+        assert f["expon"].test_result is not None