feat(distributions): add lognakagami, loggamma, and kl_statistic

Implement two new scipy-compatible distributions : Log-Nakagami (lognakagami) and Log-Gamma (loggamma_dist), with complete logpdf/cdf/ppf/stats/entropy/rvs methods derived from the change-of-variable Y = ln(X). Add kl_statistic, a KDE-based KL-divergence goodness-of-fit callable compatible with the Fitter class. Extend k_gen with _stats (improving speed), _cdf, and a fit guard, and switch kv → kve to improve numerical stability at large arguments. Add unit tests for all three additions covering normalization, monotonicity, ppf inversion, moment formulas, and Fitter integration.
2026-04-26 23:17:22 -03:00
parent d07590e73d
commit 9a3f5959cd
5 changed files with 600 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -35,3 +35,4 @@ data/*
 etc/tools/read_raw_data.py
 scripts/separate_data.py
--- a/etc/tests/test_distributions.py
+++ b/etc/tests/test_distributions.py
@@ -1,12 +1,13 @@
 import numpy as np
 import pytest
 import scipy.special as sc
 import matplotlib.pyplot as plt
 import sys
 import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-from tools.distributions import k_dist
+from tools.distributions import k_dist, lognakagami, loggamma_dist
 X = np.linspace(0.01, 10.0, 500)
@@ -167,6 +168,178 @@ class TestKDistPlots:
 # ── Entry-point: run plots interactively ─────────────────────────────────────
 Y = np.linspace(-5.0, 5.0, 500)
 # ── lognakagami unit tests ────────────────────────────────────────────────────
 class TestLogNakagami:
    def test_logpdf_is_finite_on_real_line(self):
        """logpdf must be finite for all real y — tests positivity without float64 underflow."""
        log_vals = lognakagami.logpdf(Y, m=2.0, Omega=1.0)
        assert np.all(np.isfinite(log_vals))
    def test_pdf_integrates_to_one(self):
        """Numerical integral of PDF over the real line should be ≈ 1."""
        y_fine = np.linspace(-30, 10, 200_000)
        integral = np.trapezoid(lognakagami.pdf(y_fine, m=2.0, Omega=1.0), y_fine)
        assert pytest.approx(integral, abs=1e-3) == 1.0
    def test_pdf_integrates_to_one_nonunit_omega(self):
        """Normalisation must hold for Omega != 1."""
        y_fine = np.linspace(-30, 15, 200_000)
        integral = np.trapezoid(lognakagami.pdf(y_fine, m=2.0, Omega=4.0), y_fine)
        assert pytest.approx(integral, abs=1e-3) == 1.0
    def test_logpdf_equals_log_pdf(self):
        """logpdf must equal log(pdf) at points where pdf does not underflow."""
        y_bulk = np.linspace(-4.0, 2.0, 50)
        log_via_pdf = np.log(lognakagami.pdf(y_bulk, m=2.0, Omega=1.0))
        log_direct = lognakagami.logpdf(y_bulk, m=2.0, Omega=1.0)
        np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
    def test_cdf_is_monotone_increasing(self):
        """CDF must be strictly non-decreasing."""
        cdf_vals = lognakagami.cdf(Y, m=2.0, Omega=1.0)
        assert np.all(np.diff(cdf_vals) >= 0)
    def test_ppf_inverts_cdf(self):
        """ppf(cdf(y)) must recover y."""
        y_test = np.array([-2.0, 0.0, 0.5])
        cdf_vals = lognakagami.cdf(y_test, m=2.0, Omega=1.0)
        np.testing.assert_allclose(lognakagami.ppf(cdf_vals, m=2.0, Omega=1.0), y_test, atol=1e-8)
    def test_argcheck_rejects_m_below_half(self):
        """m < 0.5 must not produce a valid (positive-finite) PDF value."""
        val = lognakagami.pdf(0.0, m=0.3, Omega=1.0)
        assert not (np.isfinite(val) and val > 0)
    def test_argcheck_rejects_non_positive_omega(self):
        """Omega <= 0 must not produce a valid (positive-finite) PDF value."""
        val = lognakagami.pdf(0.0, m=2.0, Omega=-1.0)
        assert not (np.isfinite(val) and val > 0)
    def test_stats_mean(self):
        """Analytical mean must equal 0.5 * (digamma(m) - log(m) + log(Omega))."""
        m, Omega = 3.0, 2.0
        expected_mean = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega))
        dist_mean = float(lognakagami.stats(m=m, Omega=Omega, moments="m"))
        assert pytest.approx(dist_mean, rel=1e-10) == expected_mean
    def test_stats_mean_omega_shifts_by_half_log_omega(self):
        """Changing Omega shifts the mean by 0.5*log(Omega) and leaves variance unchanged."""
        m = 2.0
        mean1 = float(lognakagami.stats(m=m, Omega=1.0, moments="m"))
        mean4 = float(lognakagami.stats(m=m, Omega=4.0, moments="m"))
        assert pytest.approx(mean4 - mean1, rel=1e-10) == 0.5 * np.log(4.0)
    def test_stats_variance_independent_of_omega(self):
        """Variance must equal 0.25 * polygamma(1, m) and not depend on Omega."""
        m = 3.0
        expected_var = 0.25 * sc.polygamma(1, m)
        for Omega in [0.5, 1.0, 4.0]:
            _, dist_var, *_ = lognakagami.stats(m=m, Omega=Omega, moments="mv")
            assert pytest.approx(float(dist_var), rel=1e-10) == expected_var
    def test_rvs_samples_are_finite(self):
        """Random samples must be finite real numbers."""
        rng = np.random.default_rng(42)
        samples = lognakagami.rvs(m=2.0, Omega=1.0, size=200, random_state=rng)
        assert samples.shape == (200,)
        assert np.all(np.isfinite(samples))
    def test_rvs_sample_mean_near_expected(self):
        """Sample mean of many RVS should be close to the distribution mean."""
        m, Omega = 2.0, 3.0
        rng = np.random.default_rng(0)
        samples = lognakagami.rvs(m=m, Omega=Omega, size=50_000, random_state=rng)
        expected_mean = float(lognakagami.stats(m=m, Omega=Omega, moments="m"))
        assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean
 # ── loggamma_dist unit tests ──────────────────────────────────────────────────
 class TestLogGamma:
    def test_pdf_is_positive_on_real_line(self):
        """PDF must be strictly positive for all real y and a > 0."""
        vals = loggamma_dist.pdf(Y, a=2.0)
        assert np.all(vals > 0)
    def test_pdf_integrates_to_one(self):
        """Numerical integral of PDF over the real line should be ≈ 1."""
        y_fine = np.linspace(-30, 10, 200_000)
        integral = np.trapezoid(loggamma_dist.pdf(y_fine, a=2.0), y_fine)
        assert pytest.approx(integral, abs=1e-3) == 1.0
    def test_logpdf_equals_log_pdf(self):
        """logpdf must equal log(pdf) for numerical consistency."""
        log_via_pdf = np.log(loggamma_dist.pdf(Y, a=2.0))
        log_direct = loggamma_dist.logpdf(Y, a=2.0)
        np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6)
    def test_cdf_is_monotone_increasing(self):
        """CDF must be strictly non-decreasing."""
        cdf_vals = loggamma_dist.cdf(Y, a=2.0)
        assert np.all(np.diff(cdf_vals) >= 0)
    def test_cdf_and_sf_sum_to_one(self):
        """CDF + SF must equal 1 at every point."""
        cdf_vals = loggamma_dist.cdf(Y, a=2.0)
        sf_vals = loggamma_dist.sf(Y, a=2.0)
        np.testing.assert_allclose(cdf_vals + sf_vals, 1.0, atol=1e-12)
    def test_ppf_inverts_cdf(self):
        """ppf(cdf(y)) must recover y."""
        y_test = np.array([-2.0, 0.0, 1.0])
        cdf_vals = loggamma_dist.cdf(y_test, a=2.0)
        np.testing.assert_allclose(loggamma_dist.ppf(cdf_vals, a=2.0), y_test, atol=1e-8)
    def test_argcheck_rejects_non_positive_a(self):
        """a <= 0 must not produce a valid (positive-finite) PDF value."""
        val = loggamma_dist.pdf(0.0, a=-1.0)
        assert not (np.isfinite(val) and val > 0)
    def test_stats_mean_equals_digamma(self):
        """Analytical mean must equal digamma(a)."""
        a = 3.0
        expected_mean = sc.digamma(a)
        dist_mean = float(loggamma_dist.stats(a=a, moments="m"))
        assert pytest.approx(dist_mean, rel=1e-10) == expected_mean
    def test_stats_variance_equals_trigamma(self):
        """Analytical variance must equal polygamma(1, a)."""
        a = 3.0
        expected_var = sc.polygamma(1, a)
        _, dist_var, *_ = loggamma_dist.stats(a=a, moments="mv")
        assert pytest.approx(float(dist_var), rel=1e-10) == expected_var
    def test_log_transform_relation_to_gamma(self):
        """loggamma_dist.pdf(y) must equal gamma.pdf(exp(y)) * exp(y) (change-of-variable)."""
        from scipy.stats import gamma as scipy_gamma
        y_test = np.linspace(-3.0, 3.0, 20)
        direct = loggamma_dist.pdf(y_test, a=2.0)
        via_gamma = scipy_gamma.pdf(np.exp(y_test), a=2.0) * np.exp(y_test)
        np.testing.assert_allclose(direct, via_gamma, rtol=1e-6)
    def test_rvs_samples_are_finite(self):
        """Random samples must be finite real numbers."""
        rng = np.random.default_rng(42)
        samples = loggamma_dist.rvs(a=2.0, size=200, random_state=rng)
        assert samples.shape == (200,)
        assert np.all(np.isfinite(samples))
    def test_rvs_sample_mean_near_expected(self):
        """Sample mean of many RVS should be close to the distribution mean."""
        a = 2.0
        rng = np.random.default_rng(0)
        samples = loggamma_dist.rvs(a=a, size=50_000, random_state=rng)
        expected_mean = float(loggamma_dist.stats(a=a, moments="m"))
        assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean
 if __name__ == "__main__":
    plot_k_dist_varying_alpha()
    plot_k_dist_varying_mu()
--- a/etc/tests/test_statistics.py
+++ b/etc/tests/test_statistics.py
@@ -6,7 +6,7 @@ import os
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
-from tools.statistics import aic_statistic, bic_statistic
+from tools.statistics import aic_statistic, bic_statistic, kl_statistic
 from fitting.fitter import Fitter
@@ -208,3 +208,129 @@ class TestBicStatisticInFitter:
        f.validate(n_mc_samples=99)
        assert f["gamma"].test_result is not None
        assert f["expon"].test_result is not None
 # ── kl_statistic unit tests ───────────────────────────────────────────────────
 class TestKlStatistic:
    def _fitted_dist(self, dist, data, **kwargs):
        """Return a frozen distribution fitted to data."""
        params = dist.fit(data, **kwargs)
        return dist(*params)
    def test_returns_float(self):
        """kl_statistic must return a numeric scalar."""
        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
        result = kl_statistic(frozen, GAMMA_DATA, axis=None)
        assert isinstance(float(result), float)
    def test_returns_real_value(self):
        """kl_statistic returns a real number (KDE finite-sum approximation can be negative)."""
        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
        result = kl_statistic(frozen, GAMMA_DATA, axis=None)
        assert np.isreal(result)
    def test_result_is_finite(self):
        """kl_statistic must return a finite value for valid input."""
        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
        assert np.isfinite(kl_statistic(frozen, GAMMA_DATA, axis=None))
    def test_works_with_axis_zero(self):
        """kl_statistic must return a finite value when axis=0."""
        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
        result = kl_statistic(frozen, GAMMA_DATA, axis=0)
        assert np.isfinite(result)
    def test_axis_zero_same_as_axis_none_for_1d(self):
        """For 1-D data, axis=0 and axis=None must return the same value."""
        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
        result_none = kl_statistic(frozen, GAMMA_DATA, axis=None)
        result_axis0 = kl_statistic(frozen, GAMMA_DATA, axis=0)
        assert pytest.approx(result_none) == result_axis0
    def test_better_fit_has_lower_kl(self):
        """Gamma fitted to gamma data should have lower KL than normal fitted to gamma data."""
        gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
        norm_frozen = self._fitted_dist(norm, GAMMA_DATA)
        kl_gamma = kl_statistic(gamma_frozen, GAMMA_DATA, axis=None)
        kl_norm = kl_statistic(norm_frozen, GAMMA_DATA, axis=None)
        assert kl_gamma < kl_norm
    def test_matches_manual_formula(self):
        """kl_statistic result must match the KDE-based KL formula computed manually."""
        from scipy.stats import gaussian_kde
        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
        kde = gaussian_kde(GAMMA_DATA)
        data_pdf = kde(GAMMA_DATA)
        dist_pdf = frozen.pdf(GAMMA_DATA)
        epsilon = 1e-10
        expected = np.sum(
            data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon))
        )
        assert pytest.approx(kl_statistic(frozen, GAMMA_DATA, axis=None)) == expected
    def test_no_nan_when_dist_pdf_near_zero(self):
        """epsilon guard must prevent NaN when dist PDF is effectively zero over data."""
        # expon with large loc has near-zero PDF over positive-skewed gamma data
        far_dist = expon(loc=100.0, scale=1.0)
        result = kl_statistic(far_dist, GAMMA_DATA, axis=None)
        assert not np.isnan(result)
    def test_result_is_consistent_across_calls(self):
        """Two calls with identical inputs must return the same value."""
        frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
        r1 = kl_statistic(frozen, GAMMA_DATA, axis=None)
        r2 = kl_statistic(frozen, GAMMA_DATA, axis=None)
        assert r1 == r2
 # ── Integration: kl_statistic as callable in Fitter ──────────────────────────
 class TestKlStatisticInFitter:
    def test_fitter_accepts_kl_callable(self):
        f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
        f.fit(GAMMA_DATA)
        f.validate(n_mc_samples=99)
        assert f["gamma"].test_result is not None
    def test_fitter_kl_statistic_is_finite(self):
        f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
        f.fit(GAMMA_DATA)
        f.validate(n_mc_samples=99)
        assert np.isfinite(f["gamma"].gof_statistic)
    def test_fitter_kl_pvalue_in_range(self):
        f = Fitter([gamma], statistic_method=kl_statistic, gamma_params={"floc": 0})
        f.fit(GAMMA_DATA)
        f.validate(n_mc_samples=99)
        pval = f["gamma"].pvalue
        assert 0.0 <= pval <= 1.0
    def test_fitter_kl_vs_ad_different_statistic_values(self):
        """KL and AD statistics should differ numerically."""
        f_kl = Fitter(
            [gamma], statistic_method=kl_statistic, gamma_params={"floc": 0}
        )
        f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0})
        f_kl.fit(GAMMA_DATA)
        f_ad.fit(GAMMA_DATA)
        f_kl.validate(n_mc_samples=99)
        f_ad.validate(n_mc_samples=99)
        assert f_kl["gamma"].gof_statistic != pytest.approx(
            f_ad["gamma"].gof_statistic
        )
    def test_fitter_kl_multiple_distributions(self):
        f = Fitter(
            [gamma, expon],
            statistic_method=kl_statistic,
            gamma_params={"floc": 0},
            expon_params={"floc": 0},
        )
        f.fit(GAMMA_DATA)
        f.validate(n_mc_samples=99)
        assert f["gamma"].test_result is not None
        assert f["expon"].test_result is not None
--- a/etc/tools/distributions.py
+++ b/etc/tools/distributions.py
@@ -1,5 +1,7 @@
 from scipy.stats import rv_continuous
-from scipy.special import kv, gammaln
+from scipy.special import kve, gammaln
 from scipy.integrate import quad
 import scipy.special as sc
 from scipy.stats._distn_infrastructure import _ShapeInfo
 import numpy as np
@@ -36,16 +38,280 @@ class k_gen(rv_continuous):
        return np.exp(self._logpdf(x, mu, alpha, beta))
    def _logpdf(self, x, mu, alpha, beta):
        half_sum = (alpha + beta) / 2.0
        log_ab_over_mu = np.log(alpha) + np.log(beta) - np.log(mu)
        z = 2.0 * np.sqrt(alpha * beta * x / mu)
        return (
            np.log(2.0)
            - gammaln(alpha)
            - gammaln(beta)
-            + (alpha + beta) / 2.0 * np.log(alpha * beta / mu)
+            + half_sum * log_ab_over_mu
-            + ((alpha + beta) / 2.0 - 1.0) * np.log(x)
+            + (half_sum - 1.0) * np.log(x)
-            + np.log(kv(alpha - beta, z))
+            + np.log(kve(alpha - beta, z))
            - z
        )
    def _stats(self, mu, alpha, beta, moments="mv"):
        mean = mu if "m" in moments or "v" in moments or "s" in moments or "k" in moments else None
        variance = None
        if "v" in moments or "s" in moments or "k" in moments:
            second_raw = (mu ** 2) * sc.poch(alpha, 2) * sc.poch(beta, 2) / ((alpha * beta) ** 2)
            variance = second_raw - mean ** 2
        skewness = None
        if "s" in moments or "k" in moments:
            third_raw = (mu ** 3) * sc.poch(alpha, 3) * sc.poch(beta, 3) / ((alpha * beta) ** 3)
            third_central = third_raw - 3.0 * mean * second_raw + 2.0 * mean ** 3
            skewness = third_central / np.power(variance, 1.5)
        kurtosis = None
        if "k" in moments:
            fourth_raw = (mu ** 4) * sc.poch(alpha, 4) * sc.poch(beta, 4) / ((alpha * beta) ** 4)
            fourth_central = fourth_raw - 4.0 * mean * third_raw + 6.0 * mean ** 2 * second_raw - 3.0 * mean ** 4
            kurtosis = fourth_central / (variance ** 2) - 3.0
        return mean, variance, skewness, kurtosis
    def _cdf(self, x, mu, alpha, beta):
        # scipy broadcasts params to match x's shape before calling _cdf,
        # so mu/alpha/beta may be arrays. Pass all four to np.vectorize so
        # each argument arrives as a scalar inside _scalar.
        # Substitution u = sqrt(x) regularises the integrand near u=0:
        # f(u²)*2u ~ u^(alpha+beta-1), smooth for alpha,beta > 0.
        def _scalar(xi, mui, ai, bi):
            val, _ = quad(
                lambda u: float(self._pdf(float(u * u), float(mui), float(ai), float(bi))) * 2.0 * u,
                0.0, float(np.sqrt(xi)),
                limit=200, epsabs=1.49e-10, epsrel=1.49e-8,
            )
            return val
        return np.vectorize(_scalar)(x, mu, alpha, beta)
    def fit(self, data, *args, **kwds):
        if ("loc" in kwds and kwds["loc"] != 0.0) or ("floc" in kwds and kwds["floc"] != 0.0):
            raise ValueError("k_distribution uses a fixed loc=0; use mu to control the mean/scale.")
        if ("scale" in kwds and kwds["scale"] != 1.0) or ("fscale" in kwds and kwds["fscale"] != 1.0):
            raise ValueError("k_distribution uses a fixed scale=1; use mu to control the mean/scale.")
        kwds.pop("loc", None)
        kwds.pop("scale", None)
        return super().fit(data, *args, floc=0.0, fscale=1.0, **kwds)
 k_dist = k_gen(a=0.0, name="k_distribution", shapes="mu, alpha, beta")
 class lognakagami_gen(rv_continuous):
    r"""A Log-Nakagami continuous random variable.
    If X follows a Nakagami distribution with shape parameter m >= 0.5
    and spread Omega > 0, then Y = ln(X) follows the Log-Nakagami distribution.
    %(before_notes)s
    Notes
    -----
    The probability density function for `lognakagami` is:
        f(y; m, Omega) = 2*m^m / (Gamma(m)*Omega^m)
                         * exp(2*m*y) * exp(-(m/Omega)*exp(2*y))
    for y in (-inf, +inf).  Derived via the change of variable Y = ln X,
    whose Jacobian e^y combines with x^{2m-1} to give the e^{2my} term.
    The mean and variance are:
        E[Y]   = 0.5 * (psi(m) - ln(m) + ln(Omega))
        Var[Y] = 0.25 * psi_1(m)
    where psi and psi_1 are the digamma and trigamma functions.  The spread
    Omega shifts the mean by 0.5*ln(Omega) but leaves all higher moments
    unchanged; entropy is also Omega-independent.
    `lognakagami` takes ``m`` and ``Omega`` as shape parameters.
    See Also
    --------
    nakagami : The Nakagami distribution (before the log transform).
    """
    def _shape_info(self):
        return [
            _ShapeInfo("m", False, (0, np.inf), (False, False)),
            _ShapeInfo("Omega", False, (0, np.inf), (False, False)),
        ]
    def _argcheck(self, m, Omega):
        return (m >= 0.5) & (Omega > 0)
    # ---------- PDF / log-PDF ----------
    def _pdf(self, y, m, Omega):
        return np.exp(self._logpdf(y, m, Omega))
    def _logpdf(self, y, m, Omega):
        # ln f = ln2 + m*ln(m) - ln Γ(m) - m*ln(Ω) + 2m*y - (m/Ω)*exp(2y)
        return (np.log(2.0)
                + sc.xlogy(m, m)
                - sc.gammaln(m)
                - m * np.log(Omega)
                + 2.0 * m * y
                - (m / Omega) * np.exp(2.0 * y))
    # ---------- CDF / SF / log-CDF / log-SF ----------
    def _cdf(self, y, m, Omega):
        # P(Y <= y) = P(X <= e^y) = gammainc(m, (m/Omega) * e^{2y})
        return sc.gammainc(m, (m / Omega) * np.exp(2.0 * y))
    def _sf(self, y, m, Omega):
        return sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y))
    def _logcdf(self, y, m, Omega):
        return np.log(sc.gammainc(m, (m / Omega) * np.exp(2.0 * y)))
    def _logsf(self, y, m, Omega):
        return np.log(sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y)))
    # ---------- PPF / ISF ----------
    def _ppf(self, q, m, Omega):
        # q = gammainc(m, (m/Omega)*e^{2y})
        #   => (m/Omega)*e^{2y} = gammaincinv(m, q)
        #   => y = 0.5 * ln(Omega * gammaincinv(m, q) / m)
        return 0.5 * np.log(Omega * sc.gammaincinv(m, q) / m)
    def _isf(self, q, m, Omega):
        return 0.5 * np.log(Omega * sc.gammainccinv(m, q) / m)
    # ---------- Moments & statistics ----------
    def _stats(self, m, Omega):
        # E[Y]   = 0.5*(psi(m) - ln(m) + ln(Omega))
        mu = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega))
        # Var[Y] = 0.25 * psi_1(m)  (Omega cancels — pure shift)
        mu2 = 0.25 * sc.polygamma(1, m)
        g1 = sc.polygamma(2, m) / sc.polygamma(1, m) ** 1.5
        g2 = sc.polygamma(3, m) / sc.polygamma(1, m) ** 2
        return mu, mu2, g1, g2
    def _entropy(self, m, Omega):
        # H = -E[ln f(Y)] = -ln2 + ln Γ(m) - m*psi(m) + m
        # Omega cancels exactly (translation invariance of differential entropy)
        return -np.log(2.0) + sc.gammaln(m) - m * sc.digamma(m) + m
    # ---------- Random variate generation ----------
    def _rvs(self, m, Omega, size=None, random_state=None):
        # X ~ Nakagami(m, Omega)  =>  X^2 ~ Gamma(m, Omega/m)
        g = random_state.gamma(m, Omega / m, size=size)
        return 0.5 * np.log(g)
 lognakagami = lognakagami_gen(name='lognakagami', shapes="m, Omega")
 class loggamma_gen(rv_continuous):
    r"""A Log-Gamma continuous random variable.
    If X follows a Gamma distribution with shape a
    and unit scale (beta = 1), then Y = ln X follows
    the Log-Gamma distribution.
    %(before_notes)s
    Notes
    -----
    The probability density function for `loggamma_dist` is:
        f(y, a) = 1 / Gamma(a) * exp(a*y - exp(y))
    for y in (-inf, +inf), a > 0.
    This is the standardised form (scale beta = 1).
    `loggamma_dist` takes ``a`` as a shape parameter.
    The non-unit scale beta is recovered through scipy's
    ``loc`` parameter:  loc = ln(beta), since
    ln(beta * X) = ln(beta) + ln(X).
    See Also
    --------
    gamma : The Gamma distribution (before the log transform).
    """
    def _shape_info(self):
        return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
    def _argcheck(self, a):
        return a > 0
    # ---------- PDF / log-PDF ----------
    def _pdf(self, y, a):
        return np.exp(self._logpdf(y, a))
    def _logpdf(self, y, a):
        #  ln f = a*y - e^y - ln Γ(a)
        return a * y - np.exp(y) - sc.gammaln(a)
    # ---------- CDF / SF / log-CDF / log-SF ----------
    def _cdf(self, y, a):
        # P(Y <= y) = P(X <= e^y) = gammainc(a, e^y)
        return sc.gammainc(a, np.exp(y))
    def _sf(self, y, a):
        return sc.gammaincc(a, np.exp(y))
    def _logcdf(self, y, a):
        return np.log(sc.gammainc(a, np.exp(y)))
    def _logsf(self, y, a):
        return np.log(sc.gammaincc(a, np.exp(y)))
    # ---------- PPF / ISF ----------
    def _ppf(self, q, a):
        # Invert CDF:  q = gammainc(a, e^y)
        #   => e^y = gammaincinv(a, q)
        #   => y = ln(gammaincinv(a, q))
        return np.log(sc.gammaincinv(a, q))
    def _isf(self, q, a):
        return np.log(sc.gammainccinv(a, q))
    # ---------- Moments & statistics ----------
    def _stats(self, a):
        # Y = ln(X), X ~ Gamma(a, 1)
        # E[Y] = psi(a)             (digamma)
        # Var[Y] = psi_1(a)         (trigamma)
        # skew = psi_2(a) / psi_1(a)^{3/2}
        # excess kurtosis = psi_3(a) / psi_1(a)^2
        mu = sc.digamma(a)
        mu2 = sc.polygamma(1, a)
        g1 = sc.polygamma(2, a) / mu2 ** 1.5
        g2 = sc.polygamma(3, a) / mu2 ** 2
        return mu, mu2, g1, g2
    def _entropy(self, a):
        # H(Y) = -E[ln f(Y)] = -E[a*Y - e^Y - ln Γ(a)]
        #       = ln Γ(a) - a*psi(a) + E[e^Y]
        # E[e^Y] = E[X] = a  (for Gamma(a, 1))
        return sc.gammaln(a) - a * sc.digamma(a) + a
    # ---------- Random variate generation ----------
    def _rvs(self, a, size=None, random_state=None):
        # Generate Gamma(a, 1) variates, then take log.
        # For small a, use  Gamma(a) ~ Gamma(a+1) * U^(1/a)
        # so  ln Gamma(a) ~ ln Gamma(a+1) + ln(U)/a
        # This avoids precision loss when a << 1.
        return (np.log(random_state.gamma(a + 1, size=size))
                + np.log(random_state.uniform(size=size)) / a)
 loggamma_dist = loggamma_gen(name='loggamma_dist')
--- a/etc/tools/statistics.py
+++ b/etc/tools/statistics.py
@@ -1,4 +1,5 @@
 import numpy as np
 from scipy.stats import gaussian_kde
 def aic_statistic(dist, data, axis):
@@ -32,4 +33,30 @@ def bic_statistic(dist, data, axis):
    log_likelihood = np.sum(dist.logpdf(data), axis=axis)
    bic = np.log(n) * k - 2 * log_likelihood
-    return bic
+    return bic
 def kl_statistic(dist, data, axis):
    """
    KL divergence-based goodness-of-fit statistic.
    KL(P || Q) = ∑ P(x) log(P(x) / Q(x))
    Lower KL divergence indicates better fit, but since goodness_of_fit()
    treats larger statistic values as worse fit, KL works directly.
    """
    # Estimate the PDF of the data using KDE
    kde = gaussian_kde(data)
    data_pdf = kde(data)
    # Get the PDF of the distribution at the data points
    dist_pdf = dist.pdf(data)
    # normalize the PDFs to ensure they sum to 1
    data_pdf /= np.sum(data_pdf)
    dist_pdf /= np.sum(dist_pdf)
    # Avoid division by zero and log of zero by adding a small constant
    epsilon = 1e-10
    kl_divergence = np.sum(data_pdf * np.log((data_pdf + epsilon) / (dist_pdf + epsilon)), axis=axis)
    return kl_divergence
`@@ -35,3 +35,4 @@ data/*`


	`etc/tools/read_raw_data.py`	`etc/tools/read_raw_data.py`
		`scripts/separate_data.py`