ruff for code formatting BIC statistic AND BIC test implemented test_distributions.py for test new created dists with pytest REFACTOR: k_gen pdf changed from 2 params to generalized
211 lines
8.4 KiB
Python
211 lines
8.4 KiB
Python
import numpy as np
|
|
import pytest
|
|
from scipy.stats import gamma, expon, norm
|
|
import sys
|
|
import os
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
|
|
|
from tools.statistics import aic_statistic, bic_statistic
|
|
from fitting.fitter import Fitter
|
|
|
|
|
|
RNG = np.random.default_rng(42)
|
|
GAMMA_DATA = RNG.gamma(shape=2.0, scale=1.5, size=200)
|
|
|
|
|
|
# ── aic_statistic unit tests ──────────────────────────────────────────────────
|
|
|
|
|
|
class TestAicStatistic:
|
|
def _fitted_dist(self, dist, data, **kwargs):
|
|
"""Return a frozen distribution fitted to data."""
|
|
params = dist.fit(data, **kwargs)
|
|
return dist(*params)
|
|
|
|
def test_returns_float(self):
|
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
result = aic_statistic(frozen, GAMMA_DATA, axis=0)
|
|
assert isinstance(float(result), float)
|
|
|
|
def test_formula_correct(self):
|
|
"""AIC = 2k - 2*log_likelihood."""
|
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
k = len(frozen.args)
|
|
log_likelihood = np.sum(frozen.logpdf(GAMMA_DATA), axis=0)
|
|
expected = 2 * k - 2 * log_likelihood
|
|
assert pytest.approx(aic_statistic(frozen, GAMMA_DATA, axis=0)) == expected
|
|
|
|
def test_penalises_more_parameters(self):
|
|
"""gamma (3 params) should have higher AIC penalty term than expon (2 params)
|
|
when both are fitted to the same data with identical log-likelihood contribution."""
|
|
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
expon_frozen = self._fitted_dist(expon, GAMMA_DATA, floc=0)
|
|
# penalty term alone: 2*k; gamma has more params so its penalty is larger
|
|
assert 2 * len(gamma_frozen.args) > 2 * len(expon_frozen.args)
|
|
|
|
def test_better_fit_has_lower_aic(self):
|
|
"""Gamma fitted to gamma data should have lower AIC than normal fitted to gamma data."""
|
|
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
norm_frozen = self._fitted_dist(norm, GAMMA_DATA)
|
|
aic_gamma = aic_statistic(gamma_frozen, GAMMA_DATA, axis=0)
|
|
aic_norm = aic_statistic(norm_frozen, GAMMA_DATA, axis=0)
|
|
assert aic_gamma < aic_norm
|
|
|
|
def test_works_with_axis_none(self):
|
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
result = aic_statistic(frozen, GAMMA_DATA, axis=None)
|
|
assert np.isfinite(result)
|
|
|
|
def test_result_is_finite(self):
|
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
assert np.isfinite(aic_statistic(frozen, GAMMA_DATA, axis=0))
|
|
|
|
|
|
# ── Integration: aic_statistic as callable in Fitter ─────────────────────────
|
|
|
|
|
|
class TestAicStatisticInFitter:
|
|
def test_fitter_accepts_aic_callable(self):
|
|
f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0})
|
|
f.fit(GAMMA_DATA)
|
|
f.validate(n_mc_samples=99)
|
|
assert f["gamma"].test_result is not None
|
|
|
|
def test_fitter_aic_statistic_is_finite(self):
|
|
f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0})
|
|
f.fit(GAMMA_DATA)
|
|
f.validate(n_mc_samples=99)
|
|
assert np.isfinite(f["gamma"].gof_statistic)
|
|
|
|
def test_fitter_aic_pvalue_in_range(self):
|
|
f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0})
|
|
f.fit(GAMMA_DATA)
|
|
f.validate(n_mc_samples=99)
|
|
pval = f["gamma"].pvalue
|
|
assert 0.0 <= pval <= 1.0
|
|
|
|
def test_fitter_aic_vs_ad_different_statistic_values(self):
|
|
"""AIC and AD statistics should differ numerically."""
|
|
f_aic = Fitter(
|
|
[gamma], statistic_method=aic_statistic, gamma_params={"floc": 0}
|
|
)
|
|
f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0})
|
|
f_aic.fit(GAMMA_DATA)
|
|
f_ad.fit(GAMMA_DATA)
|
|
f_aic.validate(n_mc_samples=99)
|
|
f_ad.validate(n_mc_samples=99)
|
|
assert f_aic["gamma"].gof_statistic != pytest.approx(
|
|
f_ad["gamma"].gof_statistic
|
|
)
|
|
|
|
def test_fitter_aic_multiple_distributions(self):
|
|
f = Fitter(
|
|
[gamma, expon],
|
|
statistic_method=aic_statistic,
|
|
gamma_params={"floc": 0},
|
|
expon_params={"floc": 0},
|
|
)
|
|
f.fit(GAMMA_DATA)
|
|
f.validate(n_mc_samples=99)
|
|
assert f["gamma"].test_result is not None
|
|
assert f["expon"].test_result is not None
|
|
|
|
|
|
# ── bic_statistic unit tests ──────────────────────────────────────────────────
|
|
|
|
|
|
class TestBicStatistic:
|
|
def _fitted_dist(self, dist, data, **kwargs):
|
|
"""Return a frozen distribution fitted to data."""
|
|
params = dist.fit(data, **kwargs)
|
|
return dist(*params)
|
|
|
|
def test_returns_float(self):
|
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
result = bic_statistic(frozen, GAMMA_DATA, axis=0)
|
|
assert isinstance(float(result), float)
|
|
|
|
def test_formula_correct(self):
|
|
"""BIC = ln(n)*k - 2*log_likelihood."""
|
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
n = len(GAMMA_DATA)
|
|
k = len(frozen.args)
|
|
log_likelihood = np.sum(frozen.logpdf(GAMMA_DATA), axis=0)
|
|
expected = np.log(n) * k - 2 * log_likelihood
|
|
assert pytest.approx(bic_statistic(frozen, GAMMA_DATA, axis=0)) == expected
|
|
|
|
def test_penalises_more_parameters(self):
|
|
"""gamma (3 params) should have higher BIC penalty term than expon (2 params)."""
|
|
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
expon_frozen = self._fitted_dist(expon, GAMMA_DATA, floc=0)
|
|
n = len(GAMMA_DATA)
|
|
assert np.log(n) * len(gamma_frozen.args) > np.log(n) * len(expon_frozen.args)
|
|
|
|
def test_better_fit_has_lower_bic(self):
|
|
"""Gamma fitted to gamma data should have lower BIC than normal fitted to gamma data."""
|
|
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
norm_frozen = self._fitted_dist(norm, GAMMA_DATA)
|
|
bic_gamma = bic_statistic(gamma_frozen, GAMMA_DATA, axis=0)
|
|
bic_norm = bic_statistic(norm_frozen, GAMMA_DATA, axis=0)
|
|
assert bic_gamma < bic_norm
|
|
|
|
def test_works_with_axis_none(self):
|
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
result = bic_statistic(frozen, GAMMA_DATA, axis=None)
|
|
assert np.isfinite(result)
|
|
|
|
def test_result_is_finite(self):
|
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
|
assert np.isfinite(bic_statistic(frozen, GAMMA_DATA, axis=0))
|
|
|
|
|
|
# ── Integration: bic_statistic as callable in Fitter ─────────────────────────
|
|
|
|
|
|
class TestBicStatisticInFitter:
|
|
def test_fitter_accepts_bic_callable(self):
|
|
f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
|
|
f.fit(GAMMA_DATA)
|
|
f.validate(n_mc_samples=99)
|
|
assert f["gamma"].test_result is not None
|
|
|
|
def test_fitter_bic_statistic_is_finite(self):
|
|
f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
|
|
f.fit(GAMMA_DATA)
|
|
f.validate(n_mc_samples=99)
|
|
assert np.isfinite(f["gamma"].gof_statistic)
|
|
|
|
def test_fitter_bic_pvalue_in_range(self):
|
|
f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0})
|
|
f.fit(GAMMA_DATA)
|
|
f.validate(n_mc_samples=99)
|
|
pval = f["gamma"].pvalue
|
|
assert 0.0 <= pval <= 1.0
|
|
|
|
def test_fitter_bic_vs_ad_different_statistic_values(self):
|
|
"""BIC and AD statistics should differ numerically."""
|
|
f_bic = Fitter(
|
|
[gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}
|
|
)
|
|
f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0})
|
|
f_bic.fit(GAMMA_DATA)
|
|
f_ad.fit(GAMMA_DATA)
|
|
f_bic.validate(n_mc_samples=99)
|
|
f_ad.validate(n_mc_samples=99)
|
|
assert f_bic["gamma"].gof_statistic != pytest.approx(
|
|
f_ad["gamma"].gof_statistic
|
|
)
|
|
|
|
def test_fitter_bic_multiple_distributions(self):
|
|
f = Fitter(
|
|
[gamma, expon],
|
|
statistic_method=bic_statistic,
|
|
gamma_params={"floc": 0},
|
|
expon_params={"floc": 0},
|
|
)
|
|
f.fit(GAMMA_DATA)
|
|
f.validate(n_mc_samples=99)
|
|
assert f["gamma"].test_result is not None
|
|
assert f["expon"].test_result is not None
|