import numpy as np import pytest from scipy.stats import gamma, expon, norm import sys import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from tools.statistics import aic_statistic, bic_statistic from fitting.fitter import Fitter RNG = np.random.default_rng(42) GAMMA_DATA = RNG.gamma(shape=2.0, scale=1.5, size=200) # ── aic_statistic unit tests ────────────────────────────────────────────────── class TestAicStatistic: def _fitted_dist(self, dist, data, **kwargs): """Return a frozen distribution fitted to data.""" params = dist.fit(data, **kwargs) return dist(*params) def test_returns_float(self): frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) result = aic_statistic(frozen, GAMMA_DATA, axis=0) assert isinstance(float(result), float) def test_formula_correct(self): """AIC = 2k - 2*log_likelihood.""" frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) k = len(frozen.args) log_likelihood = np.sum(frozen.logpdf(GAMMA_DATA), axis=0) expected = 2 * k - 2 * log_likelihood assert pytest.approx(aic_statistic(frozen, GAMMA_DATA, axis=0)) == expected def test_penalises_more_parameters(self): """gamma (3 params) should have higher AIC penalty term than expon (2 params) when both are fitted to the same data with identical log-likelihood contribution.""" gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) expon_frozen = self._fitted_dist(expon, GAMMA_DATA, floc=0) # penalty term alone: 2*k; gamma has more params so its penalty is larger assert 2 * len(gamma_frozen.args) > 2 * len(expon_frozen.args) def test_better_fit_has_lower_aic(self): """Gamma fitted to gamma data should have lower AIC than normal fitted to gamma data.""" gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) norm_frozen = self._fitted_dist(norm, GAMMA_DATA) aic_gamma = aic_statistic(gamma_frozen, GAMMA_DATA, axis=0) aic_norm = aic_statistic(norm_frozen, GAMMA_DATA, axis=0) assert aic_gamma < aic_norm def test_works_with_axis_none(self): frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) result = aic_statistic(frozen, GAMMA_DATA, axis=None) assert np.isfinite(result) def test_result_is_finite(self): frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) assert np.isfinite(aic_statistic(frozen, GAMMA_DATA, axis=0)) # ── Integration: aic_statistic as callable in Fitter ───────────────────────── class TestAicStatisticInFitter: def test_fitter_accepts_aic_callable(self): f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0}) f.fit(GAMMA_DATA) f.validate(n_mc_samples=99) assert f["gamma"].test_result is not None def test_fitter_aic_statistic_is_finite(self): f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0}) f.fit(GAMMA_DATA) f.validate(n_mc_samples=99) assert np.isfinite(f["gamma"].gof_statistic) def test_fitter_aic_pvalue_in_range(self): f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0}) f.fit(GAMMA_DATA) f.validate(n_mc_samples=99) pval = f["gamma"].pvalue assert 0.0 <= pval <= 1.0 def test_fitter_aic_vs_ad_different_statistic_values(self): """AIC and AD statistics should differ numerically.""" f_aic = Fitter( [gamma], statistic_method=aic_statistic, gamma_params={"floc": 0} ) f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0}) f_aic.fit(GAMMA_DATA) f_ad.fit(GAMMA_DATA) f_aic.validate(n_mc_samples=99) f_ad.validate(n_mc_samples=99) assert f_aic["gamma"].gof_statistic != pytest.approx( f_ad["gamma"].gof_statistic ) def test_fitter_aic_multiple_distributions(self): f = Fitter( [gamma, expon], statistic_method=aic_statistic, gamma_params={"floc": 0}, expon_params={"floc": 0}, ) f.fit(GAMMA_DATA) f.validate(n_mc_samples=99) assert f["gamma"].test_result is not None assert f["expon"].test_result is not None # ── bic_statistic unit tests ────────────────────────────────────────────────── class TestBicStatistic: def _fitted_dist(self, dist, data, **kwargs): """Return a frozen distribution fitted to data.""" params = dist.fit(data, **kwargs) return dist(*params) def test_returns_float(self): frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) result = bic_statistic(frozen, GAMMA_DATA, axis=0) assert isinstance(float(result), float) def test_formula_correct(self): """BIC = ln(n)*k - 2*log_likelihood.""" frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) n = len(GAMMA_DATA) k = len(frozen.args) log_likelihood = np.sum(frozen.logpdf(GAMMA_DATA), axis=0) expected = np.log(n) * k - 2 * log_likelihood assert pytest.approx(bic_statistic(frozen, GAMMA_DATA, axis=0)) == expected def test_penalises_more_parameters(self): """gamma (3 params) should have higher BIC penalty term than expon (2 params).""" gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) expon_frozen = self._fitted_dist(expon, GAMMA_DATA, floc=0) n = len(GAMMA_DATA) assert np.log(n) * len(gamma_frozen.args) > np.log(n) * len(expon_frozen.args) def test_better_fit_has_lower_bic(self): """Gamma fitted to gamma data should have lower BIC than normal fitted to gamma data.""" gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) norm_frozen = self._fitted_dist(norm, GAMMA_DATA) bic_gamma = bic_statistic(gamma_frozen, GAMMA_DATA, axis=0) bic_norm = bic_statistic(norm_frozen, GAMMA_DATA, axis=0) assert bic_gamma < bic_norm def test_works_with_axis_none(self): frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) result = bic_statistic(frozen, GAMMA_DATA, axis=None) assert np.isfinite(result) def test_result_is_finite(self): frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0) assert np.isfinite(bic_statistic(frozen, GAMMA_DATA, axis=0)) # ── Integration: bic_statistic as callable in Fitter ───────────────────────── class TestBicStatisticInFitter: def test_fitter_accepts_bic_callable(self): f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}) f.fit(GAMMA_DATA) f.validate(n_mc_samples=99) assert f["gamma"].test_result is not None def test_fitter_bic_statistic_is_finite(self): f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}) f.fit(GAMMA_DATA) f.validate(n_mc_samples=99) assert np.isfinite(f["gamma"].gof_statistic) def test_fitter_bic_pvalue_in_range(self): f = Fitter([gamma], statistic_method=bic_statistic, gamma_params={"floc": 0}) f.fit(GAMMA_DATA) f.validate(n_mc_samples=99) pval = f["gamma"].pvalue assert 0.0 <= pval <= 1.0 def test_fitter_bic_vs_ad_different_statistic_values(self): """BIC and AD statistics should differ numerically.""" f_bic = Fitter( [gamma], statistic_method=bic_statistic, gamma_params={"floc": 0} ) f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0}) f_bic.fit(GAMMA_DATA) f_ad.fit(GAMMA_DATA) f_bic.validate(n_mc_samples=99) f_ad.validate(n_mc_samples=99) assert f_bic["gamma"].gof_statistic != pytest.approx( f_ad["gamma"].gof_statistic ) def test_fitter_bic_multiple_distributions(self): f = Fitter( [gamma, expon], statistic_method=bic_statistic, gamma_params={"floc": 0}, expon_params={"floc": 0}, ) f.fit(GAMMA_DATA) f.validate(n_mc_samples=99) assert f["gamma"].test_result is not None assert f["expon"].test_result is not None