ADD:
AIC statistic added
This commit is contained in:
1
.python-version
Normal file
1
.python-version
Normal file
@@ -0,0 +1 @@
|
|||||||
|
3.14
|
||||||
@@ -25,8 +25,8 @@ class DistributionSummary:
|
|||||||
Keyword arguments that were passed to fit() (fixed params, etc.).
|
Keyword arguments that were passed to fit() (fixed params, etc.).
|
||||||
fit_result_params : tuple
|
fit_result_params : tuple
|
||||||
The actual fitted parameters returned by fit() (empty tuple until fit() is called).
|
The actual fitted parameters returned by fit() (empty tuple until fit() is called).
|
||||||
statistic_method : str
|
statistic_method : object
|
||||||
GoF statistic identifier used in validate() (e.g. 'ad', 'ks').
|
GoF statistic identifier used in validate() (e.g. 'ad', 'ks' or a custom callable).
|
||||||
test_result : object
|
test_result : object
|
||||||
Result object from scipy.stats.goodness_of_fit; None until
|
Result object from scipy.stats.goodness_of_fit; None until
|
||||||
validate() is called. Exposes .statistic and .pvalue.
|
validate() is called. Exposes .statistic and .pvalue.
|
||||||
@@ -45,7 +45,7 @@ class DistributionSummary:
|
|||||||
args_fit_params: tuple = field(default_factory=tuple)
|
args_fit_params: tuple = field(default_factory=tuple)
|
||||||
kwds_fit_params: dict = field(default_factory=dict)
|
kwds_fit_params: dict = field(default_factory=dict)
|
||||||
fit_result_params: tuple = field(default_factory=tuple)
|
fit_result_params: tuple = field(default_factory=tuple)
|
||||||
statistic_method: str = "ad"
|
statistic_method: object = "ad"
|
||||||
test_result: object = None
|
test_result: object = None
|
||||||
|
|
||||||
# ── convenience properties ────────────────────────────────────────────────
|
# ── convenience properties ────────────────────────────────────────────────
|
||||||
|
|||||||
@@ -232,11 +232,13 @@ class TestFitterPlotQQ:
|
|||||||
|
|
||||||
def test_qq_hazen_returns_figure(self):
|
def test_qq_hazen_returns_figure(self):
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
fig = self.f.plot_qq_plots(method="hazen")
|
fig = self.f.plot_qq_plots(method="hazen")
|
||||||
assert isinstance(fig, go.Figure)
|
assert isinstance(fig, go.Figure)
|
||||||
|
|
||||||
def test_qq_filliben_returns_figure(self):
|
def test_qq_filliben_returns_figure(self):
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
fig = self.f.plot_qq_plots(method="filliben")
|
fig = self.f.plot_qq_plots(method="filliben")
|
||||||
assert isinstance(fig, go.Figure)
|
assert isinstance(fig, go.Figure)
|
||||||
|
|
||||||
@@ -257,11 +259,13 @@ class TestFitterHistogram:
|
|||||||
|
|
||||||
def test_histogram_returns_figure(self):
|
def test_histogram_returns_figure(self):
|
||||||
import plotly.graph_objects as go
|
import plotly.graph_objects as go
|
||||||
|
|
||||||
fig = self.f.histogram_with_fits()
|
fig = self.f.histogram_with_fits()
|
||||||
assert isinstance(fig, go.Figure)
|
assert isinstance(fig, go.Figure)
|
||||||
|
|
||||||
def test_histogram_seaborn_returns_figure(self):
|
def test_histogram_seaborn_returns_figure(self):
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
fig = self.f.histogram_with_fits_seaborn()
|
fig = self.f.histogram_with_fits_seaborn()
|
||||||
assert isinstance(fig, plt.Figure)
|
assert isinstance(fig, plt.Figure)
|
||||||
plt.close("all")
|
plt.close("all")
|
||||||
|
|||||||
112
etc/tests/test_statistics.py
Normal file
112
etc/tests/test_statistics.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
from scipy.stats import gamma, expon, norm
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||||
|
|
||||||
|
from tools.statistics import aic_statistic
|
||||||
|
from fitting.fitter import Fitter
|
||||||
|
|
||||||
|
|
||||||
|
RNG = np.random.default_rng(42)
|
||||||
|
GAMMA_DATA = RNG.gamma(shape=2.0, scale=1.5, size=200)
|
||||||
|
|
||||||
|
|
||||||
|
# ── aic_statistic unit tests ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestAicStatistic:
|
||||||
|
def _fitted_dist(self, dist, data, **kwargs):
|
||||||
|
"""Return a frozen distribution fitted to data."""
|
||||||
|
params = dist.fit(data, **kwargs)
|
||||||
|
return dist(*params)
|
||||||
|
|
||||||
|
def test_returns_float(self):
|
||||||
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||||
|
result = aic_statistic(frozen, GAMMA_DATA, axis=0)
|
||||||
|
assert isinstance(float(result), float)
|
||||||
|
|
||||||
|
def test_formula_correct(self):
|
||||||
|
"""AIC = 2k - 2*log_likelihood."""
|
||||||
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||||
|
k = len(frozen.args)
|
||||||
|
log_likelihood = np.sum(frozen.logpdf(GAMMA_DATA), axis=0)
|
||||||
|
expected = 2 * k - 2 * log_likelihood
|
||||||
|
assert pytest.approx(aic_statistic(frozen, GAMMA_DATA, axis=0)) == expected
|
||||||
|
|
||||||
|
def test_penalises_more_parameters(self):
|
||||||
|
"""gamma (3 params) should have higher AIC penalty term than expon (2 params)
|
||||||
|
when both are fitted to the same data with identical log-likelihood contribution."""
|
||||||
|
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||||
|
expon_frozen = self._fitted_dist(expon, GAMMA_DATA, floc=0)
|
||||||
|
# penalty term alone: 2*k; gamma has more params so its penalty is larger
|
||||||
|
assert 2 * len(gamma_frozen.args) > 2 * len(expon_frozen.args)
|
||||||
|
|
||||||
|
def test_better_fit_has_lower_aic(self):
|
||||||
|
"""Gamma fitted to gamma data should have lower AIC than normal fitted to gamma data."""
|
||||||
|
gamma_frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||||
|
norm_frozen = self._fitted_dist(norm, GAMMA_DATA)
|
||||||
|
aic_gamma = aic_statistic(gamma_frozen, GAMMA_DATA, axis=0)
|
||||||
|
aic_norm = aic_statistic(norm_frozen, GAMMA_DATA, axis=0)
|
||||||
|
assert aic_gamma < aic_norm
|
||||||
|
|
||||||
|
def test_works_with_axis_none(self):
|
||||||
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||||
|
result = aic_statistic(frozen, GAMMA_DATA, axis=None)
|
||||||
|
assert np.isfinite(result)
|
||||||
|
|
||||||
|
def test_result_is_finite(self):
|
||||||
|
frozen = self._fitted_dist(gamma, GAMMA_DATA, floc=0)
|
||||||
|
assert np.isfinite(aic_statistic(frozen, GAMMA_DATA, axis=0))
|
||||||
|
|
||||||
|
|
||||||
|
# ── Integration: aic_statistic as callable in Fitter ─────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class TestAicStatisticInFitter:
|
||||||
|
def test_fitter_accepts_aic_callable(self):
|
||||||
|
f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0})
|
||||||
|
f.fit(GAMMA_DATA)
|
||||||
|
f.validate(n_mc_samples=99)
|
||||||
|
assert f["gamma"].test_result is not None
|
||||||
|
|
||||||
|
def test_fitter_aic_statistic_is_finite(self):
|
||||||
|
f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0})
|
||||||
|
f.fit(GAMMA_DATA)
|
||||||
|
f.validate(n_mc_samples=99)
|
||||||
|
assert np.isfinite(f["gamma"].gof_statistic)
|
||||||
|
|
||||||
|
def test_fitter_aic_pvalue_in_range(self):
|
||||||
|
f = Fitter([gamma], statistic_method=aic_statistic, gamma_params={"floc": 0})
|
||||||
|
f.fit(GAMMA_DATA)
|
||||||
|
f.validate(n_mc_samples=99)
|
||||||
|
pval = f["gamma"].pvalue
|
||||||
|
assert 0.0 <= pval <= 1.0
|
||||||
|
|
||||||
|
def test_fitter_aic_vs_ad_different_statistic_values(self):
|
||||||
|
"""AIC and AD statistics should differ numerically."""
|
||||||
|
f_aic = Fitter(
|
||||||
|
[gamma], statistic_method=aic_statistic, gamma_params={"floc": 0}
|
||||||
|
)
|
||||||
|
f_ad = Fitter([gamma], statistic_method="ad", gamma_params={"floc": 0})
|
||||||
|
f_aic.fit(GAMMA_DATA)
|
||||||
|
f_ad.fit(GAMMA_DATA)
|
||||||
|
f_aic.validate(n_mc_samples=99)
|
||||||
|
f_ad.validate(n_mc_samples=99)
|
||||||
|
assert f_aic["gamma"].gof_statistic != pytest.approx(
|
||||||
|
f_ad["gamma"].gof_statistic
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_fitter_aic_multiple_distributions(self):
|
||||||
|
f = Fitter(
|
||||||
|
[gamma, expon],
|
||||||
|
statistic_method=aic_statistic,
|
||||||
|
gamma_params={"floc": 0},
|
||||||
|
expon_params={"floc": 0},
|
||||||
|
)
|
||||||
|
f.fit(GAMMA_DATA)
|
||||||
|
f.validate(n_mc_samples=99)
|
||||||
|
assert f["gamma"].test_result is not None
|
||||||
|
assert f["expon"].test_result is not None
|
||||||
18
etc/tools/statistics.py
Normal file
18
etc/tools/statistics.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
def aic_statistic(dist, data, axis):
|
||||||
|
"""
|
||||||
|
AIC-based goodness-of-fit statistic.
|
||||||
|
|
||||||
|
AIC = 2k - 2ln(L)
|
||||||
|
|
||||||
|
Lower AIC indicates better fit, but since goodness_of_fit()
|
||||||
|
treats larger statistic values as worse fit, AIC works directly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
k = len(dist.args)
|
||||||
|
log_likelihood = np.sum(dist.logpdf(data), axis=axis)
|
||||||
|
aic = 2 * k - 2 * log_likelihood
|
||||||
|
|
||||||
|
return aic
|
||||||
@@ -3,7 +3,7 @@ name = "clutter_chuva"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
description = "Tools for fitting distributions to rain clutter data"
|
description = "Tools for fitting distributions to rain clutter data"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.11"
|
requires-python = ">=3.14"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ipykernel",
|
"ipykernel",
|
||||||
"kaleido",
|
"kaleido",
|
||||||
|
|||||||
Reference in New Issue
Block a user