Implement two new scipy-compatible distributions : Log-Nakagami (lognakagami) and Log-Gamma (loggamma_dist), with complete logpdf/cdf/ppf/stats/entropy/rvs methods derived from the change-of-variable Y = ln(X). Add kl_statistic, a KDE-based KL-divergence goodness-of-fit callable compatible with the Fitter class. Extend k_gen with _stats (improving speed), _cdf, and a fit guard, and switch kv → kve to improve numerical stability at large arguments. Add unit tests for all three additions covering normalization, monotonicity, ppf inversion, moment formulas, and Fitter integration.
317 lines
11 KiB
Python
317 lines
11 KiB
Python
from scipy.stats import rv_continuous
|
|
from scipy.special import kve, gammaln
|
|
from scipy.integrate import quad
|
|
import scipy.special as sc
|
|
from scipy.stats._distn_infrastructure import _ShapeInfo
|
|
import numpy as np
|
|
|
|
|
|
class k_gen(rv_continuous):
|
|
"""Generalized K distribution for radar clutter modeling.
|
|
|
|
The probability density function is::
|
|
|
|
f(x; mu, alpha, beta) = 2 / (Gamma(alpha) * Gamma(beta))
|
|
* (alpha*beta/mu)^((alpha+beta)/2)
|
|
* x^((alpha+beta)/2 - 1)
|
|
* K_{alpha-beta}(2*sqrt(alpha*beta*x/mu))
|
|
|
|
for x > 0, where K_{alpha-beta} is the modified Bessel function of the
|
|
second kind of order alpha-beta, mu > 0 is the mean, and alpha > 0,
|
|
beta > 0 are shape parameters.
|
|
|
|
The standard (2-parameter) K distribution is the special case beta=1,
|
|
with nu=alpha and b=alpha/mu.
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [
|
|
_ShapeInfo("mu", domain=(0, np.inf), inclusive=(False, True)),
|
|
_ShapeInfo("alpha", domain=(0, np.inf), inclusive=(True, True)),
|
|
_ShapeInfo("beta", domain=(0, np.inf), inclusive=(True, True)),
|
|
]
|
|
|
|
def _argcheck(self, mu, alpha, beta):
|
|
return (mu > 0) & (alpha > 0) & (beta > 0)
|
|
|
|
def _pdf(self, x, mu, alpha, beta):
|
|
return np.exp(self._logpdf(x, mu, alpha, beta))
|
|
|
|
def _logpdf(self, x, mu, alpha, beta):
|
|
half_sum = (alpha + beta) / 2.0
|
|
log_ab_over_mu = np.log(alpha) + np.log(beta) - np.log(mu)
|
|
z = 2.0 * np.sqrt(alpha * beta * x / mu)
|
|
return (
|
|
np.log(2.0)
|
|
- gammaln(alpha)
|
|
- gammaln(beta)
|
|
+ half_sum * log_ab_over_mu
|
|
+ (half_sum - 1.0) * np.log(x)
|
|
+ np.log(kve(alpha - beta, z))
|
|
- z
|
|
)
|
|
|
|
def _stats(self, mu, alpha, beta, moments="mv"):
|
|
mean = mu if "m" in moments or "v" in moments or "s" in moments or "k" in moments else None
|
|
|
|
variance = None
|
|
if "v" in moments or "s" in moments or "k" in moments:
|
|
second_raw = (mu ** 2) * sc.poch(alpha, 2) * sc.poch(beta, 2) / ((alpha * beta) ** 2)
|
|
variance = second_raw - mean ** 2
|
|
|
|
skewness = None
|
|
if "s" in moments or "k" in moments:
|
|
third_raw = (mu ** 3) * sc.poch(alpha, 3) * sc.poch(beta, 3) / ((alpha * beta) ** 3)
|
|
third_central = third_raw - 3.0 * mean * second_raw + 2.0 * mean ** 3
|
|
skewness = third_central / np.power(variance, 1.5)
|
|
|
|
kurtosis = None
|
|
if "k" in moments:
|
|
fourth_raw = (mu ** 4) * sc.poch(alpha, 4) * sc.poch(beta, 4) / ((alpha * beta) ** 4)
|
|
fourth_central = fourth_raw - 4.0 * mean * third_raw + 6.0 * mean ** 2 * second_raw - 3.0 * mean ** 4
|
|
kurtosis = fourth_central / (variance ** 2) - 3.0
|
|
|
|
return mean, variance, skewness, kurtosis
|
|
|
|
def _cdf(self, x, mu, alpha, beta):
|
|
# scipy broadcasts params to match x's shape before calling _cdf,
|
|
# so mu/alpha/beta may be arrays. Pass all four to np.vectorize so
|
|
# each argument arrives as a scalar inside _scalar.
|
|
# Substitution u = sqrt(x) regularises the integrand near u=0:
|
|
# f(u²)*2u ~ u^(alpha+beta-1), smooth for alpha,beta > 0.
|
|
def _scalar(xi, mui, ai, bi):
|
|
val, _ = quad(
|
|
lambda u: float(self._pdf(float(u * u), float(mui), float(ai), float(bi))) * 2.0 * u,
|
|
0.0, float(np.sqrt(xi)),
|
|
limit=200, epsabs=1.49e-10, epsrel=1.49e-8,
|
|
)
|
|
return val
|
|
return np.vectorize(_scalar)(x, mu, alpha, beta)
|
|
|
|
def fit(self, data, *args, **kwds):
|
|
if ("loc" in kwds and kwds["loc"] != 0.0) or ("floc" in kwds and kwds["floc"] != 0.0):
|
|
raise ValueError("k_distribution uses a fixed loc=0; use mu to control the mean/scale.")
|
|
if ("scale" in kwds and kwds["scale"] != 1.0) or ("fscale" in kwds and kwds["fscale"] != 1.0):
|
|
raise ValueError("k_distribution uses a fixed scale=1; use mu to control the mean/scale.")
|
|
|
|
kwds.pop("loc", None)
|
|
kwds.pop("scale", None)
|
|
return super().fit(data, *args, floc=0.0, fscale=1.0, **kwds)
|
|
|
|
|
|
k_dist = k_gen(a=0.0, name="k_distribution", shapes="mu, alpha, beta")
|
|
|
|
|
|
class lognakagami_gen(rv_continuous):
|
|
r"""A Log-Nakagami continuous random variable.
|
|
|
|
If X follows a Nakagami distribution with shape parameter m >= 0.5
|
|
and spread Omega > 0, then Y = ln(X) follows the Log-Nakagami distribution.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `lognakagami` is:
|
|
|
|
f(y; m, Omega) = 2*m^m / (Gamma(m)*Omega^m)
|
|
* exp(2*m*y) * exp(-(m/Omega)*exp(2*y))
|
|
|
|
for y in (-inf, +inf). Derived via the change of variable Y = ln X,
|
|
whose Jacobian e^y combines with x^{2m-1} to give the e^{2my} term.
|
|
|
|
The mean and variance are:
|
|
|
|
E[Y] = 0.5 * (psi(m) - ln(m) + ln(Omega))
|
|
Var[Y] = 0.25 * psi_1(m)
|
|
|
|
where psi and psi_1 are the digamma and trigamma functions. The spread
|
|
Omega shifts the mean by 0.5*ln(Omega) but leaves all higher moments
|
|
unchanged; entropy is also Omega-independent.
|
|
|
|
`lognakagami` takes ``m`` and ``Omega`` as shape parameters.
|
|
|
|
See Also
|
|
--------
|
|
nakagami : The Nakagami distribution (before the log transform).
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [
|
|
_ShapeInfo("m", False, (0, np.inf), (False, False)),
|
|
_ShapeInfo("Omega", False, (0, np.inf), (False, False)),
|
|
]
|
|
|
|
def _argcheck(self, m, Omega):
|
|
return (m >= 0.5) & (Omega > 0)
|
|
|
|
# ---------- PDF / log-PDF ----------
|
|
|
|
def _pdf(self, y, m, Omega):
|
|
return np.exp(self._logpdf(y, m, Omega))
|
|
|
|
def _logpdf(self, y, m, Omega):
|
|
# ln f = ln2 + m*ln(m) - ln Γ(m) - m*ln(Ω) + 2m*y - (m/Ω)*exp(2y)
|
|
return (np.log(2.0)
|
|
+ sc.xlogy(m, m)
|
|
- sc.gammaln(m)
|
|
- m * np.log(Omega)
|
|
+ 2.0 * m * y
|
|
- (m / Omega) * np.exp(2.0 * y))
|
|
|
|
# ---------- CDF / SF / log-CDF / log-SF ----------
|
|
|
|
def _cdf(self, y, m, Omega):
|
|
# P(Y <= y) = P(X <= e^y) = gammainc(m, (m/Omega) * e^{2y})
|
|
return sc.gammainc(m, (m / Omega) * np.exp(2.0 * y))
|
|
|
|
def _sf(self, y, m, Omega):
|
|
return sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y))
|
|
|
|
def _logcdf(self, y, m, Omega):
|
|
return np.log(sc.gammainc(m, (m / Omega) * np.exp(2.0 * y)))
|
|
|
|
def _logsf(self, y, m, Omega):
|
|
return np.log(sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y)))
|
|
|
|
# ---------- PPF / ISF ----------
|
|
|
|
def _ppf(self, q, m, Omega):
|
|
# q = gammainc(m, (m/Omega)*e^{2y})
|
|
# => (m/Omega)*e^{2y} = gammaincinv(m, q)
|
|
# => y = 0.5 * ln(Omega * gammaincinv(m, q) / m)
|
|
return 0.5 * np.log(Omega * sc.gammaincinv(m, q) / m)
|
|
|
|
def _isf(self, q, m, Omega):
|
|
return 0.5 * np.log(Omega * sc.gammainccinv(m, q) / m)
|
|
|
|
# ---------- Moments & statistics ----------
|
|
|
|
def _stats(self, m, Omega):
|
|
# E[Y] = 0.5*(psi(m) - ln(m) + ln(Omega))
|
|
mu = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega))
|
|
# Var[Y] = 0.25 * psi_1(m) (Omega cancels — pure shift)
|
|
mu2 = 0.25 * sc.polygamma(1, m)
|
|
g1 = sc.polygamma(2, m) / sc.polygamma(1, m) ** 1.5
|
|
g2 = sc.polygamma(3, m) / sc.polygamma(1, m) ** 2
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, m, Omega):
|
|
# H = -E[ln f(Y)] = -ln2 + ln Γ(m) - m*psi(m) + m
|
|
# Omega cancels exactly (translation invariance of differential entropy)
|
|
return -np.log(2.0) + sc.gammaln(m) - m * sc.digamma(m) + m
|
|
|
|
# ---------- Random variate generation ----------
|
|
|
|
def _rvs(self, m, Omega, size=None, random_state=None):
|
|
# X ~ Nakagami(m, Omega) => X^2 ~ Gamma(m, Omega/m)
|
|
g = random_state.gamma(m, Omega / m, size=size)
|
|
return 0.5 * np.log(g)
|
|
|
|
|
|
lognakagami = lognakagami_gen(name='lognakagami', shapes="m, Omega")
|
|
|
|
class loggamma_gen(rv_continuous):
|
|
r"""A Log-Gamma continuous random variable.
|
|
|
|
If X follows a Gamma distribution with shape a
|
|
and unit scale (beta = 1), then Y = ln X follows
|
|
the Log-Gamma distribution.
|
|
|
|
%(before_notes)s
|
|
|
|
Notes
|
|
-----
|
|
The probability density function for `loggamma_dist` is:
|
|
|
|
f(y, a) = 1 / Gamma(a) * exp(a*y - exp(y))
|
|
|
|
for y in (-inf, +inf), a > 0.
|
|
|
|
This is the standardised form (scale beta = 1).
|
|
|
|
`loggamma_dist` takes ``a`` as a shape parameter.
|
|
|
|
The non-unit scale beta is recovered through scipy's
|
|
``loc`` parameter: loc = ln(beta), since
|
|
ln(beta * X) = ln(beta) + ln(X).
|
|
|
|
See Also
|
|
--------
|
|
gamma : The Gamma distribution (before the log transform).
|
|
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
|
|
|
|
def _argcheck(self, a):
|
|
return a > 0
|
|
|
|
# ---------- PDF / log-PDF ----------
|
|
|
|
def _pdf(self, y, a):
|
|
return np.exp(self._logpdf(y, a))
|
|
|
|
def _logpdf(self, y, a):
|
|
# ln f = a*y - e^y - ln Γ(a)
|
|
return a * y - np.exp(y) - sc.gammaln(a)
|
|
|
|
# ---------- CDF / SF / log-CDF / log-SF ----------
|
|
|
|
def _cdf(self, y, a):
|
|
# P(Y <= y) = P(X <= e^y) = gammainc(a, e^y)
|
|
return sc.gammainc(a, np.exp(y))
|
|
|
|
def _sf(self, y, a):
|
|
return sc.gammaincc(a, np.exp(y))
|
|
|
|
def _logcdf(self, y, a):
|
|
return np.log(sc.gammainc(a, np.exp(y)))
|
|
|
|
def _logsf(self, y, a):
|
|
return np.log(sc.gammaincc(a, np.exp(y)))
|
|
|
|
# ---------- PPF / ISF ----------
|
|
|
|
def _ppf(self, q, a):
|
|
# Invert CDF: q = gammainc(a, e^y)
|
|
# => e^y = gammaincinv(a, q)
|
|
# => y = ln(gammaincinv(a, q))
|
|
return np.log(sc.gammaincinv(a, q))
|
|
|
|
def _isf(self, q, a):
|
|
return np.log(sc.gammainccinv(a, q))
|
|
|
|
# ---------- Moments & statistics ----------
|
|
|
|
def _stats(self, a):
|
|
# Y = ln(X), X ~ Gamma(a, 1)
|
|
# E[Y] = psi(a) (digamma)
|
|
# Var[Y] = psi_1(a) (trigamma)
|
|
# skew = psi_2(a) / psi_1(a)^{3/2}
|
|
# excess kurtosis = psi_3(a) / psi_1(a)^2
|
|
mu = sc.digamma(a)
|
|
mu2 = sc.polygamma(1, a)
|
|
g1 = sc.polygamma(2, a) / mu2 ** 1.5
|
|
g2 = sc.polygamma(3, a) / mu2 ** 2
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, a):
|
|
# H(Y) = -E[ln f(Y)] = -E[a*Y - e^Y - ln Γ(a)]
|
|
# = ln Γ(a) - a*psi(a) + E[e^Y]
|
|
# E[e^Y] = E[X] = a (for Gamma(a, 1))
|
|
return sc.gammaln(a) - a * sc.digamma(a) + a
|
|
|
|
# ---------- Random variate generation ----------
|
|
|
|
def _rvs(self, a, size=None, random_state=None):
|
|
# Generate Gamma(a, 1) variates, then take log.
|
|
# For small a, use Gamma(a) ~ Gamma(a+1) * U^(1/a)
|
|
# so ln Gamma(a) ~ ln Gamma(a+1) + ln(U)/a
|
|
# This avoids precision loss when a << 1.
|
|
return (np.log(random_state.gamma(a + 1, size=size))
|
|
+ np.log(random_state.uniform(size=size)) / a)
|
|
|
|
|
|
loggamma_dist = loggamma_gen(name='loggamma_dist') |