Introduce logk_gen (Y = ln X where X ~ K) with analytically derived mean/variance via the CGF, numerically stable logpdf using an asymptotic Bessel expansion for large arguments, CDF delegation to k_dist, and a compound-gamma rvs sampler. Add _rvs to k_dist via the same compound-gamma algorithm and extend TestKDistPdf with stats and rvs coverage. Add a full TestLogK suite covering pdf normalization, change-of-variable identity, CDF consistency, analytical moment checks, and rvs moment checks. Module-level docstring added to distributions.py
710 lines
26 KiB
Python
710 lines
26 KiB
Python
"""
|
|
Custom continuous probability distributions for radar clutter modelling.
|
|
|
|
All classes inherit from ``scipy.stats.rv_continuous``. After instantiation
|
|
each distribution object exposes the full scipy public API summarised below.
|
|
Parameters ``loc`` and ``scale`` shift and rescale the support; shape
|
|
parameters (e.g. *mu*, *alpha*, *beta*) are passed as keyword arguments.
|
|
|
|
Public methods (from rv_continuous)
|
|
------------------------------------
|
|
rvs(*args, loc=0, scale=1, size=1, random_state=None)
|
|
Draw random variates.
|
|
|
|
pdf(x, *args, loc=0, scale=1)
|
|
Probability density function evaluated at *x*.
|
|
|
|
logpdf(x, *args, loc=0, scale=1)
|
|
Natural logarithm of the PDF; numerically preferred when values are small.
|
|
|
|
cdf(x, *args, loc=0, scale=1)
|
|
Cumulative distribution function: P(X <= x).
|
|
|
|
logcdf(x, *args, loc=0, scale=1)
|
|
Log of the CDF; avoids underflow in the far left tail.
|
|
|
|
sf(x, *args, loc=0, scale=1)
|
|
Survival function: 1 - CDF, i.e. P(X > x).
|
|
|
|
logsf(x, *args, loc=0, scale=1)
|
|
Log of the survival function; avoids underflow in the far right tail.
|
|
|
|
ppf(q, *args, loc=0, scale=1)
|
|
Percent-point function (quantile): inverse of the CDF.
|
|
|
|
isf(q, *args, loc=0, scale=1)
|
|
Inverse survival function: inverse of sf.
|
|
|
|
moment(order, *args, loc=0, scale=1)
|
|
Non-central raw moment of the specified integer order.
|
|
|
|
stats(*args, loc=0, scale=1, moments='mv')
|
|
Summary statistics selected by *moments* string: 'm' mean, 'v' variance,
|
|
's' skewness, 'k' excess kurtosis.
|
|
|
|
entropy(*args, loc=0, scale=1)
|
|
Differential (Shannon) entropy of the distribution.
|
|
|
|
expect(func, args, loc=0, scale=1, lb=None, ub=None, conditional=False)
|
|
Expected value of *func(x)* computed by numerical integration.
|
|
|
|
median(*args, loc=0, scale=1)
|
|
Median of the distribution.
|
|
|
|
mean(*args, loc=0, scale=1)
|
|
Mean of the distribution.
|
|
|
|
std(*args, loc=0, scale=1)
|
|
Standard deviation of the distribution.
|
|
|
|
var(*args, loc=0, scale=1)
|
|
Variance of the distribution.
|
|
|
|
interval(confidence, *args, loc=0, scale=1)
|
|
Confidence interval with equal probability mass on each side of the median.
|
|
|
|
__call__(*args, loc=0, scale=1)
|
|
Freeze the distribution — returns a frozen instance with fixed parameters,
|
|
so methods can be called without repeating shape arguments.
|
|
|
|
fit(data, *args, **kwds)
|
|
Maximum-likelihood estimates of shape, loc, and scale from *data*.
|
|
|
|
fit_loc_scale(data, *args)
|
|
Quick loc and scale estimates via method of moments (mean and variance).
|
|
|
|
nnlf(theta, x)
|
|
Negative log-likelihood for parameter vector *theta* and observations *x*.
|
|
|
|
support(*args, loc=0, scale=1)
|
|
Lower and upper endpoints of the distribution's support.
|
|
|
|
Overrideable private methods
|
|
-----------------------------
|
|
Subclasses customise behaviour by implementing the private counterparts
|
|
listed below. When a private method is *not* overridden scipy falls back to
|
|
a default implementation — often a slower or less precise one. Override to
|
|
gain speed or numerical accuracy.
|
|
|
|
_argcheck(*args)
|
|
Validate shape parameters; return a boolean array.
|
|
Default: always ``True``. Should be overridden to reject invalid values.
|
|
|
|
_get_support(*args)
|
|
Return ``(lower, upper)`` endpoints of the support as a function of the
|
|
shape parameters. Default: returns the ``(a, b)`` constants passed at
|
|
construction time.
|
|
|
|
_pdf(x, *args)
|
|
Core of the density. No default — must be implemented (or ``_logpdf``).
|
|
|
|
_logpdf(x, *args)
|
|
Log-density. Default: ``log(_pdf(x))``, which loses precision when
|
|
``_pdf`` underflows to zero. Override whenever a stable closed form
|
|
exists.
|
|
|
|
_cdf(x, *args)
|
|
Core of the cumulative distribution function. Default: numerical
|
|
integration of ``_pdf`` from the lower support boundary — slow.
|
|
|
|
_sf(x, *args)
|
|
Survival function P(X > x). Default: ``1 - _cdf(x)``, which loses
|
|
significant digits when ``_cdf(x)`` is close to 1 (far right tail).
|
|
Override with a direct formula whenever one exists.
|
|
|
|
_logcdf(x, *args)
|
|
Log of the CDF. Default: ``log(_cdf(x))``.
|
|
|
|
_logsf(x, *args)
|
|
Log of the survival function. Default: ``log(_sf(x))`` = ``log(1 -
|
|
_cdf(x))``, which is catastrophically inaccurate in the far right tail.
|
|
Override to avoid cancellation, e.g. via ``log1p(-_cdf(x))`` or a
|
|
complementary special function.
|
|
|
|
_ppf(q, *args)
|
|
Percent-point (quantile) function. Default: numerical inversion of
|
|
``_cdf`` — slow. Override with a closed-form inverse when available.
|
|
|
|
_isf(q, *args)
|
|
Inverse survival function. Default: ``_ppf(1 - q)``, which loses
|
|
precision for small *q* (extreme quantiles). Override when the
|
|
complementary special function provides a direct inverse.
|
|
|
|
_stats(*args, moments='mv')
|
|
Return ``(mean, variance, skewness, excess_kurtosis)``; use ``None`` for
|
|
moments you do not compute. Default: numerical integration — override
|
|
with analytical expressions for speed and accuracy.
|
|
|
|
_munp(n, *args)
|
|
Non-central raw moment of integer order *n*. Default: numerical
|
|
integration. Implement when closed-form moments are available and
|
|
``_stats`` is not sufficient.
|
|
|
|
_entropy(*args)
|
|
Differential entropy. Default: numerical integration of ``-f log f``.
|
|
Override with a closed-form expression when one exists.
|
|
|
|
_rvs(*args, size=None, random_state=None)
|
|
Random variate sampler. Default: CDF-inversion via ``_ppf`` — slow for
|
|
distributions without a closed-form quantile function. Override with
|
|
a direct simulation algorithm (e.g. a compound-distribution sampler).
|
|
|
|
Numerical accuracy summary
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
Priority order for overriding to improve tail accuracy:
|
|
|
|
1. ``_logsf`` / ``_logcdf`` — first line of defence against underflow.
|
|
2. ``_sf`` — avoids ``1 - cdf`` cancellation.
|
|
3. ``_isf`` — avoids ``ppf(1 - q)`` cancellation.
|
|
|
|
"""
|
|
|
|
from scipy.stats import rv_continuous, ncx2
|
|
from scipy.special import kve, gammaln
|
|
from scipy.integrate import quad
|
|
import scipy.special as sc
|
|
from scipy.stats._distn_infrastructure import _ShapeInfo
|
|
import numpy as np
|
|
|
|
|
|
class k_gen(rv_continuous):
|
|
"""Generalized K distribution for radar clutter modeling.
|
|
|
|
The probability density function is:
|
|
|
|
f(x; mu, alpha, beta) = 2 / (Gamma(alpha) * Gamma(beta))
|
|
* (alpha*beta/mu)^((alpha+beta)/2)
|
|
* x^((alpha+beta)/2 - 1)
|
|
* K_{alpha-beta}(2*sqrt(alpha*beta*x/mu))
|
|
|
|
for x > 0, where K_{alpha-beta} is the modified Bessel function of the
|
|
second kind of order alpha-beta, mu > 0 is the mean, and alpha > 0,
|
|
beta > 0 are shape parameters.
|
|
|
|
The standard (2-parameter) K distribution is the special case beta=1,
|
|
with nu=alpha and b=alpha/mu.
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [
|
|
_ShapeInfo("mu", domain=(0, np.inf), inclusive=(False, True)),
|
|
_ShapeInfo("alpha", domain=(0, np.inf), inclusive=(True, True)),
|
|
_ShapeInfo("beta", domain=(0, np.inf), inclusive=(True, True)),
|
|
]
|
|
|
|
def _argcheck(self, mu, alpha, beta):
|
|
return (mu > 0) & (alpha > 0) & (beta > 0)
|
|
|
|
def _pdf(self, x, mu, alpha, beta):
|
|
return np.exp(self._logpdf(x, mu, alpha, beta))
|
|
|
|
def _logpdf(self, x, mu, alpha, beta):
|
|
half_sum = (alpha + beta) / 2.0
|
|
log_ab_over_mu = np.log(alpha) + np.log(beta) - np.log(mu)
|
|
z = 2.0 * np.sqrt(alpha * beta * x / mu)
|
|
return (
|
|
np.log(2.0)
|
|
- gammaln(alpha)
|
|
- gammaln(beta)
|
|
+ half_sum * log_ab_over_mu
|
|
+ (half_sum - 1.0) * np.log(x)
|
|
+ np.log(kve(alpha - beta, z))
|
|
- z
|
|
)
|
|
|
|
def _stats(self, mu, alpha, beta, moments="mv"):
|
|
mean = mu if "m" in moments or "v" in moments or "s" in moments or "k" in moments else None
|
|
|
|
variance = None
|
|
if "v" in moments or "s" in moments or "k" in moments:
|
|
second_raw = (mu ** 2) * sc.poch(alpha, 2) * sc.poch(beta, 2) / ((alpha * beta) ** 2)
|
|
variance = second_raw - mean ** 2
|
|
|
|
skewness = None
|
|
if "s" in moments or "k" in moments:
|
|
third_raw = (mu ** 3) * sc.poch(alpha, 3) * sc.poch(beta, 3) / ((alpha * beta) ** 3)
|
|
third_central = third_raw - 3.0 * mean * second_raw + 2.0 * mean ** 3
|
|
skewness = third_central / np.power(variance, 1.5)
|
|
|
|
kurtosis = None
|
|
if "k" in moments:
|
|
fourth_raw = (mu ** 4) * sc.poch(alpha, 4) * sc.poch(beta, 4) / ((alpha * beta) ** 4)
|
|
fourth_central = fourth_raw - 4.0 * mean * third_raw + 6.0 * mean ** 2 * second_raw - 3.0 * mean ** 4
|
|
kurtosis = fourth_central / (variance ** 2) - 3.0
|
|
|
|
return mean, variance, skewness, kurtosis
|
|
|
|
def _cdf(self, x, mu, alpha, beta):
|
|
# scipy broadcasts params to match x's shape before calling _cdf,
|
|
# so mu/alpha/beta may be arrays. Pass all four to np.vectorize so
|
|
# each argument arrives as a scalar inside _scalar.
|
|
# Substitution u = sqrt(x) regularises the integrand near u=0:
|
|
# f(u²)*2u ~ u^(alpha+beta-1), smooth for alpha,beta > 0.
|
|
def _scalar(xi, mui, ai, bi):
|
|
val, _ = quad(
|
|
lambda u: float(self._pdf(float(u * u), float(mui), float(ai), float(bi))) * 2.0 * u,
|
|
0.0, float(np.sqrt(xi)),
|
|
limit=200, epsabs=1.49e-10, epsrel=1.49e-8,
|
|
)
|
|
return val
|
|
return np.vectorize(_scalar)(x, mu, alpha, beta)
|
|
|
|
def _rvs(self, mu, alpha, beta, size=None, random_state=None):
|
|
# Compound gamma: tau ~ Gamma(beta, mu/beta), X|tau ~ Gamma(alpha, tau/alpha)
|
|
tau = random_state.gamma(beta, mu / beta, size=size)
|
|
return random_state.gamma(alpha, tau / alpha)
|
|
|
|
def fit(self, data, *args, **kwds):
|
|
if ("loc" in kwds and kwds["loc"] != 0.0) or ("floc" in kwds and kwds["floc"] != 0.0):
|
|
raise ValueError("k_distribution uses a fixed loc=0; use mu to control the mean/scale.")
|
|
if ("scale" in kwds and kwds["scale"] != 1.0) or ("fscale" in kwds and kwds["fscale"] != 1.0):
|
|
raise ValueError("k_distribution uses a fixed scale=1; use mu to control the mean/scale.")
|
|
|
|
kwds.pop("loc", None)
|
|
kwds.pop("scale", None)
|
|
return super().fit(data, *args, floc=0.0, fscale=1.0, **kwds)
|
|
|
|
|
|
k_dist = k_gen(a=0.0, name="k_distribution", shapes="mu, alpha, beta")
|
|
|
|
|
|
class logk_gen(rv_continuous):
|
|
"""Log-K continuous random variable.
|
|
|
|
Y = ln(X) where X ~ K(mu, alpha, beta). The PDF is:
|
|
|
|
f(y; mu, alpha, beta) = 2 / (Gamma(alpha) * Gamma(beta))
|
|
* (alpha*beta/mu)^((alpha+beta)/2)
|
|
* exp((alpha+beta)/2 * y)
|
|
* K_{alpha-beta}(2*sqrt(alpha*beta*exp(y)/mu))
|
|
|
|
for y in (-inf, +inf), mu > 0, alpha > 0, beta > 0.
|
|
|
|
The cumulants of Y follow from the CGF
|
|
K(t) = t*ln(mu/(alpha*beta)) + lnGamma(alpha+t) - lnGamma(alpha)
|
|
+ lnGamma(beta+t) - lnGamma(beta),
|
|
giving:
|
|
E[Y] = ln(mu) - ln(alpha) - ln(beta) + psi(alpha) + psi(beta)
|
|
Var[Y] = psi_1(alpha) + psi_1(beta)
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [
|
|
_ShapeInfo("mu", domain=(0, np.inf), inclusive=(False, True)),
|
|
_ShapeInfo("alpha", domain=(0, np.inf), inclusive=(True, True)),
|
|
_ShapeInfo("beta", domain=(0, np.inf), inclusive=(True, True)),
|
|
]
|
|
|
|
def _argcheck(self, mu, alpha, beta):
|
|
return (mu > 0) & (alpha > 0) & (beta > 0)
|
|
|
|
@staticmethod
|
|
def _log_kve(v, z):
|
|
"""log(kve(v, z)) = log(Kv(z)) + z, stable for all z > 0.
|
|
|
|
For large z, kve(v, z) ≈ sqrt(π/(2z)) underflows to 0 in float64,
|
|
making log(kve) return -inf/-nan. The asymptotic expansion:
|
|
|
|
log(kve(v,z)) ≈ 0.5*log(π/(2z)) + log1p((4v²-1)/(8z) +
|
|
(4v²-1)(4v²-9)/(128z²))
|
|
|
|
is used whenever the direct evaluation would underflow.
|
|
"""
|
|
z = np.asarray(z, dtype=float)
|
|
v = np.asarray(v, dtype=float)
|
|
kve_val = kve(v, z)
|
|
# Asymptotic (2-term Hankel expansion) — accurate to O(z^{-3})
|
|
mu_v = 4.0 * v ** 2
|
|
log_asymp = (
|
|
0.5 * (np.log(np.pi) - np.log(2.0) - np.log(np.where(z > 0, z, 1.0)))
|
|
+ np.log1p((mu_v - 1.0) / (8.0 * np.where(z > 0, z, 1.0))
|
|
+ (mu_v - 1.0) * (mu_v - 9.0) / (128.0 * np.where(z > 0, z**2, 1.0)))
|
|
)
|
|
return np.where(kve_val > 0, np.log(np.where(kve_val > 0, kve_val, 1.0)), log_asymp)
|
|
|
|
def _pdf(self, y, mu, alpha, beta):
|
|
return np.exp(self._logpdf(y, mu, alpha, beta))
|
|
|
|
def _logpdf(self, y, mu, alpha, beta):
|
|
half_sum = (alpha + beta) / 2.0
|
|
log_ab_over_mu = np.log(alpha) + np.log(beta) - np.log(mu)
|
|
z = 2.0 * np.sqrt(alpha * beta * np.exp(y) / mu)
|
|
return (
|
|
np.log(2.0)
|
|
- gammaln(alpha)
|
|
- gammaln(beta)
|
|
+ half_sum * log_ab_over_mu
|
|
+ half_sum * y
|
|
+ self._log_kve(alpha - beta, z)
|
|
- z
|
|
)
|
|
|
|
def _stats(self, mu, alpha, beta):
|
|
mean = np.log(mu) - np.log(alpha) - np.log(beta) + sc.digamma(alpha) + sc.digamma(beta)
|
|
var = sc.polygamma(1, alpha) + sc.polygamma(1, beta)
|
|
g1 = (sc.polygamma(2, alpha) + sc.polygamma(2, beta)) / var**1.5
|
|
g2 = (sc.polygamma(3, alpha) + sc.polygamma(3, beta)) / var**2
|
|
return mean, var, g1, g2
|
|
|
|
def _cdf(self, y, mu, alpha, beta):
|
|
return k_dist.cdf(np.exp(y), mu, alpha, beta)
|
|
|
|
def _rvs(self, mu, alpha, beta, size=None, random_state=None):
|
|
tau = random_state.gamma(beta, mu / beta, size=size)
|
|
sample = random_state.gamma(alpha, tau / alpha)
|
|
return np.log(np.clip(sample, np.finfo(float).tiny, None))
|
|
|
|
def fit(self, data, *args, **kwds):
|
|
if ("loc" in kwds and kwds["loc"] != 0.0) or ("floc" in kwds and kwds["floc"] != 0.0):
|
|
raise ValueError("logk uses a fixed loc=0.")
|
|
if ("scale" in kwds and kwds["scale"] != 1.0) or ("fscale" in kwds and kwds["fscale"] != 1.0):
|
|
raise ValueError("logk uses a fixed scale=1.")
|
|
kwds.pop("loc", None)
|
|
kwds.pop("scale", None)
|
|
# Supply data-driven initial guesses when none are provided so the
|
|
# optimizer starts close to the data instead of the default (1,1,1).
|
|
# E[Y] = ln(mu) + ln(alpha) + ln(beta) - digamma(alpha) - digamma(beta)
|
|
# => mu0 = exp(mean(data) + ln(a0) + ln(b0) - psi(a0) - psi(b0))
|
|
if not args:
|
|
alpha0, beta0 = 1.0, 1.0
|
|
mu0 = float(np.exp(
|
|
np.mean(data)
|
|
+ np.log(alpha0) + np.log(beta0)
|
|
- sc.digamma(alpha0) - sc.digamma(beta0)
|
|
))
|
|
args = (mu0, alpha0, beta0)
|
|
return super().fit(data, *args, floc=0.0, fscale=1.0, **kwds)
|
|
|
|
|
|
logk = logk_gen(name="logk", shapes="mu, alpha, beta")
|
|
|
|
|
|
class lognakagami_gen(rv_continuous):
|
|
"""Log-Nakagami continuous random variable.
|
|
|
|
The probability density function is:
|
|
|
|
f(y; m, Omega) = 2*m^m / (Gamma(m)*Omega^m)
|
|
* exp(2*m*y) * exp(-(m/Omega)*exp(2*y))
|
|
|
|
for y in (-inf, +inf), m >= 0.5, Omega > 0. Derived via the change of
|
|
variable Y = ln X on a Nakagami(m, Omega) variate; the Jacobian e^y
|
|
combines with x^(2m-1) to give the exp(2*m*y) term.
|
|
|
|
The mean and variance are:
|
|
|
|
E[Y] = 0.5 * (psi(m) - ln(m) + ln(Omega))
|
|
Var[Y] = 0.25 * psi_1(m)
|
|
|
|
where psi and psi_1 are the digamma and trigamma functions. The spread
|
|
Omega shifts the mean by 0.5*ln(Omega) but leaves all higher moments
|
|
unchanged; entropy is also Omega-independent.
|
|
|
|
`lognakagami` takes ``m`` and ``Omega`` as shape parameters.
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [
|
|
_ShapeInfo("m", False, (0, np.inf), (False, False)),
|
|
_ShapeInfo("Omega", False, (0, np.inf), (False, False)),
|
|
]
|
|
|
|
def _argcheck(self, m, Omega):
|
|
return (m >= 0.5) & (Omega > 0)
|
|
|
|
|
|
def _pdf(self, y, m, Omega):
|
|
return np.exp(self._logpdf(y, m, Omega))
|
|
|
|
def _logpdf(self, y, m, Omega):
|
|
# ln f = ln2 + m*ln(m) - ln Γ(m) - m*ln(Ω) + 2m*y - (m/Ω)*exp(2y)
|
|
return (np.log(2.0)
|
|
+ sc.xlogy(m, m)
|
|
- sc.gammaln(m)
|
|
- m * np.log(Omega)
|
|
+ 2.0 * m * y
|
|
- (m / Omega) * np.exp(2.0 * y))
|
|
|
|
|
|
def _cdf(self, y, m, Omega):
|
|
# P(Y <= y) = P(X <= e^y) = gammainc(m, (m/Omega) * e^{2y})
|
|
return sc.gammainc(m, (m / Omega) * np.exp(2.0 * y))
|
|
|
|
def _sf(self, y, m, Omega):
|
|
return sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y))
|
|
|
|
def _logcdf(self, y, m, Omega):
|
|
return np.log(sc.gammainc(m, (m / Omega) * np.exp(2.0 * y)))
|
|
|
|
def _logsf(self, y, m, Omega):
|
|
return np.log(sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y)))
|
|
|
|
|
|
def _ppf(self, q, m, Omega):
|
|
# q = gammainc(m, (m/Omega)*e^{2y})
|
|
# => (m/Omega)*e^{2y} = gammaincinv(m, q)
|
|
# => y = 0.5 * ln(Omega * gammaincinv(m, q) / m)
|
|
return 0.5 * np.log(Omega * sc.gammaincinv(m, q) / m)
|
|
|
|
def _isf(self, q, m, Omega):
|
|
return 0.5 * np.log(Omega * sc.gammainccinv(m, q) / m)
|
|
|
|
def _stats(self, m, Omega):
|
|
# E[Y] = 0.5*(psi(m) - ln(m) + ln(Omega))
|
|
mu = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega))
|
|
# Var[Y] = 0.25 * psi_1(m) (Omega cancels — pure shift)
|
|
mu2 = 0.25 * sc.polygamma(1, m)
|
|
g1 = sc.polygamma(2, m) / sc.polygamma(1, m) ** 1.5
|
|
g2 = sc.polygamma(3, m) / sc.polygamma(1, m) ** 2
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, m, Omega):
|
|
# H = -E[ln f(Y)] = -ln2 + ln Γ(m) - m*psi(m) + m
|
|
# Omega cancels exactly (translation invariance of differential entropy)
|
|
return -np.log(2.0) + sc.gammaln(m) - m * sc.digamma(m) + m
|
|
|
|
def _rvs(self, m, Omega, size=None, random_state=None):
|
|
# X ~ Nakagami(m, Omega) => X^2 ~ Gamma(m, Omega/m)
|
|
g = random_state.gamma(m, Omega / m, size=size)
|
|
return 0.5 * np.log(g)
|
|
|
|
|
|
lognakagami = lognakagami_gen(name='lognakagami', shapes="m, Omega")
|
|
|
|
class loggamma_gen(rv_continuous):
|
|
"""Log-Gamma continuous random variable.
|
|
|
|
The probability density function is:
|
|
|
|
f(y; a) = 1 / Gamma(a) * exp(a*y - exp(y))
|
|
|
|
for y in (-inf, +inf), a > 0. Derived via the change of variable
|
|
Y = ln X on a Gamma(a, 1) variate; the Jacobian e^y combines with
|
|
x^(a-1) to give the exp(a*y) term.
|
|
|
|
The mean and variance are:
|
|
|
|
E[Y] = psi(a)
|
|
Var[Y] = psi_1(a)
|
|
|
|
where psi and psi_1 are the digamma and trigamma functions.
|
|
|
|
The non-unit scale beta is recovered through scipy's ``loc`` parameter:
|
|
loc = ln(beta), since ln(beta * X) = ln(beta) + ln(X).
|
|
|
|
`loggamma_dist` takes ``a`` as a shape parameter.
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("a", False, (0, np.inf), (False, False))]
|
|
|
|
def _argcheck(self, a):
|
|
return a > 0
|
|
|
|
def _pdf(self, y, a):
|
|
return np.exp(self._logpdf(y, a))
|
|
|
|
def _logpdf(self, y, a):
|
|
# ln f = a*y - e^y - ln Γ(a)
|
|
return a * y - np.exp(y) - sc.gammaln(a)
|
|
|
|
def _cdf(self, y, a):
|
|
# P(Y <= y) = P(X <= e^y) = gammainc(a, e^y)
|
|
return sc.gammainc(a, np.exp(y))
|
|
|
|
def _sf(self, y, a):
|
|
return sc.gammaincc(a, np.exp(y))
|
|
|
|
def _logcdf(self, y, a):
|
|
return np.log(sc.gammainc(a, np.exp(y)))
|
|
|
|
def _logsf(self, y, a):
|
|
return np.log(sc.gammaincc(a, np.exp(y)))
|
|
|
|
def _ppf(self, q, a):
|
|
# Invert CDF: q = gammainc(a, e^y)
|
|
# => e^y = gammaincinv(a, q)
|
|
# => y = ln(gammaincinv(a, q))
|
|
return np.log(sc.gammaincinv(a, q))
|
|
|
|
def _isf(self, q, a):
|
|
return np.log(sc.gammainccinv(a, q))
|
|
|
|
def _stats(self, a):
|
|
# Y = ln(X), X ~ Gamma(a, 1)
|
|
# E[Y] = psi(a) (digamma)
|
|
# Var[Y] = psi_1(a) (trigamma)
|
|
# skew = psi_2(a) / psi_1(a)^{3/2}
|
|
# excess kurtosis = psi_3(a) / psi_1(a)^2
|
|
mu = sc.digamma(a)
|
|
mu2 = sc.polygamma(1, a)
|
|
g1 = sc.polygamma(2, a) / mu2 ** 1.5
|
|
g2 = sc.polygamma(3, a) / mu2 ** 2
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, a):
|
|
# H(Y) = -E[ln f(Y)] = -E[a*Y - e^Y - ln Γ(a)]
|
|
# = ln Γ(a) - a*psi(a) + E[e^Y]
|
|
# E[e^Y] = E[X] = a (for Gamma(a, 1))
|
|
return sc.gammaln(a) - a * sc.digamma(a) + a
|
|
|
|
def _rvs(self, a, size=None, random_state=None):
|
|
# Generate Gamma(a, 1) variates, then take log.
|
|
# For small a, use Gamma(a) ~ Gamma(a+1) * U^(1/a)
|
|
# so ln Gamma(a) ~ ln Gamma(a+1) + ln(U)/a
|
|
# This avoids precision loss when a << 1.
|
|
return (np.log(random_state.gamma(a + 1, size=size))
|
|
+ np.log(random_state.uniform(size=size)) / a)
|
|
|
|
|
|
loggamma_dist = loggamma_gen(name='loggamma_dist')
|
|
|
|
|
|
class lograyleigh_gen(rv_continuous):
|
|
"""Log-Rayleigh continuous random variable.
|
|
|
|
The probability density function is:
|
|
|
|
f(y; sigma) = (e^{2y} / sigma^2) * exp(-e^{2y} / (2*sigma^2))
|
|
|
|
for y in (-inf, +inf), sigma > 0. Derived via the change of variable
|
|
Y = ln X on a Rayleigh(sigma) variate; the Jacobian e^y combines with
|
|
x to give the e^{2y} term.
|
|
|
|
The mean and variance are:
|
|
|
|
E[Y] = 0.5 * (ln(2*sigma^2) + psi(1))
|
|
Var[Y] = pi^2 / 24
|
|
|
|
where psi is the digamma function. The scale sigma shifts the mean by
|
|
ln(sigma) but leaves all higher central moments unchanged; entropy is
|
|
also sigma-independent.
|
|
|
|
`lograyleigh` takes ``sigma`` as a shape parameter.
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [_ShapeInfo("sigma", False, (0, np.inf), (False, False))]
|
|
|
|
def _argcheck(self, sigma):
|
|
return sigma > 0
|
|
|
|
def _pdf(self, y, sigma):
|
|
return np.exp(self._logpdf(y, sigma))
|
|
|
|
def _logpdf(self, y, sigma):
|
|
# ln f = 2y - 2*ln(sigma) - e^{2y} / (2*sigma^2)
|
|
return 2.0 * y - 2.0 * np.log(sigma) - np.exp(2.0 * y) / (2.0 * sigma**2)
|
|
|
|
def _cdf(self, y, sigma):
|
|
return 1.0 - np.exp(-np.exp(2.0 * y) / (2.0 * sigma**2))
|
|
|
|
def _sf(self, y, sigma):
|
|
return np.exp(-np.exp(2.0 * y) / (2.0 * sigma**2))
|
|
|
|
def _logcdf(self, y, sigma):
|
|
return np.log1p(-np.exp(-np.exp(2.0 * y) / (2.0 * sigma**2)))
|
|
|
|
def _logsf(self, y, sigma):
|
|
return -np.exp(2.0 * y) / (2.0 * sigma**2)
|
|
|
|
def _ppf(self, q, sigma):
|
|
# q = 1 - exp(-e^{2y}/(2*sigma^2))
|
|
# => e^{2y} = -2*sigma^2 * ln(1-q)
|
|
# => y = 0.5 * ln(-2*sigma^2 * ln(1-q))
|
|
return 0.5 * np.log(-2.0 * sigma**2 * np.log1p(-q))
|
|
|
|
def _isf(self, q, sigma):
|
|
# q = exp(-e^{2y}/(2*sigma^2))
|
|
# => y = 0.5 * ln(-2*sigma^2 * ln(q))
|
|
return 0.5 * np.log(-2.0 * sigma**2 * np.log(q))
|
|
|
|
def _stats(self, sigma):
|
|
# Y = 0.5*ln(2) + 0.5*ln(W) + ln(sigma), W ~ Exp(1) = Gamma(1, 1)
|
|
# E[Y] = 0.5*(ln(2*sigma^2) + psi(1))
|
|
# Var[Y] = 0.25*psi_1(1) = pi^2/24
|
|
# Standardised skewness/kurtosis are sigma-independent (pure shift)
|
|
mu = 0.5 * (np.log(2.0 * sigma**2) + sc.digamma(1))
|
|
mu2 = 0.25 * sc.polygamma(1, 1)
|
|
g1 = sc.polygamma(2, 1) / sc.polygamma(1, 1)**1.5
|
|
g2 = sc.polygamma(3, 1) / sc.polygamma(1, 1)**2
|
|
return mu, mu2, g1, g2
|
|
|
|
def _entropy(self, sigma):
|
|
# H(Y) = 1 - ln(2) - psi(1) (sigma-independent; derived via H(X) - E[ln X])
|
|
return 1.0 - np.log(2.0) - sc.digamma(1)
|
|
|
|
def _rvs(self, sigma, size=None, random_state=None):
|
|
# X ~ Rayleigh(sigma) = sigma*sqrt(-2*ln(U)), U ~ Uniform(0,1)
|
|
u = random_state.uniform(size=size)
|
|
return np.log(sigma) + 0.5 * np.log(-2.0 * np.log(u))
|
|
|
|
|
|
lograyleigh = lograyleigh_gen(name='lograyleigh', shapes="sigma")
|
|
|
|
|
|
class logrice_gen(rv_continuous):
|
|
"""Log-Rice continuous random variable.
|
|
|
|
The probability density function is:
|
|
|
|
f(y; nu, sigma) = (e^{2y} / sigma^2)
|
|
* exp(-(e^{2y} + nu^2) / (2*sigma^2))
|
|
* I_0(e^y * nu / sigma^2)
|
|
|
|
for y in (-inf, +inf), nu >= 0, sigma > 0. Derived via the change of
|
|
variable Y = ln X on a Rice(nu, sigma) variate; the Jacobian e^y
|
|
combines with x to give the e^{2y} factor. I_0 is the modified Bessel
|
|
function of the first kind of order zero.
|
|
|
|
The CDF involves the Marcum Q-function of order 1:
|
|
|
|
F(y; nu, sigma) = 1 - Q_1(nu/sigma, e^y/sigma)
|
|
|
|
When nu = 0, the distribution reduces to Log-Rayleigh.
|
|
|
|
`logrice` takes ``nu`` and ``sigma`` as shape parameters.
|
|
"""
|
|
|
|
def _shape_info(self):
|
|
return [
|
|
_ShapeInfo("nu", False, (0, np.inf), (True, False)),
|
|
_ShapeInfo("sigma", False, (0, np.inf), (False, False)),
|
|
]
|
|
|
|
def _argcheck(self, nu, sigma):
|
|
return (nu >= 0) & (sigma > 0)
|
|
|
|
def _pdf(self, y, nu, sigma):
|
|
return np.exp(self._logpdf(y, nu, sigma))
|
|
|
|
def _logpdf(self, y, nu, sigma):
|
|
# ln f = 2y - 2*ln(sigma) - (e^{2y} + nu^2)/(2*sigma^2) + ln(I_0(z))
|
|
# Use ive(0, z)*exp(z) = I_0(z) for stability: ln(I_0(z)) = ln(ive(0,z)) + z
|
|
z = np.exp(y) * nu / sigma**2
|
|
return (2.0 * y
|
|
- 2.0 * np.log(sigma)
|
|
- (np.exp(2.0 * y) + nu**2) / (2.0 * sigma**2)
|
|
+ np.log(sc.ive(0, z)) + z)
|
|
|
|
def _cdf(self, y, nu, sigma):
|
|
# P(Y <= y) = 1 - Q_1(nu/sigma, e^y/sigma) = ncx2.cdf(e^{2y}/sigma^2, 2, nu^2/sigma^2)
|
|
return ncx2.cdf(np.exp(2.0 * y) / sigma**2, 2, (nu / sigma)**2)
|
|
|
|
def _sf(self, y, nu, sigma):
|
|
return ncx2.sf(np.exp(2.0 * y) / sigma**2, 2, (nu / sigma)**2)
|
|
|
|
def _logcdf(self, y, nu, sigma):
|
|
return ncx2.logcdf(np.exp(2.0 * y) / sigma**2, 2, (nu / sigma)**2)
|
|
|
|
def _logsf(self, y, nu, sigma):
|
|
return ncx2.logsf(np.exp(2.0 * y) / sigma**2, 2, (nu / sigma)**2)
|
|
|
|
def _rvs(self, nu, sigma, size=None, random_state=None):
|
|
# X ~ Rice(nu, sigma): norm of 2D Gaussian with mean (nu, 0) and std sigma
|
|
z1 = random_state.normal(nu, sigma, size=size)
|
|
z2 = random_state.normal(0.0, sigma, size=size)
|
|
return np.log(np.hypot(z1, z2))
|
|
|
|
|
|
logrice = logrice_gen(name='logrice', shapes="nu, sigma") |