diff --git a/etc/tests/test_distributions.py b/etc/tests/test_distributions.py index 9740051..884e8f2 100644 --- a/etc/tests/test_distributions.py +++ b/etc/tests/test_distributions.py @@ -7,7 +7,7 @@ import os sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) -from tools.distributions import k_dist, lognakagami, loggamma_dist +from tools.distributions import k_dist, lognakagami, loggamma_dist, lograyleigh, logrice X = np.linspace(0.01, 10.0, 500) @@ -340,6 +340,180 @@ class TestLogGamma: assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean +# ── lograyleigh unit tests ──────────────────────────────────────────────────── + + +class TestLogRayleigh: + def test_logpdf_is_finite_on_real_line(self): + """logpdf must be finite for all real y — tests positivity without float64 underflow.""" + log_vals = lograyleigh.logpdf(Y, sigma=2.0) + assert np.all(np.isfinite(log_vals)) + + def test_pdf_integrates_to_one(self): + """Numerical integral of PDF over the real line should be ≈ 1.""" + y_fine = np.linspace(-20, 10, 200_000) + integral = np.trapezoid(lograyleigh.pdf(y_fine, sigma=2.0), y_fine) + assert pytest.approx(integral, abs=1e-3) == 1.0 + + def test_logpdf_equals_log_pdf(self): + """logpdf must equal log(pdf) at points where pdf does not underflow.""" + y_bulk = np.linspace(-5.0, 2.0, 50) + log_via_pdf = np.log(lograyleigh.pdf(y_bulk, sigma=2.0)) + log_direct = lograyleigh.logpdf(y_bulk, sigma=2.0) + np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6) + + def test_cdf_is_monotone_increasing(self): + """CDF must be strictly non-decreasing.""" + cdf_vals = lograyleigh.cdf(Y, sigma=2.0) + assert np.all(np.diff(cdf_vals) >= 0) + + def test_cdf_and_sf_sum_to_one(self): + """CDF + SF must equal 1 at every point.""" + cdf_vals = lograyleigh.cdf(Y, sigma=2.0) + sf_vals = lograyleigh.sf(Y, sigma=2.0) + np.testing.assert_allclose(cdf_vals + sf_vals, 1.0, atol=1e-12) + + def test_ppf_inverts_cdf(self): + """ppf(cdf(y)) must recover y.""" + y_test = np.array([-2.0, 0.0, 1.0]) + cdf_vals = lograyleigh.cdf(y_test, sigma=2.0) + np.testing.assert_allclose(lograyleigh.ppf(cdf_vals, sigma=2.0), y_test, atol=1e-8) + + def test_isf_inverts_sf(self): + """isf(sf(y)) must recover y.""" + y_test = np.array([-1.0, 0.5, 1.5]) + sf_vals = lograyleigh.sf(y_test, sigma=2.0) + np.testing.assert_allclose(lograyleigh.isf(sf_vals, sigma=2.0), y_test, atol=1e-8) + + def test_argcheck_rejects_non_positive_sigma(self): + """sigma <= 0 must not produce a valid (positive-finite) PDF value.""" + val = lograyleigh.pdf(0.0, sigma=-1.0) + assert not (np.isfinite(val) and val > 0) + + def test_stats_mean(self): + """Analytical mean must equal 0.5 * (log(2*sigma^2) + digamma(1)).""" + sigma = 2.0 + expected_mean = 0.5 * (np.log(2.0 * sigma**2) + sc.digamma(1)) + dist_mean = float(lograyleigh.stats(sigma=sigma, moments="m")) + assert pytest.approx(dist_mean, rel=1e-10) == expected_mean + + def test_stats_mean_shifts_with_sigma(self): + """Changing sigma shifts the mean by log(sigma2/sigma1) and leaves variance unchanged.""" + mean1 = float(lograyleigh.stats(sigma=1.0, moments="m")) + mean4 = float(lograyleigh.stats(sigma=4.0, moments="m")) + assert pytest.approx(mean4 - mean1, rel=1e-10) == np.log(4.0) + + def test_stats_variance_equals_pi_squared_over_24(self): + """Variance must equal pi^2/24 and be independent of sigma.""" + expected_var = np.pi**2 / 24.0 + for sigma in [0.5, 1.0, 3.0]: + _, dist_var, *_ = lograyleigh.stats(sigma=sigma, moments="mv") + assert pytest.approx(float(dist_var), rel=1e-10) == expected_var + + def test_log_transform_relation_to_rayleigh(self): + """lograyleigh.pdf(y) must equal rayleigh.pdf(exp(y)) * exp(y) (change-of-variable).""" + from scipy.stats import rayleigh + + y_test = np.linspace(-3.0, 3.0, 20) + sigma = 2.0 + direct = lograyleigh.pdf(y_test, sigma=sigma) + via_rayleigh = rayleigh.pdf(np.exp(y_test), scale=sigma) * np.exp(y_test) + np.testing.assert_allclose(direct, via_rayleigh, rtol=1e-6) + + def test_rvs_samples_are_finite(self): + """Random samples must be finite real numbers.""" + rng = np.random.default_rng(42) + samples = lograyleigh.rvs(sigma=2.0, size=200, random_state=rng) + assert samples.shape == (200,) + assert np.all(np.isfinite(samples)) + + def test_rvs_sample_mean_near_expected(self): + """Sample mean of many RVS should be close to the distribution mean.""" + sigma = 2.0 + rng = np.random.default_rng(0) + samples = lograyleigh.rvs(sigma=sigma, size=50_000, random_state=rng) + expected_mean = float(lograyleigh.stats(sigma=sigma, moments="m")) + assert pytest.approx(samples.mean(), rel=5e-2) == expected_mean + + +# ── logrice unit tests ──────────────────────────────────────────────────────── + + +class TestLogRice: + def test_logpdf_is_finite_on_real_line(self): + """logpdf must be finite for all real y — tests positivity without float64 underflow.""" + log_vals = logrice.logpdf(Y, nu=1.0, sigma=2.0) + assert np.all(np.isfinite(log_vals)) + + def test_pdf_integrates_to_one(self): + """Numerical integral of PDF over the real line should be ≈ 1.""" + y_fine = np.linspace(-20, 10, 200_000) + integral = np.trapezoid(logrice.pdf(y_fine, nu=1.0, sigma=2.0), y_fine) + assert pytest.approx(integral, abs=1e-3) == 1.0 + + def test_logpdf_equals_log_pdf(self): + """logpdf must equal log(pdf) at points where pdf does not underflow.""" + y_bulk = np.linspace(-5.0, 2.0, 50) + log_via_pdf = np.log(logrice.pdf(y_bulk, nu=1.0, sigma=2.0)) + log_direct = logrice.logpdf(y_bulk, nu=1.0, sigma=2.0) + np.testing.assert_allclose(log_direct, log_via_pdf, rtol=1e-6) + + def test_cdf_is_monotone_increasing(self): + """CDF must be strictly non-decreasing.""" + cdf_vals = logrice.cdf(Y, nu=1.0, sigma=2.0) + assert np.all(np.diff(cdf_vals) >= 0) + + def test_cdf_and_sf_sum_to_one(self): + """CDF + SF must equal 1 at every point.""" + cdf_vals = logrice.cdf(Y, nu=1.0, sigma=2.0) + sf_vals = logrice.sf(Y, nu=1.0, sigma=2.0) + np.testing.assert_allclose(cdf_vals + sf_vals, 1.0, atol=1e-12) + + def test_argcheck_rejects_negative_nu(self): + """nu < 0 must not produce a valid (positive-finite) PDF value.""" + val = logrice.pdf(0.0, nu=-1.0, sigma=2.0) + assert not (np.isfinite(val) and val > 0) + + def test_argcheck_rejects_non_positive_sigma(self): + """sigma <= 0 must not produce a valid (positive-finite) PDF value.""" + val = logrice.pdf(0.0, nu=1.0, sigma=-1.0) + assert not (np.isfinite(val) and val > 0) + + def test_nu_zero_matches_lograyleigh(self): + """logrice with nu=0 must match lograyleigh exactly.""" + sigma = 2.0 + pdf_rice = logrice.pdf(Y, nu=0.0, sigma=sigma) + pdf_rayleigh = lograyleigh.pdf(Y, sigma=sigma) + np.testing.assert_allclose(pdf_rice, pdf_rayleigh, rtol=1e-6) + + def test_log_transform_relation_to_rice(self): + """logrice.pdf(y) must equal rice.pdf(exp(y)) * exp(y) (change-of-variable).""" + from scipy.stats import rice + + y_test = np.linspace(-2.0, 3.0, 20) + nu, sigma = 1.0, 2.0 + direct = logrice.pdf(y_test, nu=nu, sigma=sigma) + via_rice = rice.pdf(np.exp(y_test), b=nu / sigma, scale=sigma) * np.exp(y_test) + np.testing.assert_allclose(direct, via_rice, rtol=1e-6) + + def test_rvs_samples_are_finite(self): + """Random samples must be finite real numbers.""" + rng = np.random.default_rng(42) + samples = logrice.rvs(nu=1.0, sigma=2.0, size=200, random_state=rng) + assert samples.shape == (200,) + assert np.all(np.isfinite(samples)) + + def test_rvs_sample_mean_near_numerical_mean(self): + """Sample mean of many RVS should be close to the numerically integrated mean.""" + nu, sigma = 1.0, 2.0 + rng = np.random.default_rng(0) + samples = logrice.rvs(nu=nu, sigma=sigma, size=50_000, random_state=rng) + y_fine = np.linspace(-20, 10, 200_000) + pdf_vals = logrice.pdf(y_fine, nu=nu, sigma=sigma) + numerical_mean = np.trapezoid(y_fine * pdf_vals, y_fine) + assert pytest.approx(samples.mean(), rel=5e-2) == numerical_mean + + if __name__ == "__main__": plot_k_dist_varying_alpha() plot_k_dist_varying_mu() diff --git a/etc/tools/distributions.py b/etc/tools/distributions.py index 3f7cc58..8c6c488 100644 --- a/etc/tools/distributions.py +++ b/etc/tools/distributions.py @@ -1,4 +1,4 @@ -from scipy.stats import rv_continuous +from scipy.stats import rv_continuous, ncx2 from scipy.special import kve, gammaln from scipy.integrate import quad import scipy.special as sc @@ -9,7 +9,7 @@ import numpy as np class k_gen(rv_continuous): """Generalized K distribution for radar clutter modeling. - The probability density function is:: + The probability density function is: f(x; mu, alpha, beta) = 2 / (Gamma(alpha) * Gamma(beta)) * (alpha*beta/mu)^((alpha+beta)/2) @@ -103,22 +103,16 @@ k_dist = k_gen(a=0.0, name="k_distribution", shapes="mu, alpha, beta") class lognakagami_gen(rv_continuous): - r"""A Log-Nakagami continuous random variable. + """Log-Nakagami continuous random variable. - If X follows a Nakagami distribution with shape parameter m >= 0.5 - and spread Omega > 0, then Y = ln(X) follows the Log-Nakagami distribution. - - %(before_notes)s - - Notes - ----- - The probability density function for `lognakagami` is: + The probability density function is: f(y; m, Omega) = 2*m^m / (Gamma(m)*Omega^m) * exp(2*m*y) * exp(-(m/Omega)*exp(2*y)) - for y in (-inf, +inf). Derived via the change of variable Y = ln X, - whose Jacobian e^y combines with x^{2m-1} to give the e^{2my} term. + for y in (-inf, +inf), m >= 0.5, Omega > 0. Derived via the change of + variable Y = ln X on a Nakagami(m, Omega) variate; the Jacobian e^y + combines with x^(2m-1) to give the exp(2*m*y) term. The mean and variance are: @@ -130,10 +124,6 @@ class lognakagami_gen(rv_continuous): unchanged; entropy is also Omega-independent. `lognakagami` takes ``m`` and ``Omega`` as shape parameters. - - See Also - -------- - nakagami : The Nakagami distribution (before the log transform). """ def _shape_info(self): @@ -145,7 +135,6 @@ class lognakagami_gen(rv_continuous): def _argcheck(self, m, Omega): return (m >= 0.5) & (Omega > 0) - # ---------- PDF / log-PDF ---------- def _pdf(self, y, m, Omega): return np.exp(self._logpdf(y, m, Omega)) @@ -159,7 +148,6 @@ class lognakagami_gen(rv_continuous): + 2.0 * m * y - (m / Omega) * np.exp(2.0 * y)) - # ---------- CDF / SF / log-CDF / log-SF ---------- def _cdf(self, y, m, Omega): # P(Y <= y) = P(X <= e^y) = gammainc(m, (m/Omega) * e^{2y}) @@ -174,7 +162,6 @@ class lognakagami_gen(rv_continuous): def _logsf(self, y, m, Omega): return np.log(sc.gammaincc(m, (m / Omega) * np.exp(2.0 * y))) - # ---------- PPF / ISF ---------- def _ppf(self, q, m, Omega): # q = gammainc(m, (m/Omega)*e^{2y}) @@ -185,8 +172,6 @@ class lognakagami_gen(rv_continuous): def _isf(self, q, m, Omega): return 0.5 * np.log(Omega * sc.gammainccinv(m, q) / m) - # ---------- Moments & statistics ---------- - def _stats(self, m, Omega): # E[Y] = 0.5*(psi(m) - ln(m) + ln(Omega)) mu = 0.5 * (sc.digamma(m) - np.log(m) + np.log(Omega)) @@ -200,9 +185,7 @@ class lognakagami_gen(rv_continuous): # H = -E[ln f(Y)] = -ln2 + ln Γ(m) - m*psi(m) + m # Omega cancels exactly (translation invariance of differential entropy) return -np.log(2.0) + sc.gammaln(m) - m * sc.digamma(m) + m - - # ---------- Random variate generation ---------- - + def _rvs(self, m, Omega, size=None, random_state=None): # X ~ Nakagami(m, Omega) => X^2 ~ Gamma(m, Omega/m) g = random_state.gamma(m, Omega / m, size=size) @@ -212,34 +195,27 @@ class lognakagami_gen(rv_continuous): lognakagami = lognakagami_gen(name='lognakagami', shapes="m, Omega") class loggamma_gen(rv_continuous): - r"""A Log-Gamma continuous random variable. + """Log-Gamma continuous random variable. - If X follows a Gamma distribution with shape a - and unit scale (beta = 1), then Y = ln X follows - the Log-Gamma distribution. + The probability density function is: - %(before_notes)s + f(y; a) = 1 / Gamma(a) * exp(a*y - exp(y)) - Notes - ----- - The probability density function for `loggamma_dist` is: + for y in (-inf, +inf), a > 0. Derived via the change of variable + Y = ln X on a Gamma(a, 1) variate; the Jacobian e^y combines with + x^(a-1) to give the exp(a*y) term. - f(y, a) = 1 / Gamma(a) * exp(a*y - exp(y)) + The mean and variance are: - for y in (-inf, +inf), a > 0. + E[Y] = psi(a) + Var[Y] = psi_1(a) - This is the standardised form (scale beta = 1). + where psi and psi_1 are the digamma and trigamma functions. + + The non-unit scale beta is recovered through scipy's ``loc`` parameter: + loc = ln(beta), since ln(beta * X) = ln(beta) + ln(X). `loggamma_dist` takes ``a`` as a shape parameter. - - The non-unit scale beta is recovered through scipy's - ``loc`` parameter: loc = ln(beta), since - ln(beta * X) = ln(beta) + ln(X). - - See Also - -------- - gamma : The Gamma distribution (before the log transform). - """ def _shape_info(self): @@ -248,8 +224,6 @@ class loggamma_gen(rv_continuous): def _argcheck(self, a): return a > 0 - # ---------- PDF / log-PDF ---------- - def _pdf(self, y, a): return np.exp(self._logpdf(y, a)) @@ -257,8 +231,6 @@ class loggamma_gen(rv_continuous): # ln f = a*y - e^y - ln Γ(a) return a * y - np.exp(y) - sc.gammaln(a) - # ---------- CDF / SF / log-CDF / log-SF ---------- - def _cdf(self, y, a): # P(Y <= y) = P(X <= e^y) = gammainc(a, e^y) return sc.gammainc(a, np.exp(y)) @@ -272,8 +244,6 @@ class loggamma_gen(rv_continuous): def _logsf(self, y, a): return np.log(sc.gammaincc(a, np.exp(y))) - # ---------- PPF / ISF ---------- - def _ppf(self, q, a): # Invert CDF: q = gammainc(a, e^y) # => e^y = gammaincinv(a, q) @@ -283,8 +253,6 @@ class loggamma_gen(rv_continuous): def _isf(self, q, a): return np.log(sc.gammainccinv(a, q)) - # ---------- Moments & statistics ---------- - def _stats(self, a): # Y = ln(X), X ~ Gamma(a, 1) # E[Y] = psi(a) (digamma) @@ -303,8 +271,6 @@ class loggamma_gen(rv_continuous): # E[e^Y] = E[X] = a (for Gamma(a, 1)) return sc.gammaln(a) - a * sc.digamma(a) + a - # ---------- Random variate generation ---------- - def _rvs(self, a, size=None, random_state=None): # Generate Gamma(a, 1) variates, then take log. # For small a, use Gamma(a) ~ Gamma(a+1) * U^(1/a) @@ -314,4 +280,154 @@ class loggamma_gen(rv_continuous): + np.log(random_state.uniform(size=size)) / a) -loggamma_dist = loggamma_gen(name='loggamma_dist') \ No newline at end of file +loggamma_dist = loggamma_gen(name='loggamma_dist') + + +class lograyleigh_gen(rv_continuous): + """Log-Rayleigh continuous random variable. + + The probability density function is: + + f(y; sigma) = (e^{2y} / sigma^2) * exp(-e^{2y} / (2*sigma^2)) + + for y in (-inf, +inf), sigma > 0. Derived via the change of variable + Y = ln X on a Rayleigh(sigma) variate; the Jacobian e^y combines with + x to give the e^{2y} term. + + The mean and variance are: + + E[Y] = 0.5 * (ln(2*sigma^2) + psi(1)) + Var[Y] = pi^2 / 24 + + where psi is the digamma function. The scale sigma shifts the mean by + ln(sigma) but leaves all higher central moments unchanged; entropy is + also sigma-independent. + + `lograyleigh` takes ``sigma`` as a shape parameter. + """ + + def _shape_info(self): + return [_ShapeInfo("sigma", False, (0, np.inf), (False, False))] + + def _argcheck(self, sigma): + return sigma > 0 + + def _pdf(self, y, sigma): + return np.exp(self._logpdf(y, sigma)) + + def _logpdf(self, y, sigma): + # ln f = 2y - 2*ln(sigma) - e^{2y} / (2*sigma^2) + return 2.0 * y - 2.0 * np.log(sigma) - np.exp(2.0 * y) / (2.0 * sigma**2) + + def _cdf(self, y, sigma): + return 1.0 - np.exp(-np.exp(2.0 * y) / (2.0 * sigma**2)) + + def _sf(self, y, sigma): + return np.exp(-np.exp(2.0 * y) / (2.0 * sigma**2)) + + def _logcdf(self, y, sigma): + return np.log1p(-np.exp(-np.exp(2.0 * y) / (2.0 * sigma**2))) + + def _logsf(self, y, sigma): + return -np.exp(2.0 * y) / (2.0 * sigma**2) + + def _ppf(self, q, sigma): + # q = 1 - exp(-e^{2y}/(2*sigma^2)) + # => e^{2y} = -2*sigma^2 * ln(1-q) + # => y = 0.5 * ln(-2*sigma^2 * ln(1-q)) + return 0.5 * np.log(-2.0 * sigma**2 * np.log1p(-q)) + + def _isf(self, q, sigma): + # q = exp(-e^{2y}/(2*sigma^2)) + # => y = 0.5 * ln(-2*sigma^2 * ln(q)) + return 0.5 * np.log(-2.0 * sigma**2 * np.log(q)) + + def _stats(self, sigma): + # Y = 0.5*ln(2) + 0.5*ln(W) + ln(sigma), W ~ Exp(1) = Gamma(1, 1) + # E[Y] = 0.5*(ln(2*sigma^2) + psi(1)) + # Var[Y] = 0.25*psi_1(1) = pi^2/24 + # Standardised skewness/kurtosis are sigma-independent (pure shift) + mu = 0.5 * (np.log(2.0 * sigma**2) + sc.digamma(1)) + mu2 = 0.25 * sc.polygamma(1, 1) + g1 = sc.polygamma(2, 1) / sc.polygamma(1, 1)**1.5 + g2 = sc.polygamma(3, 1) / sc.polygamma(1, 1)**2 + return mu, mu2, g1, g2 + + def _entropy(self, sigma): + # H(Y) = 1 - ln(2) - psi(1) (sigma-independent; derived via H(X) - E[ln X]) + return 1.0 - np.log(2.0) - sc.digamma(1) + + def _rvs(self, sigma, size=None, random_state=None): + # X ~ Rayleigh(sigma) = sigma*sqrt(-2*ln(U)), U ~ Uniform(0,1) + u = random_state.uniform(size=size) + return np.log(sigma) + 0.5 * np.log(-2.0 * np.log(u)) + + +lograyleigh = lograyleigh_gen(name='lograyleigh', shapes="sigma") + + +class logrice_gen(rv_continuous): + """Log-Rice continuous random variable. + + The probability density function is: + + f(y; nu, sigma) = (e^{2y} / sigma^2) + * exp(-(e^{2y} + nu^2) / (2*sigma^2)) + * I_0(e^y * nu / sigma^2) + + for y in (-inf, +inf), nu >= 0, sigma > 0. Derived via the change of + variable Y = ln X on a Rice(nu, sigma) variate; the Jacobian e^y + combines with x to give the e^{2y} factor. I_0 is the modified Bessel + function of the first kind of order zero. + + The CDF involves the Marcum Q-function of order 1: + + F(y; nu, sigma) = 1 - Q_1(nu/sigma, e^y/sigma) + + When nu = 0, the distribution reduces to Log-Rayleigh. + + `logrice` takes ``nu`` and ``sigma`` as shape parameters. + """ + + def _shape_info(self): + return [ + _ShapeInfo("nu", False, (0, np.inf), (True, False)), + _ShapeInfo("sigma", False, (0, np.inf), (False, False)), + ] + + def _argcheck(self, nu, sigma): + return (nu >= 0) & (sigma > 0) + + def _pdf(self, y, nu, sigma): + return np.exp(self._logpdf(y, nu, sigma)) + + def _logpdf(self, y, nu, sigma): + # ln f = 2y - 2*ln(sigma) - (e^{2y} + nu^2)/(2*sigma^2) + ln(I_0(z)) + # Use ive(0, z)*exp(z) = I_0(z) for stability: ln(I_0(z)) = ln(ive(0,z)) + z + z = np.exp(y) * nu / sigma**2 + return (2.0 * y + - 2.0 * np.log(sigma) + - (np.exp(2.0 * y) + nu**2) / (2.0 * sigma**2) + + np.log(sc.ive(0, z)) + z) + + def _cdf(self, y, nu, sigma): + # P(Y <= y) = 1 - Q_1(nu/sigma, e^y/sigma) = ncx2.cdf(e^{2y}/sigma^2, 2, nu^2/sigma^2) + return ncx2.cdf(np.exp(2.0 * y) / sigma**2, 2, (nu / sigma)**2) + + def _sf(self, y, nu, sigma): + return ncx2.sf(np.exp(2.0 * y) / sigma**2, 2, (nu / sigma)**2) + + def _logcdf(self, y, nu, sigma): + return ncx2.logcdf(np.exp(2.0 * y) / sigma**2, 2, (nu / sigma)**2) + + def _logsf(self, y, nu, sigma): + return ncx2.logsf(np.exp(2.0 * y) / sigma**2, 2, (nu / sigma)**2) + + def _rvs(self, nu, sigma, size=None, random_state=None): + # X ~ Rice(nu, sigma): norm of 2D Gaussian with mean (nu, 0) and std sigma + z1 = random_state.normal(nu, sigma, size=size) + z2 = random_state.normal(0.0, sigma, size=size) + return np.log(np.hypot(z1, z2)) + + +logrice = logrice_gen(name='logrice', shapes="nu, sigma") \ No newline at end of file