[MAIN] Change workdir files, add docstring in functions
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -30,3 +30,6 @@ build/
|
|||||||
data/*
|
data/*
|
||||||
!data/.gitkeep
|
!data/.gitkeep
|
||||||
|
|
||||||
|
# Claude Code
|
||||||
|
.claude/
|
||||||
|
|
||||||
|
|||||||
42
README.md
42
README.md
@@ -2,16 +2,49 @@
|
|||||||
|
|
||||||
Repositorio para códigos de fitting de distribuições de dados de chuva e clutter.
|
Repositorio para códigos de fitting de distribuições de dados de chuva e clutter.
|
||||||
|
|
||||||
## Instalação
|
## Gerenciamento de dependências
|
||||||
|
|
||||||
|
Este projeto usa [uv](https://docs.astral.sh/uv/) para gerenciar dependências e ambientes virtuais.
|
||||||
|
|
||||||
|
### Instalação do uv
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install -e .
|
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Configurar o ambiente
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Cria o ambiente virtual e instala todas as dependências
|
||||||
|
uv sync
|
||||||
|
```
|
||||||
|
|
||||||
|
### Adicionar dependências
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Adicionar um pacote ao projeto
|
||||||
|
uv add <pacote>
|
||||||
|
|
||||||
|
# Adicionar dependência de desenvolvimento
|
||||||
|
uv add --dev <pacote>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Executar scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Executar um script dentro do ambiente virtual
|
||||||
|
uv run python scripts/meu_script.py
|
||||||
|
|
||||||
|
# Abrir o Jupyter
|
||||||
|
uv run jupyter notebook
|
||||||
|
```
|
||||||
|
|
||||||
|
As dependências do projeto estão declaradas em `pyproject.toml` e o lockfile `uv.lock` garante reprodutibilidade do ambiente.
|
||||||
|
|
||||||
## Uso nos notebooks
|
## Uso nos notebooks
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from clutter_chuva import Fitter
|
from etc import Fitter
|
||||||
from scipy.stats import gamma, weibull_min, lognorm
|
from scipy.stats import gamma, weibull_min, lognorm
|
||||||
|
|
||||||
fitter = Fitter(
|
fitter = Fitter(
|
||||||
@@ -27,8 +60,9 @@ fitter.histogram_with_fits().show()
|
|||||||
## Estrutura
|
## Estrutura
|
||||||
|
|
||||||
```text
|
```text
|
||||||
clutter_chuva/ # pacote principal (importável nos notebooks)
|
etc/ # pacote principal (importável nos notebooks)
|
||||||
fitting/ # Fitter e DistributionSummary
|
fitting/ # Fitter e DistributionSummary
|
||||||
|
tools/ # funções de visualização (plots, CDF)
|
||||||
notebooks/ # notebooks Jupyter
|
notebooks/ # notebooks Jupyter
|
||||||
scripts/ # scripts .py para execução em background
|
scripts/ # scripts .py para execução em background
|
||||||
data/ # dados (não versionados)
|
data/ # dados (não versionados)
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
from .fitting import Fitter, DistributionSummary
|
|
||||||
|
|
||||||
__all__ = ["Fitter", "DistributionSummary"]
|
|
||||||
4
etc/__init__.py
Normal file
4
etc/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from .fitting import Fitter, DistributionSummary
|
||||||
|
from . import tools
|
||||||
|
|
||||||
|
__all__ = ["Fitter", "DistributionSummary", "tools"]
|
||||||
@@ -51,30 +51,70 @@ class DistributionSummary:
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def pvalue(self) -> float | None:
|
def pvalue(self) -> float | None:
|
||||||
"""p-value from the goodness-of-fit test, or None if not yet run."""
|
"""p-value from the goodness-of-fit test.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float or None
|
||||||
|
The p-value produced by the GoF test, or None if validate() has
|
||||||
|
not been called yet.
|
||||||
|
"""
|
||||||
return self.test_result.pvalue if self.test_result is not None else None
|
return self.test_result.pvalue if self.test_result is not None else None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def gof_statistic(self) -> float | None:
|
def gof_statistic(self) -> float | None:
|
||||||
"""Test statistic from the goodness-of-fit test, or None if not yet run."""
|
"""Test statistic from the goodness-of-fit test.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float or None
|
||||||
|
The GoF statistic value, or None if validate() has not been
|
||||||
|
called yet.
|
||||||
|
"""
|
||||||
return self.test_result.statistic if self.test_result is not None else None
|
return self.test_result.statistic if self.test_result is not None else None
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def mean(self) -> float:
|
def mean(self) -> float:
|
||||||
"""Mean of the fitted distribution."""
|
"""Mean of the fitted distribution.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
Mean computed from the fitted distribution parameters.
|
||||||
|
"""
|
||||||
return self.distribution_object.mean(*self.fit_result_params)
|
return self.distribution_object.mean(*self.fit_result_params)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def std(self) -> float:
|
def std(self) -> float:
|
||||||
"""Standard deviation of the fitted distribution."""
|
"""Standard deviation of the fitted distribution.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
Standard deviation computed from the fitted distribution parameters.
|
||||||
|
"""
|
||||||
return self.distribution_object.std(*self.fit_result_params)
|
return self.distribution_object.std(*self.fit_result_params)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def var(self) -> float:
|
def var(self) -> float:
|
||||||
"""Variance of the fitted distribution."""
|
"""Variance of the fitted distribution.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
Variance computed from the fitted distribution parameters.
|
||||||
|
"""
|
||||||
return self.distribution_object.var(*self.fit_result_params)
|
return self.distribution_object.var(*self.fit_result_params)
|
||||||
|
|
||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
|
"""Return a concise string representation of the distribution summary.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
Single-line string showing the distribution name, fitted
|
||||||
|
parameters, mean, standard deviation, GoF statistic, and p-value.
|
||||||
|
"""
|
||||||
pval_str = f"{self.pvalue:.4f}" if self.pvalue is not None else "N/A"
|
pval_str = f"{self.pvalue:.4f}" if self.pvalue is not None else "N/A"
|
||||||
stat_str = f"{self.gof_statistic:.4f}" if self.gof_statistic is not None else "N/A"
|
stat_str = f"{self.gof_statistic:.4f}" if self.gof_statistic is not None else "N/A"
|
||||||
return (
|
return (
|
||||||
@@ -85,6 +125,13 @@ class DistributionSummary:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def __str__(self) -> str:
|
def __str__(self) -> str:
|
||||||
|
"""Return the same string representation as ``__repr__``.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
Delegates to :meth:`__repr__`.
|
||||||
|
"""
|
||||||
return self.__repr__()
|
return self.__repr__()
|
||||||
|
|
||||||
|
|
||||||
@@ -132,6 +179,19 @@ class Fitter:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, dist_list: list[rv_continuous], statistic_method: str = 'ad', **kwargs):
|
def __init__(self, dist_list: list[rv_continuous], statistic_method: str = 'ad', **kwargs):
|
||||||
|
"""Initialise the Fitter and build per-distribution summary objects.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
dist_list : list[rv_continuous]
|
||||||
|
Distributions to fit.
|
||||||
|
statistic_method : str, optional
|
||||||
|
Goodness-of-fit statistic passed to ``goodness_of_fit``
|
||||||
|
(default ``'ad'`` for Anderson-Darling).
|
||||||
|
**kwargs :
|
||||||
|
Per-distribution initial guesses and fixed parameters, keyed as
|
||||||
|
``<dist.name>_args`` (tuple) or ``<dist.name>_params`` (dict).
|
||||||
|
"""
|
||||||
self._dist: dict[str, DistributionSummary] = {}
|
self._dist: dict[str, DistributionSummary] = {}
|
||||||
self.dist_list = list(dist_list)
|
self.dist_list = list(dist_list)
|
||||||
for dist in dist_list:
|
for dist in dist_list:
|
||||||
@@ -146,7 +206,25 @@ class Fitter:
|
|||||||
|
|
||||||
#── Getter and setter with flexible keys (str or rv_continuous) ────────────────────────────────
|
#── Getter and setter with flexible keys (str or rv_continuous) ────────────────────────────────
|
||||||
def _resolve_key(self, key: str | rv_continuous) -> str:
|
def _resolve_key(self, key: str | rv_continuous) -> str:
|
||||||
"""Resolve a distribution name or rv_continuous object to its string key."""
|
"""Resolve a distribution name or ``rv_continuous`` object to its string key.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
key : str or rv_continuous
|
||||||
|
Either the distribution's string name or its ``rv_continuous``
|
||||||
|
object (whose ``.name`` attribute is used).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
The string key used in the internal ``_dist`` dictionary.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
KeyError
|
||||||
|
If the resolved name is not found among the registered
|
||||||
|
distributions.
|
||||||
|
"""
|
||||||
name = key.name if isinstance(key, rv_continuous) else key
|
name = key.name if isinstance(key, rv_continuous) else key
|
||||||
if name not in self._dist:
|
if name not in self._dist:
|
||||||
available = ', '.join(self._dist)
|
available = ', '.join(self._dist)
|
||||||
@@ -154,13 +232,58 @@ class Fitter:
|
|||||||
return name
|
return name
|
||||||
|
|
||||||
def __contains__(self, key: str | rv_continuous) -> bool:
|
def __contains__(self, key: str | rv_continuous) -> bool:
|
||||||
|
"""Check whether a distribution is registered with this Fitter.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
key : str or rv_continuous
|
||||||
|
Distribution name or ``rv_continuous`` object to look up.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
bool
|
||||||
|
True if the distribution is registered, False otherwise.
|
||||||
|
"""
|
||||||
name = key.name if isinstance(key, rv_continuous) else key
|
name = key.name if isinstance(key, rv_continuous) else key
|
||||||
return name in self._dist
|
return name in self._dist
|
||||||
|
|
||||||
def __getitem__(self, key: str | rv_continuous) -> DistributionSummary:
|
def __getitem__(self, key: str | rv_continuous) -> DistributionSummary:
|
||||||
|
"""Retrieve the :class:`DistributionSummary` for the given distribution.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
key : str or rv_continuous
|
||||||
|
Distribution name or ``rv_continuous`` object to retrieve.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
DistributionSummary
|
||||||
|
The summary object associated with the requested distribution.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
KeyError
|
||||||
|
If the distribution is not registered.
|
||||||
|
"""
|
||||||
return self._dist[self._resolve_key(key)]
|
return self._dist[self._resolve_key(key)]
|
||||||
|
|
||||||
def __setitem__(self, key: str | rv_continuous, summary: DistributionSummary) -> None:
|
def __setitem__(self, key: str | rv_continuous, summary: DistributionSummary) -> None:
|
||||||
|
"""Override the :class:`DistributionSummary` for an existing distribution.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
key : str or rv_continuous
|
||||||
|
Distribution name or ``rv_continuous`` object to update.
|
||||||
|
summary : DistributionSummary
|
||||||
|
Replacement summary object.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
TypeError
|
||||||
|
If ``summary`` is not a :class:`DistributionSummary` instance.
|
||||||
|
KeyError
|
||||||
|
If the distribution is not registered.
|
||||||
|
"""
|
||||||
if not isinstance(summary, DistributionSummary):
|
if not isinstance(summary, DistributionSummary):
|
||||||
raise TypeError(f"Expected DistributionSummary, got {type(summary).__name__}.")
|
raise TypeError(f"Expected DistributionSummary, got {type(summary).__name__}.")
|
||||||
self._dist[self._resolve_key(key)] = summary
|
self._dist[self._resolve_key(key)] = summary
|
||||||
@@ -168,13 +291,19 @@ class Fitter:
|
|||||||
|
|
||||||
|
|
||||||
def fit(self, data: np.ndarray) -> None:
|
def fit(self, data: np.ndarray) -> None:
|
||||||
"""
|
"""Fit every distribution to *data* via MLE.
|
||||||
Fit every distribution to *data* via MLE.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
data : array-like
|
data : array-like
|
||||||
Input data. Only the absolute value is used.
|
Input data. Only the absolute value is used; the array is
|
||||||
|
flattened before fitting.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
Fitted parameters are stored in-place inside each
|
||||||
|
:class:`DistributionSummary` held by this Fitter.
|
||||||
"""
|
"""
|
||||||
data_flat = np.abs(data).flatten()
|
data_flat = np.abs(data).flatten()
|
||||||
self._last_data_flat = data_flat
|
self._last_data_flat = data_flat
|
||||||
@@ -186,14 +315,24 @@ class Fitter:
|
|||||||
self._dist[dist.name] = _summary
|
self._dist[dist.name] = _summary
|
||||||
|
|
||||||
def validate(self, **kwargs) -> None:
|
def validate(self, **kwargs) -> None:
|
||||||
"""
|
"""Run the goodness-of-fit test on every previously fitted distribution.
|
||||||
Run the goodness-of-fit test on every previously fitted distribution.
|
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
**kwargs :
|
**kwargs :
|
||||||
Extra keyword arguments forwarded to goodness_of_fit()
|
Extra keyword arguments forwarded to ``scipy.stats.goodness_of_fit``
|
||||||
(e.g. n_mc_samples=100).
|
(e.g. ``n_mc_samples=100``).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
Test results are stored in-place inside each
|
||||||
|
:class:`DistributionSummary` held by this Fitter.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
RuntimeError
|
||||||
|
If :meth:`fit` has not been called before this method.
|
||||||
"""
|
"""
|
||||||
if not hasattr(self, '_last_data_flat'):
|
if not hasattr(self, '_last_data_flat'):
|
||||||
raise RuntimeError("No data available. Call fit() first.")
|
raise RuntimeError("No data available. Call fit() first.")
|
||||||
@@ -211,14 +350,32 @@ class Fitter:
|
|||||||
self._dist[dist.name] = _summary
|
self._dist[dist.name] = _summary
|
||||||
|
|
||||||
def summary(self) -> None:
|
def summary(self) -> None:
|
||||||
"""Print a summary of all fitted distributions."""
|
"""Print a one-line summary for each registered distribution.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
Output is written to stdout via ``print``.
|
||||||
|
"""
|
||||||
for dist_name, summary in self._dist.items():
|
for dist_name, summary in self._dist.items():
|
||||||
print(summary)
|
print(summary)
|
||||||
|
|
||||||
def plot_qq_plots(self) -> None:
|
def plot_qq_plots(self) -> None:
|
||||||
"""
|
"""Generate QQ plots for each fitted distribution against the data.
|
||||||
Generate QQ plots for each fitted distribution against the data.
|
|
||||||
Requires fit() and validate() to have been called.
|
A separate interactive Plotly figure is displayed for every
|
||||||
|
distribution that has been both fitted and validated. Distributions
|
||||||
|
that have not yet been validated are skipped with a printed warning.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None
|
||||||
|
Figures are rendered inline / in a browser via ``fig.show()``.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
RuntimeError
|
||||||
|
If :meth:`fit` has not been called before this method.
|
||||||
"""
|
"""
|
||||||
if not hasattr(self, '_last_data_flat'):
|
if not hasattr(self, '_last_data_flat'):
|
||||||
raise RuntimeError("No data available. Call fit() first.")
|
raise RuntimeError("No data available. Call fit() first.")
|
||||||
@@ -247,9 +404,22 @@ class Fitter:
|
|||||||
fig.show()
|
fig.show()
|
||||||
|
|
||||||
def histogram_with_fits(self) -> go.Figure:
|
def histogram_with_fits(self) -> go.Figure:
|
||||||
"""
|
"""Return an interactive histogram with overlaid fitted PDFs (Plotly).
|
||||||
Histogram of the data with overlaid PDFs (Plotly).
|
|
||||||
Requires fit() to have been called.
|
Builds a probability-density histogram of the data and overlays a
|
||||||
|
line trace for the PDF of each fitted distribution. Hover text shows
|
||||||
|
the p-value and GoF statistic for each curve. Distributions that
|
||||||
|
have not yet been fitted are skipped with a printed warning.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
plotly.graph_objects.Figure
|
||||||
|
Interactive Plotly figure ready to display with ``fig.show()``.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
RuntimeError
|
||||||
|
If :meth:`fit` has not been called before this method.
|
||||||
"""
|
"""
|
||||||
if not hasattr(self, '_last_data_flat'):
|
if not hasattr(self, '_last_data_flat'):
|
||||||
raise RuntimeError("No data available. Call fit() first.")
|
raise RuntimeError("No data available. Call fit() first.")
|
||||||
@@ -290,9 +460,22 @@ class Fitter:
|
|||||||
return fig
|
return fig
|
||||||
|
|
||||||
def histogram_with_fits_seaborn(self) -> plt.Figure:
|
def histogram_with_fits_seaborn(self) -> plt.Figure:
|
||||||
"""
|
"""Return a static histogram with overlaid fitted PDFs (Matplotlib/Seaborn).
|
||||||
Histogram of the data with overlaid PDFs (Matplotlib/Seaborn).
|
|
||||||
Requires fit() to have been called.
|
Builds a probability-density histogram using Seaborn and overlays a
|
||||||
|
line for the PDF of each fitted distribution. The legend entry for
|
||||||
|
each distribution includes its p-value. Distributions that have not
|
||||||
|
yet been fitted are skipped with a printed warning.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
matplotlib.figure.Figure
|
||||||
|
Matplotlib figure object.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
RuntimeError
|
||||||
|
If :meth:`fit` has not been called before this method.
|
||||||
"""
|
"""
|
||||||
if not hasattr(self, '_last_data_flat'):
|
if not hasattr(self, '_last_data_flat'):
|
||||||
raise RuntimeError("No data available. Call fit() first.")
|
raise RuntimeError("No data available. Call fit() first.")
|
||||||
3
etc/tools/__init__.py
Normal file
3
etc/tools/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from .plots import stacked_plot, noise_mean, calculate_cdf, plot_cdfs
|
||||||
|
|
||||||
|
__all__ = ["stacked_plot", "noise_mean", "calculate_cdf", "plot_cdfs"]
|
||||||
103
etc/tools/plots.py
Normal file
103
etc/tools/plots.py
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
import numpy as np
|
||||||
|
import plotly.graph_objects as go
|
||||||
|
from plotly.subplots import make_subplots
|
||||||
|
|
||||||
|
|
||||||
|
def stacked_plot(data):
|
||||||
|
"""Create a stacked plot with mean power on top and a 2D heatmap below.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data : array-like
|
||||||
|
Input data array. Will be squeezed to remove singleton dimensions.
|
||||||
|
Rows are interpreted as samples and columns as range/frequency bins.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
plotly.graph_objects.Figure
|
||||||
|
A two-row figure: the top panel shows the mean absolute power across
|
||||||
|
columns, and the bottom panel shows a heatmap of the absolute values.
|
||||||
|
"""
|
||||||
|
data = np.squeeze(data)
|
||||||
|
mean_dp = np.mean(np.abs(data), axis=1)
|
||||||
|
|
||||||
|
fig = make_subplots(rows=2, cols=1, row_heights=[0.3, 0.7], shared_xaxes=True,
|
||||||
|
vertical_spacing=0.01)
|
||||||
|
|
||||||
|
fig.add_trace(go.Scatter(y=mean_dp, name='Mean Power'), row=1, col=1)
|
||||||
|
fig.add_trace(go.Heatmap(z=np.abs(data).T, showscale=False, name='Heat Map'), row=2, col=1)
|
||||||
|
|
||||||
|
fig.update_layout(title='Mean DP Power and 2D Map', autosize=True)
|
||||||
|
fig.update_xaxes(visible=False, row=2, col=1)
|
||||||
|
fig.update_yaxes(visible=False, row=2, col=1)
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def noise_mean(data):
|
||||||
|
"""Estimate the noise floor as the trimmed mean of absolute values.
|
||||||
|
|
||||||
|
Sorts the flattened absolute data and discards the bottom 10% and top 10%
|
||||||
|
before computing the mean, making the estimate robust to outliers and
|
||||||
|
strong targets.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data : array-like
|
||||||
|
Input data array of any shape.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
float
|
||||||
|
Mean of the central 80% of the sorted absolute values.
|
||||||
|
"""
|
||||||
|
sorted_data = np.sort(np.abs(data.flatten()))
|
||||||
|
cutoff_up_index = int(len(sorted_data) * 0.9)
|
||||||
|
cutoff_down_index = int(len(sorted_data) * 0.1)
|
||||||
|
trimmed_data = sorted_data[cutoff_down_index:cutoff_up_index]
|
||||||
|
|
||||||
|
return np.mean(trimmed_data)
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_cdf(data):
|
||||||
|
"""Compute the empirical cumulative distribution function (CDF) of the data.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data : array-like
|
||||||
|
Input data array of any shape. Will be flattened before processing.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
tuple[numpy.ndarray, numpy.ndarray]
|
||||||
|
A ``(sorted_data, cdf)`` tuple where ``sorted_data`` contains the
|
||||||
|
sorted values and ``cdf`` contains the corresponding CDF probabilities
|
||||||
|
in the range (0, 1].
|
||||||
|
"""
|
||||||
|
sorted_data = np.sort(data.flatten())
|
||||||
|
cdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
|
||||||
|
return (sorted_data, cdf)
|
||||||
|
|
||||||
|
|
||||||
|
def plot_cdfs(data_list, labels):
|
||||||
|
"""Plot the empirical CDFs of multiple datasets on a single figure.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
data_list : list of array-like
|
||||||
|
List of data arrays to plot. Each array can have any shape and will
|
||||||
|
be flattened internally by :func:`calculate_cdf`.
|
||||||
|
labels : list of str
|
||||||
|
Legend labels corresponding to each entry in ``data_list``.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
plotly.graph_objects.Figure
|
||||||
|
A figure with one CDF line per dataset.
|
||||||
|
"""
|
||||||
|
fig = go.Figure()
|
||||||
|
for data, label in zip(data_list, labels):
|
||||||
|
sorted_data, cdf = calculate_cdf(data)
|
||||||
|
fig.add_trace(go.Scatter(x=sorted_data, y=cdf, mode='lines', name=label))
|
||||||
|
fig.update_layout(title='CDF of Data', xaxis_title='Value', yaxis_title='CDF', autosize=True)
|
||||||
|
return fig
|
||||||
Reference in New Issue
Block a user