[MAIN] Change workdir files, add docstring in functions

2026-03-25 16:37:56 -03:00
parent be50b41b78
commit bcd8f25a62
8 changed files with 357 additions and 30 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,6 @@ build/
 data/*
 !data/.gitkeep

+# Claude Code
+.claude/
+
--- a/README.md
+++ b/README.md
@@ -2,16 +2,49 @@

 Repositorio para códigos de fitting de distribuições de dados de chuva e clutter.

-## Instalação
+## Gerenciamento de dependências
+
+Este projeto usa [uv](https://docs.astral.sh/uv/) para gerenciar dependências e ambientes virtuais.
+
+### Instalação do uv

 ```bash
-pip install -e .
+curl -LsSf https://astral.sh/uv/install.sh | sh
 ```

+### Configurar o ambiente
+
+```bash
+# Cria o ambiente virtual e instala todas as dependências
+uv sync
+```
+
+### Adicionar dependências
+
+```bash
+# Adicionar um pacote ao projeto
+uv add <pacote>
+
+# Adicionar dependência de desenvolvimento
+uv add --dev <pacote>
+```
+
+### Executar scripts
+
+```bash
+# Executar um script dentro do ambiente virtual
+uv run python scripts/meu_script.py
+
+# Abrir o Jupyter
+uv run jupyter notebook
+```
+
+As dependências do projeto estão declaradas em `pyproject.toml` e o lockfile `uv.lock` garante reprodutibilidade do ambiente.
+
 ## Uso nos notebooks

 ```python
-from clutter_chuva import Fitter
+from etc import Fitter
 from scipy.stats import gamma, weibull_min, lognorm

 fitter = Fitter(
@@ -27,8 +60,9 @@ fitter.histogram_with_fits().show()
 ## Estrutura

 ```text
-clutter_chuva/      # pacote principal (importável nos notebooks)
+etc/                # pacote principal (importável nos notebooks)
  fitting/          # Fitter e DistributionSummary
+  tools/            # funções de visualização (plots, CDF)
 notebooks/          # notebooks Jupyter
 scripts/            # scripts .py para execução em background
 data/               # dados (não versionados)
--- a/clutter_chuva/init.py
+++ b/clutter_chuva/init.py
@@ -1,3 +0,0 @@
-from .fitting import Fitter, DistributionSummary
-
-__all__ = ["Fitter", "DistributionSummary"]
--- a/etc/init.py
+++ b/etc/init.py
@@ -0,0 +1,4 @@
+from .fitting import Fitter, DistributionSummary
+from . import tools
+
+__all__ = ["Fitter", "DistributionSummary", "tools"]
--- a/clutter_chuva/fitting/init.py
+++ b/clutter_chuva/fitting/init.py
--- a/clutter_chuva/fitting/fitter.py
+++ b/clutter_chuva/fitting/fitter.py
@@ -51,30 +51,70 @@ class DistributionSummary:

    @property
    def pvalue(self) -> float | None:
-        """p-value from the goodness-of-fit test, or None if not yet run."""
+        """p-value from the goodness-of-fit test.
+
+        Returns
+        -------
+        float or None
+            The p-value produced by the GoF test, or None if validate() has
+            not been called yet.
+        """
        return self.test_result.pvalue if self.test_result is not None else None

    @property
    def gof_statistic(self) -> float | None:
-        """Test statistic from the goodness-of-fit test, or None if not yet run."""
+        """Test statistic from the goodness-of-fit test.
+
+        Returns
+        -------
+        float or None
+            The GoF statistic value, or None if validate() has not been
+            called yet.
+        """
        return self.test_result.statistic if self.test_result is not None else None

    @property
    def mean(self) -> float:
-        """Mean of the fitted distribution."""
+        """Mean of the fitted distribution.
+
+        Returns
+        -------
+        float
+            Mean computed from the fitted distribution parameters.
+        """
        return self.distribution_object.mean(*self.fit_result_params)

    @property
    def std(self) -> float:
-        """Standard deviation of the fitted distribution."""
+        """Standard deviation of the fitted distribution.
+
+        Returns
+        -------
+        float
+            Standard deviation computed from the fitted distribution parameters.
+        """
        return self.distribution_object.std(*self.fit_result_params)

    @property
    def var(self) -> float:
-        """Variance of the fitted distribution."""
+        """Variance of the fitted distribution.
+
+        Returns
+        -------
+        float
+            Variance computed from the fitted distribution parameters.
+        """
        return self.distribution_object.var(*self.fit_result_params)

    def __repr__(self) -> str:
+        """Return a concise string representation of the distribution summary.
+
+        Returns
+        -------
+        str
+            Single-line string showing the distribution name, fitted
+            parameters, mean, standard deviation, GoF statistic, and p-value.
+        """
        pval_str = f"{self.pvalue:.4f}" if self.pvalue is not None else "N/A"
        stat_str = f"{self.gof_statistic:.4f}" if self.gof_statistic is not None else "N/A"
        return (
@@ -85,6 +125,13 @@ class DistributionSummary:
        )

    def __str__(self) -> str:
+        """Return the same string representation as ``__repr__``.
+
+        Returns
+        -------
+        str
+            Delegates to :meth:`__repr__`.
+        """
        return self.__repr__()


@@ -132,6 +179,19 @@ class Fitter:
    """

    def __init__(self, dist_list: list[rv_continuous], statistic_method: str = 'ad', **kwargs):
+        """Initialise the Fitter and build per-distribution summary objects.
+
+        Parameters
+        ----------
+        dist_list : list[rv_continuous]
+            Distributions to fit.
+        statistic_method : str, optional
+            Goodness-of-fit statistic passed to ``goodness_of_fit``
+            (default ``'ad'`` for Anderson-Darling).
+        **kwargs :
+            Per-distribution initial guesses and fixed parameters, keyed as
+            ``<dist.name>_args`` (tuple) or ``<dist.name>_params`` (dict).
+        """
        self._dist: dict[str, DistributionSummary] = {}
        self.dist_list = list(dist_list)
        for dist in dist_list:
@@ -146,7 +206,25 @@ class Fitter:

    #── Getter and setter with flexible keys (str or rv_continuous) ────────────────────────────────
    def _resolve_key(self, key: str | rv_continuous) -> str:
-        """Resolve a distribution name or rv_continuous object to its string key."""
+        """Resolve a distribution name or ``rv_continuous`` object to its string key.
+
+        Parameters
+        ----------
+        key : str or rv_continuous
+            Either the distribution's string name or its ``rv_continuous``
+            object (whose ``.name`` attribute is used).
+
+        Returns
+        -------
+        str
+            The string key used in the internal ``_dist`` dictionary.
+
+        Raises
+        ------
+        KeyError
+            If the resolved name is not found among the registered
+            distributions.
+        """
        name = key.name if isinstance(key, rv_continuous) else key
        if name not in self._dist:
            available = ', '.join(self._dist)
@@ -154,13 +232,58 @@ class Fitter:
        return name

    def __contains__(self, key: str | rv_continuous) -> bool:
+        """Check whether a distribution is registered with this Fitter.
+
+        Parameters
+        ----------
+        key : str or rv_continuous
+            Distribution name or ``rv_continuous`` object to look up.
+
+        Returns
+        -------
+        bool
+            True if the distribution is registered, False otherwise.
+        """
        name = key.name if isinstance(key, rv_continuous) else key
        return name in self._dist

    def __getitem__(self, key: str | rv_continuous) -> DistributionSummary:
+        """Retrieve the :class:`DistributionSummary` for the given distribution.
+
+        Parameters
+        ----------
+        key : str or rv_continuous
+            Distribution name or ``rv_continuous`` object to retrieve.
+
+        Returns
+        -------
+        DistributionSummary
+            The summary object associated with the requested distribution.
+
+        Raises
+        ------
+        KeyError
+            If the distribution is not registered.
+        """
        return self._dist[self._resolve_key(key)]

    def __setitem__(self, key: str | rv_continuous, summary: DistributionSummary) -> None:
+        """Override the :class:`DistributionSummary` for an existing distribution.
+
+        Parameters
+        ----------
+        key : str or rv_continuous
+            Distribution name or ``rv_continuous`` object to update.
+        summary : DistributionSummary
+            Replacement summary object.
+
+        Raises
+        ------
+        TypeError
+            If ``summary`` is not a :class:`DistributionSummary` instance.
+        KeyError
+            If the distribution is not registered.
+        """
        if not isinstance(summary, DistributionSummary):
            raise TypeError(f"Expected DistributionSummary, got {type(summary).__name__}.")
        self._dist[self._resolve_key(key)] = summary
@@ -168,13 +291,19 @@ class Fitter:


    def fit(self, data: np.ndarray) -> None:
-        """
-        Fit every distribution to *data* via MLE.
+        """Fit every distribution to *data* via MLE.

        Parameters
        ----------
        data : array-like
-            Input data. Only the absolute value is used.
+            Input data. Only the absolute value is used; the array is
+            flattened before fitting.
+
+        Returns
+        -------
+        None
+            Fitted parameters are stored in-place inside each
+            :class:`DistributionSummary` held by this Fitter.
        """
        data_flat = np.abs(data).flatten()
        self._last_data_flat = data_flat
@@ -186,14 +315,24 @@ class Fitter:
            self._dist[dist.name] = _summary

    def validate(self, **kwargs) -> None:
-        """
-        Run the goodness-of-fit test on every previously fitted distribution.
+        """Run the goodness-of-fit test on every previously fitted distribution.

        Parameters
        ----------
        **kwargs :
-            Extra keyword arguments forwarded to goodness_of_fit()
-            (e.g. n_mc_samples=100).
+            Extra keyword arguments forwarded to ``scipy.stats.goodness_of_fit``
+            (e.g. ``n_mc_samples=100``).
+
+        Returns
+        -------
+        None
+            Test results are stored in-place inside each
+            :class:`DistributionSummary` held by this Fitter.
+
+        Raises
+        ------
+        RuntimeError
+            If :meth:`fit` has not been called before this method.
        """
        if not hasattr(self, '_last_data_flat'):
            raise RuntimeError("No data available. Call fit() first.")
@@ -211,14 +350,32 @@ class Fitter:
            self._dist[dist.name] = _summary

    def summary(self) -> None:
-        """Print a summary of all fitted distributions."""
+        """Print a one-line summary for each registered distribution.
+
+        Returns
+        -------
+        None
+            Output is written to stdout via ``print``.
+        """
        for dist_name, summary in self._dist.items():
            print(summary)

    def plot_qq_plots(self) -> None:
-        """
-        Generate QQ plots for each fitted distribution against the data.
-        Requires fit() and validate() to have been called.
+        """Generate QQ plots for each fitted distribution against the data.
+
+        A separate interactive Plotly figure is displayed for every
+        distribution that has been both fitted and validated.  Distributions
+        that have not yet been validated are skipped with a printed warning.
+
+        Returns
+        -------
+        None
+            Figures are rendered inline / in a browser via ``fig.show()``.
+
+        Raises
+        ------
+        RuntimeError
+            If :meth:`fit` has not been called before this method.
        """
        if not hasattr(self, '_last_data_flat'):
            raise RuntimeError("No data available. Call fit() first.")
@@ -247,9 +404,22 @@ class Fitter:
            fig.show()

    def histogram_with_fits(self) -> go.Figure:
-        """
-        Histogram of the data with overlaid PDFs (Plotly).
-        Requires fit() to have been called.
+        """Return an interactive histogram with overlaid fitted PDFs (Plotly).
+
+        Builds a probability-density histogram of the data and overlays a
+        line trace for the PDF of each fitted distribution.  Hover text shows
+        the p-value and GoF statistic for each curve.  Distributions that
+        have not yet been fitted are skipped with a printed warning.
+
+        Returns
+        -------
+        plotly.graph_objects.Figure
+            Interactive Plotly figure ready to display with ``fig.show()``.
+
+        Raises
+        ------
+        RuntimeError
+            If :meth:`fit` has not been called before this method.
        """
        if not hasattr(self, '_last_data_flat'):
            raise RuntimeError("No data available. Call fit() first.")
@@ -290,9 +460,22 @@ class Fitter:
        return fig

    def histogram_with_fits_seaborn(self) -> plt.Figure:
-        """
-        Histogram of the data with overlaid PDFs (Matplotlib/Seaborn).
-        Requires fit() to have been called.
+        """Return a static histogram with overlaid fitted PDFs (Matplotlib/Seaborn).
+
+        Builds a probability-density histogram using Seaborn and overlays a
+        line for the PDF of each fitted distribution.  The legend entry for
+        each distribution includes its p-value.  Distributions that have not
+        yet been fitted are skipped with a printed warning.
+
+        Returns
+        -------
+        matplotlib.figure.Figure
+            Matplotlib figure object.
+
+        Raises
+        ------
+        RuntimeError
+            If :meth:`fit` has not been called before this method.
        """
        if not hasattr(self, '_last_data_flat'):
            raise RuntimeError("No data available. Call fit() first.")
--- a/etc/tools/init.py
+++ b/etc/tools/init.py
@@ -0,0 +1,3 @@
+from .plots import stacked_plot, noise_mean, calculate_cdf, plot_cdfs
+
+__all__ = ["stacked_plot", "noise_mean", "calculate_cdf", "plot_cdfs"]
--- a/etc/tools/plots.py
+++ b/etc/tools/plots.py
@@ -0,0 +1,103 @@
+import numpy as np
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+
+
+def stacked_plot(data):
+    """Create a stacked plot with mean power on top and a 2D heatmap below.
+
+    Parameters
+    ----------
+    data : array-like
+        Input data array. Will be squeezed to remove singleton dimensions.
+        Rows are interpreted as samples and columns as range/frequency bins.
+
+    Returns
+    -------
+    plotly.graph_objects.Figure
+        A two-row figure: the top panel shows the mean absolute power across
+        columns, and the bottom panel shows a heatmap of the absolute values.
+    """
+    data = np.squeeze(data)
+    mean_dp = np.mean(np.abs(data), axis=1)
+
+    fig = make_subplots(rows=2, cols=1, row_heights=[0.3, 0.7], shared_xaxes=True,
+                        vertical_spacing=0.01)
+
+    fig.add_trace(go.Scatter(y=mean_dp, name='Mean Power'), row=1, col=1)
+    fig.add_trace(go.Heatmap(z=np.abs(data).T, showscale=False, name='Heat Map'), row=2, col=1)
+
+    fig.update_layout(title='Mean DP Power and 2D Map', autosize=True)
+    fig.update_xaxes(visible=False, row=2, col=1)
+    fig.update_yaxes(visible=False, row=2, col=1)
+
+    return fig
+
+
+def noise_mean(data):
+    """Estimate the noise floor as the trimmed mean of absolute values.
+
+    Sorts the flattened absolute data and discards the bottom 10% and top 10%
+    before computing the mean, making the estimate robust to outliers and
+    strong targets.
+
+    Parameters
+    ----------
+    data : array-like
+        Input data array of any shape.
+
+    Returns
+    -------
+    float
+        Mean of the central 80% of the sorted absolute values.
+    """
+    sorted_data = np.sort(np.abs(data.flatten()))
+    cutoff_up_index = int(len(sorted_data) * 0.9)
+    cutoff_down_index = int(len(sorted_data) * 0.1)
+    trimmed_data = sorted_data[cutoff_down_index:cutoff_up_index]
+
+    return np.mean(trimmed_data)
+
+
+def calculate_cdf(data):
+    """Compute the empirical cumulative distribution function (CDF) of the data.
+
+    Parameters
+    ----------
+    data : array-like
+        Input data array of any shape. Will be flattened before processing.
+
+    Returns
+    -------
+    tuple[numpy.ndarray, numpy.ndarray]
+        A ``(sorted_data, cdf)`` tuple where ``sorted_data`` contains the
+        sorted values and ``cdf`` contains the corresponding CDF probabilities
+        in the range (0, 1].
+    """
+    sorted_data = np.sort(data.flatten())
+    cdf = np.arange(1, len(sorted_data) + 1) / len(sorted_data)
+    return (sorted_data, cdf)
+
+
+def plot_cdfs(data_list, labels):
+    """Plot the empirical CDFs of multiple datasets on a single figure.
+
+    Parameters
+    ----------
+    data_list : list of array-like
+        List of data arrays to plot. Each array can have any shape and will
+        be flattened internally by :func:`calculate_cdf`.
+    labels : list of str
+        Legend labels corresponding to each entry in ``data_list``.
+
+    Returns
+    -------
+    plotly.graph_objects.Figure
+        A figure with one CDF line per dataset.
+    """
+    fig = go.Figure()
+    for data, label in zip(data_list, labels):
+        sorted_data, cdf = calculate_cdf(data)
+        fig.add_trace(go.Scatter(x=sorted_data, y=cdf, mode='lines', name=label))
+    fig.update_layout(title='CDF of Data', xaxis_title='Value', yaxis_title='CDF', autosize=True)
+    return fig