feat(scripts): overhaul generate_data pipeline to use pre-processed CSVs
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,6 +29,7 @@ build/
|
|||||||
# Data (large files)
|
# Data (large files)
|
||||||
data/*
|
data/*
|
||||||
!data/.gitkeep
|
!data/.gitkeep
|
||||||
|
tmp/
|
||||||
|
|
||||||
# Claude Code
|
# Claude Code
|
||||||
.claude/
|
.claude/
|
||||||
|
|||||||
@@ -4,23 +4,15 @@ import sys
|
|||||||
# Add project root to path
|
# Add project root to path
|
||||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||||
|
|
||||||
from etc.tools.read_raw_data import load_data, RC_PREFIX, DP_PREFIX
|
|
||||||
from etc.fitting import Fitter
|
from etc.fitting import Fitter
|
||||||
from etc.tools.statistics import aic_statistic
|
from etc.tools.distributions import logweibull,lognakagami, loggamma_dist, k_dist, lograyleigh, logrice,logk
|
||||||
|
from etc.tools.statistics import aic_statistic, bic_statistic
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import plotly.graph_objects as go
|
|
||||||
import plotly.io as pio
|
import plotly.io as pio
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
import seaborn as sns
|
from scipy.stats import weibull_min, nakagami, gamma, rayleigh, rice
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from scipy.special import gamma as gamma_func
|
|
||||||
from scipy.special import kv as modified_bessel_second_kind
|
|
||||||
from scipy.special import gammaln
|
|
||||||
from scipy.stats import gamma,gumbel_r,gumbel_l,gompertz ,norm,weibull_min, lognorm, genextreme, genpareto, rayleigh, kstest, rv_continuous, goodness_of_fit
|
|
||||||
import statsmodels.api as sm
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
pio.renderers.default = "browser"
|
pio.renderers.default = "browser"
|
||||||
@@ -28,119 +20,175 @@ pio.renderers.default = "browser"
|
|||||||
# LOAD env variables in dir above
|
# LOAD env variables in dir above
|
||||||
load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
|
load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
|
||||||
DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "data")
|
DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "data")
|
||||||
|
DATA_FOLDER = os.path.join(DATA_PATH, "rain_statistics_04may2026")
|
||||||
|
RAIN_DATA_FOLDER = os.path.join(DATA_PATH, "processed")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
aberturas = ['12','13','14','15']
|
aberturas = ["12", "13", "14", "15"]
|
||||||
|
|
||||||
voltas = ['0','2','4','6','8','10']
|
voltas = ["0", "2", "4", "6", "8", "10"]
|
||||||
ads = ['0','3']
|
ads = ["0","1","2","3"]
|
||||||
bursts = ['0','3']
|
bursts = ["0","1","2","3"]
|
||||||
|
|
||||||
# if not exists, create the directory to save the dataframes and the figure
|
# if not exists, create the directory to save the dataframes and the figure
|
||||||
if not os.path.exists(DATA_PATH):
|
if not os.path.exists(DATA_PATH):
|
||||||
os.makedirs(DATA_PATH)
|
os.makedirs(DATA_PATH)
|
||||||
dist_list = [gumbel_r, weibull_min, lognorm, rayleigh]
|
|
||||||
|
|
||||||
|
|
||||||
dist_list_log = [gumbel_l, weibull_min, genextreme, gamma]
|
|
||||||
|
|
||||||
statistics_dataframe = pd.DataFrame(columns=[dist.name for dist in dist_list_log])
|
|
||||||
|
|
||||||
|
if not os.path.exists(DATA_FOLDER):
|
||||||
|
os.makedirs(DATA_FOLDER)
|
||||||
|
dist_list = [weibull_min,nakagami,gamma, rice, rayleigh, k_dist]
|
||||||
|
dist_list_log = [logweibull,lognakagami,loggamma_dist,logrice,lograyleigh,logk]
|
||||||
|
|
||||||
|
statistics_dataframe_aic= pd.DataFrame(columns=[dist.name for dist in dist_list_log])
|
||||||
|
statistics_dataframe_bic= pd.DataFrame(columns=[dist.name for dist in dist_list_log])
|
||||||
|
statistics_dataframe_ks= pd.DataFrame(columns=[dist.name for dist in dist_list_log])
|
||||||
|
statistics_dataframe_cvm= pd.DataFrame(columns=[dist.name for dist in dist_list_log])
|
||||||
# create dataframes for each dist in list
|
# create dataframes for each dist in list
|
||||||
# weibull_dataframe = pd.DataFrame(columns=[f'weibull_{param}' for param in ['shape', 'loc', 'scale']])
|
# weibull_dataframe = pd.DataFrame(columns=[f'weibull_{param}' for param in ['shape', 'loc', 'scale']])
|
||||||
# rayleigh_dataframe = pd.DataFrame(columns=[f'rayleigh_{param}' for param in ['loc', 'scale']])
|
lognakagami_dataframe = pd.DataFrame(columns=[f'lognakagami_{param}' for param in ['m', 'Omega', 'loc', 'scale']])
|
||||||
# normal_dataframe = pd.DataFrame(columns=[f'normal_{param}' for param in ['loc', 'scale']])
|
loggamma_dataframe = pd.DataFrame(columns=[f'loggamma_{param}' for param in ['a', 'loc', 'scale']])
|
||||||
# gumbel_l_dataframe = pd.DataFrame(columns=[f'gumbel_l_{param}' for param in ['loc','scale']])
|
logrice_dataframe = pd.DataFrame(columns=[f'logrice_{param}' for param in ['nu', 'sigma', 'loc', 'scale']])
|
||||||
# genextreme_dataframe = pd.DataFrame(columns=[f'genextreme_{param}' for param in ['shape','loc','scale']])
|
lograyleigh_dataframe = pd.DataFrame(columns=[f'lograyleigh_{param}' for param in ['sigma', 'loc', 'scale']])
|
||||||
# gamma_dataframe = pd.DataFrame(columns=[f'gamma_{param}' for param in ['shape','loc','scale']])
|
logk_dataframe = pd.DataFrame(columns=[f'logk_{param}' for param in ['mu', 'alpha', 'beta', 'loc', 'scale']])
|
||||||
# gompertz_dataframe = pd.DataFrame(columns=[f'gompertz_{param}' for param in ['shape','loc','scale']])
|
logweibull_dataframe = pd.DataFrame(columns=[f'logweibull_{param}' for param in ['k', 'lam', 'loc', 'scale']])
|
||||||
|
|
||||||
|
|
||||||
i = 1
|
i = 1
|
||||||
for abertura, volta, ad, burst in itertools.product(aberturas, voltas, ads, bursts):
|
for abertura, volta, ad, burst in itertools.product(aberturas, voltas, ads, bursts):
|
||||||
print(f"########### ITERATION {i} ###########")
|
print(f"########### ITERATION {i} ###########")
|
||||||
print(f"Processing ad{ad} burst{burst} abertura{abertura} volta{volta}")
|
print(f"Processing ad{ad} burst{burst} abertura{abertura} volta{volta}")
|
||||||
_data_rc = load_data(RC_PREFIX, ad_num=ad, burst_num=burst, feixe_num=0, abertura_num=abertura, volta_num=volta)
|
|
||||||
_data_rc = np.squeeze(_data_rc)
|
|
||||||
|
|
||||||
_data_dp = load_data(DP_PREFIX, ad_num=ad, burst_num=burst, feixe_num=0, abertura_num=abertura, volta_num=volta)
|
try:
|
||||||
_data_dp = np.squeeze(_data_dp)
|
df = pd.read_csv(f"{RAIN_DATA_FOLDER}/ad{ad}_burst{burst}_abertura{abertura}_volta{volta}.csv")
|
||||||
|
except FileNotFoundError:
|
||||||
_noise_data_dp = _data_dp[5000:6000,:]
|
print(f"File not found for ad{ad} burst{burst} abertura{abertura} volta{volta}, skipping iteration.")
|
||||||
|
i += 1
|
||||||
# values of _noise_data_dp should be greater than 0, take only values greater than 0
|
continue
|
||||||
_noise_data_dp = _noise_data_dp[np.abs(_noise_data_dp) > 0]
|
|
||||||
|
|
||||||
_mean__noise_dp = np.abs(_noise_data_dp).mean()
|
|
||||||
|
|
||||||
factor_quartile_99 = 3.424
|
|
||||||
factor_quantile_pfa_1e_6 = 4.194
|
|
||||||
|
|
||||||
_data_dp_threshold = _mean__noise_dp * factor_quantile_pfa_1e_6
|
|
||||||
|
|
||||||
_dp_above_noise_idx = np.where(np.abs(_data_dp).mean(axis=1) > _data_dp_threshold)[0]
|
|
||||||
|
|
||||||
# plot dp mean and threshold to check if the threshold is correct, save the figure
|
|
||||||
# fig = go.Figure()
|
|
||||||
# fig.add_trace(go.Scatter(y=np.abs(_data_dp).mean(axis=1), name='Mean DP Power'))
|
|
||||||
# fig.add_trace(go.Scatter(y=[_data_dp_threshold]*len(_data_dp), name='Threshold', line=dict(dash='dash')))
|
|
||||||
# fig.update_layout(title=f'Mean DP Power and Threshold for ad{ad} burst{burst} abertura{abertura} volta{volta}', autosize=True)
|
|
||||||
# # plot dots on _dp_above_noise_idx
|
|
||||||
# fig.add_trace(go.Scatter(x=_dp_above_noise_idx, y=np.abs(_data_dp).mean(axis=1)[_dp_above_noise_idx], mode='markers', name='Above Threshold', marker=dict(color='red', size=5)))
|
|
||||||
# fig.show()
|
|
||||||
|
|
||||||
_dp_above_noise_idx = _dp_above_noise_idx[_dp_above_noise_idx >2250] # remove the first 1500 samples to avoid noise
|
|
||||||
|
|
||||||
_rain_data = _data_rc[_dp_above_noise_idx,:]
|
|
||||||
# get rain data above zero
|
|
||||||
_rain_data = _rain_data[np.abs(_rain_data) > 0]
|
|
||||||
|
|
||||||
|
|
||||||
# plot rain data to check if it is correct, plot the figure
|
# Re-read with converters for every column
|
||||||
|
converters = {col: complex for col in df.columns}
|
||||||
|
df = pd.read_csv(
|
||||||
|
f"{RAIN_DATA_FOLDER}/ad{ad}_burst{burst}_abertura{abertura}_volta{volta}.csv", converters=converters
|
||||||
|
)
|
||||||
|
_rain_data = df["rain_data"].values
|
||||||
|
|
||||||
|
# check abertura value and get samples between intervals for each abertura
|
||||||
|
|
||||||
|
# #plot rain data to check if it is correct, plot the figure
|
||||||
# fig = go.Figure()
|
# fig = go.Figure()
|
||||||
# fig.add_trace(go.Scatter(y=np.abs(_data_rc).mean(axis=1), name='Rain Data'))
|
# fig.add_trace(go.Scatter(y=np.abs(_data_rc).mean(axis=1), name='Rain Data'))
|
||||||
# fig.update_layout(title=f'Rain Data for ad{ad} burst{burst} abertura{abertura} volta{volta}', autosize=True)
|
# fig.update_layout(title=f'Rain Data for ad{ad} burst{burst} abertura{abertura} volta{volta}', autosize=True)
|
||||||
# # add dots on _dp_above_noise_idx
|
# # add dots on _dp_above_noise_idx
|
||||||
# fig.add_trace(go.Scatter(x=_dp_above_noise_idx, y=np.abs(_data_rc).mean(axis=1)[_dp_above_noise_idx], mode='markers', name='Above Threshold', marker=dict(color='red', size=5)))
|
# fig.add_trace(go.Scatter(x=_dp_above_noise_idx, y=np.abs(_data_rc).mean(axis=1)[_dp_above_noise_idx], mode='markers', name='Above Threshold', marker=dict(color='red', size=5)))
|
||||||
# fig.show()
|
# fig.show()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# check if _rain_data is empty, if it is, skip this iteration
|
# check if _rain_data is empty, if it is, skip this iteration
|
||||||
if _rain_data.size == 0:
|
if _rain_data.size == 0:
|
||||||
continue
|
continue
|
||||||
### LOG DATA
|
### LOG DATA
|
||||||
|
_rain_data = np.abs(_rain_data)
|
||||||
_rain_data_log = np.log(_rain_data)
|
_rain_data_log = np.log(_rain_data)
|
||||||
_rain_data_log = _rain_data_log[np.isfinite(_rain_data_log)]
|
_rain_data_log = _rain_data_log[np.isfinite(_rain_data_log)]
|
||||||
fitter = Fitter(dist_list_log, statistic_method=aic_statistic)
|
|
||||||
|
fitter_aic = Fitter(dist_list_log, statistic_method=aic_statistic)
|
||||||
|
fitter_bic = Fitter(dist_list_log, statistic_method=bic_statistic)
|
||||||
|
fitter_ks = Fitter(dist_list_log, statistic_method="ks")
|
||||||
|
fitter_cvm = Fitter(dist_list_log, statistic_method="cvm")
|
||||||
|
|
||||||
|
# create a fitter just to take dist params for kl divergence calculation
|
||||||
print(f"Fitting distributions for ad{ad} burst{burst} abertura{abertura} volta{volta}")
|
print(f"Fitting distributions for ad{ad} burst{burst} abertura{abertura} volta{volta}")
|
||||||
fitter.fit(_rain_data_log)
|
fitter_aic.fit(_rain_data_log)
|
||||||
|
fitter_bic.fit(_rain_data_log)
|
||||||
|
fitter_ks.fit(_rain_data_log)
|
||||||
|
fitter_cvm.fit(_rain_data_log)
|
||||||
print(f"Validating fits for ad{ad} burst{burst} abertura{abertura} volta{volta}")
|
print(f"Validating fits for ad{ad} burst{burst} abertura{abertura} volta{volta}")
|
||||||
fitter.validate(n_mc_samples=1,)
|
fitter_aic.validate(n_mc_samples=1)
|
||||||
|
fitter_bic.validate(n_mc_samples=1)
|
||||||
|
fitter_ks.validate(n_mc_samples=1)
|
||||||
|
fitter_cvm.validate(n_mc_samples=1)
|
||||||
|
|
||||||
|
# create hist with plots and save for each volta and abertura
|
||||||
|
|
||||||
|
if ad == '0' and burst == '0':
|
||||||
|
_fig = fitter_aic.histogram_with_fits()
|
||||||
|
_fig.write_html(f'{DATA_FOLDER}/histogram_with_fits_ad{ad}_burst{burst}_abertura{abertura}_volta{volta}.html')
|
||||||
|
|
||||||
# create a pandas dataframe to store the statistics of the fitted distributions and concatenate it with the existing dataframe
|
# create a pandas dataframe to store the statistics of the fitted distributions and concatenate it with the existing dataframe
|
||||||
dist_stats = {dist.name: fitter[dist.name].pvalue for dist in dist_list_log}
|
dist_stats_aic = {dist.name: fitter_aic[dist.name].gof_statistic for dist in dist_list_log}
|
||||||
print(f"dist stats: {dist_stats}")
|
dist_stats_bic = {dist.name: fitter_bic[dist.name].gof_statistic for dist in dist_list_log}
|
||||||
dist_stats_df = pd.DataFrame(dist_stats,index=[0])
|
dist_stats_ks = {dist.name: fitter_ks[dist.name].gof_statistic for dist in dist_list_log}
|
||||||
statistics_dataframe = pd.concat([statistics_dataframe, dist_stats_df], ignore_index=True)
|
dist_stats_cvm = {dist.name: fitter_cvm[dist.name].gof_statistic for dist in dist_list_log}
|
||||||
# check if any p-value is greater than 0.05, if it isnt, break the loop and do not save the dataframes and the figure
|
|
||||||
if not any(stat > 0.05 for stat in dist_stats.values()):
|
|
||||||
print(f"No distribution passed the GoF test for ad{ad} burst{burst} abertura{abertura} volta{volta}. Skipping saving results.")
|
dist_stats_df_aic = pd.DataFrame(dist_stats_aic, index=[0])
|
||||||
break
|
statistics_dataframe_aic = pd.concat([statistics_dataframe_aic, dist_stats_df_aic], ignore_index=True)
|
||||||
|
|
||||||
|
dist_stats_df_bic = pd.DataFrame(dist_stats_bic, index=[0])
|
||||||
|
statistics_dataframe_bic = pd.concat([statistics_dataframe_bic, dist_stats_df_bic], ignore_index=True)
|
||||||
|
|
||||||
|
dist_stats_df_ks = pd.DataFrame(dist_stats_ks, index=[0])
|
||||||
|
statistics_dataframe_ks = pd.concat([statistics_dataframe_ks, dist_stats_df_ks], ignore_index=True)
|
||||||
|
|
||||||
|
dist_stats_df_cvm = pd.DataFrame(dist_stats_cvm, index=[0])
|
||||||
|
statistics_dataframe_cvm = pd.concat([statistics_dataframe_cvm, dist_stats_df_cvm], ignore_index=True)
|
||||||
|
|
||||||
|
|
||||||
|
# Create pandas dataframe to store the statistics of the fitted distributions and concatenate it with the existing dataframe
|
||||||
|
# log weibull parameters
|
||||||
|
logweibull_params = {f'logweibull_{param}': fitter_aic._dist['logweibull'].fit_result_params[i] for i, param in enumerate(['k', 'lam', 'loc', 'scale'])}
|
||||||
|
logweibull_params_df = pd.DataFrame(logweibull_params,index=[0])
|
||||||
|
logweibull_dataframe = pd.concat([logweibull_dataframe, logweibull_params_df], ignore_index=True)
|
||||||
|
|
||||||
|
lognakagami_params = {f'lognakagami_{param}': fitter_aic._dist['lognakagami'].fit_result_params[i] for i, param in enumerate(['m', 'Omega', 'loc', 'scale'])}
|
||||||
|
lognakagami_params_df = pd.DataFrame(lognakagami_params,index=[0])
|
||||||
|
lognakagami_dataframe = pd.concat([lognakagami_dataframe, lognakagami_params_df], ignore_index=True)
|
||||||
|
|
||||||
|
loggamma_params = {f'loggamma_{param}': fitter_aic._dist['loggamma_dist'].fit_result_params[i] for i, param in enumerate(['a', 'loc', 'scale'])}
|
||||||
|
loggamma_params_df = pd.DataFrame(loggamma_params,index=[0])
|
||||||
|
loggamma_dataframe = pd.concat([loggamma_dataframe, loggamma_params_df], ignore_index=True)
|
||||||
|
|
||||||
|
logrice_params = {f'logrice_{param}': fitter_aic._dist['logrice'].fit_result_params[i] for i, param in enumerate(['nu', 'sigma', 'loc', 'scale'])}
|
||||||
|
logrice_params_df = pd.DataFrame(logrice_params,index=[0])
|
||||||
|
logrice_dataframe = pd.concat([logrice_dataframe, logrice_params_df], ignore_index=True)
|
||||||
|
|
||||||
|
lograyleigh_params = {f'lograyleigh_{param}': fitter_aic._dist['lograyleigh'].fit_result_params[i] for i, param in enumerate(['sigma', 'loc', 'scale'])}
|
||||||
|
lograyleigh_params_df = pd.DataFrame(lograyleigh_params,index=[0])
|
||||||
|
lograyleigh_dataframe = pd.concat([lograyleigh_dataframe, lograyleigh_params_df], ignore_index=True)
|
||||||
|
|
||||||
|
logk_params = {f'logk_{param}': fitter_aic._dist['logk'].fit_result_params[i] for i, param in enumerate(['mu', 'alpha', 'beta', 'loc', 'scale'])}
|
||||||
|
logk_params_df = pd.DataFrame(logk_params,index=[0])
|
||||||
|
logk_dataframe = pd.concat([logk_dataframe, logk_params_df], ignore_index=True)
|
||||||
|
|
||||||
|
# save params for each distribution in a csv file, one for each distribution. If exists, overwrite it, if not, create it
|
||||||
|
logweibull_dataframe.to_csv(f'{DATA_FOLDER}/logweibull_params.csv', index=False)
|
||||||
|
lognakagami_dataframe.to_csv(f'{DATA_FOLDER}/lognakagami_params.csv', index=False)
|
||||||
|
loggamma_dataframe.to_csv(f'{DATA_FOLDER}/loggamma_params.csv', index=False)
|
||||||
|
logrice_dataframe.to_csv(f'{DATA_FOLDER}/logrice_params.csv', index=False)
|
||||||
|
lograyleigh_dataframe.to_csv(f'{DATA_FOLDER}/lograyleigh_params.csv', index=False)
|
||||||
|
logk_dataframe.to_csv(f'{DATA_FOLDER}/logk_params.csv', index=False)
|
||||||
|
|
||||||
|
|
||||||
# create histogram and save .html file with the figure
|
# create histogram and save .html file with the figure
|
||||||
fig_1 = fitter.histogram_with_fits()
|
# save only in ad0 burst 0 for each abertura and volta
|
||||||
fig_1.write_html(f'{DATA_PATH}/histogram_with_fits_ad{ad}_burst{burst}_abertura{abertura}_volta{volta}.html')
|
if ad == '0' and burst == '0':
|
||||||
|
fig_1 = fitter_aic.histogram_with_fits()
|
||||||
|
fig_1.write_html(f'{DATA_FOLDER}/histogram_with_fits_ad{ad}_burst{burst}_abertura{abertura}_volta{volta}.html')
|
||||||
|
fig_2 = fitter_aic.plot_qq_plots_sns(method='filliben')
|
||||||
|
# write sns image as .png file
|
||||||
|
fig_2.savefig(f'{DATA_FOLDER}/qq_plots_sns_ad{ad}_burst{burst}_abertura{abertura}_volta{volta}.png')
|
||||||
|
|
||||||
# save qqplot and save .html file with the figure
|
# save params
|
||||||
fig_2 = fitter.plot_qq_plots(method='filliben')
|
statistics_dataframe_aic.to_csv(f"{DATA_FOLDER}/distribution_fit_aic_statistics.csv", index=False)
|
||||||
fig_2.write_html(f'{DATA_PATH}/qq_plots_ad{ad}_burst{burst}_abertura{abertura}_volta{volta}.html')
|
statistics_dataframe_bic.to_csv(f"{DATA_FOLDER}/distribution_fit_bic_statistics.csv", index=False)
|
||||||
|
statistics_dataframe_ks.to_csv(f"{DATA_FOLDER}/distribution_fit_ks_statistics.csv", index=False)
|
||||||
|
statistics_dataframe_cvm.to_csv(f"{DATA_FOLDER}/distribution_fit_cvm_statistics.csv", index=False)
|
||||||
|
|
||||||
|
# # save qqplot and save .html file with the figure
|
||||||
|
# fig_2 = fitter_aic.plot_qq_plots(method='filliben')
|
||||||
|
# fig_2.write_html(f'{DATA_PATH}/qq_plots_ad{ad}_burst{burst}_abertura{abertura}_volta{volta}.html')
|
||||||
|
|
||||||
# # create pandas dataframe to store weibull parameters and concatenate it with the existing dataframe
|
# # create pandas dataframe to store weibull parameters and concatenate it with the existing dataframe
|
||||||
# weibull_params = {f'weibull_{param}': fitter._dist['weibull_min'].fit_result_params[i] for i, param in enumerate(['shape', 'loc', 'scale'])}
|
# weibull_params = {f'weibull_{param}': fitter_aic._dist['weibull_min'].fit_result_params[i] for i, param in enumerate(['shape', 'loc', 'scale'])}
|
||||||
# weibull_params_df = pd.DataFrame(weibull_params,index=[0])
|
# weibull_params_df = pd.DataFrame(weibull_params,index=[0])
|
||||||
# weibull_dataframe = pd.concat([weibull_dataframe, weibull_params_df], ignore_index=True)
|
# weibull_dataframe = pd.concat([weibull_dataframe, weibull_params_df], ignore_index=True)
|
||||||
|
|
||||||
@@ -154,23 +202,23 @@ if __name__ == "__main__":
|
|||||||
# normal_params_df = pd.DataFrame(normal_params,index=[0])
|
# normal_params_df = pd.DataFrame(normal_params,index=[0])
|
||||||
# normal_dataframe = pd.concat([normal_dataframe, normal_params_df], ignore_index=True)
|
# normal_dataframe = pd.concat([normal_dataframe, normal_params_df], ignore_index=True)
|
||||||
|
|
||||||
# create pandas dataframe for gumbel_r parameters and concatenate it with the existing dataframe
|
# # create pandas dataframe for gumbel_r parameters and concatenate it with the existing dataframe
|
||||||
# gumbel_l_params = {f'gumbel_l_{param}': fitter._dist['gumbel_l'].fit_result_params[i] for i, param in enumerate(['loc', 'scale'])}
|
# gumbel_l_params = {f'gumbel_l_{param}': fitter_aic._dist['gumbel_l'].fit_result_params[i] for i, param in enumerate(['loc', 'scale'])}
|
||||||
# gumbel_l_params_df = pd.DataFrame(gumbel_l_params,index=[0])
|
# gumbel_l_params_df = pd.DataFrame(gumbel_l_params,index=[0])
|
||||||
# gumbel_l_dataframe = pd.concat([gumbel_l_dataframe, gumbel_l_params_df], ignore_index=True)
|
# gumbel_l_dataframe = pd.concat([gumbel_l_dataframe, gumbel_l_params_df], ignore_index=True)
|
||||||
|
|
||||||
# create pandas dataframe for genextreme parameters and concatenate it with the existing dataframe
|
# # create pandas dataframe for genextreme parameters and concatenate it with the existing dataframe
|
||||||
# genextreme_params = {f'genextreme_{param}': fitter._dist['genextreme'].fit_result_params[i] for i, param in enumerate(['shape', 'loc', 'scale'])}
|
# genextreme_params = {f'genextreme_{param}': fitter_aic._dist['genextreme'].fit_result_params[i] for i, param in enumerate(['shape', 'loc', 'scale'])}
|
||||||
# genextreme_params_df = pd.DataFrame(genextreme_params,index=[0])
|
# genextreme_params_df = pd.DataFrame(genextreme_params,index=[0])
|
||||||
# genextreme_dataframe = pd.concat([genextreme_dataframe, genextreme_params_df], ignore_index=True)
|
# genextreme_dataframe = pd.concat([genextreme_dataframe, genextreme_params_df], ignore_index=True)
|
||||||
|
|
||||||
# create pandas dataframe for gamma parameters and concatenate it with the existing dataframe
|
# # create pandas dataframe for gamma parameters and concatenate it with the existing dataframe
|
||||||
# gamma_params = {f'gamma_{param}': fitter._dist['gamma'].fit_result_params[i] for i, param in enumerate(['shape', 'loc', 'scale'])}
|
# gamma_params = {f'gamma_{param}': fitter_aic._dist['gamma'].fit_result_params[i] for i, param in enumerate(['shape', 'loc', 'scale'])}
|
||||||
# gamma_params_df = pd.DataFrame(gamma_params,index=[0])
|
# gamma_params_df = pd.DataFrame(gamma_params,index=[0])
|
||||||
# gamma_dataframe = pd.concat([gamma_dataframe, gamma_params_df], ignore_index=True)
|
# gamma_dataframe = pd.concat([gamma_dataframe, gamma_params_df], ignore_index=True)
|
||||||
|
|
||||||
# create pandas dataframe for gompertz parameters and concatenate it with the existing dataframe
|
# # create pandas dataframe for gompertz parameters and concatenate it with the existing dataframe
|
||||||
# gompertz_params = {f'gompertz_{param}': fitter._dist['gompertz'].fit_result_params[i] for i, param in enumerate(['shape', 'loc', 'scale'])}
|
# gompertz_params = {f'gompertz_{param}': fitter_aic._dist['gompertz'].fit_result_params[i] for i, param in enumerate(['shape', 'loc', 'scale'])}
|
||||||
# gompertz_params_df = pd.DataFrame(gompertz_params,index=[0])
|
# gompertz_params_df = pd.DataFrame(gompertz_params,index=[0])
|
||||||
# gompertz_dataframe = pd.concat([gompertz_dataframe, gompertz_params_df], ignore_index=True)
|
# gompertz_dataframe = pd.concat([gompertz_dataframe, gompertz_params_df], ignore_index=True)
|
||||||
|
|
||||||
@@ -178,16 +226,15 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# path to save the dataframes and the figure
|
# path to save the dataframes and the figure
|
||||||
|
|
||||||
statistics_dataframe.to_csv(f'{DATA_PATH}/distribution_fit_pvalue_statistics.csv', index=False)
|
|
||||||
# rayleigh_dataframe.to_csv(f'{DATA_PATH}/rayleigh_parameters.csv', index=False)
|
|
||||||
#weibull_dataframe.to_csv(f'{DATA_PATH}/weibull_parameters.csv', index=False)
|
|
||||||
# normal_dataframe.to_csv(f'{DATA_PATH}/normal_parameters.csv', index=False)
|
|
||||||
# gumbel_l_dataframe.to_csv(f'{DATA_PATH}/gumbel_l_parameters.csv', index=False)
|
|
||||||
# genextreme_dataframe.to_csv(f'{DATA_PATH}/genextreme_parameters.csv', index=False)
|
|
||||||
# gamma_dataframe.to_csv(f'{DATA_PATH}/gamma_parameters.csv', index=False)
|
|
||||||
# gompertz_dataframe.to_csv(f'{DATA_PATH}/gompertz_parameters.csv', index=False)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
statistics_dataframe_aic.to_csv(f"{DATA_FOLDER}/distribution_fit_aic_statistics.csv", index=False)
|
||||||
|
statistics_dataframe_bic.to_csv(f"{DATA_FOLDER}/distribution_fit_bic_statistics.csv", index=False)
|
||||||
|
statistics_dataframe_ks.to_csv(f"{DATA_FOLDER}/distribution_fit_ks_statistics.csv", index=False)
|
||||||
|
statistics_dataframe_cvm.to_csv(f"{DATA_FOLDER}/distribution_fit_cvm_statistics.csv", index=False)
|
||||||
|
# rayleigh_dataframe.to_csv(f'{DATA_FOLDER}/rayleigh_parameters.csv', index=False)
|
||||||
|
# weibull_dataframe.to_csv(f'{DATA_FOLDER}/weibull_parameters.csv', index=False)
|
||||||
|
# normal_dataframe.to_csv(f'{DATA_FOLDER}/normal_parameters.csv', index=False)
|
||||||
|
# gumbel_l_dataframe.to_csv(f'{DATA_FOLDER}/gumbel_l_parameters.csv', index=False)
|
||||||
|
# genextreme_dataframe.to_csv(f'{DATA_FOLDER}/genextreme_parameters.csv', index=False)
|
||||||
|
# gamma_dataframe.to_csv(f'{DATA_FOLDER}/gamma_parameters.csv', index=False)
|
||||||
|
# gompertz_dataframe.to_csv(f'{DATA_FOLDER}/gompertz_parameters.csv', index=False)
|
||||||
|
|||||||
Reference in New Issue
Block a user