I'm trying to implement dask. I'm currently using joblib and it works perfectly, it uses the entire CPU, which seems perfect to me but I want to add more resources
Now trying to implement dask it's going much slower, I don't know what I'm doing wrong.
def evaluate_params(params, train, holidays, initial, periode, horizon,parallel:str="processes"):
mae_ = 999999
rmse_ = 999999
df_p = pd.DataFrame()
try:
model = Prophet(**params,
holidays=holidays)
model.add_country_holidays(country_name='PA')
model.add_regressor('weekendOrPayday')
model.fit(train)
# Usando el paralelismo nativo de Dask en Prophet
df_cv = cross_validation(
model,
initial=f"{initial} days",
period=f"{periode} days",
horizon=f"{horizon} days",
parallel=parallel # change "processes" a "dask"
)
df_p = performance_metrics(df_cv, rolling_window=1)
except Exception as e:
error = f"Params {params}: {repr(e)}"
print(error, flush=True)
if not df_p.empty:
mae_ = df_p["mae"].values[0]
rmse_ = df_p["rmse"].values[0]
return {
'params': params,
'mae': mae_,
'rmse': rmse_
}
def optimize_prophet(train, holidays, initial_days, periode_days, horizon_days):
param_grid = {
'changepoint_range': [0.8, 0.90, 0.95],
"changepoint_prior_scale": [
x for x in np.arange(0.01, 0.06, 0.01, dtype=float)
],
"seasonality_prior_scale": [
x for x in np.arange(1, 10.1, 1, dtype=float)
],
"seasonality_mode": ["additive", "multiplicative"],
'holidays_prior_scale': [x for x in np.arange(1, 10.1, 1, dtype=float)],
}
all_params = [
dict(zip(param_grid.keys(), v))
for v in itertools.product(*param_grid.values())
]
if daskcluster == True:
results = []
## with this code no run
"""future = delayed(evaluate_params)(
params,
train,
holidays,
initial_days,
periode_days,
horizon_days,
"dask"
)
results = self.client.compute(results)
results = self.client.gather(results)
"""
for params in all_params:
future = evaluate_params(
params,
train,
holidays,
initial_days,
periode_days,
horizon_days,
"dask"
)
results.append(future)
client.close()
cluster.close()
else:
results = Parallel(n_jobs=self.n_jobs)(
delayed(evaluate_params)(params, train, holidays,
initial_days, periode_days, horizon_days)
for params in all_params
)
best_result = min(results, key=lambda x: x['rmse'])
best_params = best_result['params']
minimal_mae = best_result['mae']
return best_params, minimal_mae