implement dask cluster with prophet

Question

I'm trying to implement dask. I'm currently using joblib and it works perfectly, it uses the entire CPU, which seems perfect to me but I want to add more resources

Now trying to implement dask it's going much slower, I don't know what I'm doing wrong.

def evaluate_params(params, train, holidays, initial, periode, horizon,parallel:str="processes"):
    mae_ = 999999
    rmse_ = 999999
    df_p = pd.DataFrame()
try:
    model = Prophet(**params,
                   holidays=holidays)

    model.add_country_holidays(country_name='PA')
    model.add_regressor('weekendOrPayday')
    model.fit(train)

    # Usando el paralelismo nativo de Dask en Prophet
    df_cv = cross_validation(
        model,
        initial=f&quot;{initial} days&quot;,
        period=f&quot;{periode} days&quot;,
        horizon=f&quot;{horizon} days&quot;,
        parallel=parallel # change &quot;processes&quot; a &quot;dask&quot;
    )

    df_p = performance_metrics(df_cv, rolling_window=1)

except Exception as e:
    error = f&quot;Params {params}: {repr(e)}&quot;
    print(error, flush=True)

if not df_p.empty:
    mae_ = df_p[&quot;mae&quot;].values[0]
    rmse_ = df_p[&quot;rmse&quot;].values[0]

return {
    'params': params,
    'mae': mae_,
    'rmse': rmse_
}


def optimize_prophet(train, holidays, initial_days, periode_days, horizon_days):
param_grid = {
    'changepoint_range': [0.8, 0.90, 0.95],
    &quot;changepoint_prior_scale&quot;: [
        x for x in np.arange(0.01, 0.06, 0.01, dtype=float)
    ],
    &quot;seasonality_prior_scale&quot;: [
        x for x in np.arange(1, 10.1, 1, dtype=float)
    ],
    &quot;seasonality_mode&quot;: [&quot;additive&quot;, &quot;multiplicative&quot;],
    'holidays_prior_scale': [x for x in np.arange(1, 10.1, 1, dtype=float)],
}

all_params = [
    dict(zip(param_grid.keys(), v))
    for v in itertools.product(*param_grid.values())
]


if daskcluster == True:
    results = []
    ## with this code no run
    &quot;&quot;&quot;future = delayed(evaluate_params)(
                    params, 
                    train, 
                    holidays,
                    initial_days, 
                    periode_days, 
                    horizon_days,
                    &quot;dask&quot;   
                )

    results = self.client.compute(results)
    results = self.client.gather(results)           
    &quot;&quot;&quot;


    for params in all_params:
        future = evaluate_params(
            params, 
            train, 
            holidays,
            initial_days, 
            periode_days, 
            horizon_days,
            &quot;dask&quot;
        )
        results.append(future)
    client.close()
    cluster.close()

else:
    results = Parallel(n_jobs=self.n_jobs)(
                delayed(evaluate_params)(params, train, holidays,
                                        initial_days, periode_days, horizon_days)
                for params in all_params
            )


best_result = min(results, key=lambda x: x['rmse'])
best_params = best_result['params']
minimal_mae = best_result['mae']



return best_params, minimal_mae

implement dask cluster with prophet

0 Answers0