0

I'm trying to implement dask. I'm currently using joblib and it works perfectly, it uses the entire CPU, which seems perfect to me but I want to add more resources

Now trying to implement dask it's going much slower, I don't know what I'm doing wrong.

def evaluate_params(params, train, holidays, initial, periode, horizon,parallel:str="processes"):
    mae_ = 999999
    rmse_ = 999999
    df_p = pd.DataFrame()
try:
    model = Prophet(**params,
                   holidays=holidays)

    model.add_country_holidays(country_name='PA')
    model.add_regressor('weekendOrPayday')
    model.fit(train)

    # Usando el paralelismo nativo de Dask en Prophet
    df_cv = cross_validation(
        model,
        initial=f"{initial} days",
        period=f"{periode} days",
        horizon=f"{horizon} days",
        parallel=parallel # change "processes" a "dask"
    )

    df_p = performance_metrics(df_cv, rolling_window=1)

except Exception as e:
    error = f"Params {params}: {repr(e)}"
    print(error, flush=True)

if not df_p.empty:
    mae_ = df_p["mae"].values[0]
    rmse_ = df_p["rmse"].values[0]

return {
    'params': params,
    'mae': mae_,
    'rmse': rmse_
}

def optimize_prophet(train, holidays, initial_days, periode_days, horizon_days):

param_grid = {
    'changepoint_range': [0.8, 0.90, 0.95],
    "changepoint_prior_scale": [
        x for x in np.arange(0.01, 0.06, 0.01, dtype=float)
    ],
    "seasonality_prior_scale": [
        x for x in np.arange(1, 10.1, 1, dtype=float)
    ],
    "seasonality_mode": ["additive", "multiplicative"],
    'holidays_prior_scale': [x for x in np.arange(1, 10.1, 1, dtype=float)],
}

all_params = [
    dict(zip(param_grid.keys(), v))
    for v in itertools.product(*param_grid.values())
]


if daskcluster == True:
    results = []
    ## with this code no run
    """future = delayed(evaluate_params)(
                    params, 
                    train, 
                    holidays,
                    initial_days, 
                    periode_days, 
                    horizon_days,
                    "dask"   
                )

    results = self.client.compute(results)
    results = self.client.gather(results)           
    """


    for params in all_params:
        future = evaluate_params(
            params, 
            train, 
            holidays,
            initial_days, 
            periode_days, 
            horizon_days,
            "dask"
        )
        results.append(future)
    client.close()
    cluster.close()

else:
    results = Parallel(n_jobs=self.n_jobs)(
                delayed(evaluate_params)(params, train, holidays,
                                        initial_days, periode_days, horizon_days)
                for params in all_params
            )


best_result = min(results, key=lambda x: x['rmse'])
best_params = best_result['params']
minimal_mae = best_result['mae']



return best_params, minimal_mae

Bryro
  • 101

0 Answers0