-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Description
pycaret version checks
-
I have checked that this issue has not already been reported here.
-
I have confirmed this bug exists on the latest version of pycaret.
-
I have confirmed this bug exists on the master branch of pycaret (pip install -U git+https://github.com/pycaret/pycaret.git@master).
Issue Description
when use fold_strategy = 'time_series' got an error:
ValueError: Invalid value for the fold_strategy parameter. 'timeseries' requires 'data_split_shuffle' and 'fold_shuffle' to be False as it can lead to unexpected data split.
if add data_split_shuffle='False', fold_shuffle='False' gives same error
Update:
tested in colab, gives same error
Reproducible Example
from pycaret.datasets import get_data
from pycaret.classification import setup
data = get_data('diabetes')
s = setup(data, target = 'Class variable', fold_strategy='timeseries')
Expected Behavior
.
Actual Results
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[4], line 1
----> 1 s = setup(data, target = 'Class variable', fold_strategy='timeseries')
2 #, fold_shuffle=False)
File ~\anaconda3\lib\site-packages\pycaret\classification\functional.py:595, in setup(data, data_func, target, index, train_size, test_data, ordinal_features, numeric_features, categorical_features, date_features, text_features, ignore_features, keep_features, preprocess, create_date_columns, imputation_type, numeric_imputation, categorical_imputation, iterative_imputation_iters, numeric_iterative_imputer, categorical_iterative_imputer, text_features_method, max_encoding_ohe, encoding_method, rare_to_value, rare_value, polynomial_features, polynomial_degree, low_variance_threshold, group_features, drop_groups, remove_multicollinearity, multicollinearity_threshold, bin_numeric_features, remove_outliers, outliers_method, outliers_threshold, fix_imbalance, fix_imbalance_method, transformation, transformation_method, normalize, normalize_method, pca, pca_method, pca_components, feature_selection, feature_selection_method, feature_selection_estimator, n_features_to_select, custom_pipeline, custom_pipeline_position, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, html, session_id, system_log, log_experiment, experiment_name, experiment_custom_tags, log_plots, log_profile, log_data, verbose, memory, profile, profile_kwargs)
593 exp = _EXPERIMENT_CLASS()
594 set_current_experiment(exp)
--> 595 return exp.setup(
596 data=data,
597 data_func=data_func,
598 target=target,
599 index=index,
600 train_size=train_size,
601 test_data=test_data,
602 ordinal_features=ordinal_features,
603 numeric_features=numeric_features,
604 categorical_features=categorical_features,
605 date_features=date_features,
606 text_features=text_features,
607 ignore_features=ignore_features,
608 keep_features=keep_features,
609 preprocess=preprocess,
610 create_date_columns=create_date_columns,
611 imputation_type=imputation_type,
612 numeric_imputation=numeric_imputation,
613 categorical_imputation=categorical_imputation,
614 iterative_imputation_iters=iterative_imputation_iters,
615 numeric_iterative_imputer=numeric_iterative_imputer,
616 categorical_iterative_imputer=categorical_iterative_imputer,
617 text_features_method=text_features_method,
618 max_encoding_ohe=max_encoding_ohe,
619 encoding_method=encoding_method,
620 rare_to_value=rare_to_value,
621 rare_value=rare_value,
622 polynomial_features=polynomial_features,
623 polynomial_degree=polynomial_degree,
624 low_variance_threshold=low_variance_threshold,
625 group_features=group_features,
626 drop_groups=drop_groups,
627 remove_multicollinearity=remove_multicollinearity,
628 multicollinearity_threshold=multicollinearity_threshold,
629 bin_numeric_features=bin_numeric_features,
630 remove_outliers=remove_outliers,
631 outliers_method=outliers_method,
632 outliers_threshold=outliers_threshold,
633 fix_imbalance=fix_imbalance,
634 fix_imbalance_method=fix_imbalance_method,
635 transformation=transformation,
636 transformation_method=transformation_method,
637 normalize=normalize,
638 normalize_method=normalize_method,
639 pca=pca,
640 pca_method=pca_method,
641 pca_components=pca_components,
642 feature_selection=feature_selection,
643 feature_selection_method=feature_selection_method,
644 feature_selection_estimator=feature_selection_estimator,
645 n_features_to_select=n_features_to_select,
646 custom_pipeline=custom_pipeline,
647 custom_pipeline_position=custom_pipeline_position,
648 data_split_shuffle=data_split_shuffle,
649 data_split_stratify=data_split_stratify,
650 fold_strategy=fold_strategy,
651 fold=fold,
652 fold_shuffle=fold_shuffle,
653 fold_groups=fold_groups,
654 n_jobs=n_jobs,
655 use_gpu=use_gpu,
656 html=html,
657 session_id=session_id,
658 system_log=system_log,
659 log_experiment=log_experiment,
660 experiment_name=experiment_name,
661 experiment_custom_tags=experiment_custom_tags,
662 log_plots=log_plots,
663 log_profile=log_profile,
664 log_data=log_data,
665 verbose=verbose,
666 memory=memory,
667 profile=profile,
668 profile_kwargs=profile_kwargs,
669 )
File ~\anaconda3\lib\site-packages\pycaret\classification\oop.py:750, in ClassificationExperiment.setup(self, data, data_func, target, index, train_size, test_data, ordinal_features, numeric_features, categorical_features, date_features, text_features, ignore_features, keep_features, preprocess, create_date_columns, imputation_type, numeric_imputation, categorical_imputation, iterative_imputation_iters, numeric_iterative_imputer, categorical_iterative_imputer, text_features_method, max_encoding_ohe, encoding_method, rare_to_value, rare_value, polynomial_features, polynomial_degree, low_variance_threshold, group_features, drop_groups, remove_multicollinearity, multicollinearity_threshold, bin_numeric_features, remove_outliers, outliers_method, outliers_threshold, fix_imbalance, fix_imbalance_method, transformation, transformation_method, normalize, normalize_method, pca, pca_method, pca_components, feature_selection, feature_selection_method, feature_selection_estimator, n_features_to_select, custom_pipeline, custom_pipeline_position, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, html, session_id, system_log, log_experiment, experiment_name, experiment_custom_tags, log_plots, log_profile, log_data, engine, verbose, memory, profile, profile_kwargs)
747 self.data_split_stratify = data_split_stratify
748 self.data_split_shuffle = data_split_shuffle
--> 750 self._prepare_folds(
751 fold_strategy=fold_strategy,
752 fold=fold,
753 fold_shuffle=fold_shuffle,
754 fold_groups=fold_groups,
755 data_split_shuffle=data_split_shuffle,
756 )
758 self._prepare_train_test(
759 train_size=train_size,
760 test_data=test_data,
761 data_split_stratify=data_split_stratify,
762 data_split_shuffle=data_split_shuffle,
763 )
765 self._prepare_column_types(
766 ordinal_features=ordinal_features,
767 numeric_features=numeric_features,
(...)
772 keep_features=keep_features,
773 )
File ~\anaconda3\lib\site-packages\pycaret\internal\preprocess\preprocessor.py:350, in Preprocessor._prepare_folds(self, fold_strategy, fold, fold_shuffle, fold_groups, data_split_shuffle)
348 if fold_strategy == "timeseries" or isinstance(fold_strategy, TimeSeriesSplit):
349 if fold_shuffle or data_split_shuffle:
--> 350 raise ValueError(
351 "Invalid value for the fold_strategy parameter. 'timeseries' "
352 "requires 'data_split_shuffle' and 'fold_shuffle' to be False "
353 "as it can lead to unexpected data split."
354 )
356 if isinstance(fold_groups, str):
357 if fold_groups in self.X.columns:
ValueError: Invalid value for the fold_strategy parameter. 'timeseries' requires 'data_split_shuffle' and 'fold_shuffle' to be False as it can lead to unexpected data split.
Installed Versions
PyCaret required dependencies:
pip: 23.2.1
setuptools: 68.2.0
pycaret: 3.0.4
IPython: 8.15.0
ipywidgets: 8.1.0
tqdm: 4.66.1
numpy: 1.25.2
pandas: 2.1.0
jinja2: 3.1.2
scipy: 1.12.0.dev0+1612.6f49f12
joblib: 1.4.dev0
sklearn: 1.3.0
pyod: 1.1.0
imblearn: 0.12.0.dev0
category_encoders: 2.6.2
lightgbm: 4.0.0.99
numba: 0.58.0rc1
requests: 2.31.0
matplotlib: 3.8.0.dev1689+g9bba195394
scikitplot: 0.3.7
yellowbrick: 1.5
plotly: 5.16.1
plotly-resampler: Not installed
kaleido: 0.2.1
schemdraw: 0.17
statsmodels: 0.15.0.dev53+g56e9c569f
sktime: 0.22.0
tbats: 1.1.3
pmdarima: 2.0.3
psutil: 5.9.5
markupsafe: 2.1.3
pickle5: Not installed
cloudpickle: 2.2.1
deprecation: 2.1.0
xxhash: 3.3.0
wurlitzer: Not installed
PyCaret optional dependencies:
shap: 0.42.1
interpret: 0.4.4
umap: 0.5.3
ydata_profiling: 4.5.1
explainerdashboard: 0.4.3
autoviz: 0.1.730
fairlearn: 0.9.0
deepchecks: 0.17.4
xgboost: 2.1.0-dev
catboost: 1.2.1
kmodes: 0.12.2
mlxtend: 0.22.0
statsforecast: 1.6.0
tune_sklearn: 0.4.6
ray: 2.6.3
hyperopt: 0.2.7
optuna: 3.3.0
skopt: 0.9.0
mlflow: Installed but version unavailable
gradio: 3.43.2
fastapi: 0.103.1
uvicorn: 0.23.2
m2cgen: 0.10.0
evidently: 0.4.4
fugue: 0.8.7
streamlit: Not installed
prophet: Not installed