-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Description
pycaret version checks
-
I have checked that this issue has not already been reported here.
-
I have confirmed this bug exists on the latest version of pycaret.
-
I have confirmed this bug exists on the master branch of pycaret (pip install -U git+https://github.com/pycaret/pycaret.git@master).
Issue Description
I ran the compare_models and returned the top 4 models. I then saved this 4 models to the disk.
Now when I try to tune_model by loading this saved models I get a
ValueError: When passing a model not in PyCaret's model library, the custom_grid parameter must be provided.
The model currently being tuned in the loop is the xgboost model which is in the Pycaret's library. So I don't really understand the source of this value error
Here is the relevant part of the code
# Comparing models and returning the top 4
models = exp.compare_models(include=models_to_ignore,n_select=4)
# Saving the models
from pathlib import Path
import os
model_dir = Path('../models')
for index, model in enumerate(models):
model_file_name = models_metrics.index[index]
print(f'Saving {model_file_name} model')
save_model(model, os.path.join(model_dir, model_file_name))
#Loading and tuning the saved models
from pathlib import Path
from glob import glob
import os
model_dir = Path('../models')
for model in glob(os.path.join(model_dir, '*.pkl')):
tuned_model_file = (os.path.basename(model))
tuned_model_file = os.path.splitext(tuned_model_file)[0]
model = os.path.splitext(model)[0]
print(f'Started tuning {model} model')
model = load_model(model)
print(model)
tuned_model = tune_model(
model,
n_iter=20,
optimize='RMSLE',
search_library="tune-sklearn",
search_algorithm="optuna",
early_stopping='asha',
return_tuner=True,
return_train_score=True
)
print(f'Saving Model to {tuned_model_file}')
save_model(tuned_model, os.path.join(model_dir, tuned_file_name))
# Error output
Started tuning ../models/xgboost model
Transformation Pipeline and Model Successfully Loaded
Pipeline(memory=FastMemory(location=/var/folders/vc/262dx0q508d6dv26phhy52ph0000gn/T/joblib),
steps=[('target_transformation',
TransformerWrapperWithInverse(transformer=TargetTransformer(estimator=PowerTransformer(standardize=False)))),
('numerical_imputer',
TransformerWrapper(include=['onpromotion', 'oil_price'],
transformer=SimpleImputer())),
('categori...
feature_types=None, gamma=None, gpu_id=None,
grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None,
max_bin=None, max_cat_threshold=None,
max_cat_to_onehot=None, max_delta_step=None,
max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan,
monotone_constraints=None, n_estimators=100,
n_jobs=2, num_parallel_tree=None, predictor=None,
random_state=4758, ...))])
Initiated . . . . . . . . . . . . . . . . . . 16:12:40
Status . . . . . . . . . . . . . . . . . . Loading Dependencies
Estimator . . . . . . . . . . . . . . . . . . Compiling Library
Processing: 0%
0/7 [00:00<?, ?it/s]
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/vc/262dx0q508d6dv26phhy52ph0000gn/T/ipykernel_12938/1177041359.py in <cell line: 5>()
13 print(model)
14
---> 15 tuned_model = tune_model(
16 model
17 # n_iter=20,
/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/utils/generic.py in wrapper(*args, **kwargs)
963 if globals_d[name] is None:
964 raise ValueError(message)
--> 965 return func(*args, **kwargs)
966
967 return wrapper
/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/regression/functional.py in tune_model(estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
1203 """
1204
-> 1205 return _CURRENT_EXPERIMENT.tune_model(
1206 estimator=estimator,
1207 fold=fold,
/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/regression/oop.py in tune_model(self, estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
1503 """
1504
-> 1505 return super().tune_model(
1506 estimator=estimator,
1507 fold=fold,
/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/internal/pycaret_experiment/supervised_experiment.py in tune_model(self, estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
2238 if estimator_id is None:
2239 if custom_grid is None:
-> 2240 raise ValueError(
2241 "When passing a model not in PyCaret's model library, the custom_grid parameter must be provided."
2242 )
ValueError: When passing a model not in PyCaret's model library, the custom_grid parameter must be provided.
Reproducible Example
### load sample dataset from pycaret dataset module
from pycaret.datasets import get_data
data = get_data('insurance')
# import RegressionExperiment and init the class
from pycaret.regression import RegressionExperiment
exp = RegressionExperiment()
# compare baseline models
models = compare_models(n_select=4)
from pathlib import Path
import os
model_dir = Path('../models_bug')
for index, model in enumerate(models):
model_file_name = models_metrics.index[index]
print(f'Saving {model_file_name} model')
save_model(model, os.path.join(model_dir, model_file_name))
from pathlib import Path
from glob import glob
import os
model_dir = Path('../models_bug')
for model in glob(os.path.join(model_dir, '*.pkl')):
tuned_model_file = (os.path.basename(model))
tuned_model_file = os.path.splitext(tuned_model_file)[0]
model = os.path.splitext(model)[0]
print(f'Started tuning {model} model')
model = load_model(model)
print(model)
tuned_model = tune_model(
model,
n_iter=20,
optimize='RMSLE',
search_library="tune-sklearn",
search_algorithm="optuna",
early_stopping='asha',
return_tuner=True,
return_train_score=True
)
print(f'Saving Model to {tuned_model_file}')
save_model(tuned_model, os.path.join(model_dir, tuned_file_name))
Expected Behavior
I can't figure out why it is asking for the custom_grid and raising a ValueError. Since I am just loading a saved model and not using a custom model, it should just tune it with the saved pipeline
Actual Results
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/vc/262dx0q508d6dv26phhy52ph0000gn/T/ipykernel_22138/678589870.py in <cell line: 5>()
13 print(model)
14
---> 15 tuned_model = tune_model(
16 model,
17 n_iter=20,
/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/utils/generic.py in wrapper(*args, **kwargs)
963 if globals_d[name] is None:
964 raise ValueError(message)
--> 965 return func(*args, **kwargs)
966
967 return wrapper
/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/regression/functional.py in tune_model(estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
1203 """
1204
-> 1205 return _CURRENT_EXPERIMENT.tune_model(
1206 estimator=estimator,
1207 fold=fold,
/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/regression/oop.py in tune_model(self, estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
1503 """
1504
-> 1505 return super().tune_model(
1506 estimator=estimator,
1507 fold=fold,
/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/internal/pycaret_experiment/supervised_experiment.py in tune_model(self, estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
2238 if estimator_id is None:
2239 if custom_grid is None:
-> 2240 raise ValueError(
2241 "When passing a model not in PyCaret's model library, the custom_grid parameter must be provided."
2242 )
ValueError: When passing a model not in PyCaret's model library, the custom_grid parameter must be provided.
Installed Versions
PyCaret required dependencies:
pip: 23.2.1
setuptools: 68.0.0
pycaret: 3.0.4
IPython: 7.34.0
ipywidgets: 8.1.0
tqdm: 4.65.0
numpy: 1.23.5
pandas: 1.5.3
jinja2: 3.1.2
scipy: 1.10.1
joblib: 1.3.0
sklearn: 1.2.2
pyod: 1.1.0
imblearn: 0.11.0
category_encoders: 2.6.1
lightgbm: 4.0.0
numba: 0.57.1
requests: 2.31.0
matplotlib: 3.7.2
scikitplot: 0.3.7
yellowbrick: 1.5
plotly: 5.15.0
plotly-resampler: Not installed
kaleido: 0.2.1
schemdraw: 0.15
statsmodels: 0.14.0
sktime: 0.21.0
tbats: 1.1.3
pmdarima: 2.0.3
psutil: 5.9.5
markupsafe: 2.1.3
pickle5: Not installed
cloudpickle: 2.2.1
deprecation: 2.1.0
xxhash: 3.2.0
wurlitzer: 3.0.3
PyCaret optional dependencies:
shap: 0.42.1
interpret: 0.4.2
umap: 0.5.3
pandas_profiling: 3.6.6
explainerdashboard: 0.4.3
autoviz: 0.1.730
fairlearn: 0.7.0
deepchecks: 0.17.4
xgboost: 1.7.4
catboost: 1.1.1
kmodes: 0.12.2
mlxtend: 0.22.0
statsforecast: 1.5.0
tune_sklearn: 0.4.6
ray: 2.6.1
hyperopt: 0.2.7
optuna: 3.2.0
skopt: 0.9.0
mlflow: 1.30.1
gradio: 3.39.0
fastapi: 0.100.1
uvicorn: 0.23.2
m2cgen: 0.10.0
evidently: 0.2.8
fugue: 0.8.6
streamlit: Not installed
prophet: Not installed