Skip to content

[BUG]: Load and Tune Models #3691

@ganeshmorye

Description

@ganeshmorye

pycaret version checks

Issue Description

I ran the compare_models and returned the top 4 models. I then saved this 4 models to the disk.
Now when I try to tune_model by loading this saved models I get a

ValueError: When passing a model not in PyCaret's model library, the custom_grid parameter must be provided.

The model currently being tuned in the loop is the xgboost model which is in the Pycaret's library. So I don't really understand the source of this value error
Here is the relevant part of the code

# Comparing models and returning the top 4
models = exp.compare_models(include=models_to_ignore,n_select=4)

# Saving the models
from pathlib import Path
import os
model_dir = Path('../models')
for index, model in enumerate(models):
    model_file_name = models_metrics.index[index]
    print(f'Saving {model_file_name} model')
    save_model(model, os.path.join(model_dir, model_file_name))
#Loading and tuning the saved models
from pathlib import Path
from glob import glob
import os
model_dir = Path('../models')
for model in glob(os.path.join(model_dir, '*.pkl')):
    
    tuned_model_file = (os.path.basename(model))
    tuned_model_file = os.path.splitext(tuned_model_file)[0]
    
    model = os.path.splitext(model)[0]
    print(f'Started tuning {model} model')
    model = load_model(model)
    print(model)
    
    tuned_model = tune_model(
        model, 
        n_iter=20, 
        optimize='RMSLE',
        search_library="tune-sklearn",
        search_algorithm="optuna",
        early_stopping='asha',
        return_tuner=True,
        return_train_score=True
    )
    
    print(f'Saving Model to {tuned_model_file}')
    save_model(tuned_model, os.path.join(model_dir, tuned_file_name))
# Error output
Started tuning ../models/xgboost model
Transformation Pipeline and Model Successfully Loaded
Pipeline(memory=FastMemory(location=/var/folders/vc/262dx0q508d6dv26phhy52ph0000gn/T/joblib),
         steps=[('target_transformation',
                 TransformerWrapperWithInverse(transformer=TargetTransformer(estimator=PowerTransformer(standardize=False)))),
                ('numerical_imputer',
                 TransformerWrapper(include=['onpromotion', 'oil_price'],
                                    transformer=SimpleImputer())),
                ('categori...
                              feature_types=None, gamma=None, gpu_id=None,
                              grow_policy=None, importance_type=None,
                              interaction_constraints=None, learning_rate=None,
                              max_bin=None, max_cat_threshold=None,
                              max_cat_to_onehot=None, max_delta_step=None,
                              max_depth=None, max_leaves=None,
                              min_child_weight=None, missing=nan,
                              monotone_constraints=None, n_estimators=100,
                              n_jobs=2, num_parallel_tree=None, predictor=None,
                              random_state=4758, ...))])
Initiated	. . . . . . . . . . . . . . . . . .	16:12:40
Status	. . . . . . . . . . . . . . . . . .	Loading Dependencies
Estimator	. . . . . . . . . . . . . . . . . .	Compiling Library
Processing: 0%
0/7 [00:00<?, ?it/s]
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/var/folders/vc/262dx0q508d6dv26phhy52ph0000gn/T/ipykernel_12938/1177041359.py in <cell line: 5>()
     13     print(model)
     14 
---> 15     tuned_model = tune_model(
     16         model
     17         # n_iter=20,

/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/utils/generic.py in wrapper(*args, **kwargs)
    963                 if globals_d[name] is None:
    964                     raise ValueError(message)
--> 965             return func(*args, **kwargs)
    966 
    967         return wrapper

/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/regression/functional.py in tune_model(estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
   1203     """
   1204 
-> 1205     return _CURRENT_EXPERIMENT.tune_model(
   1206         estimator=estimator,
   1207         fold=fold,

/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/regression/oop.py in tune_model(self, estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
   1503         """
   1504 
-> 1505         return super().tune_model(
   1506             estimator=estimator,
   1507             fold=fold,

/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/internal/pycaret_experiment/supervised_experiment.py in tune_model(self, estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
   2238         if estimator_id is None:
   2239             if custom_grid is None:
-> 2240                 raise ValueError(
   2241                     "When passing a model not in PyCaret's model library, the custom_grid parameter must be provided."
   2242                 )

ValueError: When passing a model not in PyCaret's model library, the custom_grid parameter must be provided.

Reproducible Example

### load sample dataset from pycaret dataset module
from pycaret.datasets import get_data
data = get_data('insurance')

# import RegressionExperiment and init the class
from pycaret.regression import RegressionExperiment
exp = RegressionExperiment()

# compare baseline models
models = compare_models(n_select=4)

from pathlib import Path
import os
model_dir = Path('../models_bug')
for index, model in enumerate(models):
    model_file_name = models_metrics.index[index]
    print(f'Saving {model_file_name} model')
    save_model(model, os.path.join(model_dir, model_file_name))

from pathlib import Path
from glob import glob
import os
model_dir = Path('../models_bug')
for model in glob(os.path.join(model_dir, '*.pkl')):
    
    tuned_model_file = (os.path.basename(model))
    tuned_model_file = os.path.splitext(tuned_model_file)[0]
    
    model = os.path.splitext(model)[0]
    print(f'Started tuning {model} model')
    model = load_model(model)
    print(model)
    
    tuned_model = tune_model(
        model, 
        n_iter=20, 
        optimize='RMSLE',
        search_library="tune-sklearn",
        search_algorithm="optuna",
        early_stopping='asha',
        return_tuner=True,
        return_train_score=True
    )
    
    print(f'Saving Model to {tuned_model_file}')
    save_model(tuned_model, os.path.join(model_dir, tuned_file_name))

Expected Behavior

I can't figure out why it is asking for the custom_grid and raising a ValueError. Since I am just loading a saved model and not using a custom model, it should just tune it with the saved pipeline

Actual Results

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/var/folders/vc/262dx0q508d6dv26phhy52ph0000gn/T/ipykernel_22138/678589870.py in <cell line: 5>()
     13     print(model)
     14 
---> 15     tuned_model = tune_model(
     16         model,
     17         n_iter=20,

/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/utils/generic.py in wrapper(*args, **kwargs)
    963                 if globals_d[name] is None:
    964                     raise ValueError(message)
--> 965             return func(*args, **kwargs)
    966 
    967         return wrapper

/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/regression/functional.py in tune_model(estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
   1203     """
   1204 
-> 1205     return _CURRENT_EXPERIMENT.tune_model(
   1206         estimator=estimator,
   1207         fold=fold,

/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/regression/oop.py in tune_model(self, estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
   1503         """
   1504 
-> 1505         return super().tune_model(
   1506             estimator=estimator,
   1507             fold=fold,

/opt/anaconda3/envs/store_sales/lib/python3.10/site-packages/pycaret/internal/pycaret_experiment/supervised_experiment.py in tune_model(self, estimator, fold, round, n_iter, custom_grid, optimize, custom_scorer, search_library, search_algorithm, early_stopping, early_stopping_max_iters, choose_better, fit_kwargs, groups, return_tuner, verbose, tuner_verbose, return_train_score, **kwargs)
   2238         if estimator_id is None:
   2239             if custom_grid is None:
-> 2240                 raise ValueError(
   2241                     "When passing a model not in PyCaret's model library, the custom_grid parameter must be provided."
   2242                 )

ValueError: When passing a model not in PyCaret's model library, the custom_grid parameter must be provided.

Installed Versions

ystem: python: 3.10.12 | packaged by conda-forge | (main, Jun 23 2023, 22:39:40) [Clang 15.0.7 ] executable: /opt/anaconda3/envs/store_sales/bin/python machine: macOS-14.0-x86_64-i386-64bit

PyCaret required dependencies:
pip: 23.2.1
setuptools: 68.0.0
pycaret: 3.0.4
IPython: 7.34.0
ipywidgets: 8.1.0
tqdm: 4.65.0
numpy: 1.23.5
pandas: 1.5.3
jinja2: 3.1.2
scipy: 1.10.1
joblib: 1.3.0
sklearn: 1.2.2
pyod: 1.1.0
imblearn: 0.11.0
category_encoders: 2.6.1
lightgbm: 4.0.0
numba: 0.57.1
requests: 2.31.0
matplotlib: 3.7.2
scikitplot: 0.3.7
yellowbrick: 1.5
plotly: 5.15.0
plotly-resampler: Not installed
kaleido: 0.2.1
schemdraw: 0.15
statsmodels: 0.14.0
sktime: 0.21.0
tbats: 1.1.3
pmdarima: 2.0.3
psutil: 5.9.5
markupsafe: 2.1.3
pickle5: Not installed
cloudpickle: 2.2.1
deprecation: 2.1.0
xxhash: 3.2.0
wurlitzer: 3.0.3

PyCaret optional dependencies:
shap: 0.42.1
interpret: 0.4.2
umap: 0.5.3
pandas_profiling: 3.6.6
explainerdashboard: 0.4.3
autoviz: 0.1.730
fairlearn: 0.7.0
deepchecks: 0.17.4
xgboost: 1.7.4
catboost: 1.1.1
kmodes: 0.12.2
mlxtend: 0.22.0
statsforecast: 1.5.0
tune_sklearn: 0.4.6
ray: 2.6.1
hyperopt: 0.2.7
optuna: 3.2.0
skopt: 0.9.0
mlflow: 1.30.1
gradio: 3.39.0
fastapi: 0.100.1
uvicorn: 0.23.2
m2cgen: 0.10.0
evidently: 0.2.8
fugue: 0.8.6
streamlit: Not installed
prophet: Not installed

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Relationships

None yet

Development

No branches or pull requests

Issue actions