Skip to content

CPP error in grid_search #1741

@acrofales

Description

@acrofales

Problem: catboost grid_search throws the following error: CatBoostError: library/cpp/json/writer/json_value.cpp:457: Not a string
catboost version: 0.25.1
Operating System: linux (Google Compute Engine)
CPU: 4 vCPUs type Intel Haswell

GPU: N/A

Here is the full stack trace, and the initial call:

params = {'depth': [4, 7, 10],
         'per_float_feature_quantization':[None, [f'{n25_index}:border_count=1024'], [f'{n75_index}:border_count=1024'],[f'{n25_index}:border_count=1024', f'{n75_index}:border_count=1024']],
         'l2_leaf_reg': [1, 3, 5, 7, 9],
         'iterations': [2000],
         'learning_rate':[0.3],
         'verbose': [100]
         }
        

cbr = CatBoostRegressor()
cb_model = cbr.grid_search(params,
            trainpool,
            cv=3,
            partition_random_seed=42,
            calc_cv_statistics=True,
            search_by_train_test_split=True,
            refit=True,
            shuffle=True,
            stratified=None,
            train_size=0.8, 
            verbose=5,
            plot=True)


---------------------------------------------------------------------------
CatBoostError                             Traceback (most recent call last)
<ipython-input-141-be7bb715851e> in <module>
     24             train_size=0.8,
     25             verbose=5,
---> 26             plot=True)

/opt/conda/lib/python3.7/site-packages/catboost/core.py in grid_search(self, param_grid, X, y, cv, partition_random_seed, calc_cv_statistics, search_by_train_test_split, refit, shuffle, stratified, train_size, verbose, plot)
   3618             partition_random_seed=partition_random_seed, calc_cv_statistics=calc_cv_statistics,
   3619             search_by_train_test_split=search_by_train_test_split, refit=refit, shuffle=shuffle,
-> 3620             stratified=stratified, train_size=train_size, verbose=verbose, plot=plot
   3621         )
   3622 

/opt/conda/lib/python3.7/site-packages/catboost/core.py in _tune_hyperparams(self, param_grid, X, y, cv, n_iter, partition_random_seed, calc_cv_statistics, search_by_train_test_split, refit, shuffle, stratified, train_size, verbose, plot)
   3526             assert not self.is_fitted()
   3527             self.set_params(**cv_result['params'])
-> 3528             self.fit(X, y, silent=True)
   3529         return cv_result
   3530 

/opt/conda/lib/python3.7/site-packages/catboost/core.py in fit(self, X, y, cat_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
   5087                          use_best_model, eval_set, verbose, logging_level, plot, column_description,
   5088                          verbose_eval, metric_period, silent, early_stopping_rounds,
-> 5089                          save_snapshot, snapshot_file, snapshot_interval, init_model)
   5090 
   5091     def predict(self, data, prediction_type=None, ntree_start=0, ntree_end=0, thread_count=-1, verbose=None):

/opt/conda/lib/python3.7/site-packages/catboost/core.py in _fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
   1909             column_description=column_description, verbose_eval=verbose_eval, metric_period=metric_period,
   1910             silent=silent, early_stopping_rounds=early_stopping_rounds, save_snapshot=save_snapshot,
-> 1911             snapshot_file=snapshot_file, snapshot_interval=snapshot_interval, init_model=init_model
   1912         )
   1913         params = train_params["params"]

/opt/conda/lib/python3.7/site-packages/catboost/core.py in _prepare_train_params(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
   1834         _check_param_types(params)
   1835         params = _params_type_cast(params)
-> 1836         _check_train_params(params)
   1837 
   1838         eval_set_list = eval_set if isinstance(eval_set, list) else [eval_set]

_catboost.pyx in _catboost._check_train_params()

_catboost.pyx in _catboost._check_train_params()

CatBoostError: library/cpp/json/writer/json_value.cpp:457: Not a string

This effectively breaks the grid_search method shipped with catboost. I'm happy to use sklearn's gridsearch, but it would have been nice to know before starting the hyperparameter tuning...

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions