-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Closed
Description
Problem: catboost grid_search throws the following error: CatBoostError: library/cpp/json/writer/json_value.cpp:457: Not a string
catboost version: 0.25.1
Operating System: linux (Google Compute Engine)
CPU: 4 vCPUs type Intel Haswell
GPU: N/A
Here is the full stack trace, and the initial call:
params = {'depth': [4, 7, 10],
'per_float_feature_quantization':[None, [f'{n25_index}:border_count=1024'], [f'{n75_index}:border_count=1024'],[f'{n25_index}:border_count=1024', f'{n75_index}:border_count=1024']],
'l2_leaf_reg': [1, 3, 5, 7, 9],
'iterations': [2000],
'learning_rate':[0.3],
'verbose': [100]
}
cbr = CatBoostRegressor()
cb_model = cbr.grid_search(params,
trainpool,
cv=3,
partition_random_seed=42,
calc_cv_statistics=True,
search_by_train_test_split=True,
refit=True,
shuffle=True,
stratified=None,
train_size=0.8,
verbose=5,
plot=True)
---------------------------------------------------------------------------
CatBoostError Traceback (most recent call last)
<ipython-input-141-be7bb715851e> in <module>
24 train_size=0.8,
25 verbose=5,
---> 26 plot=True)
/opt/conda/lib/python3.7/site-packages/catboost/core.py in grid_search(self, param_grid, X, y, cv, partition_random_seed, calc_cv_statistics, search_by_train_test_split, refit, shuffle, stratified, train_size, verbose, plot)
3618 partition_random_seed=partition_random_seed, calc_cv_statistics=calc_cv_statistics,
3619 search_by_train_test_split=search_by_train_test_split, refit=refit, shuffle=shuffle,
-> 3620 stratified=stratified, train_size=train_size, verbose=verbose, plot=plot
3621 )
3622
/opt/conda/lib/python3.7/site-packages/catboost/core.py in _tune_hyperparams(self, param_grid, X, y, cv, n_iter, partition_random_seed, calc_cv_statistics, search_by_train_test_split, refit, shuffle, stratified, train_size, verbose, plot)
3526 assert not self.is_fitted()
3527 self.set_params(**cv_result['params'])
-> 3528 self.fit(X, y, silent=True)
3529 return cv_result
3530
/opt/conda/lib/python3.7/site-packages/catboost/core.py in fit(self, X, y, cat_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
5087 use_best_model, eval_set, verbose, logging_level, plot, column_description,
5088 verbose_eval, metric_period, silent, early_stopping_rounds,
-> 5089 save_snapshot, snapshot_file, snapshot_interval, init_model)
5090
5091 def predict(self, data, prediction_type=None, ntree_start=0, ntree_end=0, thread_count=-1, verbose=None):
/opt/conda/lib/python3.7/site-packages/catboost/core.py in _fit(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
1909 column_description=column_description, verbose_eval=verbose_eval, metric_period=metric_period,
1910 silent=silent, early_stopping_rounds=early_stopping_rounds, save_snapshot=save_snapshot,
-> 1911 snapshot_file=snapshot_file, snapshot_interval=snapshot_interval, init_model=init_model
1912 )
1913 params = train_params["params"]
/opt/conda/lib/python3.7/site-packages/catboost/core.py in _prepare_train_params(self, X, y, cat_features, text_features, embedding_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
1834 _check_param_types(params)
1835 params = _params_type_cast(params)
-> 1836 _check_train_params(params)
1837
1838 eval_set_list = eval_set if isinstance(eval_set, list) else [eval_set]
_catboost.pyx in _catboost._check_train_params()
_catboost.pyx in _catboost._check_train_params()
CatBoostError: library/cpp/json/writer/json_value.cpp:457: Not a string
This effectively breaks the grid_search method shipped with catboost. I'm happy to use sklearn's gridsearch, but it would have been nice to know before starting the hyperparameter tuning...
dertty