Skip to content

Bug with tweedie loss training on GPU #2812

@dubovikmaster

Description

@dubovikmaster

The following simple example demonstrates bug when using Tweedie as a loss function with training on GPU

I use catboost V1.2.7 with python 3.11

from catboost import (
    CatBoostRegressor, 
    Pool
)
from sklearn.datasets import make_regression
from sklearn import metrics


X, y = make_regression(n_samples=100000, n_features=10, noise=1, random_state=42)

train_pool = Pool(X, y)

model = CatBoostRegressor(loss_function='Tweedie:variance_power=1.1', task_type='GPU',  verbose=False)

model.fit(train_pool, plot=True)

Error

CatBoostError                             Traceback (most recent call last)
Cell In[186], line 1
----> 1 model.fit(train_pool, plot=True)

File /opt/conda/envs/ltv/lib/python3.11/site-packages/catboost/core.py:5873, in CatBoostRegressor.fit(self, X, y, cat_features, text_features, embedding_features, graph, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
   5871 if 'loss_function' in params:
   5872     CatBoostRegressor._check_is_compatible_loss(params['loss_function'])
-> 5873 return self._fit(X, y, cat_features, text_features, embedding_features, None, graph, sample_weight, None, None, None, None, baseline,
   5874                  use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description,
   5875                  verbose_eval, metric_period, silent, early_stopping_rounds,
   5876                  save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)

File /opt/conda/envs/ltv/lib/python3.11/site-packages/catboost/core.py:2410, in CatBoost._fit(self, X, y, cat_features, text_features, embedding_features, pairs, graph, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, plot_file, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model, callbacks, log_cout, log_cerr)
   2407 allow_clear_pool = train_params["allow_clear_pool"]
   2409 with plot_wrapper(plot, plot_file, 'Training plots', [_get_train_dir(self.get_params())]):
-> 2410     self._train(
   2411         train_pool,
   2412         train_params["eval_sets"],
   2413         params,
   2414         allow_clear_pool,
   2415         train_params["init_model"]
   2416     )
   2418 # Have property feature_importance possibly set
   2419 loss = self._object._get_loss_function_name()

File /opt/conda/envs/ltv/lib/python3.11/site-packages/catboost/core.py:1790, in _CatBoostBase._train(self, train_pool, test_pool, params, allow_clear_pool, init_model)
   1789 def _train(self, train_pool, test_pool, params, allow_clear_pool, init_model):
-> 1790     self._object._train(train_pool, test_pool, params, allow_clear_pool, init_model._object if init_model else None)
   1791     self._set_trained_model_attributes()

File _catboost.pyx:5017, in _catboost._CatBoost._train()

File _catboost.pyx:5066, in _catboost._CatBoost._train()

CatBoostError: library/cpp/json/writer/json.cpp:259: JSON writer: invalid float value: inf

Metadata

Metadata

Assignees

No one assigned

    Type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions