-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Description
pycaret version checks
-
I have checked that this issue has not already been reported here.
-
I have confirmed this bug exists on the latest version of pycaret.
-
I have confirmed this bug exists on the master branch of pycaret (pip install -U git+https://github.com/pycaret/pycaret.git@master).
Issue Description
I am doing a custom preprocessor custom_pipeline=preprocessor
which includes a CatBoostRegressor.
During setup
I am getting this error :
TypeError: get_feature_names_out() takes 1 positional argument but 2 were given
When I comment out the CatBoostEncoder step it runs smoothly.
Reproducible Example
import numpy as np
import pandas as pd
from pycaret.datasets import get_data
from pycaret.regression import RegressionExperiment
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import RobustScaler, PowerTransformer, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import category_encoders as ce
df = get_data("automobile")
# df.loc[[0, 5, 131, 141], "make"] = np.nan
# df.loc[[1, 6, 132, 142], "body-style"] = np.nan
# df.loc[[2, 7, 133, 143], "normalized-losses"] = np.nan
df_train = df.head(130)
df_valid = df.loc[130:]
tscv = TimeSeriesSplit(test_size=int(df_train.shape[0] / 10), n_splits=5)
TARGET = "price"
columns_numeric = ["normalized-losses"]
columns_cardinal = ["make"]
columns_onehot = ["body-style"]
pipe_numeric = Pipeline(
[
("imputer", SimpleImputer(strategy="median")),
("scaler", RobustScaler()),
("transformer", PowerTransformer(method="yeo-johnson")),
]
)
pipe_cardinal = Pipeline(
[
("imputer", SimpleImputer(strategy="most_frequent")),
("cardinal", ce.CatBoostEncoder()),
]
)
pipe_categorical = Pipeline(
[
("imputer", SimpleImputer(strategy="most_frequent")),
(
"onehot",
OneHotEncoder(sparse_output=False, handle_unknown="infrequent_if_exist"),
),
]
)
preprocessor = ColumnTransformer(
transformers=[
("numeric", pipe_numeric, columns_numeric),
("categorical", pipe_categorical, columns_onehot),
("cardinal", pipe_cardinal, columns_cardinal),
],
remainder="drop",
)
# xx = preprocessor.fit_transform(df_train.drop('price',axis=1),df_train['price'])
# pd.DataFrame(xx).isnull().sum()
rgr = RegressionExperiment()
rgr.setup(
data=df_train,
target="price",
test_data=df_valid,
preprocess=False,
custom_pipeline=preprocessor,
data_split_shuffle=False,
fold_strategy=tscv,
)
Expected Behavior
<pycaret.regression.oop.RegressionExperiment at 0x282358a07f0>
Actual Results
TypeError Traceback (most recent call last)
c:\Users\takis\sckool\pycaret-tutorial\example.py in line 50
57 # xx = preprocessor.fit_transform(df_train.drop('price',axis=1),df_train['price'])
58 # pd.DataFrame(xx).isnull().sum()
60 rgr = RegressionExperiment()
---> 61 rgr.setup(
62 data=df_train,
63 target="price",
64 test_data=df_valid,
65 preprocess=False,
66 custom_pipeline=preprocessor,
67 data_split_shuffle=False,
68 fold_strategy=tscv,
69 )
File c:\Users\user\anaconda3\envs\caretenv\lib\site-packages\pycaret\regression\oop.py:861, in RegressionExperiment.setup(self, data, data_func, target, index, train_size, test_data, ordinal_features, numeric_features, categorical_features, date_features, text_features, ignore_features, keep_features, preprocess, create_date_columns, imputation_type, numeric_imputation, categorical_imputation, iterative_imputation_iters, numeric_iterative_imputer, categorical_iterative_imputer, text_features_method, max_encoding_ohe, encoding_method, rare_to_value, rare_value, polynomial_features, polynomial_degree, low_variance_threshold, group_features, group_names, drop_groups, remove_multicollinearity, multicollinearity_threshold, bin_numeric_features, remove_outliers, outliers_method, outliers_threshold, transformation, transformation_method, normalize, normalize_method, pca, pca_method, pca_components, feature_selection, feature_selection_method, feature_selection_estimator, n_features_to_select, transform_target, transform_target_method, custom_pipeline, custom_pipeline_position, data_split_shuffle, data_split_stratify, fold_strategy, fold, fold_shuffle, fold_groups, n_jobs, use_gpu, html, session_id, system_log, log_experiment, experiment_name, experiment_custom_tags, log_plots, log_profile, log_data, engine, verbose, memory, profile, profile_kwargs)
858 if ("placeholder", None) in self.pipeline.steps and len(self.pipeline) > 1:
859 self.pipeline.steps.remove(("placeholder", None))
--> 861 self.pipeline.fit(self.X_train, self.y_train)
863 self.logger.info("Finished creating preprocessing pipeline.")
864 self.logger.info(f"Pipeline: {self.pipeline}")
File c:\Users\user\anaconda3\envs\caretenv\lib\site-packages\pycaret\internal\pipeline.py:255, in Pipeline.fit(self, X, y, **fit_params)
253 def fit(self, X=None, y=None, **fit_params):
...
755 )
--> 756 feature_names_out = transform.get_feature_names_out(feature_names_out)
757 return feature_names_out
TypeError: get_feature_names_out() takes 1 positional argument but 2 were given
Installed Versions
System:
python: 3.8.16 (default, Jan 17 2023, 22:25:28) [MSC v.1916 64 bit (AMD64)]
executable: c:\Users\user\anaconda3\envs\caretenv\python.exe
machine: Windows-10-10.0.22621-SP0
PyCaret required dependencies:
pip: 22.3.1
setuptools: 60.10.0
pycaret: 3.0.0
IPython: 8.10.0
ipywidgets: 8.0.4
tqdm: 4.64.1
numpy: 1.23.5
pandas: 1.5.3
jinja2: 3.1.2
scipy: 1.10.1
joblib: 1.2.0
sklearn: 1.2.1
pyod: 1.0.9
imblearn: 0.10.1
category_encoders: 2.6.0
lightgbm: 3.3.5
numba: 0.56.4
requests: 2.28.2
matplotlib: 3.7.0
scikitplot: 0.3.7
yellowbrick: 1.5
plotly: 5.13.1
kaleido: 0.2.1
statsmodels: 0.13.5
sktime: 0.16.1
tbats: 1.1.2
pmdarima: 2.0.2
psutil: 5.9.4
PyCaret optional dependencies:
shap: Not installed
interpret: Not installed
umap: Not installed
pandas_profiling: Not installed
explainerdashboard: Not installed
autoviz: 0.1.58
fairlearn: Not installed
xgboost: 1.7.4
catboost: Not installed
kmodes: Not installed
mlxtend: Not installed
statsforecast: Not installed
tune_sklearn: Not installed
ray: Not installed
hyperopt: Not installed
optuna: 3.1.0
skopt: Not installed
mlflow: 1.30.0
gradio: 3.19.1
fastapi: 0.92.0
uvicorn: 0.20.0
m2cgen: 0.10.0
evidently: 0.2.5
fugue: Not installed
streamlit: Not installed
prophet: Not installed