-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Closed
Description
Problem:
I'm trying out catboost-spark, with catboost version 0.26.1, spark version 2.4, scala version 2.11, catboost-spark version 0.26.1. My operating system is Linux
I'm running the binary classification example using pyspark, and get error in model = classifier.fit(trainPool, [evalPool])
:
Py4JJavaError Traceback (most recent call last)
<ipython-input-10-2defab1ef68c> in <module>
1 # train model
----> 2 model = classifier.fit(trainPool, [evalPool])
<some_path>/ai.catboost_catboost-spark_2.4_2.11-0.26.1.jar/catboost_spark/core.py in fit(self, trainDataset, evalDatasets)
4987 evalDatasetsAsJavaObject[i] = _py2java(sc, evalDatasets[i])
4988 self._transfer_params_to_java()
-> 4989 java_model = self._java_obj.fit(_py2java(sc, trainDataset), evalDatasetsAsJavaObject)
4990 return CatBoostClassificationModel(java_model)
4991
<some_path>/spark/2/4/3/python/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
<some_path>/spark/2/4/3/python/pyspark/sql/utils.py in deco(*a, **kw)
61 def deco(*a, **kw):
62 try:
---> 63 return f(*a, **kw)
64 except py4j.protocol.Py4JJavaError as e:
65 s = e.java_exception.toString()
<some_path>/spark/2/4/3/python/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
--> 328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
Py4JJavaError: An error occurred while calling o142.fit.
: java.lang.RuntimeException: Error while executing master
at ai.catboost.spark.impl.Helpers$.checkOneFutureAndWaitForOther(Helpers.scala:30)
at ai.catboost.spark.CatBoostPredictorTrait$class.fit(CatBoostPredictor.scala:208)
at ai.catboost.spark.CatBoostClassifier.fit(CatBoostClassifier.scala:362)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:566)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.util.concurrent.ExecutionException: java.io.IOException: Cannot run program "<some_path>/java": error=7, Argument list too long
at java.base/java.util.concurrent.FutureTask.report(FutureTask.java:122)
at java.base/java.util.concurrent.FutureTask.get(FutureTask.java:191)
at ai.catboost.spark.impl.Helpers$.checkOneFutureAndWaitForOther(Helpers.scala:26)
... 13 more
Caused by: java.io.IOException: Cannot run program "<some_path>/java": error=7, Argument list too long
at java.base/java.lang.ProcessBuilder.start(ProcessBuilder.java:1128)
at java.base/java.lang.ProcessBuilder.start(ProcessBuilder.java:1071)
at ai.catboost.spark.impl.RunClassInNewProcess$.apply(RunClassInNewProcess.scala:70)
at ai.catboost.spark.impl.CatBoostMasterWrapper.trainCallback(Master.scala:170)
at ai.catboost.spark.CatBoostPredictorTrait$$anonfun$4.apply(CatBoostPredictor.scala:178)
at ai.catboost.spark.CatBoostPredictorTrait$$anonfun$4.apply(CatBoostPredictor.scala:178)
at ai.catboost.spark.TrainingDriver.run(TrainingDriver.scala:221)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
... 1 more
Caused by: java.io.IOException: error=7, Argument list too long
at java.base/java.lang.ProcessImpl.forkAndExec(Native Method)
at java.base/java.lang.ProcessImpl.<init>(ProcessImpl.java:340)
at java.base/java.lang.ProcessImpl.start(ProcessImpl.java:271)
at java.base/java.lang.ProcessBuilder.start(ProcessBuilder.java:1107)
... 13 more