Skip to content

should joblib warn about float n_jobs? #1539

@auderson

Description

@auderson
from joblib import Parallel, delayed


@delayed
def f():
    pass


Parallel(n_jobs=10.0)(f() for i in range(10))
Empty                                     Traceback (most recent call last)
File ~/mambaforge/envs/py39_2/lib/python3.9/site-packages/joblib/parallel.py:1423, in Parallel.dispatch_one_batch(self, iterator)
   1422 try:
-> 1423     tasks = self._ready_batches.get(block=False)
   1424 except queue.Empty:
   1425     # slice the iterator n_jobs * batchsize items at a time. If the
   1426     # slice returns less than that, then the current batchsize puts
   (...)
   1429     # accordingly to distribute evenly the last items between all
   1430     # workers.

File ~/mambaforge/envs/py39_2/lib/python3.9/queue.py:168, in Queue.get(self, block, timeout)
    167     if not self._qsize():
--> 168         raise Empty
    169 elif timeout is None:

Empty: 

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
Cell In[7], line 9
      4 @delayed
      5 def f():
      6     pass
----> 9 Parallel(n_jobs=10.0)(f() for i in range(10))

File ~/mambaforge/envs/py39_2/lib/python3.9/site-packages/joblib/parallel.py:1950, in Parallel.__call__(self, iterable)
   1944 self._call_ref = weakref.ref(output)
   1946 # The first item from the output is blank, but it makes the interpreter
   1947 # progress until it enters the Try/Except block of the generator and
   1948 # reach the first `yield` statement. This starts the aynchronous
   1949 # dispatch of the tasks to the workers.
-> 1950 next(output)
   1952 return output if self.return_generator else list(output)

File ~/mambaforge/envs/py39_2/lib/python3.9/site-packages/joblib/parallel.py:1588, in Parallel._get_outputs(self, iterator, pre_dispatch)
   1586 detach_generator_exit = False
   1587 try:
-> 1588     self._start(iterator, pre_dispatch)
   1589     # first yield returns None, for internal use only. This ensures
   1590     # that we enter the try/except block and start dispatching the
   1591     # tasks.
   1592     yield

File ~/mambaforge/envs/py39_2/lib/python3.9/site-packages/joblib/parallel.py:1571, in Parallel._start(self, iterator, pre_dispatch)
   1562 def _start(self, iterator, pre_dispatch):
   1563     # Only set self._iterating to True if at least a batch
   1564     # was dispatched. In particular this covers the edge
   (...)
   1568     # was very quick and its callback already dispatched all the
   1569     # remaining jobs.
   1570     self._iterating = False
-> 1571     if self.dispatch_one_batch(iterator):
   1572         self._iterating = self._original_iterator is not None
   1574     while self.dispatch_one_batch(iterator):

File ~/mambaforge/envs/py39_2/lib/python3.9/site-packages/joblib/parallel.py:1434, in Parallel.dispatch_one_batch(self, iterator)
   1431 n_jobs = self._cached_effective_n_jobs
   1432 big_batch_size = batch_size * n_jobs
-> 1434 islice = list(itertools.islice(iterator, big_batch_size))
   1435 if len(islice) == 0:
   1436     return False

ValueError: Stop argument for islice() must be None or an integer: 0 <= x <= sys.maxsize.

The occurrence of a float value for n_jobs may arise when the user retrieves the value from Ray's num_cpus. This exception is not immediately apparent and may pose challenges during debugging. Therefore, it might be beneficial for joblib to issue a warning if n_jobs is not an integer or raise a ValueError in such cases.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions