-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Closed
Labels
Description
I can smoothly calculate the percentiles using linear interpolation of a Dask array backed by NumPy, but I cannot with a Dask array backed by CuPy. From a brief step in the debugger, the issue arises due to a NumPy array full of individual CuPy 0-d array scalars not working in np.interp
within merge_percentiles
.
Note that if we don't go down this codepath (such as by using "lower" as the interpolation value) we do not get an error but the result is a 0-d cupy scalar inside a numpy array (which may be less than ideal as the output).
import dask.array as da
import numpy as np
import cupy as cp
rs = da.random.RandomState(RandomState=np.random.RandomState)
x = rs.normal(10, 0.5, size=(1000, 10), chunks=(100, 10))
x = x.flatten()
da.percentile(x, 0.05).compute()
array([8.61981942])
import dask.array as da
import numpy as np
import cupy as cp
rs = da.random.RandomState(RandomState=cp.random.RandomState)
x = rs.normal(10, 0.5, size=(1000, 10), chunks=(100, 10))
x = x.flatten()
da.percentile(x, 0.05).compute()
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-43-52bfcedf4158> in <module>
6 x = rs.normal(10, 0.5, size=(1000, 10), chunks=(100, 10))
7 x = x.flatten()
----> 8 da.percentile(x, 0.05).compute()
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/base.py in compute(self, **kwargs)
165 dask.base.compute
166 """
--> 167 (result,) = compute(self, traverse=False, **kwargs)
168 return result
169
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
450 postcomputes.append(x.__dask_postcompute__())
451
--> 452 results = schedule(dsk, keys, **kwargs)
453 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
454
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
82 get_id=_thread_get_id,
83 pack_exception=pack_exception,
---> 84 **kwargs
85 )
86
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
484 _execute_task(task, data) # Re-execute locally
485 else:
--> 486 raise_exception(exc, tb)
487 res, worker_id = loads(res_info)
488 state["cache"][key] = res
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/local.py in reraise(exc, tb)
314 if exc.__traceback__ is not tb:
315 raise exc.with_traceback(tb)
--> 316 raise exc
317
318
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/array/percentile.py in merge_percentiles(finalq, qs, vals, interpolation, Ns)
242 # investigated further.
243 if interpolation == "linear":
--> 244 rv = np.interp(desired_q, combined_q, combined_vals)
245 else:
246 left = np.searchsorted(combined_q, desired_q, side="left")
<__array_function__ internals> in interp(*args, **kwargs)
/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/numpy/lib/function_base.py in interp(x, xp, fp, left, right, period)
1421 fp = np.concatenate((fp[-1:], fp, fp[0:1]))
1422
-> 1423 return interp_func(x, xp, fp, left, right)
1424
1425
TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'
> /raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/numpy/lib/function_base.py(1423)interp()
1421 fp = np.concatenate((fp[-1:], fp, fp[0:1]))
1422
-> 1423 return interp_func(x, xp, fp, left, right)
1424
1425
ipdb> fp.dtype
dtype('O')
ipdb> fp[:5]
array([array(8.07582234), array(8.08897682), array(8.19900077),
array(8.24083655), array(8.28087683)], dtype=object)
ipdb> left
ipdb> right
ipdb> type(fp[0])
<class 'cupy.core.core.ndarray'>
ipdb> type(fp)
<class 'numpy.ndarray'>
TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'
> /raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/numpy/lib/function_base.py(1423)interp()
1421 fp = np.concatenate((fp[-1:], fp, fp[0:1]))
1422
-> 1423 return interp_func(x, xp, fp, left, right)
1424
1425
ipdb> up
> <__array_function__ internals>(6)interp()
ipdb> up
> /raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/array/percentile.py(244)merge_percentiles()
242 # investigated further.
243 if interpolation == "linear":
--> 244 rv = np.interp(desired_q, combined_q, combined_vals)
245 else:
246 left = np.searchsorted(combined_q, desired_q, side="left")
ipdb> type(combined_vals), type(combined_vals[0])
(<class 'numpy.ndarray'>, <class 'cupy.core.core.ndarray'>)
!conda list | grep "dask\|numpy\|cupy"
cupy 7.8.0 py37h940342b_1 conda-forge
dask 2.30.0 py_0 conda-forge
dask-core 2.30.0 py_0 conda-forge
dask-cuda 0.17.0a201202 py37_49 rapidsai-nightly
dask-cudf 0.17.0a201202 py37_g42644cc23a_365 rapidsai-nightly
numpy 1.19.4 py37h7e9df27_1 conda-forge