Skip to content

Cannot calculate percentiles for cupy backed array with linear interpolation #6942

@beckernick

Description

@beckernick

I can smoothly calculate the percentiles using linear interpolation of a Dask array backed by NumPy, but I cannot with a Dask array backed by CuPy. From a brief step in the debugger, the issue arises due to a NumPy array full of individual CuPy 0-d array scalars not working in np.interp within merge_percentiles.

Note that if we don't go down this codepath (such as by using "lower" as the interpolation value) we do not get an error but the result is a 0-d cupy scalar inside a numpy array (which may be less than ideal as the output).

import dask.array as da
import numpy as np
import cupy as cprs = da.random.RandomState(RandomState=np.random.RandomState) 
x = rs.normal(10, 0.5, size=(1000, 10), chunks=(100, 10))
x = x.flatten()
da.percentile(x, 0.05).compute()
array([8.61981942])
import dask.array as da
import numpy as np
import cupy as cprs = da.random.RandomState(RandomState=cp.random.RandomState) 
x = rs.normal(10, 0.5, size=(1000, 10), chunks=(100, 10))
x = x.flatten()
da.percentile(x, 0.05).compute()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-43-52bfcedf4158> in <module>
      6 x = rs.normal(10, 0.5, size=(1000, 10), chunks=(100, 10))
      7 x = x.flatten()
----> 8 da.percentile(x, 0.05).compute()

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/base.py in compute(self, **kwargs)
    165         dask.base.compute
    166         """
--> 167         (result,) = compute(self, traverse=False, **kwargs)
    168         return result
    169 

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/base.py in compute(*args, **kwargs)
    450         postcomputes.append(x.__dask_postcompute__())
    451 
--> 452     results = schedule(dsk, keys, **kwargs)
    453     return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
    454 

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
     82         get_id=_thread_get_id,
     83         pack_exception=pack_exception,
---> 84         **kwargs
     85     )
     86 

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
    484                         _execute_task(task, data)  # Re-execute locally
    485                     else:
--> 486                         raise_exception(exc, tb)
    487                 res, worker_id = loads(res_info)
    488                 state["cache"][key] = res

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/local.py in reraise(exc, tb)
    314     if exc.__traceback__ is not tb:
    315         raise exc.with_traceback(tb)
--> 316     raise exc
    317 
    318 

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    220     try:
    221         task, data = loads(task_info)
--> 222         result = _execute_task(task, data)
    223         id = get_id()
    224         result = dumps((result, id))

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
    119         # temporaries by their reference count and can execute certain
    120         # operations in-place.
--> 121         return func(*(_execute_task(a, cache) for a in args))
    122     elif not ishashable(arg):
    123         return arg

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/array/percentile.py in merge_percentiles(finalq, qs, vals, interpolation, Ns)
    242     # investigated further.
    243     if interpolation == "linear":
--> 244         rv = np.interp(desired_q, combined_q, combined_vals)
    245     else:
    246         left = np.searchsorted(combined_q, desired_q, side="left")

<__array_function__ internals> in interp(*args, **kwargs)

/raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/numpy/lib/function_base.py in interp(x, xp, fp, left, right, period)
   1421         fp = np.concatenate((fp[-1:], fp, fp[0:1]))
   1422 
-> 1423     return interp_func(x, xp, fp, left, right)
   1424 
   1425 

TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'
> /raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/numpy/lib/function_base.py(1423)interp()
   1421         fp = np.concatenate((fp[-1:], fp, fp[0:1]))
   1422 
-> 1423     return interp_func(x, xp, fp, left, right)
   1424 
   1425 

ipdb>  fp.dtype
dtype('O')
ipdb>  fp[:5]
array([array(8.07582234), array(8.08897682), array(8.19900077),
       array(8.24083655), array(8.28087683)], dtype=object)
ipdb>  left
ipdb>  right
ipdb>  type(fp[0])
<class 'cupy.core.core.ndarray'>
ipdb>  type(fp)
<class 'numpy.ndarray'>
TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'
> /raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/numpy/lib/function_base.py(1423)interp()
   1421         fp = np.concatenate((fp[-1:], fp, fp[0:1]))
   1422 
-> 1423     return interp_func(x, xp, fp, left, right)
   1424 
   1425 

ipdb>  up
> <__array_function__ internals>(6)interp()

ipdb>  up
> /raid/nicholasb/miniconda3/envs/rapids-tpcxbb-20201202/lib/python3.7/site-packages/dask/array/percentile.py(244)merge_percentiles()
    242     # investigated further.
    243     if interpolation == "linear":
--> 244         rv = np.interp(desired_q, combined_q, combined_vals)
    245     else:
    246         left = np.searchsorted(combined_q, desired_q, side="left")

ipdb>  type(combined_vals), type(combined_vals[0])
(<class 'numpy.ndarray'>, <class 'cupy.core.core.ndarray'>)
!conda list | grep "dask\|numpy\|cupy"
cupy                      7.8.0            py37h940342b_1    conda-forge
dask                      2.30.0                     py_0    conda-forge
dask-core                 2.30.0                     py_0    conda-forge
dask-cuda                 0.17.0a201202           py37_49    rapidsai-nightly
dask-cudf                 0.17.0a201202   py37_g42644cc23a_365    rapidsai-nightly
numpy                     1.19.4           py37h7e9df27_1    conda-forge

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions