-
Notifications
You must be signed in to change notification settings - Fork 349
Closed
Closed
Copy link
Labels
Milestone
Description
Description
Want to support syntax like t[i, j] += scalar
where t is a tile.
Context
Enable things like simple parallel reduction:
BLOCK_DIM = 16
@wp.kernel
def compute(x: wp.array(dtype=float), y: wp.array(dtype=float)):
i, j = wp.tid()
t = wp.tile_load(x, shape=BLOCK_DIM, offset=BLOCK_DIM * i)
k = BLOCK_DIM // 2
while k > 0:
if j < k:
t[j] += t[j + k]
k //= 2
wp.tile_store(y, wp.tile_view(t, offset=(0,), shape=(1,)), i)
N = BLOCK_DIM * 4
x_np = np.arange(N, dtype=np.float32)
x = wp.array(x_np, dtype=float, device=device)
y = wp.zeros(4, dtype=float, device=device)
wp.launch_tiled(compute, dim=4, inputs=[x], outputs=[y], block_dim=BLOCK_DIM, device=device)
assert_np_equal(np.sum(y.numpy()), np.sum(x_np))