-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Closed
Description
This code:
#include "Halide.h"
using namespace Halide;
int main() {
ImageParam input(UInt(8), 2);
Var x{"x"}, y{"y"}, yo{"yo"}, yi{"yi"};
Func f[3];
f[0] = BoundaryConditions::repeat_edge(input);
f[1](x, y) = ((f[0]((x / 2) + 2, (y / 2) + 2) + f[0](x / 2, (y / 2) + -2)) + (f[0](x + 1, y) + f[0](x + -1, y + -1)));
f[2](x, y) = ((f[1](x * 2, (y * 2) + -2) + f[1]((x * 2) + 1, (y * 2) + 2)) + (f[1](x + -1, y + -1) + f[1](x + -2, y + -1)));
f[2].split(y, yo, yi, 16);
f[0].hoist_storage(f[2], yo).compute_at(f[1], x);
f[1].hoist_storage_root().compute_at(f[2], yi);
f[2].compile_jit();
printf("Success!\n");
return 0;
}
produces this compile-time error during codegen
Symbol not found: f3.s0.y.yi
The IR after storage flattening has the following size for the allocation of f[0]:
allocate repeat_edge[uint8 * ((max(((max((f3.extent.0 + f3.min.0)*2, (f3.extent.0 + f3.min.0) + -1) + -1)/2) + 1, max((f3.extent.0 + f3.min.0)*2, (f3.extent.0 + f3.min.0) + -1) + -1) - min(min(f3.min.0*2, f3.min.0 + -2)/2, min(f3.min.0*2, f3.min.0 + -2) + -1)) + 2) * ((max(((((let t43 = min(f3.s0.y.yo*16, f3.extent.1 + -16) in (max((((f3.min.1 + t43) + f3.s0.y.yi)*2) + 3, (f3.min.1 + t43) + f3.s0.y.yi) - min(((f3.min.1 + t43) + f3.s0.y.yi)*2, ((f3.min.1 + t43) + f3.s0.y.yi) + 1))) + min((f3.s0.y.yi + f3.s0.y.yi.base)*2, (f3.s0.y.yi + f3.s0.y.yi.base) + 1)) + -1)/2) + 2, ((let t43 = min(f3.s0.y.yo*16, f3.extent.1 + -16) in (max((((f3.min.1 + t43) + f3.s0.y.yi)*2) + 3, (f3.min.1 + t43) + f3.s0.y.yi) - min(((f3.min.1 + t43) + f3.s0.y.yi)*2, ((f3.min.1 + t43) + f3.s0.y.yi) + 1))) + min((f3.s0.y.yi + f3.s0.y.yi.base)*2, (f3.s0.y.yi + f3.s0.y.yi.base) + 1)) + -1) - min((min((f3.s0.y.yi + f3.s0.y.yi.base)*2, (f3.s0.y.yi + f3.s0.y.yi.base) + 1) + -2)/2, (min((f3.s0.y.yi + f3.s0.y.yi.base)*2, (f3.s0.y.yi + f3.s0.y.yi.base) + 1) + -2) + 1)) + 3)]
It includes references to f3.so.y.yi, but it's outside of that loop. Note that f[1] calls f[0] in a weird way - it adds f[0] to an upsampled version of f[0]. This means the region required by one value of f[1] depends on an amount of f[0] that is a function of x and y.
The storage hoisting code looks like it explicitly tries to account for this, so I'm not sure what's going on.