-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Closed
Labels
Description
The following .ll produces to 16 scalar ops with control flow. This means that our float -> int casts in Halide are pretty terrible when we compile to wasm.
; llc wasm_float_cast.ll -mtriple=wasm32-unknown--wasm -mattr=+simd128 -o -
define void @test(ptr noalias nocapture noundef readonly %in, ptr noalias nocapture noundef writeonly %out) {
entry:
%fv.0.copyload = load <4 x float>, ptr %in, align 16
%conv = fptoui <4 x float> %fv.0.copyload to <4 x i8>
store <4 x i8> %conv, ptr %out, align 16
ret void
}
.text
.file "wasm_float_cast.ll"
.functype test (i32, i32) -> ()
.section .text.test,"",@
.globl test # -- Begin function test
.type test,@function
test: # @test
.functype test (i32, i32) -> ()
.local v128, f32, i32, i32, v128
# %bb.0: # %entry
block
block
local.get 0
v128.load 0
local.tee 2
f32x4.extract_lane 1
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label1
# %bb.1: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label0
.LBB0_2: # %entry
end_block # label1:
i32.const 0
local.set 4
.LBB0_3: # %entry
end_block # label0:
block
block
local.get 2
f32x4.extract_lane 0
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label3
# %bb.4: # %entry
local.get 3
i32.trunc_f32_u
local.set 5
br 1 # 1: down to label2
.LBB0_5: # %entry
end_block # label3:
i32.const 0
local.set 5
.LBB0_6: # %entry
end_block # label2:
local.get 5
i8x16.splat
local.get 4
i8x16.replace_lane 1
local.set 6
block
block
local.get 2
f32x4.extract_lane 2
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label5
# %bb.7: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label4
.LBB0_8: # %entry
end_block # label5:
i32.const 0
local.set 4
.LBB0_9: # %entry
end_block # label4:
local.get 6
local.get 4
i8x16.replace_lane 2
local.set 6
block
block
local.get 2
f32x4.extract_lane 3
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label7
# %bb.10: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label6
.LBB0_11: # %entry
end_block # label7:
i32.const 0
local.set 4
.LBB0_12: # %entry
end_block # label6:
local.get 6
local.get 4
i8x16.replace_lane 3
local.set 6
block
block
local.get 0
v128.load 16
local.tee 2
f32x4.extract_lane 0
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label9
# %bb.13: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label8
.LBB0_14: # %entry
end_block # label9:
i32.const 0
local.set 4
.LBB0_15: # %entry
end_block # label8:
local.get 6
local.get 4
i8x16.replace_lane 4
local.set 6
block
block
local.get 2
f32x4.extract_lane 1
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label11
# %bb.16: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label10
.LBB0_17: # %entry
end_block # label11:
i32.const 0
local.set 4
.LBB0_18: # %entry
end_block # label10:
local.get 6
local.get 4
i8x16.replace_lane 5
local.set 6
block
block
local.get 2
f32x4.extract_lane 2
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label13
# %bb.19: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label12
.LBB0_20: # %entry
end_block # label13:
i32.const 0
local.set 4
.LBB0_21: # %entry
end_block # label12:
local.get 6
local.get 4
i8x16.replace_lane 6
local.set 6
block
block
local.get 2
f32x4.extract_lane 3
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label15
# %bb.22: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label14
.LBB0_23: # %entry
end_block # label15:
i32.const 0
local.set 4
.LBB0_24: # %entry
end_block # label14:
local.get 6
local.get 4
i8x16.replace_lane 7
local.set 6
block
block
local.get 0
v128.load 32
local.tee 2
f32x4.extract_lane 0
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label17
# %bb.25: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label16
.LBB0_26: # %entry
end_block # label17:
i32.const 0
local.set 4
.LBB0_27: # %entry
end_block # label16:
local.get 6
local.get 4
i8x16.replace_lane 8
local.set 6
block
block
local.get 2
f32x4.extract_lane 1
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label19
# %bb.28: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label18
.LBB0_29: # %entry
end_block # label19:
i32.const 0
local.set 4
.LBB0_30: # %entry
end_block # label18:
local.get 6
local.get 4
i8x16.replace_lane 9
local.set 6
block
block
local.get 2
f32x4.extract_lane 2
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label21
# %bb.31: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label20
.LBB0_32: # %entry
end_block # label21:
i32.const 0
local.set 4
.LBB0_33: # %entry
end_block # label20:
local.get 6
local.get 4
i8x16.replace_lane 10
local.set 6
block
block
local.get 2
f32x4.extract_lane 3
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label23
# %bb.34: # %entry
local.get 3
i32.trunc_f32_u
local.set 4
br 1 # 1: down to label22
.LBB0_35: # %entry
end_block # label23:
i32.const 0
local.set 4
.LBB0_36: # %entry
end_block # label22:
local.get 6
local.get 4
i8x16.replace_lane 11
local.set 6
block
block
local.get 0
i32.const 48
i32.add
v128.load 0
local.tee 2
f32x4.extract_lane 0
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label25
# %bb.37: # %entry
local.get 3
i32.trunc_f32_u
local.set 0
br 1 # 1: down to label24
.LBB0_38: # %entry
end_block # label25:
i32.const 0
local.set 0
.LBB0_39: # %entry
end_block # label24:
local.get 6
local.get 0
i8x16.replace_lane 12
local.set 6
block
block
local.get 2
f32x4.extract_lane 1
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label27
# %bb.40: # %entry
local.get 3
i32.trunc_f32_u
local.set 0
br 1 # 1: down to label26
.LBB0_41: # %entry
end_block # label27:
i32.const 0
local.set 0
.LBB0_42: # %entry
end_block # label26:
local.get 6
local.get 0
i8x16.replace_lane 13
local.set 6
block
block
local.get 2
f32x4.extract_lane 2
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label29
# %bb.43: # %entry
local.get 3
i32.trunc_f32_u
local.set 0
br 1 # 1: down to label28
.LBB0_44: # %entry
end_block # label29:
i32.const 0
local.set 0
.LBB0_45: # %entry
end_block # label28:
local.get 6
local.get 0
i8x16.replace_lane 14
local.set 6
block
block
local.get 2
f32x4.extract_lane 3
local.tee 3
f32.const 0x1p32
f32.lt
local.get 3
f32.const 0x0p0
f32.ge
i32.and
i32.eqz
br_if 0 # 0: down to label31
# %bb.46: # %entry
local.get 3
i32.trunc_f32_u
local.set 0
br 1 # 1: down to label30
.LBB0_47: # %entry
end_block # label31:
i32.const 0
local.set 0
.LBB0_48: # %entry
end_block # label30:
local.get 1
local.get 6
local.get 0
i8x16.replace_lane 15
v128.store 0
# fallthrough-return
end_function
# -- End function
.section .custom_section.target_features,"",@
.int8 3
.int8 43
.int8 15
.ascii "mutable-globals"
.int8 43
.int8 8
.ascii "sign-ext"
.int8 43
.int8 7
.ascii "simd128"
.section .text.test,"",@