Skip to content

float -> int casts on wasm scalarize #7972

@abadams

Description

@abadams

The following .ll produces to 16 scalar ops with control flow. This means that our float -> int casts in Halide are pretty terrible when we compile to wasm.

; llc wasm_float_cast.ll -mtriple=wasm32-unknown--wasm -mattr=+simd128 -o -

define void @test(ptr noalias nocapture noundef readonly %in, ptr noalias nocapture noundef writeonly %out) {
entry:
  %fv.0.copyload = load <4 x float>, ptr %in, align 16
  %conv = fptoui <4 x float> %fv.0.copyload to <4 x i8>
  store <4 x i8> %conv, ptr %out, align 16
  ret void
}
	.text
	.file	"wasm_float_cast.ll"
	.functype	test (i32, i32) -> ()
	.section	.text.test,"",@
	.globl	test                            # -- Begin function test
	.type	test,@function
test:                                   # @test
	.functype	test (i32, i32) -> ()
	.local  	v128, f32, i32, i32, v128
# %bb.0:                                # %entry
	block   	
	block   	
	local.get	0
	v128.load	0
	local.tee	2
	f32x4.extract_lane	1
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label1
# %bb.1:                                # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label0
.LBB0_2:                                # %entry
	end_block                               # label1:
	i32.const	0
	local.set	4
.LBB0_3:                                # %entry
	end_block                               # label0:
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	0
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label3
# %bb.4:                                # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	5
	br      	1                               # 1: down to label2
.LBB0_5:                                # %entry
	end_block                               # label3:
	i32.const	0
	local.set	5
.LBB0_6:                                # %entry
	end_block                               # label2:
	local.get	5
	i8x16.splat
	local.get	4
	i8x16.replace_lane	1
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	2
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label5
# %bb.7:                                # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label4
.LBB0_8:                                # %entry
	end_block                               # label5:
	i32.const	0
	local.set	4
.LBB0_9:                                # %entry
	end_block                               # label4:
	local.get	6
	local.get	4
	i8x16.replace_lane	2
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	3
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label7
# %bb.10:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label6
.LBB0_11:                               # %entry
	end_block                               # label7:
	i32.const	0
	local.set	4
.LBB0_12:                               # %entry
	end_block                               # label6:
	local.get	6
	local.get	4
	i8x16.replace_lane	3
	local.set	6
	block   	
	block   	
	local.get	0
	v128.load	16
	local.tee	2
	f32x4.extract_lane	0
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label9
# %bb.13:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label8
.LBB0_14:                               # %entry
	end_block                               # label9:
	i32.const	0
	local.set	4
.LBB0_15:                               # %entry
	end_block                               # label8:
	local.get	6
	local.get	4
	i8x16.replace_lane	4
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	1
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label11
# %bb.16:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label10
.LBB0_17:                               # %entry
	end_block                               # label11:
	i32.const	0
	local.set	4
.LBB0_18:                               # %entry
	end_block                               # label10:
	local.get	6
	local.get	4
	i8x16.replace_lane	5
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	2
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label13
# %bb.19:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label12
.LBB0_20:                               # %entry
	end_block                               # label13:
	i32.const	0
	local.set	4
.LBB0_21:                               # %entry
	end_block                               # label12:
	local.get	6
	local.get	4
	i8x16.replace_lane	6
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	3
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label15
# %bb.22:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label14
.LBB0_23:                               # %entry
	end_block                               # label15:
	i32.const	0
	local.set	4
.LBB0_24:                               # %entry
	end_block                               # label14:
	local.get	6
	local.get	4
	i8x16.replace_lane	7
	local.set	6
	block   	
	block   	
	local.get	0
	v128.load	32
	local.tee	2
	f32x4.extract_lane	0
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label17
# %bb.25:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label16
.LBB0_26:                               # %entry
	end_block                               # label17:
	i32.const	0
	local.set	4
.LBB0_27:                               # %entry
	end_block                               # label16:
	local.get	6
	local.get	4
	i8x16.replace_lane	8
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	1
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label19
# %bb.28:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label18
.LBB0_29:                               # %entry
	end_block                               # label19:
	i32.const	0
	local.set	4
.LBB0_30:                               # %entry
	end_block                               # label18:
	local.get	6
	local.get	4
	i8x16.replace_lane	9
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	2
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label21
# %bb.31:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label20
.LBB0_32:                               # %entry
	end_block                               # label21:
	i32.const	0
	local.set	4
.LBB0_33:                               # %entry
	end_block                               # label20:
	local.get	6
	local.get	4
	i8x16.replace_lane	10
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	3
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label23
# %bb.34:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	4
	br      	1                               # 1: down to label22
.LBB0_35:                               # %entry
	end_block                               # label23:
	i32.const	0
	local.set	4
.LBB0_36:                               # %entry
	end_block                               # label22:
	local.get	6
	local.get	4
	i8x16.replace_lane	11
	local.set	6
	block   	
	block   	
	local.get	0
	i32.const	48
	i32.add 
	v128.load	0
	local.tee	2
	f32x4.extract_lane	0
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label25
# %bb.37:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	0
	br      	1                               # 1: down to label24
.LBB0_38:                               # %entry
	end_block                               # label25:
	i32.const	0
	local.set	0
.LBB0_39:                               # %entry
	end_block                               # label24:
	local.get	6
	local.get	0
	i8x16.replace_lane	12
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	1
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label27
# %bb.40:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	0
	br      	1                               # 1: down to label26
.LBB0_41:                               # %entry
	end_block                               # label27:
	i32.const	0
	local.set	0
.LBB0_42:                               # %entry
	end_block                               # label26:
	local.get	6
	local.get	0
	i8x16.replace_lane	13
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	2
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label29
# %bb.43:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	0
	br      	1                               # 1: down to label28
.LBB0_44:                               # %entry
	end_block                               # label29:
	i32.const	0
	local.set	0
.LBB0_45:                               # %entry
	end_block                               # label28:
	local.get	6
	local.get	0
	i8x16.replace_lane	14
	local.set	6
	block   	
	block   	
	local.get	2
	f32x4.extract_lane	3
	local.tee	3
	f32.const	0x1p32
	f32.lt  
	local.get	3
	f32.const	0x0p0
	f32.ge  
	i32.and 
	i32.eqz
	br_if   	0                               # 0: down to label31
# %bb.46:                               # %entry
	local.get	3
	i32.trunc_f32_u
	local.set	0
	br      	1                               # 1: down to label30
.LBB0_47:                               # %entry
	end_block                               # label31:
	i32.const	0
	local.set	0
.LBB0_48:                               # %entry
	end_block                               # label30:
	local.get	1
	local.get	6
	local.get	0
	i8x16.replace_lane	15
	v128.store	0
                                        # fallthrough-return
	end_function
                                        # -- End function
	.section	.custom_section.target_features,"",@
	.int8	3
	.int8	43
	.int8	15
	.ascii	"mutable-globals"
	.int8	43
	.int8	8
	.ascii	"sign-ext"
	.int8	43
	.int8	7
	.ascii	"simd128"
	.section	.text.test,"",@

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions