Hello! I am compiling a Scheme and there are some cases like primitive ops like #%heap-object? which are emitted separately from branch during compilation and cranelift produces code like this:
47d: 41 80 fa 12 cmp $0x12,%r10b
481: 0f 94 c1 sete %cl
484: 84 c9 test %cl,%cl
486: 0f 84 84 00 00 00 je 510 <fn0:false+0x220>
Is there any way to actually avoid that and get something more decent as in cmp and then directly je? Or is my only option to make changes to my codegen?
Would it be possible to extract the CLIF that leads to this codegen? This might be a missing pattern in optimizations or a missing pattern in lowering, and the CLIF can help narrow it down as to which
Alex Crichton said:
Would it be possible to extract the CLIF that leads to this codegen? This might be a missing pattern in optimizations or a missing pattern in lowering, and the CLIF can help narrow it down as to which
I would not be able to extract it at the moment, but it looks like this during codegen:
fn is_false(ssa: &mut SSABuilder, val: ir::Value) -> ir::Value {
let is_false = ssa.builder.ins().icmp_imm(IntCC::Equal, val, Value::VALUE_FALSE);
let false_ = ssa.builder.ins().iconst(types::I64, Value::VALUE_FALSE);
let true_ = ssa.builder.ins().iconst(types::I64, Value::VALUE_TRUE);
ssa.builder.ins().select(is_false, false_, true_)
}
fn emit_if(ssa: &mut SSABuilder, var: LVar, kcons: LVar, kalt: LVar) {
let check = is_false(ssa, ssa.var(var));
let is_false = builder.ins().icmp_imm(IntCC::Equal, is_false, Value::VALUE_FALSE);
ssa.builder.ins().brif(is_false, ...)
}
so it seems there's no rewrite for select + icmp + brif
I got a half working solution at the moment but I was expecting Cranelift being able to rewrite such combination of select + brif
A naive attempt:
function %a(i32) -> i64 {
block0(v0: i32):
v1 = icmp_imm eq v0, 0
v2 = iconst.i64 0
v3 = iconst.i64 1
v4 = select v1, v2, v3
v5 = icmp_imm eq v4, 0
brif v5, block1, block2
block1:
v6 = iconst.i64 1
return v6
block2:
v7 = iconst.i64 2
return v7
}
shows what I would expect with cargo run -p cranelift-tools compile foo.clif --target x86_64 -D --set opt_level=speed:
Disassembly of 32 bytes <%a>:
0: 55 pushq %rbp
1: 48 89 e5 movq %rsp, %rbp
4: 85 ff testl %edi, %edi
6: 0f 84 0a 00 00 00 je 0x16
c: b8 02 00 00 00 movl $2, %eax
11: 48 89 ec movq %rbp, %rsp
14: 5d popq %rbp
15: c3 retq
16: b8 01 00 00 00 movl $1, %eax
1b: 48 89 ec movq %rbp, %rsp
1e: 5d popq %rbp
1f: c3 retq
To confirm, are you enabling optimizations in Cranelift?
yes, I do. Let me actually replicate this behavior again
Okay so here's the IR:
block2:
v21 = get_pinned_reg.i64
v50 = iconst.i64 -562949953421310
v24 = band.i64 v9, v50 ; v50 = -562949953421310
v49 = iconst.i64 0
v25 = icmp ne v24, v49 ; v49 = 0
v23 = iconst.i8 0
brif v25, block7(v23), block6 ; v23 = 0
and corresponding assembly:
844: 4d 89 ee mov %r13,%r14
847: 4c 89 ff mov %r15,%rdi
84a: 45 33 c9 xor %r9d,%r9d
84d: 48 85 35 d4 00 00 00 test %rsi,0xd4(%rip) # 928 <cont1:after_call17+0x110>
854: 0f 85 0a 00 00 00 jne 864 <cont1:after_call17+0x4c>
85a: 48 0f b6 06 movzbq (%rsi),%rax
85e: 3c 12 cmp $0x12,%al
860: 41 0f 94 c1 sete %r9b
864: 45 84 c9 test %r9b,%r9b
867: 0f 84 84 00 00 00 je 8f1 <cont1:after_call17+0xd9>
oh wait, I have trouble mapping the assembly to IR, let me simplify it further
Okay, I can't really reduce it, this is what I can get though:
block2:
v21 = get_pinned_reg.i64
v22 = call fn5(v21)
v23 = symbol_value.i64 gv0
store v22, v23
v28 = get_pinned_reg.i64
v27 = func_addr.i64 fn10
v81 = iconst.i64 0
v20 = iconst.i64 1
v29 = call fn9(v28, v27, v81, v20) ; v81 = 0, v20 = 1
v30 = load.i64 v29+16
v31 = symbol_value.i64 gv1
v32 = load.i64 v31
v33 = get_pinned_reg.i64
v34 = call fn6(v33, v32, v29)
v35 = symbol_value.i64 gv2
v36 = load.i64 v35
v79 = iconst.i64 -562949953421310
v37 = band v36, v79 ; v79 = -562949953421310
v38 = icmp eq v37, v81 ; v81 = 0
v40 = iconst.i64 7
v39 = iconst.i64 6
v41 = select v38, v40, v39 ; v40 = 7, v39 = 6
brif v41, block6, block7
371: 4c 8b 1d 50 2c 00 00 mov 0x2c50(%rip),%r11 # 2fc8 <capy_define@Base>
378: 48 89 c2 mov %rax,%rdx
37b: 41 ff d3 call *%r11
37e: 48 8d 1d 8b 2c 00 00 lea 0x2c8b(%rip),%rbx # 3010 <cache_cell1>
385: 4c 8b 1b mov (%rbx),%r11
388: 41 ba 06 00 00 00 mov $0x6,%r10d
38e: 4c 85 1d 0b 01 00 00 test %r11,0x10b(%rip) # 4a0 <fn0:false+0x1b0>
395: 4c 0f 44 15 0b 01 00 cmove 0x10b(%rip),%r10 # 4a8 <fn0:false+0x1b8>
39c: 00
39d: 4d 85 d2 test %r10,%r10
3a0: 0f 85 4b 00 00 00 jne 3f1 <fn0:false+0x101>
Would you be able to further extract that to a standalone function?
yea, let me try that
how can I dump an assembly with cranelift-jit though? Or is my only option using cranelift-object?
ah that I'm not sure sorry
function u0:0(i64, i64, i64) -> i64 tail {
sig0 = (i64, i64) -> i64 tail
sig1 = (i64) -> i64 tail
block0(v0: i64, v1: i64, v2: i64):
v3 = band_imm v2, -562949953421310
v4 = icmp_imm eq v3, 0
v5 = iconst.i64 6
v6 = iconst.i64 7
v7 = select v4, v6, v5 ; v6 = 7, v5 = 6
v8 = icmp_imm eq v7, 6
brif v8, block2, block1
block1:
return_call_indirect.i64 sig0, v0(v1, v2)
block2:
return_call_indirect.i64 sig1, v0(v2)
0000000000000000 <.Lfn0>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 49 89 fa mov %rdi,%r10
7: 41 b9 06 00 00 00 mov $0x6,%r9d
d: 48 85 15 2c 00 00 00 test %rdx,0x2c(%rip) # 40 <.Lfn0+0x40>
14: 49 89 d3 mov %rdx,%r11
17: 4c 0f 44 0d 29 00 00 cmove 0x29(%rip),%r9 # 48 <.Lfn0+0x48>
1e: 00
1f: 49 83 f9 06 cmp $0x6,%r9
23: 0f 84 0d 00 00 00 je 36 <.Lfn0+0x36>
29: 48 89 f7 mov %rsi,%rdi
2c: 4c 89 de mov %r11,%rsi
2f: 48 89 ec mov %rbp,%rsp
32: 5d pop %rbp
33: 41 ff e2 jmp *%r10
36: 4c 89 df mov %r11,%rdi
39: 48 89 ec mov %rbp,%rsp
3c: 5d pop %rbp
3d: 41 ff e2 jmp *%r10
thanks! Simplified a tiny bit more to:
function %a(i64) -> i64 {
block0(v2: i64):
v3 = band_imm v2, -562949953421310
v4 = icmp_imm eq v3, 0
v5 = iconst.i64 6
v6 = iconst.i64 7
v7 = select v4, v6, v5 ; v6 = 7, v5 = 6
v8 = icmp_imm eq v7, 6
brif v8, block2, block1
block1:
v9 = iconst.i64 100
jump block3(v9)
block2:
v10 = iconst.i64 101
jump block3(v10)
block3(v11: i64):
return v11
}
Looks like we don't have an optimization rule for this, no, but shouldn't be too hard to add! Mind filing an issue for this?
sure, will do
https://github.com/bytecodealliance/wasmtime/issues/11578
Last updated: Dec 06 2025 at 06:05 UTC