Stream: cranelift

Topic: How do I mitigate code duplication after optimizations?


view this post on Zulip Dwayne Slater (Jul 16 2025 at 19:33):

I'm writing a JIT for Lua 5.4 using Cranelift. After optimization I'm seeing my IR values duplicated on all of the cold paths (and I assume the resulting assembly duplicates them as well).

I'm unsure if this is expected or not. Should I be using variables instead of SSA values here?

Here's the IR before and after optimization:
Pre-opt:

function u0:0(i64, i64, i64, i64) -> i64 system_v {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
    v4 = load.i64 notrap aligned can_move v0+160
    v5 = load.i8 notrap aligned can_move v0+168
    v6 = icmp_imm eq v5, 3
    brif v6, block2, block1

block1 cold:
    v7 = iconst.i64 -1
    return v7  ; v7 = -1

block2:
    v8 = band_imm.i64 v4, 255
    v9 = iconst.i8 3
    v10 = sshr_imm.i64 v4, 8
    v11 = iconst.i8 3
    v12 = load.i64 notrap aligned can_move v0+32
    v13 = load.i8 notrap aligned can_move v0+40
    v14 = icmp_imm eq v13, 3
    brif v14, block4, block3

block3 cold:
    store.i64 notrap aligned can_move v10, v0+160
    store.i8 notrap aligned can_move v11, v0+168  ; v11 = 3
    store.i64 notrap aligned can_move v8, v0+176
    store.i8 notrap aligned can_move v9, v0+184  ; v9 = 3
    v15 = iconst.i64 -2
    return v15  ; v15 = -2

block4:
    v16 = bxor.i64 v12, v8
    v17 = iconst.i8 3
    v18 = imul_imm v16, 0x0100_0000_01b3
    v19 = iconst.i8 3
    v20 = load.i64 notrap aligned can_move v0+48
    v21 = load.i8 notrap aligned can_move v0+56
    v22 = icmp_imm eq v21, 3
    brif v22, block6, block5

block5 cold:
    store.i64 notrap aligned can_move v18, v0+32
    store.i8 notrap aligned can_move v19, v0+40  ; v19 = 3
    store.i64 notrap aligned can_move v10, v0+160
    store.i8 notrap aligned can_move v11, v0+168  ; v11 = 3
    store.i64 notrap aligned can_move v8, v0+176
    store.i8 notrap aligned can_move v9, v0+184  ; v9 = 3
    v23 = iconst.i64 -3
    return v23  ; v23 = -3

block6:
    v24 = iadd_imm.i64 v20, 1
    v25 = iconst.i8 3
    store.i64 notrap aligned can_move v18, v0+32
    store.i8 notrap aligned can_move v19, v0+40  ; v19 = 3
    store notrap aligned can_move v24, v0+48
    store notrap aligned can_move v25, v0+56  ; v25 = 3
    store.i64 notrap aligned can_move v10, v0+160
    store.i8 notrap aligned can_move v11, v0+168  ; v11 = 3
    store.i64 notrap aligned can_move v8, v0+176
    store.i8 notrap aligned can_move v9, v0+184  ; v9 = 3
    v26 = iconst.i64 0
    return v26  ; v26 = 0
}

Opt:

function u0:0(i64, i64, i64, i64) -> i64 system_v {
block0(v0: i64, v1: i64, v2: i64, v3: i64):
    v4 = load.i64 notrap aligned can_move v0+160
    v5 = load.i8 notrap aligned can_move v0+168
    v27 = iconst.i8 3
    v6 = icmp eq v5, v27  ; v27 = 3
    brif v6, block2, block1

block1 cold:
    v7 = iconst.i64 -1
    return v7  ; v7 = -1

block2:
    v12 = load.i64 notrap aligned can_move v0+32
    v13 = load.i8 notrap aligned can_move v0+40
    v34 = iconst.i8 3
    v35 = icmp eq v13, v34  ; v34 = 3
    brif v35, block4, block3

block3 cold:
    v29 = iconst.i64 8
    v10 = sshr.i64 v4, v29  ; v29 = 8
    store notrap aligned can_move v10, v0+160
    v36 = iconst.i8 3
    store notrap aligned can_move v36, v0+168  ; v36 = 3
    v28 = iconst.i64 255
    v8 = band.i64 v4, v28  ; v28 = 255
    store notrap aligned can_move v8, v0+176
    store notrap aligned can_move v36, v0+184  ; v36 = 3
    v15 = iconst.i64 -2
    return v15  ; v15 = -2

block4:
    v20 = load.i64 notrap aligned can_move v0+48
    v21 = load.i8 notrap aligned can_move v0+56
    v37 = iconst.i8 3
    v38 = icmp eq v21, v37  ; v37 = 3
    brif v38, block6, block5

block5 cold:
    v39 = iconst.i64 255
    v40 = band.i64 v4, v39  ; v39 = 255
    v16 = bxor.i64 v12, v40
    v31 = iconst.i64 0x0100_0000_01b3
    v18 = imul v16, v31  ; v31 = 0x0100_0000_01b3
    store notrap aligned can_move v18, v0+32
    v41 = iconst.i8 3
    store notrap aligned can_move v41, v0+40  ; v41 = 3
    v42 = iconst.i64 8
    v43 = sshr.i64 v4, v42  ; v42 = 8
    store notrap aligned can_move v43, v0+160
    store notrap aligned can_move v41, v0+168  ; v41 = 3
    store notrap aligned can_move v40, v0+176
    store notrap aligned can_move v41, v0+184  ; v41 = 3
    v23 = iconst.i64 -3
    return v23  ; v23 = -3

block6:
    v44 = iconst.i64 255
    v45 = band.i64 v4, v44  ; v44 = 255
    v46 = bxor.i64 v12, v45
    v47 = iconst.i64 0x0100_0000_01b3
    v48 = imul v46, v47  ; v47 = 0x0100_0000_01b3
    store notrap aligned can_move v48, v0+32
    v49 = iconst.i8 3
    store notrap aligned can_move v49, v0+40  ; v49 = 3
    v33 = iconst.i64 1
    v24 = iadd.i64 v20, v33  ; v33 = 1
    store notrap aligned can_move v24, v0+48
    store notrap aligned can_move v49, v0+56  ; v49 = 3
    v50 = iconst.i64 8
    v51 = sshr.i64 v4, v50  ; v50 = 8
    store notrap aligned can_move v51, v0+160
    store notrap aligned can_move v49, v0+168  ; v49 = 3
    store notrap aligned can_move v45, v0+176
    store notrap aligned can_move v49, v0+184  ; v49 = 3
    v26 = iconst.i64 0
    return v26  ; v26 = 0
}

Last updated: Dec 06 2025 at 07:03 UTC