Stream: git-wasmtime

Topic: wasmtime / Issue #1448 Cranelift: clz miscompilation when...


view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 19:09):

bjorn3 opened Issue #1448:

I tried to switch to the haswell preset for SIMD support in https://github.com/bjorn3/rustc_codegen_cranelift/pull/951. On Linux everything is fine, however on macOS clz returns the wrong result. (At least on github actions) As I currently don't have access to a mac, I used github actions to bisect the change from https://github.com/bjorn3/rustc_codegen_cranelift/pull/951 and to minimize the test case to basically

fn main() {
    assert_eq!(intrinsics::ctlz(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128) as u32, 26u32);
}

in 5f58262982a8b36b4c5cf5b7f1c515c77ac0c65b.

<details><summary>Clif ir before compilation</summary>

test compile
set is_pic
set enable_simd
target x86_64-apple-macosx10.7.0 haswell

function u0:2() system_v {
; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E
; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] }
; sig ([]; c_variadic: false)->()

; kind  loc.idx   param    pass mode                            ty
; ret   _0      -          NoPass                               ()

; kind  local ty                              size align (abi,pref)
; ssa   _1    bool                              1b 1, 1
; ssa   _2    u32                               4b 4, 4
; ssa   _3    u128                             16b 8, 8
; ssa   _4    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _5    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _6    &(&str, &str, u32, u32)           8b 8, 8

    gv0 = symbol colocated u1:0
    gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned
    sig0 = (i64) system_v
    sig1 = (i64) system_v
    fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] }
    fn1 = u0:4 sig1 ; puts

                                block0:
                                    nop
                                    jump block1

                                block1:
                                    nop
;
; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128)
@0002                               v0 = iconst.i64 0x0020_0000_0800_4000
@0002                               v1 = iconst.i64 0x0020_8000_0000
@0002                               v2 = iconcat v0, v1
@0002                               v3, v4 = isplit v2
@0002                               v5 = clz v3
@0002                               v6 = clz v4
@0002                               v7 = icmp_imm eq v4, 0
@0002                               v8 = iadd_imm v5, 64
@0002                               v9 = select v7, v8, v6
@0002                               v10 = uextend.i128 v9
                                    v11 -> v10
; write_cvalue: Var(_3): u128 <- ByVal(v10): u128
@0002                               jump block2

                                block2:
@0002                               nop
@0001                               v12, v13 = isplit.i128 v11
@0001                               v14 = ireduce.i32 v12
; write_cvalue: Var(_2): u32 <- ByVal(v14): u32
@0004                               v15 = iconst.i32 26
@0004                               v16 = icmp ne v14, v15
@0004                               v17 = bint.i8 v16
; write_cvalue: Var(_1): bool <- ByVal(v17): bool
;
; switchInt(_1)
@0006                               v18 = uextend.i32 v17
@0006                               brz v18, block3
@0006                               jump block4

                                block3:
@0006                               nop
;
; return
@0008                               return

                                block4:
@0008                               nop
@0009                               v19 = global_value.i64 gv0
; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32)
@0009                               v20 = load.i64 v19
; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
;
; const mini_core::panic(move _4)
@000a                               call fn0(v20)
@000a                               v21 = global_value.i64 gv1
@000a                               call fn1(v21)
@000a                               trap unreachable
}

</details>

<details><summary>Clif ir after compilation</summary>

test compile
set is_pic
set enable_simd
target x86_64-apple-macosx10.7.0 haswell

function u0:2(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E
; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] }
; sig ([]; c_variadic: false)->()

; kind  loc.idx   param    pass mode                            ty
; ret   _0      -          NoPass                               ()

; kind  local ty                              size align (abi,pref)
; ssa   _1    bool                              1b 1, 1
; ssa   _2    u32                               4b 4, 4
; ssa   _3    u128                             16b 8, 8
; ssa   _4    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _5    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _6    &(&str, &str, u32, u32)           8b 8, 8

    ss0 = incoming_arg 16, offset -16
    gv0 = symbol colocated u1:0
    gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned
    sig0 = (i64 [%rdi]) system_v
    sig1 = (i64 [%rdi]) system_v
    fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] }
    fn1 = u0:4 sig1 ; puts

                                block0(v24: i64 [%rbp]):
[RexOp1pushq#50]                    x86_push v24
[RexOp1copysp#8089]                 copy_special %rsp -> %rbp
[-]                                 nop
[-]                                 fallthrough block1

                                block1:
[-]                                 nop
;
; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128)
@0002 [RexOp1pu_iq#80b8,%rax]       v0 = iconst.i64 0x0020_0000_0800_4000
                                    v3 -> v0
@0002 [RexOp1pu_iq#80b8,%rcx]       v1 = iconst.i64 0x0020_8000_0000
                                    v4 -> v1
@0002 [-,-]                         v2 = iconcat v0, v1
@0002 [RexMp2urm#86bd,%rax]         v5 = clz v0
@0002 [RexMp2urm#86bd,%rdx]         v6 = clz v1
@0002 [DynRexOp1icscc_ib#f083,%rcx] v7 = icmp_imm eq v1, 0
@0002 [DynRexOp1r_ib#8083,%rax]     v8 = iadd_imm v5, 64
@0002 [RexOp1t8jccb#75]             brnz v7, block5(v8)
[-]                                 fallthrough block7

                                block7:
@0002 [DynRexOp1umr#8089,%rax]      v23 = copy.i64 v6
@0002 [-]                           fallthrough block5(v23)

                                block5(v9: i64 [%rax]):
                                    v12 -> v9
@0002 [RexOp1pu_id#b8,%rcx]         v22 = iconst.i64 0
                                    v13 -> v22
@0002 [-,-]                         v10 = iconcat v9, v22
                                    v11 -> v10
; write_cvalue: Var(_3): u128 <- ByVal(v10): u128
@0002 [-]                           fallthrough block2

                                block2:
@0002 [-]                           nop
@0001 [null#00,%rax]                v14 = ireduce.i32 v9
; write_cvalue: Var(_2): u32 <- ByVal(v14): u32
@0004 [RexOp1pu_id#b8,%rcx]         v15 = iconst.i32 26
@0004 [DynRexOp1icscc#39,%rax]      v16 = icmp ne v14, v15
@0004 [RexOp2urm_noflags#4b6,%rax]  v17 = bint.i8 v16
; write_cvalue: Var(_1): bool <- ByVal(v17): bool
;
; switchInt(_1)
@0006 [RexOp2urm_noflags#4b6,%rax]  v18 = uextend.i32 v17
;                                                            ;  val1@%rax
@0006 [RexOp1tjccb#74]              brz v18, block3
@0006 [Op1jmpb#eb]                  jump block4

                                block3:
@0006 [-]                           nop
;
; return
[RexOp1popq#58,%rbp]                v25 = x86_pop.i64
@0008 [Op1ret#c3]                   return v25

                                block4:
@0008 [-]                           nop
@0009 [RexOp1pcrel_gvaddr8#808d,%rax] v19 = symbol_value.i64 gv0
; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32)
@0009 [RexOp1ld#808b,%rax]          v20 = load.i64 v19
; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
;
; const mini_core::panic(move _4)
@000a [RexOp1rmov#8089]             regmove v20, %rax -> %rdi
;                                                            ;  val4@%rdi
@000a [Op1call_plt_id#e8]           call fn0(v20)
;                                                            ;  val4â˜
@000a [RexOp1pcrel_gvaddr8#808d,%rax] v21 = symbol_value.i64 gv1
@000a [RexOp1rmov#8089]             regmove v21, %rax -> %rdi
@000a [Op1call_plt_id#e8]           call fn1(v21)
@000a [Op2trap#40b]                 trap unreachable
;                                                            ;  val1â˜
}

</details>

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 19:09):

bjorn3 labeled Issue #1448:

I tried to switch to the haswell preset for SIMD support in https://github.com/bjorn3/rustc_codegen_cranelift/pull/951. On Linux everything is fine, however on macOS clz returns the wrong result. (At least on github actions) As I currently don't have access to a mac, I used github actions to bisect the change from https://github.com/bjorn3/rustc_codegen_cranelift/pull/951 and to minimize the test case to basically

fn main() {
    assert_eq!(intrinsics::ctlz(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128) as u32, 26u32);
}

in 5f58262982a8b36b4c5cf5b7f1c515c77ac0c65b.

<details><summary>Clif ir before compilation</summary>

test compile
set is_pic
set enable_simd
target x86_64-apple-macosx10.7.0 haswell

function u0:2() system_v {
; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E
; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] }
; sig ([]; c_variadic: false)->()

; kind  loc.idx   param    pass mode                            ty
; ret   _0      -          NoPass                               ()

; kind  local ty                              size align (abi,pref)
; ssa   _1    bool                              1b 1, 1
; ssa   _2    u32                               4b 4, 4
; ssa   _3    u128                             16b 8, 8
; ssa   _4    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _5    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _6    &(&str, &str, u32, u32)           8b 8, 8

    gv0 = symbol colocated u1:0
    gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned
    sig0 = (i64) system_v
    sig1 = (i64) system_v
    fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] }
    fn1 = u0:4 sig1 ; puts

                                block0:
                                    nop
                                    jump block1

                                block1:
                                    nop
;
; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128)
@0002                               v0 = iconst.i64 0x0020_0000_0800_4000
@0002                               v1 = iconst.i64 0x0020_8000_0000
@0002                               v2 = iconcat v0, v1
@0002                               v3, v4 = isplit v2
@0002                               v5 = clz v3
@0002                               v6 = clz v4
@0002                               v7 = icmp_imm eq v4, 0
@0002                               v8 = iadd_imm v5, 64
@0002                               v9 = select v7, v8, v6
@0002                               v10 = uextend.i128 v9
                                    v11 -> v10
; write_cvalue: Var(_3): u128 <- ByVal(v10): u128
@0002                               jump block2

                                block2:
@0002                               nop
@0001                               v12, v13 = isplit.i128 v11
@0001                               v14 = ireduce.i32 v12
; write_cvalue: Var(_2): u32 <- ByVal(v14): u32
@0004                               v15 = iconst.i32 26
@0004                               v16 = icmp ne v14, v15
@0004                               v17 = bint.i8 v16
; write_cvalue: Var(_1): bool <- ByVal(v17): bool
;
; switchInt(_1)
@0006                               v18 = uextend.i32 v17
@0006                               brz v18, block3
@0006                               jump block4

                                block3:
@0006                               nop
;
; return
@0008                               return

                                block4:
@0008                               nop
@0009                               v19 = global_value.i64 gv0
; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32)
@0009                               v20 = load.i64 v19
; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
;
; const mini_core::panic(move _4)
@000a                               call fn0(v20)
@000a                               v21 = global_value.i64 gv1
@000a                               call fn1(v21)
@000a                               trap unreachable
}

</details>

<details><summary>Clif ir after compilation</summary>

test compile
set is_pic
set enable_simd
target x86_64-apple-macosx10.7.0 haswell

function u0:2(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E
; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] }
; sig ([]; c_variadic: false)->()

; kind  loc.idx   param    pass mode                            ty
; ret   _0      -          NoPass                               ()

; kind  local ty                              size align (abi,pref)
; ssa   _1    bool                              1b 1, 1
; ssa   _2    u32                               4b 4, 4
; ssa   _3    u128                             16b 8, 8
; ssa   _4    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _5    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _6    &(&str, &str, u32, u32)           8b 8, 8

    ss0 = incoming_arg 16, offset -16
    gv0 = symbol colocated u1:0
    gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned
    sig0 = (i64 [%rdi]) system_v
    sig1 = (i64 [%rdi]) system_v
    fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] }
    fn1 = u0:4 sig1 ; puts

                                block0(v24: i64 [%rbp]):
[RexOp1pushq#50]                    x86_push v24
[RexOp1copysp#8089]                 copy_special %rsp -> %rbp
[-]                                 nop
[-]                                 fallthrough block1

                                block1:
[-]                                 nop
;
; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128)
@0002 [RexOp1pu_iq#80b8,%rax]       v0 = iconst.i64 0x0020_0000_0800_4000
                                    v3 -> v0
@0002 [RexOp1pu_iq#80b8,%rcx]       v1 = iconst.i64 0x0020_8000_0000
                                    v4 -> v1
@0002 [-,-]                         v2 = iconcat v0, v1
@0002 [RexMp2urm#86bd,%rax]         v5 = clz v0
@0002 [RexMp2urm#86bd,%rdx]         v6 = clz v1
@0002 [DynRexOp1icscc_ib#f083,%rcx] v7 = icmp_imm eq v1, 0
@0002 [DynRexOp1r_ib#8083,%rax]     v8 = iadd_imm v5, 64
@0002 [RexOp1t8jccb#75]             brnz v7, block5(v8)
[-]                                 fallthrough block7

                                block7:
@0002 [DynRexOp1umr#8089,%rax]      v23 = copy.i64 v6
@0002 [-]                           fallthrough block5(v23)

                                block5(v9: i64 [%rax]):
                                    v12 -> v9
@0002 [RexOp1pu_id#b8,%rcx]         v22 = iconst.i64 0
                                    v13 -> v22
@0002 [-,-]                         v10 = iconcat v9, v22
                                    v11 -> v10
; write_cvalue: Var(_3): u128 <- ByVal(v10): u128
@0002 [-]                           fallthrough block2

                                block2:
@0002 [-]                           nop
@0001 [null#00,%rax]                v14 = ireduce.i32 v9
; write_cvalue: Var(_2): u32 <- ByVal(v14): u32
@0004 [RexOp1pu_id#b8,%rcx]         v15 = iconst.i32 26
@0004 [DynRexOp1icscc#39,%rax]      v16 = icmp ne v14, v15
@0004 [RexOp2urm_noflags#4b6,%rax]  v17 = bint.i8 v16
; write_cvalue: Var(_1): bool <- ByVal(v17): bool
;
; switchInt(_1)
@0006 [RexOp2urm_noflags#4b6,%rax]  v18 = uextend.i32 v17
;                                                            ;  val1@%rax
@0006 [RexOp1tjccb#74]              brz v18, block3
@0006 [Op1jmpb#eb]                  jump block4

                                block3:
@0006 [-]                           nop
;
; return
[RexOp1popq#58,%rbp]                v25 = x86_pop.i64
@0008 [Op1ret#c3]                   return v25

                                block4:
@0008 [-]                           nop
@0009 [RexOp1pcrel_gvaddr8#808d,%rax] v19 = symbol_value.i64 gv0
; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32)
@0009 [RexOp1ld#808b,%rax]          v20 = load.i64 v19
; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
;
; const mini_core::panic(move _4)
@000a [RexOp1rmov#8089]             regmove v20, %rax -> %rdi
;                                                            ;  val4@%rdi
@000a [Op1call_plt_id#e8]           call fn0(v20)
;                                                            ;  val4â˜
@000a [RexOp1pcrel_gvaddr8#808d,%rax] v21 = symbol_value.i64 gv1
@000a [RexOp1rmov#8089]             regmove v21, %rax -> %rdi
@000a [Op1call_plt_id#e8]           call fn1(v21)
@000a [Op2trap#40b]                 trap unreachable
;                                                            ;  val1â˜
}

</details>

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 19:09):

bjorn3 labeled Issue #1448:

I tried to switch to the haswell preset for SIMD support in https://github.com/bjorn3/rustc_codegen_cranelift/pull/951. On Linux everything is fine, however on macOS clz returns the wrong result. (At least on github actions) As I currently don't have access to a mac, I used github actions to bisect the change from https://github.com/bjorn3/rustc_codegen_cranelift/pull/951 and to minimize the test case to basically

fn main() {
    assert_eq!(intrinsics::ctlz(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128) as u32, 26u32);
}

in 5f58262982a8b36b4c5cf5b7f1c515c77ac0c65b.

<details><summary>Clif ir before compilation</summary>

test compile
set is_pic
set enable_simd
target x86_64-apple-macosx10.7.0 haswell

function u0:2() system_v {
; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E
; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] }
; sig ([]; c_variadic: false)->()

; kind  loc.idx   param    pass mode                            ty
; ret   _0      -          NoPass                               ()

; kind  local ty                              size align (abi,pref)
; ssa   _1    bool                              1b 1, 1
; ssa   _2    u32                               4b 4, 4
; ssa   _3    u128                             16b 8, 8
; ssa   _4    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _5    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _6    &(&str, &str, u32, u32)           8b 8, 8

    gv0 = symbol colocated u1:0
    gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned
    sig0 = (i64) system_v
    sig1 = (i64) system_v
    fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] }
    fn1 = u0:4 sig1 ; puts

                                block0:
                                    nop
                                    jump block1

                                block1:
                                    nop
;
; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128)
@0002                               v0 = iconst.i64 0x0020_0000_0800_4000
@0002                               v1 = iconst.i64 0x0020_8000_0000
@0002                               v2 = iconcat v0, v1
@0002                               v3, v4 = isplit v2
@0002                               v5 = clz v3
@0002                               v6 = clz v4
@0002                               v7 = icmp_imm eq v4, 0
@0002                               v8 = iadd_imm v5, 64
@0002                               v9 = select v7, v8, v6
@0002                               v10 = uextend.i128 v9
                                    v11 -> v10
; write_cvalue: Var(_3): u128 <- ByVal(v10): u128
@0002                               jump block2

                                block2:
@0002                               nop
@0001                               v12, v13 = isplit.i128 v11
@0001                               v14 = ireduce.i32 v12
; write_cvalue: Var(_2): u32 <- ByVal(v14): u32
@0004                               v15 = iconst.i32 26
@0004                               v16 = icmp ne v14, v15
@0004                               v17 = bint.i8 v16
; write_cvalue: Var(_1): bool <- ByVal(v17): bool
;
; switchInt(_1)
@0006                               v18 = uextend.i32 v17
@0006                               brz v18, block3
@0006                               jump block4

                                block3:
@0006                               nop
;
; return
@0008                               return

                                block4:
@0008                               nop
@0009                               v19 = global_value.i64 gv0
; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32)
@0009                               v20 = load.i64 v19
; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
;
; const mini_core::panic(move _4)
@000a                               call fn0(v20)
@000a                               v21 = global_value.i64 gv1
@000a                               call fn1(v21)
@000a                               trap unreachable
}

</details>

<details><summary>Clif ir after compilation</summary>

test compile
set is_pic
set enable_simd
target x86_64-apple-macosx10.7.0 haswell

function u0:2(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E
; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] }
; sig ([]; c_variadic: false)->()

; kind  loc.idx   param    pass mode                            ty
; ret   _0      -          NoPass                               ()

; kind  local ty                              size align (abi,pref)
; ssa   _1    bool                              1b 1, 1
; ssa   _2    u32                               4b 4, 4
; ssa   _3    u128                             16b 8, 8
; ssa   _4    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _5    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _6    &(&str, &str, u32, u32)           8b 8, 8

    ss0 = incoming_arg 16, offset -16
    gv0 = symbol colocated u1:0
    gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned
    sig0 = (i64 [%rdi]) system_v
    sig1 = (i64 [%rdi]) system_v
    fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] }
    fn1 = u0:4 sig1 ; puts

                                block0(v24: i64 [%rbp]):
[RexOp1pushq#50]                    x86_push v24
[RexOp1copysp#8089]                 copy_special %rsp -> %rbp
[-]                                 nop
[-]                                 fallthrough block1

                                block1:
[-]                                 nop
;
; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128)
@0002 [RexOp1pu_iq#80b8,%rax]       v0 = iconst.i64 0x0020_0000_0800_4000
                                    v3 -> v0
@0002 [RexOp1pu_iq#80b8,%rcx]       v1 = iconst.i64 0x0020_8000_0000
                                    v4 -> v1
@0002 [-,-]                         v2 = iconcat v0, v1
@0002 [RexMp2urm#86bd,%rax]         v5 = clz v0
@0002 [RexMp2urm#86bd,%rdx]         v6 = clz v1
@0002 [DynRexOp1icscc_ib#f083,%rcx] v7 = icmp_imm eq v1, 0
@0002 [DynRexOp1r_ib#8083,%rax]     v8 = iadd_imm v5, 64
@0002 [RexOp1t8jccb#75]             brnz v7, block5(v8)
[-]                                 fallthrough block7

                                block7:
@0002 [DynRexOp1umr#8089,%rax]      v23 = copy.i64 v6
@0002 [-]                           fallthrough block5(v23)

                                block5(v9: i64 [%rax]):
                                    v12 -> v9
@0002 [RexOp1pu_id#b8,%rcx]         v22 = iconst.i64 0
                                    v13 -> v22
@0002 [-,-]                         v10 = iconcat v9, v22
                                    v11 -> v10
; write_cvalue: Var(_3): u128 <- ByVal(v10): u128
@0002 [-]                           fallthrough block2

                                block2:
@0002 [-]                           nop
@0001 [null#00,%rax]                v14 = ireduce.i32 v9
; write_cvalue: Var(_2): u32 <- ByVal(v14): u32
@0004 [RexOp1pu_id#b8,%rcx]         v15 = iconst.i32 26
@0004 [DynRexOp1icscc#39,%rax]      v16 = icmp ne v14, v15
@0004 [RexOp2urm_noflags#4b6,%rax]  v17 = bint.i8 v16
; write_cvalue: Var(_1): bool <- ByVal(v17): bool
;
; switchInt(_1)
@0006 [RexOp2urm_noflags#4b6,%rax]  v18 = uextend.i32 v17
;                                                            ;  val1@%rax
@0006 [RexOp1tjccb#74]              brz v18, block3
@0006 [Op1jmpb#eb]                  jump block4

                                block3:
@0006 [-]                           nop
;
; return
[RexOp1popq#58,%rbp]                v25 = x86_pop.i64
@0008 [Op1ret#c3]                   return v25

                                block4:
@0008 [-]                           nop
@0009 [RexOp1pcrel_gvaddr8#808d,%rax] v19 = symbol_value.i64 gv0
; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32)
@0009 [RexOp1ld#808b,%rax]          v20 = load.i64 v19
; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
;
; const mini_core::panic(move _4)
@000a [RexOp1rmov#8089]             regmove v20, %rax -> %rdi
;                                                            ;  val4@%rdi
@000a [Op1call_plt_id#e8]           call fn0(v20)
;                                                            ;  val4â˜
@000a [RexOp1pcrel_gvaddr8#808d,%rax] v21 = symbol_value.i64 gv1
@000a [RexOp1rmov#8089]             regmove v21, %rax -> %rdi
@000a [Op1call_plt_id#e8]           call fn1(v21)
@000a [Op2trap#40b]                 trap unreachable
;                                                            ;  val1â˜
}

</details>

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 19:15):

github-actions[bot] commented on Issue #1448:

Subscribe to Label Action

This issue or pull request has been labeled: "cranelift"

<details> <summary>Users Subscribed to "cranelift"</summary>

</details>

To subscribe or unsubscribe from this label, edit the <code>.github/subscribe-to-label.json</code> configuration file.

Learn more.

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 20:33):

abrown commented on Issue #1448:

That is very weird; what are the chances that has_lzcnt is not getting enabled in the settings for some reason? I've had that problem before. Maybe the #[cfg...] is preventing that function from being compiled on the macOS target?

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 20:41):

bjorn3 commented on Issue #1448:

I am not using cranelift-native. Instead I take the target triple from rustc, parse it and pass it to cranelift_codegen::isa::lookup. In the failing PR I then tried to enable the haswell preset.

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 20:47):

abrown commented on Issue #1448:

Yeah, but if you are running test compile filetests then you might be: see the TargetIsa passed in to TestCompile::run?

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 20:58):

bjorn3 commented on Issue #1448:

I haven't run it as test compile. I just made the clif ir printer in cg_clif always include that directive to make running it that way faster. Due to the several minute CI feedback loop, I haven't made a standalone repro yet.

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 20:58):

bjorn3 commented on Issue #1448:

I haven't run it as test compile. I just made the clif ir printer in cg_clif always include that directive to make running it that way faster. Due to the several minute CI feedback loop, I haven't made a standalone repro yet.

view this post on Zulip Wasmtime GitHub notifications bot (Mar 31 2020 at 20:58):

bjorn3 deleted a comment on Issue #1448:

I haven't run it as test compile. I just made the clif ir printer in cg_clif always include that directive to make running it that way faster. Due to the several minute CI feedback loop, I haven't made a standalone repro yet.

view this post on Zulip Wasmtime GitHub notifications bot (Jul 25 2020 at 13:31):

bjorn3 commented on Issue #1448:

Found the problem: The macOS CI uses an "Intel(R) Xeon(R) CPU E5-1650 v2 @ 3.50GHz" This is a Ivy Bridge EP, which means that lzcnt is not yet supported.

view this post on Zulip Wasmtime GitHub notifications bot (Jul 25 2020 at 13:31):

bjorn3 closed Issue #1448:

I tried to switch to the haswell preset for SIMD support in https://github.com/bjorn3/rustc_codegen_cranelift/pull/951. On Linux everything is fine, however on macOS clz returns the wrong result. (At least on github actions) As I currently don't have access to a mac, I used github actions to bisect the change from https://github.com/bjorn3/rustc_codegen_cranelift/pull/951 and to minimize the test case to basically

fn main() {
    assert_eq!(intrinsics::ctlz(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128) as u32, 26u32);
}

in 5f58262982a8b36b4c5cf5b7f1c515c77ac0c65b.

<details><summary>Clif ir before compilation</summary>

test compile
set is_pic
set enable_simd
target x86_64-apple-macosx10.7.0 haswell

function u0:2() system_v {
; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E
; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] }
; sig ([]; c_variadic: false)->()

; kind  loc.idx   param    pass mode                            ty
; ret   _0      -          NoPass                               ()

; kind  local ty                              size align (abi,pref)
; ssa   _1    bool                              1b 1, 1
; ssa   _2    u32                               4b 4, 4
; ssa   _3    u128                             16b 8, 8
; ssa   _4    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _5    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _6    &(&str, &str, u32, u32)           8b 8, 8

    gv0 = symbol colocated u1:0
    gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned
    sig0 = (i64) system_v
    sig1 = (i64) system_v
    fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] }
    fn1 = u0:4 sig1 ; puts

                                block0:
                                    nop
                                    jump block1

                                block1:
                                    nop
;
; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128)
@0002                               v0 = iconst.i64 0x0020_0000_0800_4000
@0002                               v1 = iconst.i64 0x0020_8000_0000
@0002                               v2 = iconcat v0, v1
@0002                               v3, v4 = isplit v2
@0002                               v5 = clz v3
@0002                               v6 = clz v4
@0002                               v7 = icmp_imm eq v4, 0
@0002                               v8 = iadd_imm v5, 64
@0002                               v9 = select v7, v8, v6
@0002                               v10 = uextend.i128 v9
                                    v11 -> v10
; write_cvalue: Var(_3): u128 <- ByVal(v10): u128
@0002                               jump block2

                                block2:
@0002                               nop
@0001                               v12, v13 = isplit.i128 v11
@0001                               v14 = ireduce.i32 v12
; write_cvalue: Var(_2): u32 <- ByVal(v14): u32
@0004                               v15 = iconst.i32 26
@0004                               v16 = icmp ne v14, v15
@0004                               v17 = bint.i8 v16
; write_cvalue: Var(_1): bool <- ByVal(v17): bool
;
; switchInt(_1)
@0006                               v18 = uextend.i32 v17
@0006                               brz v18, block3
@0006                               jump block4

                                block3:
@0006                               nop
;
; return
@0008                               return

                                block4:
@0008                               nop
@0009                               v19 = global_value.i64 gv0
; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32)
@0009                               v20 = load.i64 v19
; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
;
; const mini_core::panic(move _4)
@000a                               call fn0(v20)
@000a                               v21 = global_value.i64 gv1
@000a                               call fn1(v21)
@000a                               trap unreachable
}

</details>

<details><summary>Clif ir after compilation</summary>

test compile
set is_pic
set enable_simd
target x86_64-apple-macosx10.7.0 haswell

function u0:2(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E
; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] }
; sig ([]; c_variadic: false)->()

; kind  loc.idx   param    pass mode                            ty
; ret   _0      -          NoPass                               ()

; kind  local ty                              size align (abi,pref)
; ssa   _1    bool                              1b 1, 1
; ssa   _2    u32                               4b 4, 4
; ssa   _3    u128                             16b 8, 8
; ssa   _4    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _5    &(&str, &str, u32, u32)           8b 8, 8
; ssa   _6    &(&str, &str, u32, u32)           8b 8, 8

    ss0 = incoming_arg 16, offset -16
    gv0 = symbol colocated u1:0
    gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned
    sig0 = (i64 [%rdi]) system_v
    sig1 = (i64 [%rdi]) system_v
    fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] }
    fn1 = u0:4 sig1 ; puts

                                block0(v24: i64 [%rbp]):
[RexOp1pushq#50]                    x86_push v24
[RexOp1copysp#8089]                 copy_special %rsp -> %rbp
[-]                                 nop
[-]                                 fallthrough block1

                                block1:
[-]                                 nop
;
; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128)
@0002 [RexOp1pu_iq#80b8,%rax]       v0 = iconst.i64 0x0020_0000_0800_4000
                                    v3 -> v0
@0002 [RexOp1pu_iq#80b8,%rcx]       v1 = iconst.i64 0x0020_8000_0000
                                    v4 -> v1
@0002 [-,-]                         v2 = iconcat v0, v1
@0002 [RexMp2urm#86bd,%rax]         v5 = clz v0
@0002 [RexMp2urm#86bd,%rdx]         v6 = clz v1
@0002 [DynRexOp1icscc_ib#f083,%rcx] v7 = icmp_imm eq v1, 0
@0002 [DynRexOp1r_ib#8083,%rax]     v8 = iadd_imm v5, 64
@0002 [RexOp1t8jccb#75]             brnz v7, block5(v8)
[-]                                 fallthrough block7

                                block7:
@0002 [DynRexOp1umr#8089,%rax]      v23 = copy.i64 v6
@0002 [-]                           fallthrough block5(v23)

                                block5(v9: i64 [%rax]):
                                    v12 -> v9
@0002 [RexOp1pu_id#b8,%rcx]         v22 = iconst.i64 0
                                    v13 -> v22
@0002 [-,-]                         v10 = iconcat v9, v22
                                    v11 -> v10
; write_cvalue: Var(_3): u128 <- ByVal(v10): u128
@0002 [-]                           fallthrough block2

                                block2:
@0002 [-]                           nop
@0001 [null#00,%rax]                v14 = ireduce.i32 v9
; write_cvalue: Var(_2): u32 <- ByVal(v14): u32
@0004 [RexOp1pu_id#b8,%rcx]         v15 = iconst.i32 26
@0004 [DynRexOp1icscc#39,%rax]      v16 = icmp ne v14, v15
@0004 [RexOp2urm_noflags#4b6,%rax]  v17 = bint.i8 v16
; write_cvalue: Var(_1): bool <- ByVal(v17): bool
;
; switchInt(_1)
@0006 [RexOp2urm_noflags#4b6,%rax]  v18 = uextend.i32 v17
;                                                            ;  val1@%rax
@0006 [RexOp1tjccb#74]              brz v18, block3
@0006 [Op1jmpb#eb]                  jump block4

                                block3:
@0006 [-]                           nop
;
; return
[RexOp1popq#58,%rbp]                v25 = x86_pop.i64
@0008 [Op1ret#c3]                   return v25

                                block4:
@0008 [-]                           nop
@0009 [RexOp1pcrel_gvaddr8#808d,%rax] v19 = symbol_value.i64 gv0
; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32)
@0009 [RexOp1ld#808b,%rax]          v20 = load.i64 v19
; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32)
;
; const mini_core::panic(move _4)
@000a [RexOp1rmov#8089]             regmove v20, %rax -> %rdi
;                                                            ;  val4@%rdi
@000a [Op1call_plt_id#e8]           call fn0(v20)
;                                                            ;  val4â˜
@000a [RexOp1pcrel_gvaddr8#808d,%rax] v21 = symbol_value.i64 gv1
@000a [RexOp1rmov#8089]             regmove v21, %rax -> %rdi
@000a [Op1call_plt_id#e8]           call fn1(v21)
@000a [Op2trap#40b]                 trap unreachable
;                                                            ;  val1â˜
}

</details>


Last updated: Jan 24 2025 at 00:11 UTC