bjorn3 opened Issue #1448:
I tried to switch to the
haswell
preset for SIMD support in https://github.com/bjorn3/rustc_codegen_cranelift/pull/951. On Linux everything is fine, however on macOSclz
returns the wrong result. (At least on github actions) As I currently don't have access to a mac, I used github actions to bisect the change from https://github.com/bjorn3/rustc_codegen_cranelift/pull/951 and to minimize the test case to basicallyfn main() { assert_eq!(intrinsics::ctlz(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128) as u32, 26u32); }in 5f58262982a8b36b4c5cf5b7f1c515c77ac0c65b.
<details><summary>Clif ir before compilation</summary>
test compile set is_pic set enable_simd target x86_64-apple-macosx10.7.0 haswell function u0:2() system_v { ; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E ; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] } ; sig ([]; c_variadic: false)->() ; kind loc.idx param pass mode ty ; ret _0 - NoPass () ; kind local ty size align (abi,pref) ; ssa _1 bool 1b 1, 1 ; ssa _2 u32 4b 4, 4 ; ssa _3 u128 16b 8, 8 ; ssa _4 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _5 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _6 &(&str, &str, u32, u32) 8b 8, 8 gv0 = symbol colocated u1:0 gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned sig0 = (i64) system_v sig1 = (i64) system_v fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] } fn1 = u0:4 sig1 ; puts block0: nop jump block1 block1: nop ; ; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128) @0002 v0 = iconst.i64 0x0020_0000_0800_4000 @0002 v1 = iconst.i64 0x0020_8000_0000 @0002 v2 = iconcat v0, v1 @0002 v3, v4 = isplit v2 @0002 v5 = clz v3 @0002 v6 = clz v4 @0002 v7 = icmp_imm eq v4, 0 @0002 v8 = iadd_imm v5, 64 @0002 v9 = select v7, v8, v6 @0002 v10 = uextend.i128 v9 v11 -> v10 ; write_cvalue: Var(_3): u128 <- ByVal(v10): u128 @0002 jump block2 block2: @0002 nop @0001 v12, v13 = isplit.i128 v11 @0001 v14 = ireduce.i32 v12 ; write_cvalue: Var(_2): u32 <- ByVal(v14): u32 @0004 v15 = iconst.i32 26 @0004 v16 = icmp ne v14, v15 @0004 v17 = bint.i8 v16 ; write_cvalue: Var(_1): bool <- ByVal(v17): bool ; ; switchInt(_1) @0006 v18 = uextend.i32 v17 @0006 brz v18, block3 @0006 jump block4 block3: @0006 nop ; ; return @0008 return block4: @0008 nop @0009 v19 = global_value.i64 gv0 ; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32) @0009 v20 = load.i64 v19 ; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; ; const mini_core::panic(move _4) @000a call fn0(v20) @000a v21 = global_value.i64 gv1 @000a call fn1(v21) @000a trap unreachable }</details>
<details><summary>Clif ir after compilation</summary>
test compile set is_pic set enable_simd target x86_64-apple-macosx10.7.0 haswell function u0:2(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { ; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E ; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] } ; sig ([]; c_variadic: false)->() ; kind loc.idx param pass mode ty ; ret _0 - NoPass () ; kind local ty size align (abi,pref) ; ssa _1 bool 1b 1, 1 ; ssa _2 u32 4b 4, 4 ; ssa _3 u128 16b 8, 8 ; ssa _4 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _5 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _6 &(&str, &str, u32, u32) 8b 8, 8 ss0 = incoming_arg 16, offset -16 gv0 = symbol colocated u1:0 gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned sig0 = (i64 [%rdi]) system_v sig1 = (i64 [%rdi]) system_v fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] } fn1 = u0:4 sig1 ; puts block0(v24: i64 [%rbp]): [RexOp1pushq#50] x86_push v24 [RexOp1copysp#8089] copy_special %rsp -> %rbp [-] nop [-] fallthrough block1 block1: [-] nop ; ; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128) @0002 [RexOp1pu_iq#80b8,%rax] v0 = iconst.i64 0x0020_0000_0800_4000 v3 -> v0 @0002 [RexOp1pu_iq#80b8,%rcx] v1 = iconst.i64 0x0020_8000_0000 v4 -> v1 @0002 [-,-] v2 = iconcat v0, v1 @0002 [RexMp2urm#86bd,%rax] v5 = clz v0 @0002 [RexMp2urm#86bd,%rdx] v6 = clz v1 @0002 [DynRexOp1icscc_ib#f083,%rcx] v7 = icmp_imm eq v1, 0 @0002 [DynRexOp1r_ib#8083,%rax] v8 = iadd_imm v5, 64 @0002 [RexOp1t8jccb#75] brnz v7, block5(v8) [-] fallthrough block7 block7: @0002 [DynRexOp1umr#8089,%rax] v23 = copy.i64 v6 @0002 [-] fallthrough block5(v23) block5(v9: i64 [%rax]): v12 -> v9 @0002 [RexOp1pu_id#b8,%rcx] v22 = iconst.i64 0 v13 -> v22 @0002 [-,-] v10 = iconcat v9, v22 v11 -> v10 ; write_cvalue: Var(_3): u128 <- ByVal(v10): u128 @0002 [-] fallthrough block2 block2: @0002 [-] nop @0001 [null#00,%rax] v14 = ireduce.i32 v9 ; write_cvalue: Var(_2): u32 <- ByVal(v14): u32 @0004 [RexOp1pu_id#b8,%rcx] v15 = iconst.i32 26 @0004 [DynRexOp1icscc#39,%rax] v16 = icmp ne v14, v15 @0004 [RexOp2urm_noflags#4b6,%rax] v17 = bint.i8 v16 ; write_cvalue: Var(_1): bool <- ByVal(v17): bool ; ; switchInt(_1) @0006 [RexOp2urm_noflags#4b6,%rax] v18 = uextend.i32 v17 ; ; val1@%rax @0006 [RexOp1tjccb#74] brz v18, block3 @0006 [Op1jmpb#eb] jump block4 block3: @0006 [-] nop ; ; return [RexOp1popq#58,%rbp] v25 = x86_pop.i64 @0008 [Op1ret#c3] return v25 block4: @0008 [-] nop @0009 [RexOp1pcrel_gvaddr8#808d,%rax] v19 = symbol_value.i64 gv0 ; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32) @0009 [RexOp1ld#808b,%rax] v20 = load.i64 v19 ; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; ; const mini_core::panic(move _4) @000a [RexOp1rmov#8089] regmove v20, %rax -> %rdi ; ; val4@%rdi @000a [Op1call_plt_id#e8] call fn0(v20) ; ; val4☠@000a [RexOp1pcrel_gvaddr8#808d,%rax] v21 = symbol_value.i64 gv1 @000a [RexOp1rmov#8089] regmove v21, %rax -> %rdi @000a [Op1call_plt_id#e8] call fn1(v21) @000a [Op2trap#40b] trap unreachable ; ; val1☠}</details>
- Which Cranelift version / commit hash / branch are you using? ac7cd4c46abdc9d4f3ef3230386afd52177e5f7c
- If relevant, can you include some extra information about your environment? x86_64 macOS
bjorn3 labeled Issue #1448:
I tried to switch to the
haswell
preset for SIMD support in https://github.com/bjorn3/rustc_codegen_cranelift/pull/951. On Linux everything is fine, however on macOSclz
returns the wrong result. (At least on github actions) As I currently don't have access to a mac, I used github actions to bisect the change from https://github.com/bjorn3/rustc_codegen_cranelift/pull/951 and to minimize the test case to basicallyfn main() { assert_eq!(intrinsics::ctlz(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128) as u32, 26u32); }in 5f58262982a8b36b4c5cf5b7f1c515c77ac0c65b.
<details><summary>Clif ir before compilation</summary>
test compile set is_pic set enable_simd target x86_64-apple-macosx10.7.0 haswell function u0:2() system_v { ; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E ; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] } ; sig ([]; c_variadic: false)->() ; kind loc.idx param pass mode ty ; ret _0 - NoPass () ; kind local ty size align (abi,pref) ; ssa _1 bool 1b 1, 1 ; ssa _2 u32 4b 4, 4 ; ssa _3 u128 16b 8, 8 ; ssa _4 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _5 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _6 &(&str, &str, u32, u32) 8b 8, 8 gv0 = symbol colocated u1:0 gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned sig0 = (i64) system_v sig1 = (i64) system_v fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] } fn1 = u0:4 sig1 ; puts block0: nop jump block1 block1: nop ; ; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128) @0002 v0 = iconst.i64 0x0020_0000_0800_4000 @0002 v1 = iconst.i64 0x0020_8000_0000 @0002 v2 = iconcat v0, v1 @0002 v3, v4 = isplit v2 @0002 v5 = clz v3 @0002 v6 = clz v4 @0002 v7 = icmp_imm eq v4, 0 @0002 v8 = iadd_imm v5, 64 @0002 v9 = select v7, v8, v6 @0002 v10 = uextend.i128 v9 v11 -> v10 ; write_cvalue: Var(_3): u128 <- ByVal(v10): u128 @0002 jump block2 block2: @0002 nop @0001 v12, v13 = isplit.i128 v11 @0001 v14 = ireduce.i32 v12 ; write_cvalue: Var(_2): u32 <- ByVal(v14): u32 @0004 v15 = iconst.i32 26 @0004 v16 = icmp ne v14, v15 @0004 v17 = bint.i8 v16 ; write_cvalue: Var(_1): bool <- ByVal(v17): bool ; ; switchInt(_1) @0006 v18 = uextend.i32 v17 @0006 brz v18, block3 @0006 jump block4 block3: @0006 nop ; ; return @0008 return block4: @0008 nop @0009 v19 = global_value.i64 gv0 ; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32) @0009 v20 = load.i64 v19 ; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; ; const mini_core::panic(move _4) @000a call fn0(v20) @000a v21 = global_value.i64 gv1 @000a call fn1(v21) @000a trap unreachable }</details>
<details><summary>Clif ir after compilation</summary>
test compile set is_pic set enable_simd target x86_64-apple-macosx10.7.0 haswell function u0:2(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { ; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E ; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] } ; sig ([]; c_variadic: false)->() ; kind loc.idx param pass mode ty ; ret _0 - NoPass () ; kind local ty size align (abi,pref) ; ssa _1 bool 1b 1, 1 ; ssa _2 u32 4b 4, 4 ; ssa _3 u128 16b 8, 8 ; ssa _4 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _5 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _6 &(&str, &str, u32, u32) 8b 8, 8 ss0 = incoming_arg 16, offset -16 gv0 = symbol colocated u1:0 gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned sig0 = (i64 [%rdi]) system_v sig1 = (i64 [%rdi]) system_v fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] } fn1 = u0:4 sig1 ; puts block0(v24: i64 [%rbp]): [RexOp1pushq#50] x86_push v24 [RexOp1copysp#8089] copy_special %rsp -> %rbp [-] nop [-] fallthrough block1 block1: [-] nop ; ; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128) @0002 [RexOp1pu_iq#80b8,%rax] v0 = iconst.i64 0x0020_0000_0800_4000 v3 -> v0 @0002 [RexOp1pu_iq#80b8,%rcx] v1 = iconst.i64 0x0020_8000_0000 v4 -> v1 @0002 [-,-] v2 = iconcat v0, v1 @0002 [RexMp2urm#86bd,%rax] v5 = clz v0 @0002 [RexMp2urm#86bd,%rdx] v6 = clz v1 @0002 [DynRexOp1icscc_ib#f083,%rcx] v7 = icmp_imm eq v1, 0 @0002 [DynRexOp1r_ib#8083,%rax] v8 = iadd_imm v5, 64 @0002 [RexOp1t8jccb#75] brnz v7, block5(v8) [-] fallthrough block7 block7: @0002 [DynRexOp1umr#8089,%rax] v23 = copy.i64 v6 @0002 [-] fallthrough block5(v23) block5(v9: i64 [%rax]): v12 -> v9 @0002 [RexOp1pu_id#b8,%rcx] v22 = iconst.i64 0 v13 -> v22 @0002 [-,-] v10 = iconcat v9, v22 v11 -> v10 ; write_cvalue: Var(_3): u128 <- ByVal(v10): u128 @0002 [-] fallthrough block2 block2: @0002 [-] nop @0001 [null#00,%rax] v14 = ireduce.i32 v9 ; write_cvalue: Var(_2): u32 <- ByVal(v14): u32 @0004 [RexOp1pu_id#b8,%rcx] v15 = iconst.i32 26 @0004 [DynRexOp1icscc#39,%rax] v16 = icmp ne v14, v15 @0004 [RexOp2urm_noflags#4b6,%rax] v17 = bint.i8 v16 ; write_cvalue: Var(_1): bool <- ByVal(v17): bool ; ; switchInt(_1) @0006 [RexOp2urm_noflags#4b6,%rax] v18 = uextend.i32 v17 ; ; val1@%rax @0006 [RexOp1tjccb#74] brz v18, block3 @0006 [Op1jmpb#eb] jump block4 block3: @0006 [-] nop ; ; return [RexOp1popq#58,%rbp] v25 = x86_pop.i64 @0008 [Op1ret#c3] return v25 block4: @0008 [-] nop @0009 [RexOp1pcrel_gvaddr8#808d,%rax] v19 = symbol_value.i64 gv0 ; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32) @0009 [RexOp1ld#808b,%rax] v20 = load.i64 v19 ; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; ; const mini_core::panic(move _4) @000a [RexOp1rmov#8089] regmove v20, %rax -> %rdi ; ; val4@%rdi @000a [Op1call_plt_id#e8] call fn0(v20) ; ; val4☠@000a [RexOp1pcrel_gvaddr8#808d,%rax] v21 = symbol_value.i64 gv1 @000a [RexOp1rmov#8089] regmove v21, %rax -> %rdi @000a [Op1call_plt_id#e8] call fn1(v21) @000a [Op2trap#40b] trap unreachable ; ; val1☠}</details>
- Which Cranelift version / commit hash / branch are you using? ac7cd4c46abdc9d4f3ef3230386afd52177e5f7c
- If relevant, can you include some extra information about your environment? x86_64 macOS
bjorn3 labeled Issue #1448:
I tried to switch to the
haswell
preset for SIMD support in https://github.com/bjorn3/rustc_codegen_cranelift/pull/951. On Linux everything is fine, however on macOSclz
returns the wrong result. (At least on github actions) As I currently don't have access to a mac, I used github actions to bisect the change from https://github.com/bjorn3/rustc_codegen_cranelift/pull/951 and to minimize the test case to basicallyfn main() { assert_eq!(intrinsics::ctlz(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128) as u32, 26u32); }in 5f58262982a8b36b4c5cf5b7f1c515c77ac0c65b.
<details><summary>Clif ir before compilation</summary>
test compile set is_pic set enable_simd target x86_64-apple-macosx10.7.0 haswell function u0:2() system_v { ; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E ; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] } ; sig ([]; c_variadic: false)->() ; kind loc.idx param pass mode ty ; ret _0 - NoPass () ; kind local ty size align (abi,pref) ; ssa _1 bool 1b 1, 1 ; ssa _2 u32 4b 4, 4 ; ssa _3 u128 16b 8, 8 ; ssa _4 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _5 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _6 &(&str, &str, u32, u32) 8b 8, 8 gv0 = symbol colocated u1:0 gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned sig0 = (i64) system_v sig1 = (i64) system_v fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] } fn1 = u0:4 sig1 ; puts block0: nop jump block1 block1: nop ; ; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128) @0002 v0 = iconst.i64 0x0020_0000_0800_4000 @0002 v1 = iconst.i64 0x0020_8000_0000 @0002 v2 = iconcat v0, v1 @0002 v3, v4 = isplit v2 @0002 v5 = clz v3 @0002 v6 = clz v4 @0002 v7 = icmp_imm eq v4, 0 @0002 v8 = iadd_imm v5, 64 @0002 v9 = select v7, v8, v6 @0002 v10 = uextend.i128 v9 v11 -> v10 ; write_cvalue: Var(_3): u128 <- ByVal(v10): u128 @0002 jump block2 block2: @0002 nop @0001 v12, v13 = isplit.i128 v11 @0001 v14 = ireduce.i32 v12 ; write_cvalue: Var(_2): u32 <- ByVal(v14): u32 @0004 v15 = iconst.i32 26 @0004 v16 = icmp ne v14, v15 @0004 v17 = bint.i8 v16 ; write_cvalue: Var(_1): bool <- ByVal(v17): bool ; ; switchInt(_1) @0006 v18 = uextend.i32 v17 @0006 brz v18, block3 @0006 jump block4 block3: @0006 nop ; ; return @0008 return block4: @0008 nop @0009 v19 = global_value.i64 gv0 ; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32) @0009 v20 = load.i64 v19 ; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; ; const mini_core::panic(move _4) @000a call fn0(v20) @000a v21 = global_value.i64 gv1 @000a call fn1(v21) @000a trap unreachable }</details>
<details><summary>Clif ir after compilation</summary>
test compile set is_pic set enable_simd target x86_64-apple-macosx10.7.0 haswell function u0:2(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { ; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E ; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] } ; sig ([]; c_variadic: false)->() ; kind loc.idx param pass mode ty ; ret _0 - NoPass () ; kind local ty size align (abi,pref) ; ssa _1 bool 1b 1, 1 ; ssa _2 u32 4b 4, 4 ; ssa _3 u128 16b 8, 8 ; ssa _4 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _5 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _6 &(&str, &str, u32, u32) 8b 8, 8 ss0 = incoming_arg 16, offset -16 gv0 = symbol colocated u1:0 gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned sig0 = (i64 [%rdi]) system_v sig1 = (i64 [%rdi]) system_v fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] } fn1 = u0:4 sig1 ; puts block0(v24: i64 [%rbp]): [RexOp1pushq#50] x86_push v24 [RexOp1copysp#8089] copy_special %rsp -> %rbp [-] nop [-] fallthrough block1 block1: [-] nop ; ; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128) @0002 [RexOp1pu_iq#80b8,%rax] v0 = iconst.i64 0x0020_0000_0800_4000 v3 -> v0 @0002 [RexOp1pu_iq#80b8,%rcx] v1 = iconst.i64 0x0020_8000_0000 v4 -> v1 @0002 [-,-] v2 = iconcat v0, v1 @0002 [RexMp2urm#86bd,%rax] v5 = clz v0 @0002 [RexMp2urm#86bd,%rdx] v6 = clz v1 @0002 [DynRexOp1icscc_ib#f083,%rcx] v7 = icmp_imm eq v1, 0 @0002 [DynRexOp1r_ib#8083,%rax] v8 = iadd_imm v5, 64 @0002 [RexOp1t8jccb#75] brnz v7, block5(v8) [-] fallthrough block7 block7: @0002 [DynRexOp1umr#8089,%rax] v23 = copy.i64 v6 @0002 [-] fallthrough block5(v23) block5(v9: i64 [%rax]): v12 -> v9 @0002 [RexOp1pu_id#b8,%rcx] v22 = iconst.i64 0 v13 -> v22 @0002 [-,-] v10 = iconcat v9, v22 v11 -> v10 ; write_cvalue: Var(_3): u128 <- ByVal(v10): u128 @0002 [-] fallthrough block2 block2: @0002 [-] nop @0001 [null#00,%rax] v14 = ireduce.i32 v9 ; write_cvalue: Var(_2): u32 <- ByVal(v14): u32 @0004 [RexOp1pu_id#b8,%rcx] v15 = iconst.i32 26 @0004 [DynRexOp1icscc#39,%rax] v16 = icmp ne v14, v15 @0004 [RexOp2urm_noflags#4b6,%rax] v17 = bint.i8 v16 ; write_cvalue: Var(_1): bool <- ByVal(v17): bool ; ; switchInt(_1) @0006 [RexOp2urm_noflags#4b6,%rax] v18 = uextend.i32 v17 ; ; val1@%rax @0006 [RexOp1tjccb#74] brz v18, block3 @0006 [Op1jmpb#eb] jump block4 block3: @0006 [-] nop ; ; return [RexOp1popq#58,%rbp] v25 = x86_pop.i64 @0008 [Op1ret#c3] return v25 block4: @0008 [-] nop @0009 [RexOp1pcrel_gvaddr8#808d,%rax] v19 = symbol_value.i64 gv0 ; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32) @0009 [RexOp1ld#808b,%rax] v20 = load.i64 v19 ; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; ; const mini_core::panic(move _4) @000a [RexOp1rmov#8089] regmove v20, %rax -> %rdi ; ; val4@%rdi @000a [Op1call_plt_id#e8] call fn0(v20) ; ; val4☠@000a [RexOp1pcrel_gvaddr8#808d,%rax] v21 = symbol_value.i64 gv1 @000a [RexOp1rmov#8089] regmove v21, %rax -> %rdi @000a [Op1call_plt_id#e8] call fn1(v21) @000a [Op2trap#40b] trap unreachable ; ; val1☠}</details>
- Which Cranelift version / commit hash / branch are you using? ac7cd4c46abdc9d4f3ef3230386afd52177e5f7c
- If relevant, can you include some extra information about your environment? x86_64 macOS
github-actions[bot] commented on Issue #1448:
Subscribe to Label Action
This issue or pull request has been labeled: "cranelift"
<details> <summary>Users Subscribed to "cranelift"</summary>
- @bnjbvr
</details>
To subscribe or unsubscribe from this label, edit the <code>.github/subscribe-to-label.json</code> configuration file.
abrown commented on Issue #1448:
That is very weird; what are the chances that
has_lzcnt
is not getting enabled in the settings for some reason? I've had that problem before. Maybe the#[cfg...]
is preventing that function from being compiled on the macOS target?
bjorn3 commented on Issue #1448:
I am not using
cranelift-native
. Instead I take the target triple from rustc, parse it and pass it tocranelift_codegen::isa::lookup
. In the failing PR I then tried to enable thehaswell
preset.
abrown commented on Issue #1448:
Yeah, but if you are running
test compile
filetests then you might be: see theTargetIsa
passed in toTestCompile::run
?
bjorn3 commented on Issue #1448:
I haven't run it as
test compile
. I just made the clif ir printer in cg_clif always include that directive to make running it that way faster. Due to the several minute CI feedback loop, I haven't made a standalone repro yet.
bjorn3 commented on Issue #1448:
I haven't run it as
test compile
. I just made the clif ir printer in cg_clif always include that directive to make running it that way faster. Due to the several minute CI feedback loop, I haven't made a standalone repro yet.
bjorn3 deleted a comment on Issue #1448:
I haven't run it as
test compile
. I just made the clif ir printer in cg_clif always include that directive to make running it that way faster. Due to the several minute CI feedback loop, I haven't made a standalone repro yet.
bjorn3 commented on Issue #1448:
Found the problem: The macOS CI uses an "Intel(R) Xeon(R) CPU E5-1650 v2 @ 3.50GHz" This is a Ivy Bridge EP, which means that
lzcnt
is not yet supported.
bjorn3 closed Issue #1448:
I tried to switch to the
haswell
preset for SIMD support in https://github.com/bjorn3/rustc_codegen_cranelift/pull/951. On Linux everything is fine, however on macOSclz
returns the wrong result. (At least on github actions) As I currently don't have access to a mac, I used github actions to bisect the change from https://github.com/bjorn3/rustc_codegen_cranelift/pull/951 and to minimize the test case to basicallyfn main() { assert_eq!(intrinsics::ctlz(0b0000000000000000000000000010000010000000000000000000000000000000_0000000000100000000000000000000000001000000000000100000000000000u128) as u32, 26u32); }
in 5f58262982a8b36b4c5cf5b7f1c515c77ac0c65b.
<details><summary>Clif ir before compilation</summary>
test compile set is_pic set enable_simd target x86_64-apple-macosx10.7.0 haswell function u0:2() system_v { ; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E ; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] } ; sig ([]; c_variadic: false)->() ; kind loc.idx param pass mode ty ; ret _0 - NoPass () ; kind local ty size align (abi,pref) ; ssa _1 bool 1b 1, 1 ; ssa _2 u32 4b 4, 4 ; ssa _3 u128 16b 8, 8 ; ssa _4 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _5 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _6 &(&str, &str, u32, u32) 8b 8, 8 gv0 = symbol colocated u1:0 gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned sig0 = (i64) system_v sig1 = (i64) system_v fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] } fn1 = u0:4 sig1 ; puts block0: nop jump block1 block1: nop ; ; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128) @0002 v0 = iconst.i64 0x0020_0000_0800_4000 @0002 v1 = iconst.i64 0x0020_8000_0000 @0002 v2 = iconcat v0, v1 @0002 v3, v4 = isplit v2 @0002 v5 = clz v3 @0002 v6 = clz v4 @0002 v7 = icmp_imm eq v4, 0 @0002 v8 = iadd_imm v5, 64 @0002 v9 = select v7, v8, v6 @0002 v10 = uextend.i128 v9 v11 -> v10 ; write_cvalue: Var(_3): u128 <- ByVal(v10): u128 @0002 jump block2 block2: @0002 nop @0001 v12, v13 = isplit.i128 v11 @0001 v14 = ireduce.i32 v12 ; write_cvalue: Var(_2): u32 <- ByVal(v14): u32 @0004 v15 = iconst.i32 26 @0004 v16 = icmp ne v14, v15 @0004 v17 = bint.i8 v16 ; write_cvalue: Var(_1): bool <- ByVal(v17): bool ; ; switchInt(_1) @0006 v18 = uextend.i32 v17 @0006 brz v18, block3 @0006 jump block4 block3: @0006 nop ; ; return @0008 return block4: @0008 nop @0009 v19 = global_value.i64 gv0 ; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32) @0009 v20 = load.i64 v19 ; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; ; const mini_core::panic(move _4) @000a call fn0(v20) @000a v21 = global_value.i64 gv1 @000a call fn1(v21) @000a trap unreachable }
</details>
<details><summary>Clif ir after compilation</summary>
test compile set is_pic set enable_simd target x86_64-apple-macosx10.7.0 haswell function u0:2(i64 fp [%rbp]) -> i64 fp [%rbp] system_v { ; symbol _ZN21mini_core_hello_world4main17h0e7acfb4fe46b048E ; instance Instance { def: Item(DefId(0:11 ~ mini_core_hello_world[317d]::main[0])), substs: [] } ; sig ([]; c_variadic: false)->() ; kind loc.idx param pass mode ty ; ret _0 - NoPass () ; kind local ty size align (abi,pref) ; ssa _1 bool 1b 1, 1 ; ssa _2 u32 4b 4, 4 ; ssa _3 u128 16b 8, 8 ; ssa _4 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _5 &(&str, &str, u32, u32) 8b 8, 8 ; ssa _6 &(&str, &str, u32, u32) 8b 8, 8 ss0 = incoming_arg 16, offset -16 gv0 = symbol colocated u1:0 gv1 = symbol colocated u1:1 ; [corruption] Diverging function returned sig0 = (i64 [%rdi]) system_v sig1 = (i64 [%rdi]) system_v fn0 = u0:3 sig0 ; Instance { def: Item(DefId(1:226 ~ mini_core[8787]::panic[0])), substs: [] } fn1 = u0:4 sig1 ; puts block0(v24: i64 [%rbp]): [RexOp1pushq#50] x86_push v24 [RexOp1copysp#8089] copy_special %rsp -> %rbp [-] nop [-] fallthrough block1 block1: [-] nop ; ; _3 = const mini_core::intrinsics::ctlz::<u128>(const 2574915281713599978989567361024u128) @0002 [RexOp1pu_iq#80b8,%rax] v0 = iconst.i64 0x0020_0000_0800_4000 v3 -> v0 @0002 [RexOp1pu_iq#80b8,%rcx] v1 = iconst.i64 0x0020_8000_0000 v4 -> v1 @0002 [-,-] v2 = iconcat v0, v1 @0002 [RexMp2urm#86bd,%rax] v5 = clz v0 @0002 [RexMp2urm#86bd,%rdx] v6 = clz v1 @0002 [DynRexOp1icscc_ib#f083,%rcx] v7 = icmp_imm eq v1, 0 @0002 [DynRexOp1r_ib#8083,%rax] v8 = iadd_imm v5, 64 @0002 [RexOp1t8jccb#75] brnz v7, block5(v8) [-] fallthrough block7 block7: @0002 [DynRexOp1umr#8089,%rax] v23 = copy.i64 v6 @0002 [-] fallthrough block5(v23) block5(v9: i64 [%rax]): v12 -> v9 @0002 [RexOp1pu_id#b8,%rcx] v22 = iconst.i64 0 v13 -> v22 @0002 [-,-] v10 = iconcat v9, v22 v11 -> v10 ; write_cvalue: Var(_3): u128 <- ByVal(v10): u128 @0002 [-] fallthrough block2 block2: @0002 [-] nop @0001 [null#00,%rax] v14 = ireduce.i32 v9 ; write_cvalue: Var(_2): u32 <- ByVal(v14): u32 @0004 [RexOp1pu_id#b8,%rcx] v15 = iconst.i32 26 @0004 [DynRexOp1icscc#39,%rax] v16 = icmp ne v14, v15 @0004 [RexOp2urm_noflags#4b6,%rax] v17 = bint.i8 v16 ; write_cvalue: Var(_1): bool <- ByVal(v17): bool ; ; switchInt(_1) @0006 [RexOp2urm_noflags#4b6,%rax] v18 = uextend.i32 v17 ; ; val1@%rax @0006 [RexOp1tjccb#74] brz v18, block3 @0006 [Op1jmpb#eb] jump block4 block3: @0006 [-] nop ; ; return [RexOp1popq#58,%rbp] v25 = x86_pop.i64 @0008 [Op1ret#c3] return v25 block4: @0008 [-] nop @0009 [RexOp1pcrel_gvaddr8#808d,%rax] v19 = symbol_value.i64 gv0 ; write_cvalue: Var(_6): &(&str, &str, u32, u32) <- ByRef(Pointer { base: Addr(v19), offset: Offset32(0) }, None): &(&str, &str, u32, u32) @0009 [RexOp1ld#808b,%rax] v20 = load.i64 v19 ; write_cvalue: Var(_5): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; write_cvalue: Var(_4): &(&str, &str, u32, u32) <- ByVal(v20): &(&str, &str, u32, u32) ; ; const mini_core::panic(move _4) @000a [RexOp1rmov#8089] regmove v20, %rax -> %rdi ; ; val4@%rdi @000a [Op1call_plt_id#e8] call fn0(v20) ; ; val4☠@000a [RexOp1pcrel_gvaddr8#808d,%rax] v21 = symbol_value.i64 gv1 @000a [RexOp1rmov#8089] regmove v21, %rax -> %rdi @000a [Op1call_plt_id#e8] call fn1(v21) @000a [Op2trap#40b] trap unreachable ; ; val1☠}
</details>
- Which Cranelift version / commit hash / branch are you using? ac7cd4c46abdc9d4f3ef3230386afd52177e5f7c
- If relevant, can you include some extra information about your environment? x86_64 macOS
Last updated: Dec 23 2024 at 12:05 UTC