wasmtime / Issue #1423 Cranelift: x86_pextr encoding usin... · git-wasmtime

Stream: git-wasmtime

Topic: wasmtime / Issue #1423 Cranelift: x86_pextr encoding usin...

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:52):

I compiled

test compile
set enable_simd
target x86_64-unknown-linux-gnu haswell

function u0:0() -> i32 system_v {
    ss0 = explicit_slot 32
    sig0 = () system_v
    fn0 = colocated u0:2 sig0

block0:
    v0 = stack_addr.i64 ss0
    v1 = load.i32x4 v0
    call fn0()
    v2 = extractlane.i32x4 v1, 1
    return v2
}

this results in the following ir after regalloc:

function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v {
    ss0 = explicit_slot 32, offset -48
    ss1 = spill_slot 16, offset -64
    ss2 = incoming_arg 16, offset -16
    sig0 = () system_v
    fn0 = colocated u0:2 sig0

                                block0(v5: i64 [%rbp]):
[RexOp1pushq#50]                    x86_push v5
[RexOp1copysp#8089]                 copy_special %rsp -> %rbp
[RexOp1adjustsp_ib#d083]            adjust_sp_down_imm 48
[RexOp1spaddr8_id#808d,%rax]        v0 = stack_addr.i64 ss0
[DynRexOp2fld#410,%xmm0]            v3 = load.i32x4 v0
[RexOp2fspillSib32#411,ss1]         v1 = spill v3
[Op1call_id#e8]                     call fn0()
[RexOp2ffillSib32#410,%xmm15]       v4 = fill v1
[DynRexMp3r_ib_unsigned_gpr#d16,%rax] v2 = x86_pextr v4, 1
[RexOp1adjustsp_ib#8083]            adjust_sp_up_imm 48
[RexOp1popq#58,%rbp]                v6 = x86_pop.i64
[Op1ret#c3]                         return v2, v6
}

The value passed to x86_pextr is stored in %xmm15, however the resulting asm expects it in %xmm7 despite actually being filled to the correct %xmm15:

   0:   40 55                   push    rbp
   2:   48 89 e5                mov     rbp, rsp
   5:   48 83 ec 30             sub     rsp, 0x30
   9:   48 8d 84 24 10 00 00 00 lea     rax, [rsp + 0x10]
  11:   0f 10 00                movups  xmm0, xmmword ptr [rax]
  14:   40 0f 11 84 24 00 00 00 00
                                movups  xmmword ptr [rsp], xmm0
  1d:   e8 00 00 00 00          call    0x22
  22:   44 0f 10 bc 24 00 00 00 00
                                movups  xmm15, xmmword ptr [rsp]
  2b:   66 41 0f 3a 16 f8 01    pextrd  r8d, xmm7, 1
  32:   48 83 c4 30             add     rsp, 0x30
  36:   40 5d                   pop     rbp
  38:   c3                      ret

(Experimenting with SIMD support for cg_clif)

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:52):

bjorn3 opened Issue #1423:

I compiled

test compile
set enable_simd
target x86_64-unknown-linux-gnu haswell

function u0:0() -> i32 system_v {
    ss0 = explicit_slot 32
    sig0 = () system_v
    fn0 = colocated u0:2 sig0

block0:
    v0 = stack_addr.i64 ss0
    v1 = load.i32x4 v0
    call fn0()
    v2 = extractlane.i32x4 v1, 1
    return v2
}

this results in the following ir after regalloc:

function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v {
    ss0 = explicit_slot 32, offset -48
    ss1 = spill_slot 16, offset -64
    ss2 = incoming_arg 16, offset -16
    sig0 = () system_v
    fn0 = colocated u0:2 sig0

                                block0(v5: i64 [%rbp]):
[RexOp1pushq#50]                    x86_push v5
[RexOp1copysp#8089]                 copy_special %rsp -> %rbp
[RexOp1adjustsp_ib#d083]            adjust_sp_down_imm 48
[RexOp1spaddr8_id#808d,%rax]        v0 = stack_addr.i64 ss0
[DynRexOp2fld#410,%xmm0]            v3 = load.i32x4 v0
[RexOp2fspillSib32#411,ss1]         v1 = spill v3
[Op1call_id#e8]                     call fn0()
[RexOp2ffillSib32#410,%xmm15]       v4 = fill v1
[DynRexMp3r_ib_unsigned_gpr#d16,%rax] v2 = x86_pextr v4, 1
[RexOp1adjustsp_ib#8083]            adjust_sp_up_imm 48
[RexOp1popq#58,%rbp]                v6 = x86_pop.i64
[Op1ret#c3]                         return v2, v6
}

The value passed to x86_pextr is stored in %xmm15, however the resulting asm expects it in %xmm7 despite actually being filled to the correct %xmm15:

   0:   40 55                   push    rbp
   2:   48 89 e5                mov     rbp, rsp
   5:   48 83 ec 30             sub     rsp, 0x30
   9:   48 8d 84 24 10 00 00 00 lea     rax, [rsp + 0x10]
  11:   0f 10 00                movups  xmm0, xmmword ptr [rax]
  14:   40 0f 11 84 24 00 00 00 00
                                movups  xmmword ptr [rsp], xmm0
  1d:   e8 00 00 00 00          call    0x22
  22:   44 0f 10 bc 24 00 00 00 00
                                movups  xmm15, xmmword ptr [rsp]
  2b:   66 41 0f 3a 16 f8 01    pextrd  r8d, xmm7, 1
  32:   48 83 c4 30             add     rsp, 0x30
  36:   40 5d                   pop     rbp
  38:   c3                      ret

(Experimenting with SIMD support for cg_clif)

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:52):

bjorn3 labeled Issue #1423:

I compiled

test compile
set enable_simd
target x86_64-unknown-linux-gnu haswell

function u0:0() -> i32 system_v {
    ss0 = explicit_slot 32
    sig0 = () system_v
    fn0 = colocated u0:2 sig0

block0:
    v0 = stack_addr.i64 ss0
    v1 = load.i32x4 v0
    call fn0()
    v2 = extractlane.i32x4 v1, 1
    return v2
}

this results in the following ir after regalloc:

function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v {
    ss0 = explicit_slot 32, offset -48
    ss1 = spill_slot 16, offset -64
    ss2 = incoming_arg 16, offset -16
    sig0 = () system_v
    fn0 = colocated u0:2 sig0

                                block0(v5: i64 [%rbp]):
[RexOp1pushq#50]                    x86_push v5
[RexOp1copysp#8089]                 copy_special %rsp -> %rbp
[RexOp1adjustsp_ib#d083]            adjust_sp_down_imm 48
[RexOp1spaddr8_id#808d,%rax]        v0 = stack_addr.i64 ss0
[DynRexOp2fld#410,%xmm0]            v3 = load.i32x4 v0
[RexOp2fspillSib32#411,ss1]         v1 = spill v3
[Op1call_id#e8]                     call fn0()
[RexOp2ffillSib32#410,%xmm15]       v4 = fill v1
[DynRexMp3r_ib_unsigned_gpr#d16,%rax] v2 = x86_pextr v4, 1
[RexOp1adjustsp_ib#8083]            adjust_sp_up_imm 48
[RexOp1popq#58,%rbp]                v6 = x86_pop.i64
[Op1ret#c3]                         return v2, v6
}

The value passed to x86_pextr is stored in %xmm15, however the resulting asm expects it in %xmm7 despite actually being filled to the correct %xmm15:

   0:   40 55                   push    rbp
   2:   48 89 e5                mov     rbp, rsp
   5:   48 83 ec 30             sub     rsp, 0x30
   9:   48 8d 84 24 10 00 00 00 lea     rax, [rsp + 0x10]
  11:   0f 10 00                movups  xmm0, xmmword ptr [rax]
  14:   40 0f 11 84 24 00 00 00 00
                                movups  xmmword ptr [rsp], xmm0
  1d:   e8 00 00 00 00          call    0x22
  22:   44 0f 10 bc 24 00 00 00 00
                                movups  xmm15, xmmword ptr [rsp]
  2b:   66 41 0f 3a 16 f8 01    pextrd  r8d, xmm7, 1
  32:   48 83 c4 30             add     rsp, 0x30
  36:   40 5d                   pop     rbp
  38:   c3                      ret

(Experimenting with SIMD support for cg_clif)

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:52):

github-actions[bot] commented on Issue #1423:

Subscribe to Label Action

This issue or pull request has been labeled: "cranelift"

<details> <summary>Users Subscribed to "cranelift"</summary>

@bnjbvr

</details>

To subscribe or unsubscribe from this label, edit the <code>.github/subscribe-to-label.json</code> configuration file.

Learn more.

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:56):

abrown commented on Issue #1423:

What version or commit of Cranelift?

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:56):

bjorn3 commented on Issue #1423:

0d4bde4ab30f202c888888db7a8eb2d905c0119f

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:56):

bjorn3 edited a comment on Issue #1423:

0d4bde4ab30f202c888888db7a8eb2d905c0119f (4 days ago)

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:56):

bjorn3 edited a comment on Issue #1423:

0d4bde4ab30f202c888888db7a8eb2d905c0119f (from 4 days ago)

Wasmtime GitHub notifications bot (Mar 27 2020 at 19:57):

abrown commented on Issue #1423:

Hm, so x86_pextr should be able to infer that it needs the REX prefix: https://github.com/bytecodealliance/wasmtime/blob/master/cranelift/codegen/meta/src/isa/x86/encodings.rs#L1761-L1778

Wasmtime GitHub notifications bot (Mar 27 2020 at 20:03):

abrown commented on Issue #1423:

Ah, the operands are flipped, right? 66 41 0f 3a 16 f8 01 pextrd r8d, xmm7, 1 is in Intel syntax (I think) and should actually read pextrd xmm15, rax, 1. The REX prefix is there and being applied but the operands are flipped in the recipe or something like that.

Wasmtime GitHub notifications bot (Mar 27 2020 at 20:05):

bjorn3 commented on Issue #1423:

This is the default capstone syntax. I just ran clif-util compile -Dp. And the output should indeed be in %rax. Didn't notice that. I was too focused on the input :)

Wasmtime GitHub notifications bot (Mar 27 2020 at 20:06):

bjorn3 commented on Issue #1423:

From the encoding:

modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte

Wasmtime GitHub notifications bot (Mar 27 2020 at 20:07):

bjorn3 commented on Issue #1423:

Flipped the rex2 arguments too and the problem was fixed. Will open a PR in a moment.

Wasmtime GitHub notifications bot (Mar 27 2020 at 20:14):

abrown commented on Issue #1423:

So my comment above was incorrect:

Ah, the operands are flipped, right?

The operands shouldn't be flipped, it should still be pextrd rax, xmm15, 1 since rax is the write register and thus in the R/M slot. It's the REX bits that need to be flipped: good catch that we need to flip the operands that we pass to rex2. Could you add an x86_pextr binemit test (or let me know and I can add to that PR)? The REX coverage is thin...

Wasmtime GitHub notifications bot (Mar 27 2020 at 21:29):

abrown closed Issue #1423:

I compiled

test compile
set enable_simd
target x86_64-unknown-linux-gnu haswell

function u0:0() -> i32 system_v {
    ss0 = explicit_slot 32
    sig0 = () system_v
    fn0 = colocated u0:2 sig0

block0:
    v0 = stack_addr.i64 ss0
    v1 = load.i32x4 v0
    call fn0()
    v2 = extractlane.i32x4 v1, 1
    return v2
}

this results in the following ir after regalloc:

function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v {
    ss0 = explicit_slot 32, offset -48
    ss1 = spill_slot 16, offset -64
    ss2 = incoming_arg 16, offset -16
    sig0 = () system_v
    fn0 = colocated u0:2 sig0

                                block0(v5: i64 [%rbp]):
[RexOp1pushq#50]                    x86_push v5
[RexOp1copysp#8089]                 copy_special %rsp -> %rbp
[RexOp1adjustsp_ib#d083]            adjust_sp_down_imm 48
[RexOp1spaddr8_id#808d,%rax]        v0 = stack_addr.i64 ss0
[DynRexOp2fld#410,%xmm0]            v3 = load.i32x4 v0
[RexOp2fspillSib32#411,ss1]         v1 = spill v3
[Op1call_id#e8]                     call fn0()
[RexOp2ffillSib32#410,%xmm15]       v4 = fill v1
[DynRexMp3r_ib_unsigned_gpr#d16,%rax] v2 = x86_pextr v4, 1
[RexOp1adjustsp_ib#8083]            adjust_sp_up_imm 48
[RexOp1popq#58,%rbp]                v6 = x86_pop.i64
[Op1ret#c3]                         return v2, v6
}

The value passed to x86_pextr is stored in %xmm15, however the resulting asm expects it in %xmm7 despite actually being filled to the correct %xmm15:

   0:   40 55                   push    rbp
   2:   48 89 e5                mov     rbp, rsp
   5:   48 83 ec 30             sub     rsp, 0x30
   9:   48 8d 84 24 10 00 00 00 lea     rax, [rsp + 0x10]
  11:   0f 10 00                movups  xmm0, xmmword ptr [rax]
  14:   40 0f 11 84 24 00 00 00 00
                                movups  xmmword ptr [rsp], xmm0
  1d:   e8 00 00 00 00          call    0x22
  22:   44 0f 10 bc 24 00 00 00 00
                                movups  xmm15, xmmword ptr [rsp]
  2b:   66 41 0f 3a 16 f8 01    pextrd  r8d, xmm7, 1
  32:   48 83 c4 30             add     rsp, 0x30
  36:   40 5d                   pop     rbp
  38:   c3                      ret

(Experimenting with SIMD support for cg_clif)

Last updated: Apr 18 2025 at 07:03 UTC