Stream: git-wasmtime

Topic: wasmtime / issue #5962 Change `bitselect`'s condition mas...


view this post on Zulip Wasmtime GitHub notifications bot (Mar 08 2023 at 03:27):

alexcrichton labeled issue #5962:

This came up during https://github.com/bytecodealliance/wasmtime/pull/5918 and the removal of the old vselect instruction. Currently the bitselect instruction uses the same type for all of its inputs and its output, but that means that for a floating-point selection the condition mask is also a floating-point value which is typically not the case. For example an icmp would produce an integer vector which could be used to bitselect the results of the comparisons.

Using an integral mask would make the NaN-canonicalization for vectors slightly simpler and additionally make it easier to re-add the optimizations removed in https://github.com/bytecodealliance/wasmtime/pull/5918 where bitselect-of-fcmp for float vectors could be optimized into f{min,max} where possible.

view this post on Zulip Wasmtime GitHub notifications bot (Mar 08 2023 at 03:27):

alexcrichton opened issue #5962:

This came up during https://github.com/bytecodealliance/wasmtime/pull/5918 and the removal of the old vselect instruction. Currently the bitselect instruction uses the same type for all of its inputs and its output, but that means that for a floating-point selection the condition mask is also a floating-point value which is typically not the case. For example an icmp would produce an integer vector which could be used to bitselect the results of the comparisons.

Using an integral mask would make the NaN-canonicalization for vectors slightly simpler and additionally make it easier to re-add the optimizations removed in https://github.com/bytecodealliance/wasmtime/pull/5918 where bitselect-of-fcmp for float vectors could be optimized into f{min,max} where possible.

view this post on Zulip Wasmtime GitHub notifications bot (Mar 08 2023 at 16:44):

alexcrichton commented on issue #5962:

I attempted this naively with:

diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 372407965..136e8f1bd 100755
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -1449,7 +1449,15 @@ pub(crate) fn define(
         .side_effects_idempotent(),
     );

-    let c = &Operand::new("c", Any).with_doc("Controlling value to test");
+    let BitselectTruthy = &TypeVar::new(
+        "BitselectTruthy",
+        "A truthy type for the `bitselect` instruction",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let c = &Operand::new("c", BitselectTruthy).with_doc("Controlling value to test");
     ig.push(
         Inst::new(
             "bitselect",
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
index b4449a967..de531b1e8 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
@@ -187,8 +187,8 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
 ;   bsl v0.16b, v1.16b, v2.16b
 ;   ret

-function %vselect_f32x4(f32x4, f32x4, f32x4) -> f32x4 {
-block0(v0: f32x4, v1: f32x4, v2: f32x4):
+function %vselect_f32x4(i32x4, f32x4, f32x4) -> f32x4 {
+block0(v0: i32x4, v1: f32x4, v2: f32x4):
     v3 = bitselect v0, v1, v2
     return v3
 }
@@ -203,8 +203,8 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4):
 ;   bsl v0.16b, v1.16b, v2.16b
 ;   ret

-function %vselect_f64x2(f64x2, f64x2, f64x2) -> f64x2 {
-block0(v0: f64x2, v1: f64x2, v2: f64x2):
+function %vselect_f64x2(i64x2, f64x2, f64x2) -> f64x2 {
+block0(v0: i64x2, v1: f64x2, v2: f64x2):
     v3 = bitselect v0, v1, v2
     return v3
 }

but that's not quite what we want I think since it's allowing i32 as a mask type for f32x4 bitselects. I don't know how to require that the type size of the condition equals the type size of the mask

view this post on Zulip Wasmtime GitHub notifications bot (Mar 08 2023 at 16:52):

alexcrichton edited a comment on issue #5962:

I attempted this naively with:

diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 372407965..136e8f1bd 100755
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -1449,7 +1449,15 @@ pub(crate) fn define(
         .side_effects_idempotent(),
     );

-    let c = &Operand::new("c", Any).with_doc("Controlling value to test");
+    let BitselectTruthy = &TypeVar::new(
+        "BitselectTruthy",
+        "A truthy type for the `bitselect` instruction",
+        TypeSetBuilder::new()
+            .ints(Interval::All)
+            .simd_lanes(Interval::All)
+            .build(),
+    );
+    let c = &Operand::new("c", BitselectTruthy).with_doc("Controlling value to test");
     ig.push(
         Inst::new(
             "bitselect",
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
index b4449a967..de531b1e8 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
@@ -187,8 +187,8 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
 ;   bsl v0.16b, v1.16b, v2.16b
 ;   ret

-function %vselect_f32x4(f32x4, f32x4, f32x4) -> f32x4 {
-block0(v0: f32x4, v1: f32x4, v2: f32x4):
+function %vselect_f32x4(i32x4, f32x4, f32x4) -> f32x4 {
+block0(v0: i32x4, v1: f32x4, v2: f32x4):
     v3 = bitselect v0, v1, v2
     return v3
 }
@@ -203,8 +203,8 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4):
 ;   bsl v0.16b, v1.16b, v2.16b
 ;   ret

-function %vselect_f64x2(f64x2, f64x2, f64x2) -> f64x2 {
-block0(v0: f64x2, v1: f64x2, v2: f64x2):
+function %vselect_f64x2(i64x2, f64x2, f64x2) -> f64x2 {
+block0(v0: i64x2, v1: f64x2, v2: f64x2):
     v3 = bitselect v0, v1, v2
     return v3
 }

but that's not quite what we want I think since it's allowing i32 as a mask type for f32x4 bitselects. I don't know how to require that the type size of the condition equals the type size of the oeprands


Last updated: Nov 22 2024 at 17:03 UTC