wasmtime / PR #10076 Winch: refactor heap address computa... · git-wasmtime

I'm personally not entirely convinced that the improvements in terms of code generation are worth the complexity of this change, for multiple reasons, but to name a few that come to mind:

The provided benchmarks are probably not representative enough to warrant some of the complexity in the CodeGen module. Even though in the code generation output we can see that some instructions are improved, and that there's no change in compilation time, I think it'd be beneficial to see a more real-world benchmark, one that actually uses the instructions for which this refactor is introduced. I'm aware that there are probably none currently, which makes me think that if it might be worth waiting until we have some real world benchmarks that we can use to decide to take an informed decision: it'd be beneficial to compare concrete % improvements for runtime performance and similar for compilation performance.

I fear that instruction-focused optimizations, like in this case, might start compromising the compiler's simplicity design principle; as a rule of thumb, I think it's worth considering new architectural patterns that can be adopted throughout the compiler, but we should probably take a step back when considering changes that have the potential to improve certain instructions at the cost of higher complexity.

That said though, I really appreciate you taking the time to explore this; I decided to explore a slightly different alternative. My goal with this exploration was to achieve a similar result, but with potentially less changes required in the CodeGen module.

diff --git a/winch/codegen/src/codegen/mod.rs b/winch/codegen/src/codegen/mod.rs
index f2eb2f976..6648bc5c9 100644
--- a/winch/codegen/src/codegen/mod.rs
+++ b/winch/codegen/src/codegen/mod.rs
@@ -647,6 +647,7 @@ where
         &mut self,
         memarg: &MemArg,
         access_size: OperandSize,
+        index: Index,
     ) -> Result<Option<Reg>> {
         let ptr_size: OperandSize = self.env.ptr_type().try_into()?;
         let enable_spectre_mitigation = self.env.heap_access_spectre_mitigation();
@@ -656,7 +657,6 @@ where

         let memory_index = MemoryIndex::from_u32(memarg.memory);
         let heap = self.env.resolve_heap(memory_index);
-        let index = Index::from_typed_reg(self.context.pop_to_reg(self.masm, None)?);
         let offset = bounds::ensure_index_and_offset(
             self.masm,
             index,
@@ -680,6 +680,7 @@ where
             self.emit_fuel_increment()?;
             self.masm.trap(TrapCode::HEAP_OUT_OF_BOUNDS)?;
             self.context.reachable = false;
+            self.context.free_reg(index.as_typed_reg().reg);
             None
         } else if !can_elide_bounds_check {
             // Account for the general case for bounds-checked memories. The
@@ -840,38 +841,30 @@ where
     }

     /// Emit checks to ensure that the address at `memarg` is correctly aligned for `size`.
-    fn emit_check_align(&mut self, memarg: &MemArg, size: OperandSize) -> Result<()> {
-        if size.bytes() > 1 {
-            // Peek addr from top of the stack by popping and pushing.
-            let addr = *self
-                .context
-                .stack
-                .peek()
-                .ok_or_else(|| CodeGenError::missing_values_in_stack())?;
-            let tmp = self.context.any_gpr(self.masm)?;
-            self.context.move_val_to_reg(&addr, tmp, self.masm)?;
+    fn emit_check_align(&mut self, memarg: &MemArg, size: OperandSize, index: Index) -> Result<()> {
+        if size.bytes() < 2 {
+            return Ok(());
+        }

-            if memarg.offset != 0 {
-                self.masm.add(
-                    writable!(tmp),
-                    tmp,
-                    RegImm::Imm(Imm::I64(memarg.offset)),
-                    size,
-                )?;
-            }
+        let tmp = self.context.any_gpr(self.masm)?;
+        let index_reg = index.as_typed_reg().reg;

-            self.masm.and(
+        self.masm
+            .mov(writable!(tmp), RegImm::reg(index_reg), size)?;
+
+        if memarg.offset != 0 {
+            self.masm.add(
                 writable!(tmp),
                 tmp,
-                RegImm::Imm(Imm::I32(size.bytes() - 1)),
+                RegImm::Imm(Imm::I64(memarg.offset)),
                 size,
             )?;
-
-            self.masm.cmp(tmp, RegImm::Imm(Imm::i64(0)), size)?;
-            self.masm.trapif(IntCmpKind::Ne, TRAP_HEAP_MISALIGNED)?;
-            self.context.free_reg(tmp);
         }

+        self.masm.cmp(tmp, RegImm::Imm(Imm::i64(0)), size)?;
+        self.masm.trapif(IntCmpKind::Ne, TRAP_HEAP_MISALIGNED)?;
+        self.context.free_reg(tmp);
+
         Ok(())
     }

@@ -879,9 +872,10 @@ where
         &mut self,
         memarg: &MemArg,
         access_size: OperandSize,
+        index: Index,
     ) -> Result<Option<Reg>> {
-        self.emit_check_align(memarg, access_size)?;
-        self.emit_compute_heap_address(memarg, access_size)
+        self.emit_check_align(memarg, access_size, index)?;
+        self.emit_compute_heap_address(memarg, access_size, index)
     }

     /// Emit a WebAssembly load.
@@ -891,13 +885,16 @@ where
         target_type: WasmValType,
         kind: LoadKind,
         op_kind: MemOpKind,
+        index: Index,
     ) -> Result<()> {
         let maybe_addr = match op_kind {
-            MemOpKind::Atomic => {
-                self.emit_compute_heap_address_align_checked(&arg, kind.derive_operand_size())?
-            }
+            MemOpKind::Atomic => self.emit_compute_heap_address_align_checked(
+                &arg,
+                kind.derive_operand_size(),
+                index,
+            )?,
             MemOpKind::Normal => {
-                self.emit_compute_heap_address(&arg, kind.derive_operand_size())?
+                self.emit_compute_heap_address(&arg, kind.derive_operand_size(), index)?
             }
         };

@@ -926,12 +923,13 @@ where
         arg: &MemArg,
         size: OperandSize,
         op_kind: MemOpKind,
+        index: Index,
     ) -> Result<()> {
         let src = self.context.pop_to_reg(self.masm, None)?;

         let maybe_addr = match op_kind {
-            MemOpKind::Atomic => self.emit_compute_heap_address_align_checked(&arg, size)?,
-            MemOpKind::Normal => self.emit_compute_heap_address(&arg, size)?,
+            MemOpKind::Atomic => self.emit_compute_heap_address_align_checked(&arg, size, index)?,
+            MemOpKind::Normal => self.emit_compute_heap_address(&arg, size, index)?,
         };

         if let Some(addr) = maybe_addr {
@@ -1391,27 +1389,35 @@ where
         size: OperandSize,
         extend: Option<Extend<Zero>>,
     ) -> Result<()> {
-        // Emission for this instruction is a bit trickier. The address for the CAS is the 3rd from
-        // the top of the stack, and we must emit instruction to compute the actual address with
-        // `emit_compute_heap_address_align_checked`, while we still have access to self. However,
-        // some ISAs have requirements with regard to the registers used for some arguments, so we
-        // need to pass the context to the masm. To solve this issue, we pop the two first
-        // arguments from the stack, compute the address, push back the arguments, and hand over
-        // the control to masm. The implementer of `atomic_cas` can expect to find `expected` and
-        // `replacement` at the top the context's stack.
+        let clobbers = M::atomic_cas_clobbers();

-        // pop the args
-        let replacement = self.context.pop_to_reg(self.masm, None)?;
-        let expected = self.context.pop_to_reg(self.masm, None)?;
-
-        if let Some(addr) = self.emit_compute_heap_address_align_checked(arg, size)? {
-            // push back the args
-            self.context.stack.push(expected.into());
-            self.context.stack.push(replacement.into());
+        let (replacement, expected, index) =
+            self.context
+                .without::<Result<(TypedReg, TypedReg, TypedReg)>, _, _>(
+                    &clobbers,
+                    self.masm,
+                    |context, masm| {
+                        Ok((
+                            context.pop_to_reg(masm, None)?,
+                            context.pop_to_reg(masm, None)?,
+                            context.pop_to_reg(masm, None)?,
+                        ))
+                    },
+                )??;

+        if let Some(addr) =
+            self.emit_compute_heap_address_align_checked(arg, size, Index::from_typed_reg(index))?
+        {
             let src = self.masm.address_at_reg(addr, 0)?;
-            self.masm
-                .atomic_cas(&mut self.context, src, size, UNTRUSTED_FLAGS, extend)?;
+
[message truncated]

Stream: git-wasmtime

Topic: wasmtime / PR #10076 Winch: refactor heap address computa...

Wasmtime GitHub notifications bot (Jan 22 2025 at 14:05):

Wasmtime GitHub notifications bot (Jan 22 2025 at 17:45):

Subscribe to Label Action

Wasmtime GitHub notifications bot (Jan 23 2025 at 18:08):

Wasmtime GitHub notifications bot (Jan 23 2025 at 18:51):

Wasmtime GitHub notifications bot (Jan 23 2025 at 18:51):

Wasmtime GitHub notifications bot (Jan 24 2025 at 19:25):