fitzgen added the pulley label to Issue #9163.
fitzgen opened issue #9163:
We should rewrite the core of the interpreter and its main loop such that we can easily flip a cargo feature on to start using nightly Rust's
feature(explicit_tail_calls)
.The way I am imagining this would be done (warning: super rough ideas incoming) is something like this:
// Define a few different things used by the tail-call-agnostic parts of the // interpreter, based on whether or not we are using explicit tail calls: // // 1. A macro that the rest of the crate uses to define opcode handlers. Users // of the macro provide do the following: // // * Take machine state and PC as "arguments". // * Decode their associated opcode's immediates and operands (the PC has // already been decoded). // * Return either `Ok(new_pc)` or // `Err(Continuation::{Trap,ReturnToHost,HostCall})` to break out of the // interpreter loop. // // 2. An `OpcodeHandler` function type alias. // // 3. A `run` function implementing the innermost interpreter loop. // Version that does NOT use explicit tail calls. #[cfg(not(feature = "explicit_tail_calls"))] mod no_tail_calls { use super::*; // A handler function returns the next handler function to call, or else // updates the `continuation` to be `Continuation::Trap` or whatever, as // appropriate. pub type OpcodeHandler = fn(&mut MachineState, &mut UnsafeBytecodeStream, &mut Continuation) -> NextOpcodeHandler; // A newtype because Rust doesn't like cyclic type aliases, even though it // shouldn't be an issue in this particular case. pub struct NextOpcodeHandler(pub OpcodeHandler); macro_rules! define_opcode_handler { ( fn $name:ident ( $state:pat : &mut MachineState, $pc:pat : UnsafeBytecodeStream, ) { $body:expr } ) => { fn $name( state: &mut MachineState, pc: &mut UnsafeBytecodeStream, continuation: &mut Continuation, ) -> Option<OpcodeHandler> { match (|$state, $pat| $body)(state, *pc) { Ok(mut new_pc) => { // Decode the next handler and return it so that `run` // can call it. let next_opcode = Opcode::decode(&mut new_pc).unwrap(); *pc = new_pc; let next_handler = OPCODE_HANDLER_TABLE[next_opcode as u8]; Some(next_handler) } Err(k) => { debug_assert_ne!(k, Continuation::Continue); *continuation = k; None } } } }; } pub fn run(vm: &mut Vm, mut pc: UnsafeBytecodeStream) -> Result<(), *mut u8> { let mut continuation = Continuation::Continue; let opcode = Opcode::decode(&mut pc).unwrap(); let mut handler = OPCODE_HANDLER_TABLE[next_opcode as u8]; // As tight as we can get the interpreter loop without tail calls: while // the handlers keep returning the next handler to call, call it. while let Some(NextOpcodeHandler(next_handler)) = handler(&mut vm.state, &mut pc, &mut continuation) { handler = next_handler; } // When a handler doesn't return the next handler to call, then we are // doing something exceptional. match continuation { Continuation::Trap => vm.trap(pc), Continuation::ReturnToHost => vm.return_to_host(), Continuation::HostCall => vm.host_call(), Continuation::Continue => unreachable!(), } } } #[cfg(not(feature = "explicit_tail_calls"))] pub use no_tail_calls::*; // Version that DOES use explicit tail calls. #[cfg(feature = "explicit_tail_calls")] mod tail_calls { use super::*; // A handler function tail calls to the next handler, if any, and ultimately // updates the `continuation` to be `Continuation::Trap` or whatever when it // is finally time to break out of the interpreter loop, as appropriate. pub type OpcodeHandler = fn(&mut MachineState, &mut UnsafeBytecodeStream, &mut Continuation); macro_rules! define_opcode_handler { ( fn $name:ident ( $state:pat : &mut MachineState, $pc:pat : UnsafeBytecodeStream, ) { $body:expr } ) => { fn $name( state: &mut MachineState, pc: &mut UnsafeBytecodeStream, continuation: &mut Continuation, ) { match (|$state, $pc| $body)(state, *pc) { Ok(mut new_pc) => { // Decode the next opcode and tail call to the next handler. let next_opcode = Opcode::decode(&mut new_pc).unwrap(); *pc = new_pc; let next_handler = OPCODE_HANDLER_TABLE[next_opcode as u8]; become next_handler(state, pc, continuation); } Err(k) => { debug_assert_ne!(k, Continuation::Continue); *continuation = k; None } } } }; } pub fn run(vm: &mut Vm, mut pc: UnsafeBytecodeStream) -> Result<(), *mut u8> { let mut continuation = Continuation::Continue; let opcode = Opcode::decode(&mut pc).unwrap(); let mut handler = OPCODE_HANDLER_TABLE[next_opcode as u8]; // The ideal interpreter loop: a bunch of opcode handlers tail calling // each other! handler(&mut vm.state, &mut pc, &mut continuation); match continuation { Continuation::Trap => vm.trap(pc), Continuation::ReturnToHost => vm.return_to_host(), Continuation::HostCall => vm.host_call(), Continuation::Continue => unreachable!(), } } } #[cfg(feature = "explicit_tail_calls")] pub use tail_calls::*; /// Define the table of opcode handlers. macro_rules! opcode_handler_table_entry { // ... } static OPCODE_HANDLER_TABLE: &[OpcodeHandler] = &[ for_each_op!(opcode_handler_table_entry), extended_op_handler, ]; static EXTENDED_OPCODE_HANDLER_TABLE: &[OpcodeHandler] = &[for_each_extended_op!(opcode_handler_table_entry)]; // A few examples of defining opcode handlers: define_opcode_handler! { fn xadd32( state: &mut MachineState, mut pc: UnsafeBytecodeStream, ) { // The `decode` module will need to be extended with methods to decode // an instructions immediates and operands, assuming that the associated // opcode has already been deecoded. let BinaryOperands { dst, src1, src2 } = decode::xadd32_imms_and_operands(&mut pc).unwrap(); let a = state[src1].get_i32(); let b = state[src[src2]].get_i32(); state[dst].set_i32(a.wrapping_add(b)); Ok(pc) } } define_opcode_handler! { fn br_if( state: &mut MachineState, mut pc: UnsafeBytecodeStream, ) { let (cond, offset) = decode::br_if_imms_and_operands(&mut pc).unwrap(); if state[cond].get_u64() != 0 { let new_pc = pc_rel_jump(pc, offset, 6); Ok(new_pc) } else { Ok(pc) } } } define_opcode_handler! { fn trap( _state: &mut MachineState, _pc: UnsafeBytecodeStream, ) { Err(Continuation::Trap) } }
cc @Kmeakin, since you've been doing some Pulley stuff
github-actions[bot] commented on issue #9163:
Subscribe to Label Action
cc @fitzgen
<details>
This issue or pull request has been labeled: "pulley"Thus the following users have been cc'd because of the following labels:
- fitzgen: pulley
To subscribe or unsubscribe from this label, edit the <code>.github/subscribe-to-label.json</code> configuration file.
Learn more.
</details>
Kmeakin commented on issue #9163:
define_opcode_handler! {
fn xadd32(
state: &mut MachineState,
mut pc: UnsafeBytecodeStream,
) {
// Thedecode
module will need to be extended with methods to decode
// an instructions immediates and operands, assuming that the associated
// opcode has already been deecoded.
let BinaryOperands { dst, src1, src2 } = decode::xadd32_imms_and_operands(&mut pc).unwrap();let a = state[src1].get_i32(); let b = state[src[src2]].get_i32(); state[dst].set_i32(a.wrapping_add(b)); Ok(pc) }
}
Why is a separate decoder function for the arguments needed? Why can't the existing decoder functions be used?
Kmeakin commented on issue #9163:
Oh and how can we verify that the tailcall optimization does indeed result in the desired assembly (ie each opcode handler does an indirect branch to the next handler, rather than a branch back to the top of the loop)? Copy/pasting the code into Compiler Explorer and looking at the output is doable, but not automatable.
alexcrichton commented on issue #9163:
For the verification, I think that's a property of the
become
keyword and the compiler implementation? There's no actual loop in the source itself andbecome
is defined as always performing a tail call, so pending compiler bugs I think we can probably skip the automated verification and just spot-check some disassembly of profiles perhaps to confirm it's happening?
fitzgen commented on issue #9163:
Why is a separate decoder function for the arguments needed? Why can't the existing decoder functions be used?
Just so that we don't have to manually remember "
xadd32
takesBinaryOperands<XReg>
as its only operands and no immediates" and can have that stuff stay in sync with the instruction definition by construction. Especially if/when we tweak an instruction's definition so that the compiler tells us all the places we need to update, rather than trying to remember and hoping we got all the right places.I fully expect these would be macro-generated like most stuff in this crate and defer to the underlying
Decode
implementations.Does that make senes?
fitzgen edited a comment on issue #9163:
Why is a separate decoder function for the arguments needed? Why can't the existing decoder functions be used?
Just so that we don't have to manually remember "
xadd32
takesBinaryOperands<XReg>
as its only operands and no immediates" and can have that stuff stay in sync with the instruction definition by construction. Especially if/when we tweak an instruction's definition so that the compiler tells us all the places we need to update, rather than trying to remember and hoping we got all the right places.I fully expect these would be macro-generated like most stuff in this crate and defer to the underlying
Decode
implementations.Does that make sense?
alexcrichton closed issue #9163:
We should rewrite the core of the interpreter and its main loop such that we can easily flip a cargo feature on to start using nightly Rust's
feature(explicit_tail_calls)
.The way I am imagining this would be done (warning: super rough ideas incoming) is something like this:
// Define a few different things used by the tail-call-agnostic parts of the // interpreter, based on whether or not we are using explicit tail calls: // // 1. A macro that the rest of the crate uses to define opcode handlers. Users // of the macro provide do the following: // // * Take machine state and PC as "arguments". // * Decode their associated opcode's immediates and operands (the PC has // already been decoded). // * Return either `Ok(new_pc)` or // `Err(Continuation::{Trap,ReturnToHost,HostCall})` to break out of the // interpreter loop. // // 2. An `OpcodeHandler` function type alias. // // 3. A `run` function implementing the innermost interpreter loop. // Version that does NOT use explicit tail calls. #[cfg(not(feature = "explicit_tail_calls"))] mod no_tail_calls { use super::*; // A handler function returns the next handler function to call, or else // updates the `continuation` to be `Continuation::Trap` or whatever, as // appropriate. pub type OpcodeHandler = fn(&mut MachineState, &mut UnsafeBytecodeStream, &mut Continuation) -> NextOpcodeHandler; // A newtype because Rust doesn't like cyclic type aliases, even though it // shouldn't be an issue in this particular case. pub struct NextOpcodeHandler(pub OpcodeHandler); macro_rules! define_opcode_handler { ( fn $name:ident ( $state:pat : &mut MachineState, $pc:pat : UnsafeBytecodeStream, ) { $body:expr } ) => { fn $name( state: &mut MachineState, pc: &mut UnsafeBytecodeStream, continuation: &mut Continuation, ) -> Option<OpcodeHandler> { match (|$state, $pat| $body)(state, *pc) { Ok(mut new_pc) => { // Decode the next handler and return it so that `run` // can call it. let next_opcode = Opcode::decode(&mut new_pc).unwrap(); *pc = new_pc; let next_handler = OPCODE_HANDLER_TABLE[next_opcode as u8]; Some(next_handler) } Err(k) => { debug_assert_ne!(k, Continuation::Continue); *continuation = k; None } } } }; } pub fn run(vm: &mut Vm, mut pc: UnsafeBytecodeStream) -> Result<(), *mut u8> { let mut continuation = Continuation::Continue; let opcode = Opcode::decode(&mut pc).unwrap(); let mut handler = OPCODE_HANDLER_TABLE[next_opcode as u8]; // As tight as we can get the interpreter loop without tail calls: while // the handlers keep returning the next handler to call, call it. while let Some(NextOpcodeHandler(next_handler)) = handler(&mut vm.state, &mut pc, &mut continuation) { handler = next_handler; } // When a handler doesn't return the next handler to call, then we are // doing something exceptional. match continuation { Continuation::Trap => vm.trap(pc), Continuation::ReturnToHost => vm.return_to_host(), Continuation::HostCall => vm.host_call(), Continuation::Continue => unreachable!(), } } } #[cfg(not(feature = "explicit_tail_calls"))] pub use no_tail_calls::*; // Version that DOES use explicit tail calls. #[cfg(feature = "explicit_tail_calls")] mod tail_calls { use super::*; // A handler function tail calls to the next handler, if any, and ultimately // updates the `continuation` to be `Continuation::Trap` or whatever when it // is finally time to break out of the interpreter loop, as appropriate. pub type OpcodeHandler = fn(&mut MachineState, &mut UnsafeBytecodeStream, &mut Continuation); macro_rules! define_opcode_handler { ( fn $name:ident ( $state:pat : &mut MachineState, $pc:pat : UnsafeBytecodeStream, ) { $body:expr } ) => { fn $name( state: &mut MachineState, pc: &mut UnsafeBytecodeStream, continuation: &mut Continuation, ) { match (|$state, $pc| $body)(state, *pc) { Ok(mut new_pc) => { // Decode the next opcode and tail call to the next handler. let next_opcode = Opcode::decode(&mut new_pc).unwrap(); *pc = new_pc; let next_handler = OPCODE_HANDLER_TABLE[next_opcode as u8]; become next_handler(state, pc, continuation); } Err(k) => { debug_assert_ne!(k, Continuation::Continue); *continuation = k; None } } } }; } pub fn run(vm: &mut Vm, mut pc: UnsafeBytecodeStream) -> Result<(), *mut u8> { let mut continuation = Continuation::Continue; let opcode = Opcode::decode(&mut pc).unwrap(); let mut handler = OPCODE_HANDLER_TABLE[next_opcode as u8]; // The ideal interpreter loop: a bunch of opcode handlers tail calling // each other! handler(&mut vm.state, &mut pc, &mut continuation); match continuation { Continuation::Trap => vm.trap(pc), Continuation::ReturnToHost => vm.return_to_host(), Continuation::HostCall => vm.host_call(), Continuation::Continue => unreachable!(), } } } #[cfg(feature = "explicit_tail_calls")] pub use tail_calls::*; /// Define the table of opcode handlers. macro_rules! opcode_handler_table_entry { // ... } static OPCODE_HANDLER_TABLE: &[OpcodeHandler] = &[ for_each_op!(opcode_handler_table_entry), extended_op_handler, ]; static EXTENDED_OPCODE_HANDLER_TABLE: &[OpcodeHandler] = &[for_each_extended_op!(opcode_handler_table_entry)]; // A few examples of defining opcode handlers: define_opcode_handler! { fn xadd32( state: &mut MachineState, mut pc: UnsafeBytecodeStream, ) { // The `decode` module will need to be extended with methods to decode // an instructions immediates and operands, assuming that the associated // opcode has already been deecoded. let BinaryOperands { dst, src1, src2 } = decode::xadd32_imms_and_operands(&mut pc).unwrap(); let a = state[src1].get_i32(); let b = state[src[src2]].get_i32(); state[dst].set_i32(a.wrapping_add(b)); Ok(pc) } } define_opcode_handler! { fn br_if( state: &mut MachineState, mut pc: UnsafeBytecodeStream, ) { let (cond, offset) = decode::br_if_imms_and_operands(&mut pc).unwrap(); if state[cond].get_u64() != 0 { let new_pc = pc_rel_jump(pc, offset, 6); Ok(new_pc) } else { Ok(pc) } } } define_opcode_handler! { fn trap( _state: &mut MachineState, _pc: UnsafeBytecodeStream, ) { Err(Continuation::Trap) } }
cc @Kmeakin, since you've been doing some Pulley stuff
alexcrichton commented on issue #9163:
This is implemented now :+1:
Last updated: Jan 24 2025 at 00:11 UTC