alexcrichton opened Issue #1295:
Investigating a spurious test failure I'm able to reproduce this locally on Linux with a script that looks like this:
use std::os::unix::prelude::*; use std::process::Command; fn main() { let s = Command::new("cargo").arg("build").status().unwrap(); assert!(s.success()); let threads = (0..28) .map(|_| { std::thread::spawn(|| { for _ in 0..1_000_000 { let s = Command::new("./target/debug/wasmtime") .arg("--disable-cache") .arg("./tests/wasm/unreachable.wat") .output() .unwrap(); if s.status.signal().is_some() { println!("{}", s.status); println!("{:?}", s.status.signal()); println!("{:?}", s); std::process::exit(1); } } }) }) .collect::<Vec<_>>(); for t in threads { t.join().unwrap() } }I'm not really sure why concurrent execution of wasmtime is necessary, but I wasn't able to reproduce with only one wasmtime process at a time.
This almost instantly has one of the subprocesses segfault for me locally, and the core dump shows the stack trace looks like:
#0 _dl_runtime_resolve_xsavec () at ../sysdeps/x86_64/dl-trampoline.h:92 #1 0x00007f01cec4163e in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #2 0x00007f01cec41bd6 in _Unwind_Find_FDE () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #3 0x00007f01cec3e183 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #4 0x00007f01cec3f360 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #5 0x00007f01cec40068 in _Unwind_Backtrace () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #6 0x000056042eee2d7d in backtrace::backtrace::libunwind::trace (cb=...) at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/backtrace/libunwind.rs:86 #7 backtrace::backtrace::trace_unsynchronized (cb=...) at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/backtrace/mod.rs:66 #8 0x000056042eee2cfe in backtrace::backtrace::trace (cb=...) at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/backtrace/mod.rs:53 #9 0x000056042eed573d in backtrace::capture::Backtrace::create (ip=94575967164064) at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/capture.rs:164 #10 0x000056042eed56b4 in backtrace::capture::Backtrace::new_unresolved () at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/capture.rs:158 #11 0x000056042eeceb08 in wasmtime_runtime::traphandlers::CallThreadState::handle_trap (self=0x7fff0284edd0, pc=0x7f01cf687005 "\017\v", reset_guard_page=false, call_handler=...) at crates/runtime/src/traphandlers.rs:343 #12 0x000056042eecef60 in wasmtime_runtime::traphandlers::HandleTrap::{{closure}} (info=...) at crates/runtime/src/traphandlers.rs:36 #13 0x0000000000000000 in ?? ()cc @yurydelendik it looks like this is something related to FDE information perhaps?
alexcrichton commented on Issue #1295:
Oh also for posterity, the
unreachable.wat
file looks like:(module (func (export "_start") unreachable ) )
alexcrichton commented on Issue #1295:
@sunfishcode pointed me at https://github.com/rust-lang/rust/issues/69533 which looks about right. The failing instruction is
mov %rax,(%rsp)
which looks like a stack overflow, so sorry @yurydelendik you can probably disregard this, this looks purely related to stacks.
alexcrichton commented on Issue #1295:
It looks like this isn't just isolated to debug builds, even release builds are failing with a stack trace that looks like:
#0 0x00007f08b8faf242 in do_lookup_x (undef_name=undef_name@entry=0x7f08b8763d69 "free", new_hash=new_hash@entry=2090266759, old_hash=old_hash@entry=0x7f08b91ca080, ref=0x7f08b8762718, result=result@entry=0x7f08b91ca090, scope=0x7f08b91ce428, i=0, version=0x7f08b91aa7b0, flags=5, skip=0x0, type_class=1, undef_map=0x7f08b91acf00) at dl-lookup.c:338 #1 0x00007f08b8fb01ef in _dl_lookup_symbol_x (undef_name=0x7f08b8763d69 "free", undef_map=0x7f08b91acf00, ref=ref@entry=0x7f08b91ca128, symbol_scope=0x7f08b91ad258, version=0x7f08b91aa7b0, type_class=type_class@entry=1, flags=5, skip_map=<optimized out>) at dl-lookup.c:813 #2 0x00007f08b8fb4ec3 in _dl_fixup (l=<optimized out>, reloc_arg=<optimized out>) at ../elf/dl-runtime.c:112 #3 0x00007f08b8fbc7ca in _dl_runtime_resolve_xsavec () at ../sysdeps/x86_64/dl-trampoline.h:125 #4 0x00007f08b877463e in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #5 0x00007f08b8774bd6 in _Unwind_Find_FDE () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #6 0x00007f08b8771183 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #7 0x00007f08b8772360 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #8 0x00007f08b8773068 in _Unwind_Backtrace () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #9 0x0000000000000000 in ?? ()which is actually sort of crazy! We try to avoid having the dynamic symbol loader activate while in a signal handler, but this is resolving the symbol
free
which is causing the problem. Who knew!
alexcrichton commented on Issue #1295:
This is perhaps another point in favor of https://github.com/bytecodealliance/wasmtime/issues/900 where we don't execute segfault code on the sigaltstack. We're executing quite a lot of code clearly which is blown the sigaltstack set up by libstd (which is prety small by default). I think it would be best to have segfault handlers execute on the main execution stack, allowing us to sidestep these sorts of issues.
alexcrichton commented on Issue #1295:
Fixed in https://github.com/bytecodealliance/wasmtime/pull/1315
alexcrichton closed Issue #1295:
Investigating a spurious test failure I'm able to reproduce this locally on Linux with a script that looks like this:
use std::os::unix::prelude::*; use std::process::Command; fn main() { let s = Command::new("cargo").arg("build").status().unwrap(); assert!(s.success()); let threads = (0..28) .map(|_| { std::thread::spawn(|| { for _ in 0..1_000_000 { let s = Command::new("./target/debug/wasmtime") .arg("--disable-cache") .arg("./tests/wasm/unreachable.wat") .output() .unwrap(); if s.status.signal().is_some() { println!("{}", s.status); println!("{:?}", s.status.signal()); println!("{:?}", s); std::process::exit(1); } } }) }) .collect::<Vec<_>>(); for t in threads { t.join().unwrap() } }I'm not really sure why concurrent execution of wasmtime is necessary, but I wasn't able to reproduce with only one wasmtime process at a time.
This almost instantly has one of the subprocesses segfault for me locally, and the core dump shows the stack trace looks like:
#0 _dl_runtime_resolve_xsavec () at ../sysdeps/x86_64/dl-trampoline.h:92 #1 0x00007f01cec4163e in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #2 0x00007f01cec41bd6 in _Unwind_Find_FDE () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #3 0x00007f01cec3e183 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #4 0x00007f01cec3f360 in ?? () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #5 0x00007f01cec40068 in _Unwind_Backtrace () from /lib/x86_64-linux-gnu/libgcc_s.so.1 #6 0x000056042eee2d7d in backtrace::backtrace::libunwind::trace (cb=...) at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/backtrace/libunwind.rs:86 #7 backtrace::backtrace::trace_unsynchronized (cb=...) at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/backtrace/mod.rs:66 #8 0x000056042eee2cfe in backtrace::backtrace::trace (cb=...) at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/backtrace/mod.rs:53 #9 0x000056042eed573d in backtrace::capture::Backtrace::create (ip=94575967164064) at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/capture.rs:164 #10 0x000056042eed56b4 in backtrace::capture::Backtrace::new_unresolved () at /home/alex/.cargo/registry/src/github.com-1ecc6299db9ec823/backtrace-0.3.44/src/capture.rs:158 #11 0x000056042eeceb08 in wasmtime_runtime::traphandlers::CallThreadState::handle_trap (self=0x7fff0284edd0, pc=0x7f01cf687005 "\017\v", reset_guard_page=false, call_handler=...) at crates/runtime/src/traphandlers.rs:343 #12 0x000056042eecef60 in wasmtime_runtime::traphandlers::HandleTrap::{{closure}} (info=...) at crates/runtime/src/traphandlers.rs:36 #13 0x0000000000000000 in ?? ()cc @yurydelendik it looks like this is something related to FDE information perhaps?
Last updated: Nov 22 2024 at 16:03 UTC