From 19e0ce2c03d8e0baa16998b086665664c420c1df Mon Sep 17 00:00:00 2001 From: Jade Lovelace Date: Sat, 24 Aug 2024 21:11:30 -0700 Subject: main: log stack traces for std::terminate These stack traces kind of suck for the reasons mentioned on the CppTrace page here (no symbols for inline functions is a major one): https://github.com/jeremy-rifkin/cpptrace I would consider using CppTrace if it were packaged, but to be honest, I think that the more reasonable option is actually to move entirely to out-of-process crash handling and symbolization. The reason for this is that if you want to generate anything of substance on SIGSEGV or really any deadly signal, you are stuck in async-signal-safe land, which is not a place to be trying to run a symbolizer. LLVM does it anyway, probably carefully, and chromium *can* do it on debug builds but in general uses crashpad: https://source.chromium.org/chromium/chromium/src/+/main:base/debug/stack_trace_posix.cc;l=974;drc=82dff63dbf9db05e9274e11d9128af7b9f51ceaa;bpv=1;bpt=1 However, some stack traces are better than *no* stack traces when we get mystery exceptions falling out the bottom of the program. I've also promoted the path for "mystery exceptions falling out the bottom of the program" to hard crash and generate a core dump because although there's been some months since the last one of these, these are nonetheless always *atrociously* diagnosed. We can't improve the crash handling further until either we use Crashpad (which involves more C++ deps, no thanks) or we put in the ostensibly work in progress Rust minidump infrastructure, in which case we need to finish full support for Rust in libutil first. Sample report: Lix crashed. This is a bug. We would appreciate if you report it at https://git.lix.systems/lix-project/lix/issues with the following information included: Exception: std::runtime_error: lol Stack trace: 0# nix::printStackTrace() in /home/jade/lix/lix3/build/src/nix/../libutil/liblixutil.so 1# 0x000073C9862331F2 in /home/jade/lix/lix3/build/src/nix/../libmain/liblixmain.so 2# 0x000073C985F2E21A in /nix/store/p44qan69linp3ii0xrviypsw2j4qdcp2-gcc-13.2.0-lib/lib/libstdc++.so.6 3# 0x000073C985F2E285 in /nix/store/p44qan69linp3ii0xrviypsw2j4qdcp2-gcc-13.2.0-lib/lib/libstdc++.so.6 4# nix::handleExceptions(std::__cxx11::basic_string, std::allocator > const&, std::function) in /home/jade/lix/lix3/build/src/nix/../libmain/liblixmain.so 5# 0x00005CF65B6B048B in /home/jade/lix/lix3/build/src/nix/nix 6# 0x000073C985C8810E in /nix/store/dbcw19dshdwnxdv5q2g6wldj6syyvq7l-glibc-2.39-52/lib/libc.so.6 7# __libc_start_main in /nix/store/dbcw19dshdwnxdv5q2g6wldj6syyvq7l-glibc-2.39-52/lib/libc.so.6 8# 0x00005CF65B610335 in /home/jade/lix/lix3/build/src/nix/nix Change-Id: I1a9f6d349b617fd7145a37159b78ecb9382cb4e9 --- src/libmain/crash-handler.cc | 41 +++++++++++++++++++++++++++++++++++++++++ src/libmain/crash-handler.hh | 21 +++++++++++++++++++++ src/libmain/meson.build | 2 ++ src/libmain/shared.cc | 14 ++++++++++---- src/libmain/shared.hh | 2 +- src/libstore/globals.cc | 2 +- 6 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 src/libmain/crash-handler.cc create mode 100644 src/libmain/crash-handler.hh (limited to 'src') diff --git a/src/libmain/crash-handler.cc b/src/libmain/crash-handler.cc new file mode 100644 index 000000000..3f1b9f7d8 --- /dev/null +++ b/src/libmain/crash-handler.cc @@ -0,0 +1,41 @@ +#include "crash-handler.hh" +#include "fmt.hh" + +#include +#include + +namespace nix { + +namespace { +void onTerminate() +{ + std::cerr << "Lix crashed. This is a bug. We would appreciate if you report it along with what caused it at https://git.lix.systems/lix-project/lix/issues with the following information included:\n\n"; + try { + std::exception_ptr eptr = std::current_exception(); + if (eptr) { + std::rethrow_exception(eptr); + } else { + std::cerr << "std::terminate() called without exception\n"; + } + } catch (const std::exception & ex) { + std::cerr << "Exception: " << boost::core::demangle(typeid(ex).name()) << ": " << ex.what() << "\n"; + } catch (...) { + std::cerr << "Unknown exception! Spooky.\n"; + } + + std::cerr << "Stack trace:\n"; + nix::printStackTrace(); + + std::abort(); +} +} + +void registerCrashHandler() +{ + // DO NOT use this for signals. Boost stacktrace is very much not + // async-signal-safe, and in a world with ASLR, addr2line is pointless. + // + // If you want signals, set up a minidump system and do it out-of-process. + std::set_terminate(onTerminate); +} +} diff --git a/src/libmain/crash-handler.hh b/src/libmain/crash-handler.hh new file mode 100644 index 000000000..4c5641b8c --- /dev/null +++ b/src/libmain/crash-handler.hh @@ -0,0 +1,21 @@ +#pragma once +/// @file Crash handler for Lix that prints back traces (hopefully in instances where it is not just going to crash the process itself). +/* + * Author's note: This will probably be partially/fully supplanted by a + * minidump writer like the following once we get our act together on crashes a + * little bit more: + * https://github.com/rust-minidump/minidump-writer + * https://github.com/EmbarkStudios/crash-handling + * (out of process implementation *should* be able to be done on-demand) + * + * Such an out-of-process implementation could then both make minidumps and + * print stack traces for arbitrarily messed-up process states such that we can + * safely give out backtraces for SIGSEGV and other deadly signals. + */ + +namespace nix { + +/** Registers the Lix crash handler for std::terminate (currently; will support more crashes later). See also detectStackOverflow(). */ +void registerCrashHandler(); + +} diff --git a/src/libmain/meson.build b/src/libmain/meson.build index a7cce287c..a1a888c16 100644 --- a/src/libmain/meson.build +++ b/src/libmain/meson.build @@ -1,5 +1,6 @@ libmain_sources = files( 'common-args.cc', + 'crash-handler.cc', 'loggers.cc', 'progress-bar.cc', 'shared.cc', @@ -8,6 +9,7 @@ libmain_sources = files( libmain_headers = files( 'common-args.hh', + 'crash-handler.hh', 'loggers.hh', 'progress-bar.hh', 'shared.hh', diff --git a/src/libmain/shared.cc b/src/libmain/shared.cc index bc9548e09..64bd00606 100644 --- a/src/libmain/shared.cc +++ b/src/libmain/shared.cc @@ -1,3 +1,4 @@ +#include "crash-handler.hh" #include "globals.hh" #include "shared.hh" #include "store-api.hh" @@ -118,6 +119,8 @@ static void sigHandler(int signo) { } void initNix() { + registerCrashHandler(); + /* Turn on buffering for cerr. */ static char buf[1024]; std::cerr.rdbuf()->pubsetbuf(buf, sizeof(buf)); @@ -335,12 +338,15 @@ int handleExceptions(const std::string & programName, std::function fun) } catch (BaseError & e) { logError(e.info()); return e.info().status; - } catch (std::bad_alloc & e) { + } catch (const std::bad_alloc & e) { printError(error + "out of memory"); return 1; - } catch (std::exception & e) { - printError(error + e.what()); - return 1; + } catch (const std::exception & e) { + // Random exceptions bubbling into main are cause for bug reports, crash + std::terminate(); + } catch (...) { + // Explicitly do not tolerate non-std exceptions escaping. + std::terminate(); } return 0; diff --git a/src/libmain/shared.hh b/src/libmain/shared.hh index b41efe567..49b72a54e 100644 --- a/src/libmain/shared.hh +++ b/src/libmain/shared.hh @@ -111,7 +111,7 @@ struct PrintFreed /** - * Install a SIGSEGV handler to detect stack overflows. + * Install a SIGSEGV handler to detect stack overflows. See also registerCrashHandler(). */ void detectStackOverflow(); diff --git a/src/libstore/globals.cc b/src/libstore/globals.cc index ffc2543ef..f43b759d2 100644 --- a/src/libstore/globals.cc +++ b/src/libstore/globals.cc @@ -443,7 +443,7 @@ static bool initLibStoreDone = false; void assertLibStoreInitialized() { if (!initLibStoreDone) { printError("The program must call nix::initNix() before calling any libstore library functions."); - abort(); + std::terminate(); }; } -- cgit v1.2.3