Skip to main content

running_process/broker/lifecycle/
crash_dump.rs

1//! Crash diagnostics for the broker process.
2//!
3//! This installs a process-wide panic hook early in broker startup so
4//! unexpected Rust panics leave a small text crash report even when the
5//! process is daemonized or launched by a service manager.
6
7use std::backtrace::Backtrace;
8use std::fs::{self, OpenOptions};
9use std::io::{self, Write};
10use std::path::{Path, PathBuf};
11use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
12use std::time::{SystemTime, UNIX_EPOCH};
13
14/// Environment variable that overrides the crash report directory.
15pub const CRASH_DUMP_DIR_ENV: &str = "RUNNING_PROCESS_BROKER_CRASH_DUMP_DIR";
16
17static INSTALLED: AtomicBool = AtomicBool::new(false);
18static CRASH_SEQUENCE: AtomicU64 = AtomicU64::new(0);
19
20/// Errors returned while installing crash diagnostics.
21#[derive(Debug, thiserror::Error)]
22pub enum CrashDumpError {
23    /// Component names become part of crash report filenames.
24    #[error(
25        "invalid broker crash dump component name {component:?}; use 1-64 ASCII letters, digits, '-' or '_'"
26    )]
27    InvalidComponent {
28        /// Invalid component name supplied by the caller.
29        component: String,
30    },
31    /// The crash report directory could not be created.
32    #[error("failed to create crash dump directory {path:?}: {source}")]
33    Directory {
34        /// Directory that could not be created.
35        path: PathBuf,
36        /// Underlying I/O error.
37        source: io::Error,
38    },
39}
40
41/// Install the broker crash diagnostics hook.
42///
43/// The hook is process-wide and idempotent. It writes panic reports to
44/// [`CRASH_DUMP_DIR_ENV`] when set, otherwise to
45/// `std::env::temp_dir()/running-process/crash-dumps`.
46pub fn install(component: &str) -> Result<(), CrashDumpError> {
47    validate_component(component)?;
48    let dir = default_crash_dump_dir();
49    fs::create_dir_all(&dir).map_err(|source| CrashDumpError::Directory {
50        path: dir.clone(),
51        source,
52    })?;
53
54    if INSTALLED.swap(true, Ordering::AcqRel) {
55        return Ok(());
56    }
57
58    let component = component.to_string();
59    let previous_hook = std::panic::take_hook();
60    std::panic::set_hook(Box::new(move |info| {
61        let sequence = CRASH_SEQUENCE.fetch_add(1, Ordering::AcqRel);
62        let timestamp_millis = current_unix_timestamp_millis();
63        let path = crash_report_path(
64            &dir,
65            &component,
66            std::process::id(),
67            timestamp_millis,
68            sequence,
69        );
70        if let Err(err) = write_panic_report(&path, &component, info) {
71            let _ = writeln!(
72                std::io::stderr(),
73                "failed to write broker crash report to {path:?}: {err}"
74            );
75        }
76        previous_hook(info);
77    }));
78
79    Ok(())
80}
81
82fn default_crash_dump_dir() -> PathBuf {
83    if let Some(path) = std::env::var_os(CRASH_DUMP_DIR_ENV) {
84        if !path.as_os_str().is_empty() {
85            return PathBuf::from(path);
86        }
87    }
88    std::env::temp_dir()
89        .join("running-process")
90        .join("crash-dumps")
91}
92
93fn validate_component(component: &str) -> Result<(), CrashDumpError> {
94    if component_is_valid(component) {
95        Ok(())
96    } else {
97        Err(CrashDumpError::InvalidComponent {
98            component: component.to_string(),
99        })
100    }
101}
102
103fn component_is_valid(component: &str) -> bool {
104    let bytes = component.as_bytes();
105    (1..=64).contains(&bytes.len())
106        && bytes
107            .iter()
108            .all(|b| b.is_ascii_alphanumeric() || *b == b'-' || *b == b'_')
109}
110
111fn current_unix_timestamp_millis() -> u128 {
112    SystemTime::now()
113        .duration_since(UNIX_EPOCH)
114        .unwrap_or_default()
115        .as_millis()
116}
117
118fn crash_report_path(
119    dir: &Path,
120    component: &str,
121    pid: u32,
122    timestamp_millis: u128,
123    sequence: u64,
124) -> PathBuf {
125    dir.join(format!(
126        "{component}-{pid}-{timestamp_millis}-{sequence}.panic.txt"
127    ))
128}
129
130fn write_panic_report(
131    path: &Path,
132    component: &str,
133    info: &std::panic::PanicHookInfo<'_>,
134) -> io::Result<()> {
135    if let Some(parent) = path.parent() {
136        fs::create_dir_all(parent)?;
137    }
138
139    let mut file = OpenOptions::new().write(true).create_new(true).open(path)?;
140    let thread = std::thread::current();
141    let thread_name = thread.name().unwrap_or("<unnamed>");
142
143    writeln!(file, "component: {component}")?;
144    writeln!(file, "pid: {}", std::process::id())?;
145    writeln!(file, "thread: {thread_name}")?;
146    writeln!(
147        file,
148        "timestamp_millis: {}",
149        current_unix_timestamp_millis()
150    )?;
151    match info.location() {
152        Some(location) => {
153            writeln!(
154                file,
155                "location: {}:{}:{}",
156                location.file(),
157                location.line(),
158                location.column()
159            )?;
160        }
161        None => {
162            writeln!(file, "location: <unknown>")?;
163        }
164    }
165    writeln!(file, "payload: {}", panic_payload(info))?;
166    writeln!(file)?;
167    writeln!(file, "backtrace:")?;
168    writeln!(file, "{}", Backtrace::force_capture())?;
169    Ok(())
170}
171
172fn panic_payload(info: &std::panic::PanicHookInfo<'_>) -> String {
173    if let Some(s) = info.payload().downcast_ref::<&str>() {
174        (*s).to_string()
175    } else if let Some(s) = info.payload().downcast_ref::<String>() {
176        s.clone()
177    } else {
178        "<non-string panic payload>".to_string()
179    }
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn component_names_are_filename_safe() {
188        assert!(component_is_valid("broker"));
189        assert!(component_is_valid("broker_v1"));
190        assert!(component_is_valid("broker-v1"));
191        assert!(!component_is_valid(""));
192        assert!(!component_is_valid("../broker"));
193        assert!(!component_is_valid("broker v1"));
194        assert!(!component_is_valid(&"a".repeat(65)));
195    }
196
197    #[test]
198    fn crash_report_path_includes_component_pid_timestamp_and_sequence() {
199        let path = crash_report_path(Path::new("/tmp/dumps"), "broker", 42, 1234, 7);
200        assert_eq!(
201            path,
202            Path::new("/tmp/dumps").join("broker-42-1234-7.panic.txt")
203        );
204    }
205
206    #[test]
207    fn invalid_component_reports_original_value() {
208        let err = validate_component("bad/name").unwrap_err();
209        assert!(matches!(
210            err,
211            CrashDumpError::InvalidComponent { component } if component == "bad/name"
212        ));
213    }
214}