running_process/broker/lifecycle/
crash_dump.rs1use std::backtrace::Backtrace;
8use std::fs::{self, OpenOptions};
9use std::io::{self, Write};
10use std::path::{Path, PathBuf};
11use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
12use std::time::{SystemTime, UNIX_EPOCH};
13
14pub const CRASH_DUMP_DIR_ENV: &str = "RUNNING_PROCESS_BROKER_CRASH_DUMP_DIR";
16
17static INSTALLED: AtomicBool = AtomicBool::new(false);
18static CRASH_SEQUENCE: AtomicU64 = AtomicU64::new(0);
19
20#[derive(Debug, thiserror::Error)]
22pub enum CrashDumpError {
23 #[error(
25 "invalid broker crash dump component name {component:?}; use 1-64 ASCII letters, digits, '-' or '_'"
26 )]
27 InvalidComponent {
28 component: String,
30 },
31 #[error("failed to create crash dump directory {path:?}: {source}")]
33 Directory {
34 path: PathBuf,
36 source: io::Error,
38 },
39}
40
41pub fn install(component: &str) -> Result<(), CrashDumpError> {
47 validate_component(component)?;
48 let dir = default_crash_dump_dir();
49 fs::create_dir_all(&dir).map_err(|source| CrashDumpError::Directory {
50 path: dir.clone(),
51 source,
52 })?;
53
54 if INSTALLED.swap(true, Ordering::AcqRel) {
55 return Ok(());
56 }
57
58 let component = component.to_string();
59 let previous_hook = std::panic::take_hook();
60 std::panic::set_hook(Box::new(move |info| {
61 let sequence = CRASH_SEQUENCE.fetch_add(1, Ordering::AcqRel);
62 let timestamp_millis = current_unix_timestamp_millis();
63 let path = crash_report_path(
64 &dir,
65 &component,
66 std::process::id(),
67 timestamp_millis,
68 sequence,
69 );
70 if let Err(err) = write_panic_report(&path, &component, info) {
71 let _ = writeln!(
72 std::io::stderr(),
73 "failed to write broker crash report to {path:?}: {err}"
74 );
75 }
76 previous_hook(info);
77 }));
78
79 Ok(())
80}
81
82fn default_crash_dump_dir() -> PathBuf {
83 if let Some(path) = std::env::var_os(CRASH_DUMP_DIR_ENV) {
84 if !path.as_os_str().is_empty() {
85 return PathBuf::from(path);
86 }
87 }
88 std::env::temp_dir()
89 .join("running-process")
90 .join("crash-dumps")
91}
92
93fn validate_component(component: &str) -> Result<(), CrashDumpError> {
94 if component_is_valid(component) {
95 Ok(())
96 } else {
97 Err(CrashDumpError::InvalidComponent {
98 component: component.to_string(),
99 })
100 }
101}
102
103fn component_is_valid(component: &str) -> bool {
104 let bytes = component.as_bytes();
105 (1..=64).contains(&bytes.len())
106 && bytes
107 .iter()
108 .all(|b| b.is_ascii_alphanumeric() || *b == b'-' || *b == b'_')
109}
110
111fn current_unix_timestamp_millis() -> u128 {
112 SystemTime::now()
113 .duration_since(UNIX_EPOCH)
114 .unwrap_or_default()
115 .as_millis()
116}
117
118fn crash_report_path(
119 dir: &Path,
120 component: &str,
121 pid: u32,
122 timestamp_millis: u128,
123 sequence: u64,
124) -> PathBuf {
125 dir.join(format!(
126 "{component}-{pid}-{timestamp_millis}-{sequence}.panic.txt"
127 ))
128}
129
130fn write_panic_report(
131 path: &Path,
132 component: &str,
133 info: &std::panic::PanicHookInfo<'_>,
134) -> io::Result<()> {
135 if let Some(parent) = path.parent() {
136 fs::create_dir_all(parent)?;
137 }
138
139 let mut file = OpenOptions::new().write(true).create_new(true).open(path)?;
140 let thread = std::thread::current();
141 let thread_name = thread.name().unwrap_or("<unnamed>");
142
143 writeln!(file, "component: {component}")?;
144 writeln!(file, "pid: {}", std::process::id())?;
145 writeln!(file, "thread: {thread_name}")?;
146 writeln!(
147 file,
148 "timestamp_millis: {}",
149 current_unix_timestamp_millis()
150 )?;
151 match info.location() {
152 Some(location) => {
153 writeln!(
154 file,
155 "location: {}:{}:{}",
156 location.file(),
157 location.line(),
158 location.column()
159 )?;
160 }
161 None => {
162 writeln!(file, "location: <unknown>")?;
163 }
164 }
165 writeln!(file, "payload: {}", panic_payload(info))?;
166 writeln!(file)?;
167 writeln!(file, "backtrace:")?;
168 writeln!(file, "{}", Backtrace::force_capture())?;
169 Ok(())
170}
171
172fn panic_payload(info: &std::panic::PanicHookInfo<'_>) -> String {
173 if let Some(s) = info.payload().downcast_ref::<&str>() {
174 (*s).to_string()
175 } else if let Some(s) = info.payload().downcast_ref::<String>() {
176 s.clone()
177 } else {
178 "<non-string panic payload>".to_string()
179 }
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 #[test]
187 fn component_names_are_filename_safe() {
188 assert!(component_is_valid("broker"));
189 assert!(component_is_valid("broker_v1"));
190 assert!(component_is_valid("broker-v1"));
191 assert!(!component_is_valid(""));
192 assert!(!component_is_valid("../broker"));
193 assert!(!component_is_valid("broker v1"));
194 assert!(!component_is_valid(&"a".repeat(65)));
195 }
196
197 #[test]
198 fn crash_report_path_includes_component_pid_timestamp_and_sequence() {
199 let path = crash_report_path(Path::new("/tmp/dumps"), "broker", 42, 1234, 7);
200 assert_eq!(
201 path,
202 Path::new("/tmp/dumps").join("broker-42-1234-7.panic.txt")
203 );
204 }
205
206 #[test]
207 fn invalid_component_reports_original_value() {
208 let err = validate_component("bad/name").unwrap_err();
209 assert!(matches!(
210 err,
211 CrashDumpError::InvalidComponent { component } if component == "bad/name"
212 ));
213 }
214}