Skip to main content

calltrace/
crash_handler.rs

1//! Crash Handler Module
2//!
3//! This module provides crash detection and detailed crash reporting functionality.
4//! It installs signal handlers for common crash signals only if no existing handlers are present,
5//! and generates comprehensive crash reports including the current call stack.
6
7use once_cell::sync::Lazy;
8use std::arch::asm;
9use std::collections::HashMap;
10use std::ffi::{c_void, CStr};
11use std::sync::atomic::{AtomicBool, Ordering};
12use std::sync::{Arc, Mutex, Once};
13
14use libc::{
15    dladdr, sigaction, sigaddset, sigemptyset, siginfo_t, ucontext_t, Dl_info, SA_NODEFER,
16    SA_RESTART, SA_SIGINFO, SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP,
17};
18
19use crate::error::{CallTraceError, Result};
20use crate::json_output::JsonOutputGenerator;
21use crate::register_reader::RegisterContext;
22
23/// Global crash handler state
24static CRASH_HANDLER_INIT: Once = Once::new();
25static CRASH_HANDLER_ENABLED: AtomicBool = AtomicBool::new(false);
26
27/// Storage for original signal handlers
28static ORIGINAL_HANDLERS: Lazy<Arc<Mutex<HashMap<i32, libc::sigaction>>>> =
29    once_cell::sync::Lazy::new(|| Arc::new(Mutex::new(HashMap::new())));
30
31/// Crash information structure
32#[derive(Debug, Clone)]
33pub struct CrashInfo {
34    pub signal: i32,
35    pub signal_name: String,
36    pub thread_id: u64,
37    pub fault_address: Option<u64>,
38    pub instruction_pointer: Option<u64>,
39    pub stack_pointer: Option<u64>,
40    pub register_context: Option<RegisterContext>,
41    pub backtrace: Vec<StackFrame>,
42    pub call_tree_snapshot: Option<String>,
43    pub crash_time: std::time::SystemTime,
44}
45
46/// Stack frame information
47#[derive(Debug, Clone, serde::Serialize)]
48pub struct StackFrame {
49    pub address: u64,
50    pub function_name: Option<String>,
51    pub library_name: Option<String>,
52    pub offset: Option<u64>,
53}
54
55/// Signals to handle for crash detection
56const CRASH_SIGNALS: &[i32] = &[
57    SIGSEGV, // Segmentation fault
58    SIGABRT, // Abort signal
59    SIGILL,  // Illegal instruction
60    SIGFPE,  // Floating point exception
61    SIGBUS,  // Bus error
62    SIGTRAP, // Trace/breakpoint trap
63];
64
65/// Initialize crash handler
66pub fn init_crash_handler() -> Result<()> {
67    CRASH_HANDLER_INIT.call_once(|| {
68        if let Err(e) = setup_crash_handlers() {
69            eprintln!("CallTrace: Failed to initialize crash handler: {:?}", e);
70        } else {
71            CRASH_HANDLER_ENABLED.store(true, Ordering::Relaxed);
72            if std::env::var("CALLTRACE_DEBUG").is_ok() {
73                eprintln!("CallTrace: Crash handler initialized successfully");
74            }
75        }
76    });
77    Ok(())
78}
79
80/// Setup signal handlers for crash detection
81fn setup_crash_handlers() -> Result<()> {
82    let mut original_handlers = ORIGINAL_HANDLERS.lock().map_err(|_| {
83        CallTraceError::InitializationError("Failed to lock original handlers".to_string())
84    })?;
85
86    for &signal in CRASH_SIGNALS {
87        // Store the current handler for restoration during cleanup
88        let mut existing_action: libc::sigaction = unsafe { std::mem::zeroed() };
89        let result = unsafe { sigaction(signal, std::ptr::null(), &mut existing_action) };
90
91        if result != 0 {
92            continue; // Skip this signal if we can't query it
93        }
94
95        // Store the original handler for restoration later
96        original_handlers.insert(signal, existing_action);
97
98        // Install our crash handler (override any existing handler)
99        let mut new_action: libc::sigaction = unsafe { std::mem::zeroed() };
100        new_action.sa_sigaction = crash_signal_handler as *const () as usize;
101        new_action.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
102
103        // Initialize signal mask
104        unsafe {
105            sigemptyset(&mut new_action.sa_mask);
106            for &other_signal in CRASH_SIGNALS {
107                if other_signal != signal {
108                    sigaddset(&mut new_action.sa_mask, other_signal);
109                }
110            }
111        }
112
113        let install_result = unsafe { sigaction(signal, &new_action, std::ptr::null_mut()) };
114
115        if install_result != 0 {
116            eprintln!("CallTrace: Failed to install handler for signal {}", signal);
117        } else if std::env::var("CALLTRACE_DEBUG").is_ok() {
118            eprintln!("CallTrace: Installed crash handler for signal {} (overriding any existing handler)", signal);
119        }
120    }
121
122    Ok(())
123}
124
125/// Main crash signal handler
126extern "C" fn crash_signal_handler(signal: i32, siginfo: *mut siginfo_t, context: *mut c_void) {
127    // Avoid recursive crashes in the handler
128    static HANDLER_RUNNING: AtomicBool = AtomicBool::new(false);
129
130    if HANDLER_RUNNING.load(Ordering::Acquire) {
131        unsafe {
132            libc::_exit(128 + signal);
133        }
134    }
135    HANDLER_RUNNING.store(true, Ordering::Release);
136
137    // Generate crash report
138    let crash_info = match generate_crash_info(signal, siginfo, context) {
139        Ok(info) => info,
140        Err(e) => {
141            eprintln!("CallTrace: Failed to generate crash info: {:?}", e);
142            unsafe {
143                libc::_exit(128 + signal);
144            }
145        }
146    };
147
148    // Output crash report
149    if let Err(e) = output_crash_report(&crash_info) {
150        eprintln!("CallTrace: Failed to output crash report: {:?}", e);
151    }
152
153    // Restore original handler and re-raise the signal
154    restore_and_reraise(signal);
155}
156
157/// Generate comprehensive crash information
158fn generate_crash_info(
159    signal: i32,
160    siginfo: *mut siginfo_t,
161    context: *mut c_void,
162) -> Result<CrashInfo> {
163    let signal_name = get_signal_name(signal);
164
165    // Capture current thread ID
166    let thread_id = unsafe {
167        #[cfg(target_os = "linux")]
168        {
169            libc::gettid() as u64
170        }
171        #[cfg(not(target_os = "linux"))]
172        {
173            // Fallback to pthread thread ID on other systems
174            libc::pthread_self() as u64
175        }
176    };
177
178    // Extract fault address and context information
179    let (fault_address, instruction_pointer, stack_pointer) =
180        extract_context_info(siginfo, context);
181
182    // Capture current register context (if possible)
183    let register_context = unsafe { RegisterContext::capture().ok() };
184
185    // Generate backtrace
186    let backtrace = generate_backtrace()?;
187
188    // Capture current call tree state
189    let call_tree_snapshot = capture_call_tree_snapshot();
190
191    Ok(CrashInfo {
192        signal,
193        signal_name,
194        thread_id,
195        fault_address,
196        instruction_pointer,
197        stack_pointer,
198        register_context,
199        backtrace,
200        call_tree_snapshot,
201        crash_time: std::time::SystemTime::now(),
202    })
203}
204
205/// Extract context information from signal info and ucontext
206fn extract_context_info(
207    siginfo: *mut siginfo_t,
208    context: *mut c_void,
209) -> (Option<u64>, Option<u64>, Option<u64>) {
210    if siginfo.is_null() || context.is_null() {
211        return (None, None, None);
212    }
213
214    let fault_address = unsafe {
215        let info = &*siginfo;
216        if info.si_signo == SIGSEGV || info.si_signo == SIGBUS {
217            // Access fault address from siginfo_t
218            #[cfg(target_arch = "x86_64")]
219            {
220                // On x86_64, fault address is in si_addr field
221                // For simplicity, we'll try to access it directly
222                // This may not work on all platforms - a real implementation would use proper bindings
223                None // Simplified for now - would need platform-specific offset calculation
224            }
225            #[cfg(not(target_arch = "x86_64"))]
226            {
227                None
228            }
229        } else {
230            None
231        }
232    };
233
234    let (instruction_pointer, stack_pointer) = unsafe {
235        #[cfg(target_arch = "x86_64")]
236        {
237            let ucontext = &*(context as *const ucontext_t);
238            // Access registers from ucontext - this is platform specific
239            // On Linux x86_64, we can access them through mcontext
240            let _mcontext_ptr = &ucontext.uc_mcontext as *const _ as *const u8;
241
242            // These offsets are architecture and OS specific
243            // For a production implementation, we'd use proper bindings
244            // For now, we'll try a simpler approach
245            (None, None) // Simplified for now
246        }
247
248        #[cfg(not(target_arch = "x86_64"))]
249        {
250            (None, None)
251        }
252    };
253
254    (fault_address, instruction_pointer, stack_pointer)
255}
256
257/// Generate stack backtrace
258/// This is a simplified implementation that captures basic stack information
259fn generate_backtrace() -> Result<Vec<StackFrame>> {
260    // For now, we'll implement a basic backtrace using frame pointers
261    // In a production implementation, this would use libunwind or similar
262    let mut frames = Vec::new();
263
264    // Try to get some basic stack frames using unsafe frame pointer walking
265    unsafe {
266        let mut frame_ptr: *const *const c_void;
267
268        // Get current frame pointer
269        #[cfg(target_arch = "x86_64")]
270        {
271            asm!("mov {}, rbp", out(reg) frame_ptr, options(nomem, nostack));
272        }
273
274        #[cfg(not(target_arch = "x86_64"))]
275        {
276            // For non-x86_64, we can't easily walk the stack
277            return Ok(frames);
278        }
279
280        // Walk up to 32 frames
281        for _ in 0..32 {
282            if frame_ptr.is_null() {
283                break;
284            }
285
286            // Get return address (next pointer in frame)
287            let return_addr_ptr = frame_ptr.offset(1);
288            if return_addr_ptr.is_null() {
289                break;
290            }
291
292            let return_addr = *return_addr_ptr as u64;
293            if return_addr == 0 {
294                break;
295            }
296
297            // Create frame info
298            let frame = resolve_stack_frame(return_addr);
299            frames.push(frame);
300
301            // Move to next frame
302            frame_ptr = *frame_ptr as *const *const c_void;
303
304            // Safety check to prevent infinite loops
305            if (frame_ptr as u64) < 0x1000 || (frame_ptr as u64) > 0x7fffffffffff {
306                break;
307            }
308        }
309    }
310
311    Ok(frames)
312}
313
314/// Resolve stack frame information using dladdr
315fn resolve_stack_frame(address: u64) -> StackFrame {
316    let mut dl_info: Dl_info = unsafe { std::mem::zeroed() };
317    let result = unsafe { dladdr(address as *const c_void, &mut dl_info) };
318
319    let (function_name, library_name, offset) = if result != 0 {
320        let func_name = if !dl_info.dli_sname.is_null() {
321            let raw_name = unsafe {
322                CStr::from_ptr(dl_info.dli_sname)
323                    .to_string_lossy()
324                    .into_owned()
325            };
326
327            // Try to demangle C++ function names
328            Some(crate::dwarf_analyzer::demangle_function_name(&raw_name))
329        } else {
330            None
331        };
332
333        let lib_name = if !dl_info.dli_fname.is_null() {
334            unsafe {
335                Some(
336                    CStr::from_ptr(dl_info.dli_fname)
337                        .to_string_lossy()
338                        .into_owned(),
339                )
340            }
341        } else {
342            None
343        };
344
345        let offset_val = if !dl_info.dli_saddr.is_null() {
346            Some(address - (dl_info.dli_saddr as u64))
347        } else {
348            None
349        };
350
351        (func_name, lib_name, offset_val)
352    } else {
353        (None, None, None)
354    };
355
356    StackFrame {
357        address,
358        function_name,
359        library_name,
360        offset,
361    }
362}
363
364/// Capture current call tree state as JSON string
365fn capture_call_tree_snapshot() -> Option<String> {
366    use crate::CALL_TREE_MANAGER;
367
368    match JsonOutputGenerator::new().generate_output(&CALL_TREE_MANAGER) {
369        Ok(trace_session) => serde_json::to_string_pretty(&trace_session).ok(),
370        Err(_) => None,
371    }
372}
373
374/// Output crash report to stderr and optional file
375fn output_crash_report(crash_info: &CrashInfo) -> Result<()> {
376    let report = format_crash_report(crash_info)?;
377
378    // Always output human-readable format to stderr
379    eprintln!("\n{}", "=".repeat(80));
380    eprintln!("CALLTRACE CRASH REPORT");
381    eprintln!("{}", "=".repeat(80));
382    eprintln!("{}", report);
383    eprintln!("{}", "=".repeat(80));
384
385    // Write JSON crash report to file
386    if let Some(base_filename) = crate::get_base_output_filename() {
387        let crash_file = format!("{}.json", base_filename);
388
389        // Convert to JSON format
390        let crash_json = convert_crash_info_to_json(crash_info)?;
391
392        // Serialize to JSON
393        let json_content = match serde_json::to_string_pretty(&crash_json) {
394            Ok(json) => json,
395            Err(e) => {
396                eprintln!("CallTrace: Failed to serialize crash info to JSON: {}", e);
397                return Ok(());
398            }
399        };
400
401        if let Err(e) = std::fs::write(&crash_file, &json_content) {
402            eprintln!(
403                "CallTrace: Failed to write crash report to {}: {}",
404                crash_file, e
405            );
406        } else {
407            eprintln!("CallTrace: Crash report written to {}", crash_file);
408        }
409    }
410
411    Ok(())
412}
413
414/// Format crash report as human-readable text
415fn format_crash_report(crash_info: &CrashInfo) -> Result<String> {
416    let mut report = String::new();
417
418    report.push_str(&format!(
419        "Signal: {} ({})\n",
420        crash_info.signal, crash_info.signal_name
421    ));
422    report.push_str(&format!("Thread ID: {}\n", crash_info.thread_id));
423    report.push_str(&format!(
424        "Time: {}\n",
425        format_system_time(&crash_info.crash_time)
426    ));
427
428    if let Some(fault_addr) = crash_info.fault_address {
429        report.push_str(&format!("Fault Address: 0x{:016x}\n", fault_addr));
430    }
431
432    if let Some(ip) = crash_info.instruction_pointer {
433        report.push_str(&format!("Instruction Pointer: 0x{:016x}\n", ip));
434    }
435
436    if let Some(sp) = crash_info.stack_pointer {
437        report.push_str(&format!("Stack Pointer: 0x{:016x}\n", sp));
438    }
439
440    report.push_str("\nStack Trace:\n");
441    for (i, frame) in crash_info.backtrace.iter().enumerate() {
442        report.push_str(&format!("  #{:2}: 0x{:016x}", i, frame.address));
443
444        if let Some(ref func) = frame.function_name {
445            report.push_str(&format!(" in {}", func));
446            if let Some(offset) = frame.offset {
447                report.push_str(&format!("+0x{:x}", offset));
448            }
449        }
450
451        if let Some(ref lib) = frame.library_name {
452            report.push_str(&format!(" ({})", lib));
453        }
454
455        report.push('\n');
456    }
457
458    if let Some(ref call_tree) = crash_info.call_tree_snapshot {
459        report.push_str("\nCall Tree at Crash:\n");
460        report.push_str(call_tree);
461    }
462
463    Ok(report)
464}
465
466/// Convert CrashInfo to JSON-serializable format with normal trace session data
467fn convert_crash_info_to_json(crash_info: &CrashInfo) -> Result<crate::json_output::TraceSession> {
468    // Generate normal trace session data first
469    let mut trace_session = crate::JsonOutputGenerator::new()
470        .generate_output(&crate::CALL_TREE_MANAGER)
471        .map_err(|e| {
472            CallTraceError::InitializationError(format!(
473                "Failed to generate trace session: {:?}",
474                e
475            ))
476        })?;
477
478    // Format timestamp
479    let (crash_time_str, crash_timestamp) = format_crash_time(&crash_info.crash_time);
480
481    // Convert backtrace
482    let backtrace_json = crash_info
483        .backtrace
484        .iter()
485        .map(|frame| crate::json_output::StackFrame {
486            address: format!("0x{:016x}", frame.address),
487            function_name: frame.function_name.clone(),
488            library_name: frame.library_name.clone(),
489            offset: frame.offset.map(|off| format!("0x{:x}", off)),
490        })
491        .collect();
492
493    // Create crash info
494    let crash_info_json = crate::json_output::CrashInfo {
495        signal: crash_info.signal,
496        signal_name: crash_info.signal_name.clone(),
497        thread_id: crash_info.thread_id,
498        fault_address: crash_info
499            .fault_address
500            .map(|addr| format!("0x{:016x}", addr)),
501        instruction_pointer: crash_info
502            .instruction_pointer
503            .map(|ip| format!("0x{:016x}", ip)),
504        stack_pointer: crash_info.stack_pointer.map(|sp| format!("0x{:016x}", sp)),
505        register_context: crash_info.register_context.clone(),
506        backtrace: backtrace_json,
507        crash_time: crash_time_str,
508        crash_timestamp,
509    };
510
511    // Add crash info to trace session
512    trace_session.crash = Some(crash_info_json);
513
514    Ok(trace_session)
515}
516
517/// Format crash time for JSON output
518fn format_crash_time(system_time: &std::time::SystemTime) -> (String, f64) {
519    match system_time.duration_since(std::time::UNIX_EPOCH) {
520        Ok(duration) => {
521            let total_seconds = duration.as_secs();
522            let microseconds = duration.subsec_micros();
523            let timestamp_decimal = total_seconds as f64 + (microseconds as f64 / 1_000_000.0);
524
525            // Get human readable time
526            let readable_time = get_readable_time(total_seconds);
527
528            (readable_time, timestamp_decimal)
529        }
530        Err(_) => (format!("{:?}", system_time), 0.0),
531    }
532}
533
534/// Format SystemTime to human-readable string
535fn format_system_time(system_time: &std::time::SystemTime) -> String {
536    match system_time.duration_since(std::time::UNIX_EPOCH) {
537        Ok(duration) => {
538            let total_seconds = duration.as_secs();
539            let microseconds = duration.subsec_micros();
540
541            // Convert to local time using UNIX utilities approach
542            // Format as: YYYY-MM-DD HH:MM:SS.uuuuuu (timestamp)
543            let timestamp_decimal = format!("{}.{:06}", total_seconds, microseconds);
544
545            // Try to get human readable time via system command
546            let readable_time = get_readable_time(total_seconds);
547
548            format!("{} ({})", readable_time, timestamp_decimal)
549        }
550        Err(_) => {
551            // Fallback to debug format if time is before UNIX epoch
552            format!("{:?}", system_time)
553        }
554    }
555}
556
557/// Get human-readable time string from UNIX timestamp
558fn get_readable_time(timestamp: u64) -> String {
559    use std::process::Command;
560
561    // Try to use system 'date' command for proper timezone handling
562    if let Ok(output) = Command::new("date")
563        .arg("-d")
564        .arg(format!("@{}", timestamp))
565        .arg("+%Y-%m-%d %H:%M:%S %Z")
566        .output()
567    {
568        if output.status.success() {
569            if let Ok(time_str) = String::from_utf8(output.stdout) {
570                return time_str.trim().to_string();
571            }
572        }
573    }
574
575    // Fallback: Simple UTC formatting
576    let total_seconds = timestamp;
577    let seconds_per_day = 86400;
578    let seconds_per_hour = 3600;
579    let seconds_per_minute = 60;
580
581    // Calculate days since UNIX epoch (1970-01-01)
582    let days_since_epoch = total_seconds / seconds_per_day;
583    let seconds_in_day = total_seconds % seconds_per_day;
584
585    let hours = seconds_in_day / seconds_per_hour;
586    let minutes = (seconds_in_day % seconds_per_hour) / seconds_per_minute;
587    let seconds = seconds_in_day % seconds_per_minute;
588
589    // Simple date calculation (approximate, doesn't handle leap years perfectly)
590    let years_since_1970 = days_since_epoch / 365;
591    let remaining_days = days_since_epoch % 365;
592    let months = remaining_days / 30; // Rough approximation
593    let days = remaining_days % 30;
594
595    format!(
596        "{:04}-{:02}-{:02} {:02}:{:02}:{:02} UTC",
597        1970 + years_since_1970,
598        1 + months,
599        1 + days,
600        hours,
601        minutes,
602        seconds
603    )
604}
605
606/// Get human-readable signal name
607fn get_signal_name(signal: i32) -> String {
608    match signal {
609        SIGSEGV => "SIGSEGV (Segmentation fault)".to_string(),
610        SIGABRT => "SIGABRT (Abort)".to_string(),
611        SIGILL => "SIGILL (Illegal instruction)".to_string(),
612        SIGFPE => "SIGFPE (Floating point exception)".to_string(),
613        SIGBUS => "SIGBUS (Bus error)".to_string(),
614        SIGTRAP => "SIGTRAP (Trace/breakpoint trap)".to_string(),
615        _ => format!("Signal {}", signal),
616    }
617}
618
619/// Restore original signal handler and re-raise the signal
620fn restore_and_reraise(signal: i32) {
621    // Restore original handler
622    if let Ok(original_handlers) = ORIGINAL_HANDLERS.lock() {
623        if let Some(original_action) = original_handlers.get(&signal) {
624            unsafe {
625                sigaction(signal, original_action, std::ptr::null_mut());
626            }
627        }
628    }
629
630    // Re-raise the signal to trigger default behavior
631    unsafe {
632        libc::raise(signal);
633    }
634
635    // If raise didn't work, exit with error code
636    unsafe {
637        libc::_exit(128 + signal);
638    }
639}
640
641/// Reinforce crash handlers (reinstall to override any handlers set after initialization)
642pub fn reinforce_crash_handlers() -> Result<()> {
643    if !CRASH_HANDLER_ENABLED.load(Ordering::Relaxed) {
644        return Ok(()); // Not enabled, nothing to do
645    }
646
647    for &signal in CRASH_SIGNALS {
648        // Install our crash handler (override any existing handler)
649        let mut new_action: libc::sigaction = unsafe { std::mem::zeroed() };
650        new_action.sa_sigaction = crash_signal_handler as *const () as usize;
651        new_action.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
652
653        // Initialize signal mask
654        unsafe {
655            sigemptyset(&mut new_action.sa_mask);
656            for &other_signal in CRASH_SIGNALS {
657                if other_signal != signal {
658                    sigaddset(&mut new_action.sa_mask, other_signal);
659                }
660            }
661        }
662
663        let install_result = unsafe { sigaction(signal, &new_action, std::ptr::null_mut()) };
664
665        if install_result == 0 {
666            // Only print reinforcement messages during debug mode
667            if std::env::var("CALLTRACE_DEBUG").is_ok() {
668                eprintln!("CallTrace: Reinforced crash handler for signal {}", signal);
669            }
670        }
671    }
672
673    Ok(())
674}
675
676/// Check if crash handler is enabled
677pub fn is_crash_handler_enabled() -> bool {
678    CRASH_HANDLER_ENABLED.load(Ordering::Relaxed)
679}
680
681/// Cleanup crash handler (restore original handlers)
682pub fn cleanup_crash_handler() {
683    if let Ok(original_handlers) = ORIGINAL_HANDLERS.lock() {
684        for (&signal, original_action) in original_handlers.iter() {
685            unsafe {
686                sigaction(signal, original_action, std::ptr::null_mut());
687            }
688        }
689    }
690    CRASH_HANDLER_ENABLED.store(false, Ordering::Relaxed);
691}