seq_runtime/
diagnostics.rs

1//! Runtime diagnostics for production debugging
2//!
3//! Provides a SIGQUIT (kill -3) handler that dumps runtime statistics to stderr,
4//! similar to JVM thread dumps. This is useful for debugging production issues
5//! without stopping the process.
6//!
7//! ## Usage
8//!
9//! Send SIGQUIT to a running Seq process:
10//! ```bash
11//! kill -3 <pid>
12//! ```
13//!
14//! The process will dump diagnostics to stderr and continue running.
15//!
16//! ## Signal Safety
17//!
18//! Signal handlers can only safely call async-signal-safe functions. Our
19//! dump_diagnostics() does I/O and acquires locks, which is NOT safe to call
20//! directly from a signal handler. Instead, we spawn a dedicated thread that
21//! waits for signals using signal-hook's iterator API, making all the I/O
22//! operations safe.
23//!
24//! ## Feature Flag
25//!
26//! This module is only compiled when the `diagnostics` feature is enabled (default).
27//! Disable it for benchmarks to eliminate SystemTime::now() syscalls and strand
28//! registry overhead on every spawn.
29
30#![cfg(feature = "diagnostics")]
31
32use crate::memory_stats::memory_registry;
33use crate::scheduler::{
34    ACTIVE_STRANDS, PEAK_STRANDS, TOTAL_COMPLETED, TOTAL_SPAWNED, strand_registry,
35};
36use std::sync::Once;
37use std::sync::atomic::Ordering;
38
39static SIGNAL_HANDLER_INIT: Once = Once::new();
40
41/// Maximum number of individual strands to display in diagnostics output
42/// to avoid overwhelming the output for programs with many strands
43const STRAND_DISPLAY_LIMIT: usize = 20;
44
45/// Install the SIGQUIT signal handler for diagnostics
46///
47/// This is called automatically by scheduler_init, but can be called
48/// explicitly if needed. Safe to call multiple times (idempotent).
49///
50/// # Implementation
51///
52/// Uses a dedicated thread to handle signals safely. The signal-hook iterator
53/// API ensures we're not calling non-async-signal-safe functions from within
54/// a signal handler context.
55pub fn install_signal_handler() {
56    SIGNAL_HANDLER_INIT.call_once(|| {
57        #[cfg(unix)]
58        {
59            use signal_hook::consts::SIGQUIT;
60            use signal_hook::iterator::Signals;
61
62            // Create signal iterator - this is safe and doesn't block
63            let mut signals = match Signals::new([SIGQUIT]) {
64                Ok(s) => s,
65                Err(_) => return, // Silently fail if we can't register
66            };
67
68            // Spawn a dedicated thread to handle signals
69            // This thread blocks waiting for signals, then safely calls dump_diagnostics()
70            std::thread::Builder::new()
71                .name("seq-diagnostics".to_string())
72                .spawn(move || {
73                    for sig in signals.forever() {
74                        if sig == SIGQUIT {
75                            dump_diagnostics();
76                        }
77                    }
78                })
79                .ok(); // Silently fail if thread spawn fails
80        }
81
82        #[cfg(not(unix))]
83        {
84            // Signal handling not supported on non-Unix platforms
85            // Diagnostics can still be called directly via dump_diagnostics()
86        }
87    });
88}
89
90/// Dump runtime diagnostics to stderr
91///
92/// This can be called directly from code or triggered via SIGQUIT.
93/// Output goes to stderr to avoid mixing with program output.
94pub fn dump_diagnostics() {
95    use std::io::Write;
96
97    let mut out = std::io::stderr().lock();
98
99    let _ = writeln!(out, "\n=== Seq Runtime Diagnostics ===");
100    let _ = writeln!(out, "Timestamp: {:?}", std::time::SystemTime::now());
101
102    // Strand statistics (global atomics - accurate)
103    let active = ACTIVE_STRANDS.load(Ordering::Relaxed);
104    let total_spawned = TOTAL_SPAWNED.load(Ordering::Relaxed);
105    let total_completed = TOTAL_COMPLETED.load(Ordering::Relaxed);
106    let peak = PEAK_STRANDS.load(Ordering::Relaxed);
107
108    let _ = writeln!(out, "\n[Strands]");
109    let _ = writeln!(out, "  Active:    {}", active);
110    let _ = writeln!(out, "  Spawned:   {} (total)", total_spawned);
111    let _ = writeln!(out, "  Completed: {} (total)", total_completed);
112    let _ = writeln!(out, "  Peak:      {} (high-water mark)", peak);
113
114    // Calculate potential leak indicator
115    // If spawned > completed + active, some strands were lost (panic, etc.)
116    let expected_completed = total_spawned.saturating_sub(active as u64);
117    if total_completed < expected_completed {
118        let lost = expected_completed - total_completed;
119        let _ = writeln!(
120            out,
121            "  WARNING: {} strands may have been lost (panic/abort)",
122            lost
123        );
124    }
125
126    // Active strand details from registry
127    let registry = strand_registry();
128    let overflow = registry.overflow_count.load(Ordering::Relaxed);
129
130    let _ = writeln!(out, "\n[Active Strand Details]");
131    let _ = writeln!(out, "  Registry capacity: {} slots", registry.capacity());
132    if overflow > 0 {
133        let _ = writeln!(
134            out,
135            "  WARNING: {} strands exceeded registry capacity (not tracked)",
136            overflow
137        );
138    }
139
140    // Get current time for duration calculation
141    let now = std::time::SystemTime::now()
142        .duration_since(std::time::UNIX_EPOCH)
143        .map(|d| d.as_secs())
144        .unwrap_or(0);
145
146    // Collect and sort active strands by spawn time (oldest first)
147    let mut strands: Vec<_> = registry.active_strands().collect();
148    strands.sort_by_key(|(_, spawn_time)| *spawn_time);
149
150    if strands.is_empty() {
151        let _ = writeln!(out, "  (no active strands in registry)");
152    } else {
153        let _ = writeln!(out, "  {} strand(s) tracked:", strands.len());
154        for (idx, (strand_id, spawn_time)) in strands.iter().take(STRAND_DISPLAY_LIMIT).enumerate()
155        {
156            let duration = now.saturating_sub(*spawn_time);
157            let _ = writeln!(
158                out,
159                "    [{:2}] Strand #{:<8} running for {}s",
160                idx + 1,
161                strand_id,
162                duration
163            );
164        }
165        if strands.len() > STRAND_DISPLAY_LIMIT {
166            let _ = writeln!(
167                out,
168                "    ... and {} more strands",
169                strands.len() - STRAND_DISPLAY_LIMIT
170            );
171        }
172    }
173
174    // Memory statistics (cross-thread aggregation)
175    let _ = writeln!(out, "\n[Memory]");
176    let mem_stats = memory_registry().aggregate_stats();
177    let _ = writeln!(out, "  Tracked threads: {}", mem_stats.active_threads);
178    let _ = writeln!(
179        out,
180        "  Arena bytes:     {} (across all threads)",
181        format_bytes(mem_stats.total_arena_bytes)
182    );
183    if mem_stats.overflow_count > 0 {
184        let _ = writeln!(
185            out,
186            "  WARNING: {} threads exceeded registry capacity (memory not tracked)",
187            mem_stats.overflow_count
188        );
189        let _ = writeln!(
190            out,
191            "           Consider increasing MAX_THREADS in memory_stats.rs (currently 64)"
192        );
193    }
194
195    // Note: Channel stats are not available with the zero-mutex design.
196    // Channels are passed directly as Value::Channel on the stack with no global registry.
197
198    let _ = writeln!(out, "\n=== End Diagnostics ===\n");
199}
200
201/// Format bytes as human-readable string
202fn format_bytes(bytes: u64) -> String {
203    if bytes >= 1024 * 1024 * 1024 {
204        format!("{:.2} GB", bytes as f64 / (1024.0 * 1024.0 * 1024.0))
205    } else if bytes >= 1024 * 1024 {
206        format!("{:.2} MB", bytes as f64 / (1024.0 * 1024.0))
207    } else if bytes >= 1024 {
208        format!("{:.2} KB", bytes as f64 / 1024.0)
209    } else {
210        format!("{} B", bytes)
211    }
212}
213
214#[cfg(test)]
215mod tests {
216    use super::*;
217
218    #[test]
219    fn test_dump_diagnostics_runs() {
220        // Just verify it doesn't panic
221        dump_diagnostics();
222    }
223
224    #[test]
225    fn test_install_signal_handler_idempotent() {
226        // Should be safe to call multiple times
227        install_signal_handler();
228        install_signal_handler();
229        install_signal_handler();
230    }
231}