Skip to main content

ralph_core/diagnostics/
mod.rs

1//! Diagnostic logging system for Ralph orchestration.
2//!
3//! Captures agent output, orchestration decisions, traces, performance metrics,
4//! and errors to structured JSONL files when `RALPH_DIAGNOSTICS=1` is set.
5
6mod agent_output;
7mod errors;
8mod orchestration;
9mod performance;
10mod stream_handler;
11mod trace_layer;
12
13#[cfg(test)]
14mod integration_tests;
15
16pub use agent_output::{AgentOutputContent, AgentOutputEntry, AgentOutputLogger};
17pub use errors::{DiagnosticError, ErrorLogger};
18pub use orchestration::{OrchestrationEvent, OrchestrationLogger};
19pub use performance::{PerformanceLogger, PerformanceMetric};
20pub use stream_handler::DiagnosticStreamHandler;
21pub use trace_layer::{DiagnosticTraceLayer, TraceEntry};
22
23use chrono::Local;
24use std::fs;
25use std::path::{Path, PathBuf};
26use std::sync::{Arc, Mutex};
27
28/// Central coordinator for diagnostic logging.
29///
30/// Checks `RALPH_DIAGNOSTICS` environment variable and creates a timestamped
31/// session directory if enabled.
32pub struct DiagnosticsCollector {
33    enabled: bool,
34    session_dir: Option<PathBuf>,
35    orchestration_logger: Option<Arc<Mutex<orchestration::OrchestrationLogger>>>,
36    performance_logger: Option<Arc<Mutex<performance::PerformanceLogger>>>,
37    error_logger: Option<Arc<Mutex<errors::ErrorLogger>>>,
38}
39
40impl DiagnosticsCollector {
41    /// Creates a new diagnostics collector.
42    ///
43    /// If `RALPH_DIAGNOSTICS=1`, creates `.ralph/diagnostics/<timestamp>/` directory.
44    pub fn new(base_path: &Path) -> std::io::Result<Self> {
45        let enabled = std::env::var("RALPH_DIAGNOSTICS")
46            .map(|v| v == "1")
47            .unwrap_or(false);
48
49        Self::with_enabled(base_path, enabled)
50    }
51
52    /// Creates a diagnostics collector with explicit enabled flag (for testing).
53    pub fn with_enabled(base_path: &Path, enabled: bool) -> std::io::Result<Self> {
54        let (session_dir, orchestration_logger, performance_logger, error_logger) = if enabled {
55            let timestamp = Local::now().format("%Y-%m-%dT%H-%M-%S");
56            let dir = base_path
57                .join(".ralph")
58                .join("diagnostics")
59                .join(timestamp.to_string());
60            fs::create_dir_all(&dir)?;
61
62            let orch_logger = orchestration::OrchestrationLogger::new(&dir)?;
63            let perf_logger = performance::PerformanceLogger::new(&dir)?;
64            let err_logger = errors::ErrorLogger::new(&dir)?;
65            (
66                Some(dir),
67                Some(Arc::new(Mutex::new(orch_logger))),
68                Some(Arc::new(Mutex::new(perf_logger))),
69                Some(Arc::new(Mutex::new(err_logger))),
70            )
71        } else {
72            (None, None, None, None)
73        };
74
75        Ok(Self {
76            enabled,
77            session_dir,
78            orchestration_logger,
79            performance_logger,
80            error_logger,
81        })
82    }
83
84    /// Creates a disabled diagnostics collector without any I/O (for testing).
85    pub fn disabled() -> Self {
86        Self {
87            enabled: false,
88            session_dir: None,
89            orchestration_logger: None,
90            performance_logger: None,
91            error_logger: None,
92        }
93    }
94
95    /// Returns whether diagnostics are enabled.
96    pub fn is_enabled(&self) -> bool {
97        self.enabled
98    }
99
100    /// Returns the session directory if diagnostics are enabled.
101    pub fn session_dir(&self) -> Option<&Path> {
102        self.session_dir.as_deref()
103    }
104
105    /// Wraps a stream handler with diagnostic logging.
106    ///
107    /// Returns the original handler if diagnostics are disabled.
108    pub fn wrap_stream_handler<H>(&self, handler: H) -> Result<DiagnosticStreamHandler<H>, H> {
109        if let Some(session_dir) = &self.session_dir {
110            match AgentOutputLogger::new(session_dir) {
111                Ok(logger) => {
112                    let logger = Arc::new(Mutex::new(logger));
113                    Ok(DiagnosticStreamHandler::new(handler, logger))
114                }
115                Err(_) => Err(handler), // Return original handler on error
116            }
117        } else {
118            Err(handler) // Diagnostics disabled, return original
119        }
120    }
121
122    /// Logs an orchestration event.
123    ///
124    /// Does nothing if diagnostics are disabled.
125    pub fn log_orchestration(&self, iteration: u32, hat: &str, event: OrchestrationEvent) {
126        if let Some(logger) = &self.orchestration_logger
127            && let Ok(mut logger) = logger.lock()
128        {
129            let _ = logger.log(iteration, hat, event);
130        }
131    }
132
133    /// Logs a performance metric.
134    ///
135    /// Does nothing if diagnostics are disabled.
136    pub fn log_performance(&self, iteration: u32, hat: &str, metric: PerformanceMetric) {
137        if let Some(logger) = &self.performance_logger
138            && let Ok(mut logger) = logger.lock()
139        {
140            let _ = logger.log(iteration, hat, metric);
141        }
142    }
143
144    /// Logs an error.
145    ///
146    /// Does nothing if diagnostics are disabled.
147    pub fn log_error(&self, iteration: u32, hat: &str, error: DiagnosticError) {
148        if let Some(logger) = &self.error_logger
149            && let Ok(mut logger) = logger.lock()
150        {
151            logger.set_context(iteration, hat);
152            logger.log(error);
153        }
154    }
155}
156
157#[cfg(test)]
158mod tests {
159    use super::*;
160    use tempfile::TempDir;
161
162    #[test]
163    fn test_diagnostics_disabled_by_default() {
164        let temp = TempDir::new().unwrap();
165
166        let collector = DiagnosticsCollector::with_enabled(temp.path(), false).unwrap();
167
168        assert!(!collector.is_enabled());
169        assert!(collector.session_dir().is_none());
170    }
171
172    #[test]
173    fn test_diagnostics_enabled_creates_directory() {
174        let temp = TempDir::new().unwrap();
175
176        let collector = DiagnosticsCollector::with_enabled(temp.path(), true).unwrap();
177
178        assert!(collector.is_enabled());
179        assert!(collector.session_dir().is_some());
180        assert!(collector.session_dir().unwrap().exists());
181    }
182
183    #[test]
184    fn test_session_directory_format() {
185        let temp = TempDir::new().unwrap();
186
187        let collector = DiagnosticsCollector::with_enabled(temp.path(), true).unwrap();
188
189        let session_dir = collector.session_dir().unwrap();
190        let dir_name = session_dir.file_name().unwrap().to_str().unwrap();
191
192        // Verify format: YYYY-MM-DDTHH-MM-SS
193        assert!(dir_name.len() == 19); // 2024-01-21T08-49-56
194        assert!(dir_name.chars().nth(4) == Some('-'));
195        assert!(dir_name.chars().nth(7) == Some('-'));
196        assert!(dir_name.chars().nth(10) == Some('T'));
197        assert!(dir_name.chars().nth(13) == Some('-'));
198        assert!(dir_name.chars().nth(16) == Some('-'));
199    }
200
201    #[test]
202    fn test_performance_logger_integration() {
203        let temp = TempDir::new().unwrap();
204        let collector = DiagnosticsCollector::with_enabled(temp.path(), true).unwrap();
205
206        // Log some performance metrics
207        collector.log_performance(
208            1,
209            "ralph",
210            PerformanceMetric::IterationDuration { duration_ms: 1500 },
211        );
212        collector.log_performance(
213            1,
214            "builder",
215            PerformanceMetric::AgentLatency { duration_ms: 800 },
216        );
217        collector.log_performance(
218            1,
219            "builder",
220            PerformanceMetric::TokenCount {
221                input: 1000,
222                output: 500,
223            },
224        );
225
226        // Verify file exists
227        let perf_file = collector.session_dir().unwrap().join("performance.jsonl");
228        assert!(perf_file.exists(), "performance.jsonl should exist");
229
230        // Verify content
231        let content = std::fs::read_to_string(perf_file).unwrap();
232        let lines: Vec<_> = content.lines().collect();
233        assert_eq!(lines.len(), 3, "Should have 3 performance entries");
234
235        // Verify each line is valid JSON
236        for line in lines {
237            let _: performance::PerformanceEntry = serde_json::from_str(line).unwrap();
238        }
239    }
240
241    #[test]
242    fn test_error_logger_integration() {
243        let temp = TempDir::new().unwrap();
244        let collector = DiagnosticsCollector::with_enabled(temp.path(), true).unwrap();
245
246        // Log some errors
247        collector.log_error(
248            1,
249            "ralph",
250            DiagnosticError::ParseError {
251                source: "agent_output".to_string(),
252                message: "Invalid JSON".to_string(),
253                input: "{invalid".to_string(),
254            },
255        );
256        collector.log_error(
257            2,
258            "builder",
259            DiagnosticError::ValidationFailure {
260                rule: "tests_required".to_string(),
261                message: "Missing test evidence".to_string(),
262                evidence: "tests: missing".to_string(),
263            },
264        );
265
266        // Verify file exists
267        let error_file = collector.session_dir().unwrap().join("errors.jsonl");
268        assert!(error_file.exists(), "errors.jsonl should exist");
269
270        // Verify content
271        let content = std::fs::read_to_string(error_file).unwrap();
272        let lines: Vec<_> = content.lines().collect();
273        assert_eq!(lines.len(), 2, "Should have 2 error entries");
274
275        // Verify each line is valid JSON
276        for line in lines {
277            let parsed: serde_json::Value = serde_json::from_str(line).unwrap();
278            assert!(parsed.get("error_type").is_some());
279            assert!(parsed.get("message").is_some());
280            assert!(parsed.get("context").is_some());
281        }
282    }
283}