Skip to main content

repl_core/
execution_monitor.rs

1//! Execution Monitoring and Debugging for Agent Behaviors
2//!
3//! Provides tracing, logging, and debugging capabilities for agent execution.
4
5use crate::dsl::evaluator::{AgentInstance, DslValue};
6use crate::error::{ReplError, Result};
7use serde_json::Value as JsonValue;
8use std::collections::HashMap;
9use std::sync::{Arc, Mutex};
10use std::time::{Duration, Instant};
11use uuid::Uuid;
12
13/// Execution trace entry
14#[derive(Debug, Clone)]
15pub struct TraceEntry {
16    pub id: Uuid,
17    pub timestamp: chrono::DateTime<chrono::Utc>,
18    pub agent_id: Option<Uuid>,
19    pub behavior_name: Option<String>,
20    pub event_type: TraceEventType,
21    pub data: JsonValue,
22    pub duration: Option<Duration>,
23}
24
25/// Types of trace events
26#[derive(Debug, Clone)]
27pub enum TraceEventType {
28    AgentCreated,
29    AgentStarted,
30    AgentStopped,
31    AgentPaused,
32    AgentResumed,
33    AgentDestroyed,
34    BehaviorStarted,
35    BehaviorCompleted,
36    BehaviorFailed,
37    FunctionCalled,
38    CapabilityChecked,
39    PolicyEvaluated,
40    VariableAssigned,
41    ExpressionEvaluated,
42    Error,
43}
44
45/// Execution statistics
46#[derive(Debug, Clone, Default)]
47pub struct ExecutionStats {
48    pub total_executions: u64,
49    pub successful_executions: u64,
50    pub failed_executions: u64,
51    pub average_duration: Duration,
52    pub total_duration: Duration,
53    pub behaviors_executed: HashMap<String, u64>,
54    pub capabilities_checked: HashMap<String, u64>,
55}
56
57/// Execution monitor for tracking and debugging agent behavior
58pub struct ExecutionMonitor {
59    /// Trace entries
60    traces: Arc<Mutex<Vec<TraceEntry>>>,
61    /// Execution statistics
62    stats: Arc<Mutex<ExecutionStats>>,
63    /// Maximum number of trace entries to keep
64    max_traces: usize,
65    /// Active execution contexts
66    active_executions: Arc<Mutex<HashMap<Uuid, ExecutionContext>>>,
67}
68
69/// Active execution context for monitoring
70#[derive(Debug, Clone)]
71pub struct ExecutionContext {
72    pub id: Uuid,
73    pub agent_id: Option<Uuid>,
74    pub behavior_name: Option<String>,
75    pub start_time: Instant,
76    pub stack: Vec<String>,
77    pub variables: HashMap<String, DslValue>,
78}
79
80impl Default for ExecutionMonitor {
81    fn default() -> Self {
82        Self::new()
83    }
84}
85
86impl ExecutionMonitor {
87    /// Create a new execution monitor
88    pub fn new() -> Self {
89        Self {
90            traces: Arc::new(Mutex::new(Vec::new())),
91            stats: Arc::new(Mutex::new(ExecutionStats::default())),
92            max_traces: 1000,
93            active_executions: Arc::new(Mutex::new(HashMap::new())),
94        }
95    }
96
97    /// Start monitoring an execution
98    pub fn start_execution(&self, agent_id: Option<Uuid>, behavior_name: Option<String>) -> Uuid {
99        let execution_id = Uuid::new_v4();
100        let context = ExecutionContext {
101            id: execution_id,
102            agent_id,
103            behavior_name: behavior_name.clone(),
104            start_time: Instant::now(),
105            stack: Vec::new(),
106            variables: HashMap::new(),
107        };
108
109        self.active_executions
110            .lock()
111            .unwrap()
112            .insert(execution_id, context);
113
114        // Add trace entry
115        self.add_trace(TraceEntry {
116            id: Uuid::new_v4(),
117            timestamp: chrono::Utc::now(),
118            agent_id,
119            behavior_name,
120            event_type: TraceEventType::BehaviorStarted,
121            data: serde_json::json!({
122                "execution_id": execution_id,
123                "started_at": chrono::Utc::now()
124            }),
125            duration: None,
126        });
127
128        execution_id
129    }
130
131    /// End monitoring an execution
132    pub fn end_execution(&self, execution_id: Uuid, result: Result<DslValue>) -> Option<Duration> {
133        let context = self.active_executions.lock().unwrap().remove(&execution_id);
134
135        if let Some(context) = context {
136            let duration = context.start_time.elapsed();
137
138            let (event_type, success) = match result {
139                Ok(_) => (TraceEventType::BehaviorCompleted, true),
140                Err(_) => (TraceEventType::BehaviorFailed, false),
141            };
142
143            // Add trace entry
144            self.add_trace(TraceEntry {
145                id: Uuid::new_v4(),
146                timestamp: chrono::Utc::now(),
147                agent_id: context.agent_id,
148                behavior_name: context.behavior_name.clone(),
149                event_type,
150                data: serde_json::json!({
151                    "execution_id": execution_id,
152                    "duration_ms": duration.as_millis(),
153                    "success": success,
154                    "result": match &result {
155                        Ok(value) => value.to_json(),
156                        Err(e) => serde_json::json!({"error": e.to_string()})
157                    }
158                }),
159                duration: Some(duration),
160            });
161
162            // Update statistics
163            self.update_stats(duration, success, &context.behavior_name);
164
165            Some(duration)
166        } else {
167            None
168        }
169    }
170
171    /// Add a trace entry
172    pub fn add_trace(&self, entry: TraceEntry) {
173        let mut traces = self.traces.lock().unwrap();
174        traces.push(entry);
175
176        // Keep only the most recent traces
177        if traces.len() > self.max_traces {
178            let excess = traces.len() - self.max_traces;
179            traces.drain(0..excess);
180        }
181    }
182
183    /// Log agent lifecycle event
184    pub fn log_agent_event(&self, agent: &AgentInstance, event_type: TraceEventType) {
185        self.add_trace(TraceEntry {
186            id: Uuid::new_v4(),
187            timestamp: chrono::Utc::now(),
188            agent_id: Some(agent.id),
189            behavior_name: None,
190            event_type,
191            data: serde_json::json!({
192                "agent_id": agent.id,
193                "agent_name": agent.definition.name,
194                "state": format!("{:?}", agent.state),
195                "timestamp": chrono::Utc::now()
196            }),
197            duration: None,
198        });
199    }
200
201    /// Log function call
202    pub fn log_function_call(
203        &self,
204        execution_id: Option<Uuid>,
205        function_name: &str,
206        args: &[DslValue],
207    ) {
208        if let Some(exec_id) = execution_id {
209            if let Some(context) = self.active_executions.lock().unwrap().get_mut(&exec_id) {
210                context.stack.push(function_name.to_string());
211            }
212        }
213
214        self.add_trace(TraceEntry {
215            id: Uuid::new_v4(),
216            timestamp: chrono::Utc::now(),
217            agent_id: execution_id.and_then(|id| {
218                self.active_executions
219                    .lock()
220                    .unwrap()
221                    .get(&id)
222                    .and_then(|ctx| ctx.agent_id)
223            }),
224            behavior_name: None,
225            event_type: TraceEventType::FunctionCalled,
226            data: serde_json::json!({
227                "function_name": function_name,
228                "argument_count": args.len(),
229                "arguments": args.iter().map(|arg| arg.to_json()).collect::<Vec<_>>()
230            }),
231            duration: None,
232        });
233    }
234
235    /// Log capability check
236    pub fn log_capability_check(&self, agent_id: Option<Uuid>, capability: &str, allowed: bool) {
237        self.add_trace(TraceEntry {
238            id: Uuid::new_v4(),
239            timestamp: chrono::Utc::now(),
240            agent_id,
241            behavior_name: None,
242            event_type: TraceEventType::CapabilityChecked,
243            data: serde_json::json!({
244                "capability": capability,
245                "allowed": allowed
246            }),
247            duration: None,
248        });
249
250        // Update capability statistics
251        let mut stats = self.stats.lock().unwrap();
252        *stats
253            .capabilities_checked
254            .entry(capability.to_string())
255            .or_insert(0) += 1;
256    }
257
258    /// Log variable assignment
259    pub fn log_variable_assignment(
260        &self,
261        execution_id: Option<Uuid>,
262        var_name: &str,
263        value: &DslValue,
264    ) {
265        if let Some(exec_id) = execution_id {
266            if let Some(context) = self.active_executions.lock().unwrap().get_mut(&exec_id) {
267                context
268                    .variables
269                    .insert(var_name.to_string(), value.clone());
270            }
271        }
272
273        self.add_trace(TraceEntry {
274            id: Uuid::new_v4(),
275            timestamp: chrono::Utc::now(),
276            agent_id: execution_id.and_then(|id| {
277                self.active_executions
278                    .lock()
279                    .unwrap()
280                    .get(&id)
281                    .and_then(|ctx| ctx.agent_id)
282            }),
283            behavior_name: None,
284            event_type: TraceEventType::VariableAssigned,
285            data: serde_json::json!({
286                "variable_name": var_name,
287                "value": value.to_json(),
288                "type": value.type_name()
289            }),
290            duration: None,
291        });
292    }
293
294    /// Log error
295    pub fn log_error(&self, agent_id: Option<Uuid>, error: &ReplError) {
296        self.add_trace(TraceEntry {
297            id: Uuid::new_v4(),
298            timestamp: chrono::Utc::now(),
299            agent_id,
300            behavior_name: None,
301            event_type: TraceEventType::Error,
302            data: serde_json::json!({
303                "error": error.to_string(),
304                "error_type": match error {
305                    ReplError::Lexing(_) => "Lexing",
306                    ReplError::Parsing(_) => "Parsing",
307                    ReplError::Execution(_) => "Execution",
308                    ReplError::Security(_) => "Security",
309                    ReplError::Runtime(_) => "Runtime",
310                    ReplError::Evaluation(_) => "Evaluation",
311                    ReplError::PolicyParsing(_) => "PolicyParsing",
312                    ReplError::Io(_) => "Io",
313                    ReplError::Json(_) => "Json",
314                    ReplError::Uuid(_) => "Uuid",
315                }
316            }),
317            duration: None,
318        });
319    }
320
321    /// Get execution traces
322    pub fn get_traces(&self, limit: Option<usize>) -> Vec<TraceEntry> {
323        let traces = self.traces.lock().unwrap();
324        let start_idx = if let Some(limit) = limit {
325            traces.len().saturating_sub(limit)
326        } else {
327            0
328        };
329        traces[start_idx..].to_vec()
330    }
331
332    /// Get traces for a specific agent
333    pub fn get_agent_traces(&self, agent_id: Uuid, limit: Option<usize>) -> Vec<TraceEntry> {
334        let traces = self.traces.lock().unwrap();
335        let agent_traces: Vec<_> = traces
336            .iter()
337            .filter(|trace| trace.agent_id == Some(agent_id))
338            .cloned()
339            .collect();
340
341        if let Some(limit) = limit {
342            let start_idx = agent_traces.len().saturating_sub(limit);
343            agent_traces[start_idx..].to_vec()
344        } else {
345            agent_traces
346        }
347    }
348
349    /// Get execution statistics
350    pub fn get_stats(&self) -> ExecutionStats {
351        self.stats.lock().unwrap().clone()
352    }
353
354    /// Get active executions
355    pub fn get_active_executions(&self) -> Vec<ExecutionContext> {
356        self.active_executions
357            .lock()
358            .unwrap()
359            .values()
360            .cloned()
361            .collect()
362    }
363
364    /// Clear all traces
365    pub fn clear_traces(&self) {
366        self.traces.lock().unwrap().clear();
367    }
368
369    /// Update execution statistics
370    fn update_stats(&self, duration: Duration, success: bool, behavior_name: &Option<String>) {
371        let mut stats = self.stats.lock().unwrap();
372
373        stats.total_executions += 1;
374        if success {
375            stats.successful_executions += 1;
376        } else {
377            stats.failed_executions += 1;
378        }
379
380        stats.total_duration += duration;
381        stats.average_duration = stats.total_duration / stats.total_executions as u32;
382
383        if let Some(behavior) = behavior_name {
384            *stats
385                .behaviors_executed
386                .entry(behavior.clone())
387                .or_insert(0) += 1;
388        }
389    }
390
391    /// Generate execution report
392    pub fn generate_report(&self) -> String {
393        let stats = self.get_stats();
394        let active = self.get_active_executions();
395        let recent_traces = self.get_traces(Some(20));
396
397        let mut report = String::new();
398        report.push_str("Execution Monitor Report\n");
399        report.push_str("========================\n\n");
400
401        // Statistics
402        report.push_str("Execution Statistics:\n");
403        report.push_str(&format!("  Total Executions: {}\n", stats.total_executions));
404        report.push_str(&format!("  Successful: {}\n", stats.successful_executions));
405        report.push_str(&format!("  Failed: {}\n", stats.failed_executions));
406        if stats.total_executions > 0 {
407            let success_rate =
408                (stats.successful_executions as f64 / stats.total_executions as f64) * 100.0;
409            report.push_str(&format!("  Success Rate: {:.1}%\n", success_rate));
410        }
411        report.push_str(&format!(
412            "  Average Duration: {:?}\n",
413            stats.average_duration
414        ));
415        report.push_str(&format!("  Total Duration: {:?}\n", stats.total_duration));
416
417        // Active executions
418        report.push_str(&format!("\nActive Executions: {}\n", active.len()));
419        for exec in &active {
420            let elapsed = exec.start_time.elapsed();
421            report.push_str(&format!(
422                "  {} - {:?} ({}s)\n",
423                exec.id,
424                exec.behavior_name.as_deref().unwrap_or("unknown"),
425                elapsed.as_secs()
426            ));
427        }
428
429        // Top behaviors
430        if !stats.behaviors_executed.is_empty() {
431            report.push_str("\nTop Behaviors:\n");
432            let mut behaviors: Vec<_> = stats.behaviors_executed.iter().collect();
433            behaviors.sort_by(|a, b| b.1.cmp(a.1));
434            for (behavior, count) in behaviors.iter().take(10) {
435                report.push_str(&format!("  {}: {} executions\n", behavior, count));
436            }
437        }
438
439        // Recent traces
440        if !recent_traces.is_empty() {
441            report.push_str("\nRecent Activity:\n");
442            for trace in recent_traces.iter().rev().take(10) {
443                report.push_str(&format!(
444                    "  {} - {:?}\n",
445                    trace.timestamp.format("%H:%M:%S"),
446                    trace.event_type
447                ));
448            }
449        }
450
451        report
452    }
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458    use crate::dsl::evaluator::DslValue;
459
460    #[test]
461    fn test_execution_monitor_basic() {
462        let monitor = ExecutionMonitor::new();
463
464        let exec_id =
465            monitor.start_execution(Some(Uuid::new_v4()), Some("test_behavior".to_string()));
466        assert!(monitor.get_active_executions().len() == 1);
467
468        let duration = monitor.end_execution(exec_id, Ok(DslValue::String("success".to_string())));
469        assert!(duration.is_some());
470        assert!(monitor.get_active_executions().is_empty());
471
472        let traces = monitor.get_traces(None);
473        assert_eq!(traces.len(), 2); // start + end
474    }
475
476    #[test]
477    fn test_execution_monitor_stats() {
478        let monitor = ExecutionMonitor::new();
479
480        // Simulate some executions
481        for i in 0..5 {
482            let exec_id = monitor.start_execution(None, Some(format!("behavior_{}", i)));
483            let result = if i % 2 == 0 {
484                Ok(DslValue::Integer(42))
485            } else {
486                Err(ReplError::Execution("test error".to_string()))
487            };
488            monitor.end_execution(exec_id, result);
489        }
490
491        let stats = monitor.get_stats();
492        assert_eq!(stats.total_executions, 5);
493        assert_eq!(stats.successful_executions, 3);
494        assert_eq!(stats.failed_executions, 2);
495    }
496}