Skip to main content

repl_core/
execution_monitor.rs

1//! Execution Monitoring and Debugging for Agent Behaviors
2//!
3//! Provides tracing, logging, and debugging capabilities for agent execution.
4
5use crate::dsl::evaluator::{AgentInstance, DslValue};
6use crate::error::{ReplError, Result};
7use serde_json::Value as JsonValue;
8use std::collections::HashMap;
9use std::sync::{Arc, Mutex};
10use std::time::{Duration, Instant};
11use uuid::Uuid;
12
13/// Execution trace entry
14#[derive(Debug, Clone)]
15pub struct TraceEntry {
16    pub id: Uuid,
17    pub timestamp: chrono::DateTime<chrono::Utc>,
18    pub agent_id: Option<Uuid>,
19    pub behavior_name: Option<String>,
20    pub event_type: TraceEventType,
21    pub data: JsonValue,
22    pub duration: Option<Duration>,
23}
24
25/// Types of trace events
26#[derive(Debug, Clone)]
27pub enum TraceEventType {
28    AgentCreated,
29    AgentStarted,
30    AgentStopped,
31    AgentPaused,
32    AgentResumed,
33    AgentDestroyed,
34    BehaviorStarted,
35    BehaviorCompleted,
36    BehaviorFailed,
37    FunctionCalled,
38    CapabilityChecked,
39    PolicyEvaluated,
40    VariableAssigned,
41    ExpressionEvaluated,
42    Error,
43}
44
45/// Execution statistics
46#[derive(Debug, Clone, Default)]
47pub struct ExecutionStats {
48    pub total_executions: u64,
49    pub successful_executions: u64,
50    pub failed_executions: u64,
51    pub average_duration: Duration,
52    pub total_duration: Duration,
53    pub behaviors_executed: HashMap<String, u64>,
54    pub capabilities_checked: HashMap<String, u64>,
55}
56
57/// Execution monitor for tracking and debugging agent behavior
58pub struct ExecutionMonitor {
59    /// Trace entries
60    traces: Arc<Mutex<Vec<TraceEntry>>>,
61    /// Execution statistics
62    stats: Arc<Mutex<ExecutionStats>>,
63    /// Maximum number of trace entries to keep
64    max_traces: usize,
65    /// Active execution contexts
66    active_executions: Arc<Mutex<HashMap<Uuid, ExecutionContext>>>,
67}
68
69/// Active execution context for monitoring
70#[derive(Debug, Clone)]
71pub struct ExecutionContext {
72    pub id: Uuid,
73    pub agent_id: Option<Uuid>,
74    pub behavior_name: Option<String>,
75    pub start_time: Instant,
76    pub stack: Vec<String>,
77    pub variables: HashMap<String, DslValue>,
78}
79
80impl Default for ExecutionMonitor {
81    fn default() -> Self {
82        Self::new()
83    }
84}
85
86impl ExecutionMonitor {
87    /// Create a new execution monitor
88    pub fn new() -> Self {
89        Self {
90            traces: Arc::new(Mutex::new(Vec::new())),
91            stats: Arc::new(Mutex::new(ExecutionStats::default())),
92            max_traces: 1000,
93            active_executions: Arc::new(Mutex::new(HashMap::new())),
94        }
95    }
96
97    /// Start monitoring an execution
98    pub fn start_execution(&self, agent_id: Option<Uuid>, behavior_name: Option<String>) -> Uuid {
99        let execution_id = Uuid::new_v4();
100        let context = ExecutionContext {
101            id: execution_id,
102            agent_id,
103            behavior_name: behavior_name.clone(),
104            start_time: Instant::now(),
105            stack: Vec::new(),
106            variables: HashMap::new(),
107        };
108
109        self.active_executions
110            .lock()
111            .unwrap()
112            .insert(execution_id, context);
113
114        // Add trace entry
115        self.add_trace(TraceEntry {
116            id: Uuid::new_v4(),
117            timestamp: chrono::Utc::now(),
118            agent_id,
119            behavior_name,
120            event_type: TraceEventType::BehaviorStarted,
121            data: serde_json::json!({
122                "execution_id": execution_id,
123                "started_at": chrono::Utc::now()
124            }),
125            duration: None,
126        });
127
128        execution_id
129    }
130
131    /// End monitoring an execution
132    pub fn end_execution(&self, execution_id: Uuid, result: Result<DslValue>) -> Option<Duration> {
133        let context = self
134            .active_executions
135            .lock()
136            .unwrap_or_else(|e| e.into_inner())
137            .remove(&execution_id);
138
139        if let Some(context) = context {
140            let duration = context.start_time.elapsed();
141
142            let (event_type, success) = match result {
143                Ok(_) => (TraceEventType::BehaviorCompleted, true),
144                Err(_) => (TraceEventType::BehaviorFailed, false),
145            };
146
147            // Add trace entry
148            self.add_trace(TraceEntry {
149                id: Uuid::new_v4(),
150                timestamp: chrono::Utc::now(),
151                agent_id: context.agent_id,
152                behavior_name: context.behavior_name.clone(),
153                event_type,
154                data: serde_json::json!({
155                    "execution_id": execution_id,
156                    "duration_ms": duration.as_millis(),
157                    "success": success,
158                    "result": match &result {
159                        Ok(value) => value.to_json(),
160                        Err(e) => serde_json::json!({"error": e.to_string()})
161                    }
162                }),
163                duration: Some(duration),
164            });
165
166            // Update statistics
167            self.update_stats(duration, success, &context.behavior_name);
168
169            Some(duration)
170        } else {
171            None
172        }
173    }
174
175    /// Add a trace entry
176    pub fn add_trace(&self, entry: TraceEntry) {
177        let mut traces = self.traces.lock().unwrap_or_else(|e| e.into_inner());
178        traces.push(entry);
179
180        // Keep only the most recent traces
181        if traces.len() > self.max_traces {
182            let excess = traces.len() - self.max_traces;
183            traces.drain(0..excess);
184        }
185    }
186
187    /// Log agent lifecycle event
188    pub fn log_agent_event(&self, agent: &AgentInstance, event_type: TraceEventType) {
189        self.add_trace(TraceEntry {
190            id: Uuid::new_v4(),
191            timestamp: chrono::Utc::now(),
192            agent_id: Some(agent.id),
193            behavior_name: None,
194            event_type,
195            data: serde_json::json!({
196                "agent_id": agent.id,
197                "agent_name": agent.definition.name,
198                "state": format!("{:?}", agent.state),
199                "timestamp": chrono::Utc::now()
200            }),
201            duration: None,
202        });
203    }
204
205    /// Log function call
206    pub fn log_function_call(
207        &self,
208        execution_id: Option<Uuid>,
209        function_name: &str,
210        args: &[DslValue],
211    ) {
212        if let Some(exec_id) = execution_id {
213            if let Some(context) = self
214                .active_executions
215                .lock()
216                .unwrap_or_else(|e| e.into_inner())
217                .get_mut(&exec_id)
218            {
219                context.stack.push(function_name.to_string());
220            }
221        }
222
223        self.add_trace(TraceEntry {
224            id: Uuid::new_v4(),
225            timestamp: chrono::Utc::now(),
226            agent_id: execution_id.and_then(|id| {
227                self.active_executions
228                    .lock()
229                    .unwrap()
230                    .get(&id)
231                    .and_then(|ctx| ctx.agent_id)
232            }),
233            behavior_name: None,
234            event_type: TraceEventType::FunctionCalled,
235            data: serde_json::json!({
236                "function_name": function_name,
237                "argument_count": args.len(),
238                "arguments": args.iter().map(|arg| arg.to_json()).collect::<Vec<_>>()
239            }),
240            duration: None,
241        });
242    }
243
244    /// Log capability check
245    pub fn log_capability_check(&self, agent_id: Option<Uuid>, capability: &str, allowed: bool) {
246        self.add_trace(TraceEntry {
247            id: Uuid::new_v4(),
248            timestamp: chrono::Utc::now(),
249            agent_id,
250            behavior_name: None,
251            event_type: TraceEventType::CapabilityChecked,
252            data: serde_json::json!({
253                "capability": capability,
254                "allowed": allowed
255            }),
256            duration: None,
257        });
258
259        // Update capability statistics
260        let mut stats = self.stats.lock().unwrap_or_else(|e| e.into_inner());
261        *stats
262            .capabilities_checked
263            .entry(capability.to_string())
264            .or_insert(0) += 1;
265    }
266
267    /// Log variable assignment
268    pub fn log_variable_assignment(
269        &self,
270        execution_id: Option<Uuid>,
271        var_name: &str,
272        value: &DslValue,
273    ) {
274        if let Some(exec_id) = execution_id {
275            if let Some(context) = self
276                .active_executions
277                .lock()
278                .unwrap_or_else(|e| e.into_inner())
279                .get_mut(&exec_id)
280            {
281                context
282                    .variables
283                    .insert(var_name.to_string(), value.clone());
284            }
285        }
286
287        self.add_trace(TraceEntry {
288            id: Uuid::new_v4(),
289            timestamp: chrono::Utc::now(),
290            agent_id: execution_id.and_then(|id| {
291                self.active_executions
292                    .lock()
293                    .unwrap()
294                    .get(&id)
295                    .and_then(|ctx| ctx.agent_id)
296            }),
297            behavior_name: None,
298            event_type: TraceEventType::VariableAssigned,
299            data: serde_json::json!({
300                "variable_name": var_name,
301                "value": value.to_json(),
302                "type": value.type_name()
303            }),
304            duration: None,
305        });
306    }
307
308    /// Log error
309    pub fn log_error(&self, agent_id: Option<Uuid>, error: &ReplError) {
310        self.add_trace(TraceEntry {
311            id: Uuid::new_v4(),
312            timestamp: chrono::Utc::now(),
313            agent_id,
314            behavior_name: None,
315            event_type: TraceEventType::Error,
316            data: serde_json::json!({
317                "error": error.to_string(),
318                "error_type": match error {
319                    ReplError::Lexing(_) => "Lexing",
320                    ReplError::Parsing(_) => "Parsing",
321                    ReplError::Execution(_) => "Execution",
322                    ReplError::Security(_) => "Security",
323                    ReplError::Runtime(_) => "Runtime",
324                    ReplError::Evaluation(_) => "Evaluation",
325                    ReplError::PolicyParsing(_) => "PolicyParsing",
326                    ReplError::Io(_) => "Io",
327                    ReplError::Json(_) => "Json",
328                    ReplError::Uuid(_) => "Uuid",
329                }
330            }),
331            duration: None,
332        });
333    }
334
335    /// Get execution traces
336    pub fn get_traces(&self, limit: Option<usize>) -> Vec<TraceEntry> {
337        let traces = self.traces.lock().unwrap_or_else(|e| e.into_inner());
338        let start_idx = if let Some(limit) = limit {
339            traces.len().saturating_sub(limit)
340        } else {
341            0
342        };
343        traces[start_idx..].to_vec()
344    }
345
346    /// Get traces for a specific agent
347    pub fn get_agent_traces(&self, agent_id: Uuid, limit: Option<usize>) -> Vec<TraceEntry> {
348        let traces = self.traces.lock().unwrap_or_else(|e| e.into_inner());
349        let agent_traces: Vec<_> = traces
350            .iter()
351            .filter(|trace| trace.agent_id == Some(agent_id))
352            .cloned()
353            .collect();
354
355        if let Some(limit) = limit {
356            let start_idx = agent_traces.len().saturating_sub(limit);
357            agent_traces[start_idx..].to_vec()
358        } else {
359            agent_traces
360        }
361    }
362
363    /// Get execution statistics
364    pub fn get_stats(&self) -> ExecutionStats {
365        self.stats.lock().unwrap_or_else(|e| e.into_inner()).clone()
366    }
367
368    /// Get active executions
369    pub fn get_active_executions(&self) -> Vec<ExecutionContext> {
370        self.active_executions
371            .lock()
372            .unwrap()
373            .values()
374            .cloned()
375            .collect()
376    }
377
378    /// Clear all traces
379    pub fn clear_traces(&self) {
380        self.traces
381            .lock()
382            .unwrap_or_else(|e| e.into_inner())
383            .clear();
384    }
385
386    /// Update execution statistics
387    fn update_stats(&self, duration: Duration, success: bool, behavior_name: &Option<String>) {
388        let mut stats = self.stats.lock().unwrap_or_else(|e| e.into_inner());
389
390        stats.total_executions += 1;
391        if success {
392            stats.successful_executions += 1;
393        } else {
394            stats.failed_executions += 1;
395        }
396
397        stats.total_duration += duration;
398        stats.average_duration = stats.total_duration / stats.total_executions as u32;
399
400        if let Some(behavior) = behavior_name {
401            *stats
402                .behaviors_executed
403                .entry(behavior.clone())
404                .or_insert(0) += 1;
405        }
406    }
407
408    /// Generate execution report
409    pub fn generate_report(&self) -> String {
410        let stats = self.get_stats();
411        let active = self.get_active_executions();
412        let recent_traces = self.get_traces(Some(20));
413
414        let mut report = String::new();
415        report.push_str("Execution Monitor Report\n");
416        report.push_str("========================\n\n");
417
418        // Statistics
419        report.push_str("Execution Statistics:\n");
420        report.push_str(&format!("  Total Executions: {}\n", stats.total_executions));
421        report.push_str(&format!("  Successful: {}\n", stats.successful_executions));
422        report.push_str(&format!("  Failed: {}\n", stats.failed_executions));
423        if stats.total_executions > 0 {
424            let success_rate =
425                (stats.successful_executions as f64 / stats.total_executions as f64) * 100.0;
426            report.push_str(&format!("  Success Rate: {:.1}%\n", success_rate));
427        }
428        report.push_str(&format!(
429            "  Average Duration: {:?}\n",
430            stats.average_duration
431        ));
432        report.push_str(&format!("  Total Duration: {:?}\n", stats.total_duration));
433
434        // Active executions
435        report.push_str(&format!("\nActive Executions: {}\n", active.len()));
436        for exec in &active {
437            let elapsed = exec.start_time.elapsed();
438            report.push_str(&format!(
439                "  {} - {:?} ({}s)\n",
440                exec.id,
441                exec.behavior_name.as_deref().unwrap_or("unknown"),
442                elapsed.as_secs()
443            ));
444        }
445
446        // Top behaviors
447        if !stats.behaviors_executed.is_empty() {
448            report.push_str("\nTop Behaviors:\n");
449            let mut behaviors: Vec<_> = stats.behaviors_executed.iter().collect();
450            behaviors.sort_by(|a, b| b.1.cmp(a.1));
451            for (behavior, count) in behaviors.iter().take(10) {
452                report.push_str(&format!("  {}: {} executions\n", behavior, count));
453            }
454        }
455
456        // Recent traces
457        if !recent_traces.is_empty() {
458            report.push_str("\nRecent Activity:\n");
459            for trace in recent_traces.iter().rev().take(10) {
460                report.push_str(&format!(
461                    "  {} - {:?}\n",
462                    trace.timestamp.format("%H:%M:%S"),
463                    trace.event_type
464                ));
465            }
466        }
467
468        report
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475    use crate::dsl::evaluator::DslValue;
476
477    #[test]
478    fn test_execution_monitor_basic() {
479        let monitor = ExecutionMonitor::new();
480
481        let exec_id =
482            monitor.start_execution(Some(Uuid::new_v4()), Some("test_behavior".to_string()));
483        assert!(monitor.get_active_executions().len() == 1);
484
485        let duration = monitor.end_execution(exec_id, Ok(DslValue::String("success".to_string())));
486        assert!(duration.is_some());
487        assert!(monitor.get_active_executions().is_empty());
488
489        let traces = monitor.get_traces(None);
490        assert_eq!(traces.len(), 2); // start + end
491    }
492
493    #[test]
494    fn test_execution_monitor_stats() {
495        let monitor = ExecutionMonitor::new();
496
497        // Simulate some executions
498        for i in 0..5 {
499            let exec_id = monitor.start_execution(None, Some(format!("behavior_{}", i)));
500            let result = if i % 2 == 0 {
501                Ok(DslValue::Integer(42))
502            } else {
503                Err(ReplError::Execution("test error".to_string()))
504            };
505            monitor.end_execution(exec_id, result);
506        }
507
508        let stats = monitor.get_stats();
509        assert_eq!(stats.total_executions, 5);
510        assert_eq!(stats.successful_executions, 3);
511        assert_eq!(stats.failed_executions, 2);
512    }
513}