Skip to main content

forge_agent/
audit.rs

1//! Audit trail system with serde-serializable events and JSON persistence.
2//!
3//! The audit trail provides a complete record of all agent operations for
4//! debugging, compliance, and transaction replay. Every phase transition
5//! is logged with timestamps and relevant data.
6//!
7//! # Audit Events
8//!
9//! Each phase of the agent loop records an audit event:
10//! - `Observe`: Context gathering from the graph
11//! - `Constrain`: Policy validation results
12//! - `Plan`: Execution step generation
13//! - `Mutate`: File modifications applied
14//! - `Verify`: Validation results
15//! - `Commit`: Transaction finalization
16//! - `Rollback`: Error recovery with reason
17//!
18//! # Persistence
19//!
20//! Events are persisted to `.forge/audit/{tx_id}.json` after each phase
21//! for durability and replay capability.
22
23use chrono::{DateTime, Utc};
24use serde::{Deserialize, Serialize};
25use std::path::PathBuf;
26use thiserror::Error;
27use uuid::Uuid;
28
29/// Error types for audit operations.
30#[derive(Error, Debug)]
31pub enum AuditError {
32    /// Failed to serialize audit event
33    #[error("Serialization failed: {0}")]
34    SerializationFailed(#[from] serde_json::Error),
35
36    /// Failed to write audit file
37    #[error("Write failed: {0}")]
38    WriteFailed(#[from] std::io::Error),
39
40    /// Failed to create audit directory
41    #[error("Directory creation failed: {0}")]
42    DirectoryFailed(String),
43}
44
45/// Audit event for phase transitions.
46///
47/// Each event captures the timestamp and phase-specific data for
48/// complete transaction reconstruction.
49#[derive(Clone, Debug, Serialize, Deserialize)]
50pub enum AuditEvent {
51    /// Observation phase - gather context from graph
52    Observe {
53        timestamp: DateTime<Utc>,
54        query: String,
55        symbol_count: usize,
56    },
57    /// Constraint phase - apply policy rules
58    Constrain {
59        timestamp: DateTime<Utc>,
60        policy_count: usize,
61        violations: usize,
62    },
63    /// Plan phase - generate execution steps
64    Plan {
65        timestamp: DateTime<Utc>,
66        step_count: usize,
67        estimated_files: usize,
68    },
69    /// Mutate phase - apply changes
70    Mutate {
71        timestamp: DateTime<Utc>,
72        files_modified: Vec<String>,
73    },
74    /// Verify phase - validate results
75    Verify {
76        timestamp: DateTime<Utc>,
77        passed: bool,
78        diagnostic_count: usize,
79    },
80    /// Commit phase - finalize transaction
81    Commit {
82        timestamp: DateTime<Utc>,
83        transaction_id: String,
84    },
85    /// Rollback occurred with reason
86    Rollback {
87        timestamp: DateTime<Utc>,
88        reason: String,
89        phase: String,
90    },
91    /// Workflow execution started
92    WorkflowStarted {
93        timestamp: DateTime<Utc>,
94        workflow_id: String,
95        task_count: usize,
96    },
97    /// Workflow task started
98    WorkflowTaskStarted {
99        timestamp: DateTime<Utc>,
100        workflow_id: String,
101        task_id: String,
102        task_name: String,
103    },
104    /// Workflow task completed
105    WorkflowTaskCompleted {
106        timestamp: DateTime<Utc>,
107        workflow_id: String,
108        task_id: String,
109        task_name: String,
110        result: String,
111    },
112    /// Workflow task failed
113    WorkflowTaskFailed {
114        timestamp: DateTime<Utc>,
115        workflow_id: String,
116        task_id: String,
117        task_name: String,
118        error: String,
119    },
120    /// Workflow execution completed
121    WorkflowCompleted {
122        timestamp: DateTime<Utc>,
123        workflow_id: String,
124        total_tasks: usize,
125        completed_tasks: usize,
126    },
127    /// Workflow task rolled back
128    WorkflowTaskRolledBack {
129        timestamp: DateTime<Utc>,
130        workflow_id: String,
131        task_id: String,
132        compensation: String,
133    },
134    /// Workflow rolled back
135    WorkflowRolledBack {
136        timestamp: DateTime<Utc>,
137        workflow_id: String,
138        reason: String,
139        rolled_back_tasks: Vec<String>,
140    },
141    /// Workflow cancelled
142    WorkflowCancelled {
143        timestamp: DateTime<Utc>,
144        workflow_id: String,
145    },
146    /// Workflow task timed out
147    WorkflowTaskTimedOut {
148        timestamp: DateTime<Utc>,
149        workflow_id: String,
150        task_id: String,
151        task_name: String,
152        timeout_secs: u64,
153    },
154    /// Workflow tool fallback activated
155    WorkflowToolFallback {
156        timestamp: DateTime<Utc>,
157        workflow_id: String,
158        task_id: String,
159        tool_name: String,
160        error: String,
161        fallback_action: String,
162    },
163    /// Parallel task layer started
164    WorkflowTaskParallelStarted {
165        timestamp: DateTime<Utc>,
166        workflow_id: String,
167        layer_index: usize,
168        task_count: usize,
169    },
170    /// Parallel task layer completed
171    WorkflowTaskParallelCompleted {
172        timestamp: DateTime<Utc>,
173        workflow_id: String,
174        layer_index: usize,
175        task_count: usize,
176    },
177    /// Workflow deadlock check completed
178    WorkflowDeadlockCheck {
179        timestamp: DateTime<Utc>,
180        workflow_id: String,
181        has_cycles: bool,
182        warnings: Vec<String>,
183    },
184    /// Workflow deadlock timeout occurred
185    WorkflowDeadlockTimeout {
186        timestamp: DateTime<Utc>,
187        workflow_id: String,
188        layer_index: usize,
189        timeout_secs: u64,
190    },
191}
192
193/// Audit log for recording and persisting phase transitions.
194///
195/// Each audit log has a unique transaction ID and persists events
196/// to `.forge/audit/{tx_id}.json` for replay capability.
197pub struct AuditLog {
198    /// Unique transaction identifier
199    tx_id: Uuid,
200    /// Accumulated events for this transaction
201    events: Vec<AuditEvent>,
202    /// Directory for audit file storage
203    audit_dir: PathBuf,
204}
205
206impl AuditLog {
207    /// Creates a new audit log with a fresh transaction ID.
208    ///
209    /// The `.forge/audit` directory is created if it doesn't exist.
210    pub fn new() -> Self {
211        Self::with_dir(PathBuf::from(".forge/audit"))
212    }
213
214    /// Creates a new audit log with a custom audit directory.
215    ///
216    /// # Arguments
217    ///
218    /// * `audit_dir` - Directory path for audit file storage
219    pub fn with_dir(audit_dir: PathBuf) -> Self {
220        Self {
221            tx_id: Uuid::new_v4(),
222            events: Vec::new(),
223            audit_dir,
224        }
225    }
226
227    /// Records an audit event and persists to disk.
228    ///
229    /// # Arguments
230    ///
231    /// * `event` - The audit event to record
232    ///
233    /// # Returns
234    ///
235    /// Returns `Ok(())` on success, or `AuditError` on failure.
236    pub async fn record(&mut self, event: AuditEvent) -> Result<(), AuditError> {
237        self.events.push(event);
238        self.persist().await?;
239        Ok(())
240    }
241
242    /// Persists all events to the audit file.
243    ///
244    /// Events are written as formatted JSON to `.forge/audit/{tx_id}.json`.
245    async fn persist(&self) -> Result<(), AuditError> {
246        // Create audit directory if it doesn't exist
247        tokio::fs::create_dir_all(&self.audit_dir)
248            .await
249            .map_err(|e| AuditError::DirectoryFailed(e.to_string()))?;
250
251        // Serialize events to JSON
252        let json = serde_json::to_string_pretty(&self.events)?;
253
254        // Write to audit file
255        let audit_path = self.audit_dir.join(format!("{}.json", self.tx_id));
256        tokio::fs::write(audit_path, json).await?;
257
258        Ok(())
259    }
260
261    /// Returns a replay of all recorded events.
262    ///
263    /// # Returns
264    ///
265    /// A clone of all events for transaction reconstruction.
266    pub fn replay(&self) -> Vec<AuditEvent> {
267        self.events.clone()
268    }
269
270    /// Returns the transaction ID.
271    ///
272    /// # Returns
273    ///
274    /// The UUID identifying this transaction.
275    pub fn tx_id(&self) -> Uuid {
276        self.tx_id
277    }
278
279    /// Converts the audit log into a vector of events.
280    ///
281    /// This consumes the audit log and returns all events.
282    pub fn into_events(self) -> Vec<AuditEvent> {
283        self.events
284    }
285
286    /// Returns the number of events in the log.
287    #[cfg(test)]
288    pub fn len(&self) -> usize {
289        self.events.len()
290    }
291
292    /// Returns true if the audit log has no events.
293    #[cfg(test)]
294    pub fn is_empty(&self) -> bool {
295        self.events.is_empty()
296    }
297}
298
299impl Default for AuditLog {
300    fn default() -> Self {
301        Self::new()
302    }
303}
304
305impl Clone for AuditLog {
306    fn clone(&self) -> Self {
307        Self {
308            tx_id: self.tx_id,
309            events: self.events.clone(),
310            audit_dir: self.audit_dir.clone(),
311        }
312    }
313}
314
315#[cfg(test)]
316mod tests {
317    use super::*;
318    use tempfile::TempDir;
319
320    #[tokio::test]
321    async fn test_audit_log_creation() {
322        let log = AuditLog::new();
323        assert!(!log.events.is_empty() || log.is_empty()); // Should be empty on creation
324        assert_ne!(log.tx_id(), Uuid::nil());
325    }
326
327    #[tokio::test]
328    async fn test_audit_event_serialization() {
329        let event = AuditEvent::Observe {
330            timestamp: Utc::now(),
331            query: "test query".to_string(),
332            symbol_count: 42,
333        };
334
335        // Serialize and deserialize
336        let json = serde_json::to_string(&event).unwrap();
337        let deserialized: AuditEvent = serde_json::from_str(&json).unwrap();
338
339        match deserialized {
340            AuditEvent::Observe {
341                query,
342                symbol_count,
343                ..
344            } => {
345                assert_eq!(query, "test query");
346                assert_eq!(symbol_count, 42);
347            }
348            _ => panic!("Wrong event type"),
349        }
350    }
351
352    #[tokio::test]
353    async fn test_audit_log_record_and_replay() {
354        let temp_dir = TempDir::new().unwrap();
355        let mut log = AuditLog::with_dir(temp_dir.path().to_path_buf());
356
357        let event = AuditEvent::Observe {
358            timestamp: Utc::now(),
359            query: "test".to_string(),
360            symbol_count: 1,
361        };
362
363        log.record(event.clone()).await.unwrap();
364
365        let replayed = log.replay();
366        assert_eq!(replayed.len(), 1);
367        assert!(matches!(replayed[0], AuditEvent::Observe { .. }));
368    }
369
370    #[tokio::test]
371    async fn test_audit_log_persistence() {
372        let temp_dir = TempDir::new().unwrap();
373        let audit_dir = temp_dir.path().to_path_buf();
374
375        let tx_id = {
376            let mut log = AuditLog::with_dir(audit_dir.clone());
377            let tx = log.tx_id();
378            log.record(AuditEvent::Observe {
379                timestamp: Utc::now(),
380                query: "persistence test".to_string(),
381                symbol_count: 0,
382            })
383            .await
384            .unwrap();
385            tx
386        };
387
388        // Verify file was created
389        let audit_file = audit_dir.join(format!("{}.json", tx_id));
390        assert!(audit_file.exists());
391
392        // Verify file contents
393        let contents = tokio::fs::read_to_string(audit_file).await.unwrap();
394        let events: Vec<AuditEvent> = serde_json::from_str(&contents).unwrap();
395        assert_eq!(events.len(), 1);
396    }
397
398    #[tokio::test]
399    async fn test_all_audit_event_variants() {
400        let temp_dir = TempDir::new().unwrap();
401        let mut log = AuditLog::with_dir(temp_dir.path().to_path_buf());
402
403        // Test all event variants
404        log.record(AuditEvent::Observe {
405            timestamp: Utc::now(),
406            query: "test".to_string(),
407            symbol_count: 5,
408        })
409        .await
410        .unwrap();
411
412        log.record(AuditEvent::Constrain {
413            timestamp: Utc::now(),
414            policy_count: 2,
415            violations: 0,
416        })
417        .await
418        .unwrap();
419
420        log.record(AuditEvent::Plan {
421            timestamp: Utc::now(),
422            step_count: 3,
423            estimated_files: 2,
424        })
425        .await
426        .unwrap();
427
428        log.record(AuditEvent::Mutate {
429            timestamp: Utc::now(),
430            files_modified: vec!["test.rs".to_string()],
431        })
432        .await
433        .unwrap();
434
435        log.record(AuditEvent::Verify {
436            timestamp: Utc::now(),
437            passed: true,
438            diagnostic_count: 0,
439        })
440        .await
441        .unwrap();
442
443        log.record(AuditEvent::Commit {
444            timestamp: Utc::now(),
445            transaction_id: "tx-123".to_string(),
446        })
447        .await
448        .unwrap();
449
450        log.record(AuditEvent::Rollback {
451            timestamp: Utc::now(),
452            reason: "test error".to_string(),
453            phase: "TestPhase".to_string(),
454        })
455        .await
456        .unwrap();
457
458        // Test workflow events
459        log.record(AuditEvent::WorkflowStarted {
460            timestamp: Utc::now(),
461            workflow_id: "workflow-1".to_string(),
462            task_count: 3,
463        })
464        .await
465        .unwrap();
466
467        log.record(AuditEvent::WorkflowTaskStarted {
468            timestamp: Utc::now(),
469            workflow_id: "workflow-1".to_string(),
470            task_id: "task-1".to_string(),
471            task_name: "Task 1".to_string(),
472        })
473        .await
474        .unwrap();
475
476        log.record(AuditEvent::WorkflowTaskCompleted {
477            timestamp: Utc::now(),
478            workflow_id: "workflow-1".to_string(),
479            task_id: "task-1".to_string(),
480            task_name: "Task 1".to_string(),
481            result: "Success".to_string(),
482        })
483        .await
484        .unwrap();
485
486        log.record(AuditEvent::WorkflowTaskFailed {
487            timestamp: Utc::now(),
488            workflow_id: "workflow-1".to_string(),
489            task_id: "task-2".to_string(),
490            task_name: "Task 2".to_string(),
491            error: "Task failed".to_string(),
492        })
493        .await
494        .unwrap();
495
496        log.record(AuditEvent::WorkflowCompleted {
497            timestamp: Utc::now(),
498            workflow_id: "workflow-1".to_string(),
499            total_tasks: 3,
500            completed_tasks: 2,
501        })
502        .await
503        .unwrap();
504
505        log.record(AuditEvent::WorkflowTaskTimedOut {
506            timestamp: Utc::now(),
507            workflow_id: "workflow-1".to_string(),
508            task_id: "task-3".to_string(),
509            task_name: "Task 3".to_string(),
510            timeout_secs: 30,
511        })
512        .await
513        .unwrap();
514
515        let events = log.replay();
516        assert_eq!(events.len(), 13);
517    }
518
519    #[tokio::test]
520    async fn test_workflow_event_serialization() {
521        let event = AuditEvent::WorkflowStarted {
522            timestamp: Utc::now(),
523            workflow_id: "workflow-1".to_string(),
524            task_count: 3,
525        };
526
527        // Serialize and deserialize
528        let json = serde_json::to_string(&event).unwrap();
529        let deserialized: AuditEvent = serde_json::from_str(&json).unwrap();
530
531        match deserialized {
532            AuditEvent::WorkflowStarted {
533                workflow_id,
534                task_count,
535                ..
536            } => {
537                assert_eq!(workflow_id, "workflow-1");
538                assert_eq!(task_count, 3);
539            }
540            _ => panic!("Wrong event type"),
541        }
542    }
543
544    #[tokio::test]
545    async fn test_workflow_task_timed_out_event_serialization() {
546        let event = AuditEvent::WorkflowTaskTimedOut {
547            timestamp: Utc::now(),
548            workflow_id: "workflow-1".to_string(),
549            task_id: "task-1".to_string(),
550            task_name: "Task 1".to_string(),
551            timeout_secs: 30,
552        };
553
554        // Serialize and deserialize
555        let json = serde_json::to_string(&event).unwrap();
556        let deserialized: AuditEvent = serde_json::from_str(&json).unwrap();
557
558        match deserialized {
559            AuditEvent::WorkflowTaskTimedOut {
560                workflow_id,
561                task_id,
562                task_name,
563                timeout_secs,
564                ..
565            } => {
566                assert_eq!(workflow_id, "workflow-1");
567                assert_eq!(task_id, "task-1");
568                assert_eq!(task_name, "Task 1");
569                assert_eq!(timeout_secs, 30);
570            }
571            _ => panic!("Wrong event type"),
572        }
573    }
574
575    #[tokio::test]
576    async fn test_workflow_execution_audit_trail() {
577        let temp_dir = TempDir::new().unwrap();
578        let mut log = AuditLog::with_dir(temp_dir.path().to_path_buf());
579
580        // Simulate workflow execution
581        log.record(AuditEvent::WorkflowStarted {
582            timestamp: Utc::now(),
583            workflow_id: "workflow-1".to_string(),
584            task_count: 2,
585        })
586        .await
587        .unwrap();
588
589        log.record(AuditEvent::WorkflowTaskStarted {
590            timestamp: Utc::now(),
591            workflow_id: "workflow-1".to_string(),
592            task_id: "task-1".to_string(),
593            task_name: "Task 1".to_string(),
594        })
595        .await
596        .unwrap();
597
598        log.record(AuditEvent::WorkflowTaskCompleted {
599            timestamp: Utc::now(),
600            workflow_id: "workflow-1".to_string(),
601            task_id: "task-1".to_string(),
602            task_name: "Task 1".to_string(),
603            result: "Success".to_string(),
604        })
605        .await
606        .unwrap();
607
608        log.record(AuditEvent::WorkflowCompleted {
609            timestamp: Utc::now(),
610            workflow_id: "workflow-1".to_string(),
611            total_tasks: 2,
612            completed_tasks: 2,
613        })
614        .await
615        .unwrap();
616
617        let events = log.replay();
618
619        // Verify workflow started is first
620        assert!(matches!(events[0], AuditEvent::WorkflowStarted { .. }));
621
622        // Verify workflow completed is last
623        assert!(matches!(events[events.len() - 1], AuditEvent::WorkflowCompleted { .. }));
624
625        // Count workflow task events
626        let task_events: Vec<_> = events
627            .iter()
628            .filter(|e| {
629                matches!(
630                    e,
631                    AuditEvent::WorkflowTaskStarted { .. } | AuditEvent::WorkflowTaskCompleted { .. }
632                )
633            })
634            .collect();
635
636        assert_eq!(task_events.len(), 2);
637    }
638
639    #[tokio::test]
640    async fn test_audit_log_into_events() {
641        let mut log = AuditLog::new();
642        log.record(AuditEvent::Constrain {
643            timestamp: Utc::now(),
644            policy_count: 1,
645            violations: 0,
646        })
647        .await
648        .unwrap();
649
650        let events = log.into_events();
651        assert_eq!(events.len(), 1);
652        assert!(matches!(events[0], AuditEvent::Constrain { .. }));
653    }
654
655    #[tokio::test]
656    async fn test_audit_log_clone() {
657        let mut log = AuditLog::new();
658        log.record(AuditEvent::Plan {
659            timestamp: Utc::now(),
660            step_count: 1,
661            estimated_files: 1,
662        })
663        .await
664        .unwrap();
665
666        let cloned = log.clone();
667        assert_eq!(cloned.tx_id(), log.tx_id());
668        assert_eq!(cloned.len(), log.len());
669    }
670}