1use std::path::Path;
43use std::time::{SystemTime, UNIX_EPOCH};
44
45use blake3::Hasher;
46use serde::{Deserialize, Serialize};
47
48use super::error::EmbedError;
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct AuditEntry {
53 pub sequence: u64,
55
56 pub timestamp: u64,
58
59 pub prev_hash: String,
61
62 pub hash: String,
64
65 pub operation: AuditOperation,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
71#[serde(tag = "type")]
72pub enum AuditOperation {
73 LogCreated {
75 version: u32,
76 created_by: String,
77 },
78
79 EmbedStart {
81 repo_path: String,
82 settings_hash: String,
83 },
84
85 EmbedComplete {
87 chunks_count: usize,
88 total_tokens: u64,
89 manifest_hash: String,
90 },
91
92 EmbedFailed {
94 error_code: String,
95 error_message: String,
96 },
97
98 ManifestLoaded {
100 path: String,
101 manifest_hash: String,
102 chunks_count: usize,
103 },
104
105 ManifestSaved {
107 path: String,
108 manifest_hash: String,
109 },
110
111 DiffComputed {
113 added: usize,
114 modified: usize,
115 removed: usize,
116 },
117
118 BatchStart {
120 repo_count: usize,
121 total_settings_hash: String,
122 },
123
124 BatchRepoComplete {
126 repo_index: usize,
127 repo_path: String,
128 chunks_count: usize,
129 success: bool,
130 },
131
132 BatchComplete {
134 successful: usize,
135 failed: usize,
136 total_chunks: usize,
137 },
138
139 SecurityScan {
141 findings_count: usize,
142 secrets_redacted: bool,
143 },
144
145 CheckpointCreated {
147 checkpoint_hash: String,
148 files_processed: usize,
149 chunks_generated: usize,
150 },
151
152 ResumeFromCheckpoint {
154 checkpoint_hash: String,
155 files_remaining: usize,
156 },
157
158 Custom {
160 name: String,
161 data: String,
162 },
163}
164
165pub const AUDIT_LOG_VERSION: u32 = 1;
167
168#[derive(Debug, Clone, Serialize, Deserialize)]
170pub struct AuditLog {
171 pub version: u32,
173
174 pub entries: Vec<AuditEntry>,
176}
177
178impl Default for AuditLog {
179 fn default() -> Self {
180 Self::new()
181 }
182}
183
184impl AuditLog {
185 pub fn new() -> Self {
187 let mut log = Self {
188 version: AUDIT_LOG_VERSION,
189 entries: Vec::new(),
190 };
191
192 log.record(AuditOperation::LogCreated {
194 version: AUDIT_LOG_VERSION,
195 created_by: format!("infiniloom-engine/{}", env!("CARGO_PKG_VERSION")),
196 });
197
198 log
199 }
200
201 pub fn record(&mut self, operation: AuditOperation) -> String {
205 let sequence = self.entries.len() as u64;
206 let timestamp = SystemTime::now()
207 .duration_since(UNIX_EPOCH)
208 .unwrap_or_default()
209 .as_secs();
210
211 let prev_hash = self
212 .entries
213 .last()
214 .map(|e| e.hash.clone())
215 .unwrap_or_default();
216
217 let hash = compute_entry_hash(sequence, timestamp, &prev_hash, &operation);
219
220 let entry = AuditEntry {
221 sequence,
222 timestamp,
223 prev_hash,
224 hash: hash.clone(),
225 operation,
226 };
227
228 self.entries.push(entry);
229 hash
230 }
231
232 pub fn verify_integrity(&self) -> bool {
236 let mut prev_hash = String::new();
237
238 for entry in &self.entries {
239 if entry.prev_hash != prev_hash {
241 return false;
242 }
243
244 let expected_hash =
246 compute_entry_hash(entry.sequence, entry.timestamp, &prev_hash, &entry.operation);
247
248 if entry.hash != expected_hash {
249 return false;
250 }
251
252 prev_hash = entry.hash.clone();
253 }
254
255 true
256 }
257
258 pub fn verify_integrity_detailed(&self) -> IntegrityReport {
260 let mut errors = Vec::new();
261 let mut prev_hash = String::new();
262
263 for (index, entry) in self.entries.iter().enumerate() {
264 if entry.prev_hash != prev_hash {
266 errors.push(IntegrityError::ChainBroken {
267 entry_index: index,
268 expected_prev: prev_hash.clone(),
269 actual_prev: entry.prev_hash.clone(),
270 });
271 }
272
273 let expected_hash =
275 compute_entry_hash(entry.sequence, entry.timestamp, &prev_hash, &entry.operation);
276
277 if entry.hash != expected_hash {
278 errors.push(IntegrityError::HashMismatch {
279 entry_index: index,
280 expected: expected_hash,
281 actual: entry.hash.clone(),
282 });
283 }
284
285 prev_hash = entry.hash.clone();
286 }
287
288 IntegrityReport {
289 is_valid: errors.is_empty(),
290 entries_checked: self.entries.len(),
291 errors,
292 }
293 }
294
295 pub fn len(&self) -> usize {
297 self.entries.len()
298 }
299
300 pub fn is_empty(&self) -> bool {
302 self.entries.len() <= 1
303 }
304
305 pub fn head_hash(&self) -> Option<&str> {
307 self.entries.last().map(|e| e.hash.as_str())
308 }
309
310 pub fn filter_by_type<F>(&self, predicate: F) -> Vec<&AuditEntry>
312 where
313 F: Fn(&AuditOperation) -> bool,
314 {
315 self.entries
316 .iter()
317 .filter(|e| predicate(&e.operation))
318 .collect()
319 }
320
321 pub fn filter_by_time(&self, start: u64, end: u64) -> Vec<&AuditEntry> {
323 self.entries
324 .iter()
325 .filter(|e| e.timestamp >= start && e.timestamp <= end)
326 .collect()
327 }
328
329 pub fn save(&self, path: &Path) -> Result<(), EmbedError> {
331 let json = serde_json::to_string_pretty(self).map_err(|e| EmbedError::SerializationError {
332 reason: format!("Failed to serialize audit log: {}", e),
333 })?;
334
335 std::fs::write(path, json).map_err(|e| EmbedError::IoError {
336 path: path.to_path_buf(),
337 source: e,
338 })?;
339
340 Ok(())
341 }
342
343 pub fn save_jsonl(&self, path: &Path) -> Result<(), EmbedError> {
347 use std::io::Write;
348
349 let file = std::fs::File::create(path).map_err(|e| EmbedError::IoError {
350 path: path.to_path_buf(),
351 source: e,
352 })?;
353
354 let mut writer = std::io::BufWriter::new(file);
355
356 let header = serde_json::json!({
358 "audit_log_version": self.version,
359 "entry_count": self.entries.len()
360 });
361 writeln!(writer, "{}", header).map_err(|e| EmbedError::IoError {
362 path: path.to_path_buf(),
363 source: e,
364 })?;
365
366 for entry in &self.entries {
368 let line =
369 serde_json::to_string(entry).map_err(|e| EmbedError::SerializationError {
370 reason: format!("Failed to serialize audit entry: {}", e),
371 })?;
372 writeln!(writer, "{}", line).map_err(|e| EmbedError::IoError {
373 path: path.to_path_buf(),
374 source: e,
375 })?;
376 }
377
378 writer.flush().map_err(|e| EmbedError::IoError {
379 path: path.to_path_buf(),
380 source: e,
381 })?;
382
383 Ok(())
384 }
385
386 pub fn load(path: &Path) -> Result<Self, EmbedError> {
388 let content = std::fs::read_to_string(path).map_err(|e| EmbedError::IoError {
389 path: path.to_path_buf(),
390 source: e,
391 })?;
392
393 let log: Self =
394 serde_json::from_str(&content).map_err(|e| EmbedError::DeserializationError {
395 reason: format!("Failed to deserialize audit log: {}", e),
396 })?;
397
398 if !log.verify_integrity() {
400 return Err(EmbedError::ManifestCorrupted {
401 path: path.to_path_buf(),
402 expected: "valid hash chain".to_string(),
403 actual: "hash chain broken".to_string(),
404 });
405 }
406
407 Ok(log)
408 }
409
410 pub fn append_entry_to_file(path: &Path, entry: &AuditEntry) -> Result<(), EmbedError> {
414 use std::io::Write;
415
416 let file = std::fs::OpenOptions::new()
417 .create(true)
418 .append(true)
419 .open(path)
420 .map_err(|e| EmbedError::IoError {
421 path: path.to_path_buf(),
422 source: e,
423 })?;
424
425 let mut writer = std::io::BufWriter::new(file);
426 let line = serde_json::to_string(entry).map_err(|e| EmbedError::SerializationError {
427 reason: format!("Failed to serialize audit entry: {}", e),
428 })?;
429 writeln!(writer, "{}", line).map_err(|e| EmbedError::IoError {
430 path: path.to_path_buf(),
431 source: e,
432 })?;
433
434 writer.flush().map_err(|e| EmbedError::IoError {
435 path: path.to_path_buf(),
436 source: e,
437 })?;
438
439 Ok(())
440 }
441}
442
443fn compute_entry_hash(
445 sequence: u64,
446 timestamp: u64,
447 prev_hash: &str,
448 operation: &AuditOperation,
449) -> String {
450 let mut hasher = Hasher::new();
451
452 hasher.update(&sequence.to_le_bytes());
454
455 hasher.update(×tamp.to_le_bytes());
457
458 hasher.update(prev_hash.as_bytes());
460
461 let op_json = serde_json::to_string(operation).unwrap_or_default();
463 hasher.update(op_json.as_bytes());
464
465 hasher.finalize().to_hex().to_string()
467}
468
469#[derive(Debug, Clone)]
471pub struct IntegrityReport {
472 pub is_valid: bool,
474
475 pub entries_checked: usize,
477
478 pub errors: Vec<IntegrityError>,
480}
481
482#[derive(Debug, Clone)]
484pub enum IntegrityError {
485 ChainBroken {
487 entry_index: usize,
488 expected_prev: String,
489 actual_prev: String,
490 },
491
492 HashMismatch {
494 entry_index: usize,
495 expected: String,
496 actual: String,
497 },
498}
499
500impl std::fmt::Display for IntegrityError {
501 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
502 match self {
503 Self::ChainBroken {
504 entry_index,
505 expected_prev,
506 actual_prev,
507 } => {
508 write!(
509 f,
510 "Chain broken at entry {}: expected prev_hash '{}', got '{}'",
511 entry_index,
512 &expected_prev[..8.min(expected_prev.len())],
513 &actual_prev[..8.min(actual_prev.len())]
514 )
515 }
516 Self::HashMismatch {
517 entry_index,
518 expected,
519 actual,
520 } => {
521 write!(
522 f,
523 "Hash mismatch at entry {}: expected '{}', got '{}'",
524 entry_index,
525 &expected[..8.min(expected.len())],
526 &actual[..8.min(actual.len())]
527 )
528 }
529 }
530 }
531}
532
533#[cfg(test)]
534mod tests {
535 use super::*;
536
537 #[test]
538 fn test_new_audit_log() {
539 let log = AuditLog::new();
540 assert_eq!(log.version, AUDIT_LOG_VERSION);
541 assert_eq!(log.entries.len(), 1); assert!(log.verify_integrity());
543 }
544
545 #[test]
546 fn test_record_operations() {
547 let mut log = AuditLog::new();
548
549 log.record(AuditOperation::EmbedStart {
550 repo_path: "/test/repo".to_string(),
551 settings_hash: "abc123".to_string(),
552 });
553
554 log.record(AuditOperation::EmbedComplete {
555 chunks_count: 100,
556 total_tokens: 50000,
557 manifest_hash: "def456".to_string(),
558 });
559
560 assert_eq!(log.entries.len(), 3);
561 assert!(log.verify_integrity());
562 }
563
564 #[test]
565 fn test_hash_chain_integrity() {
566 let mut log = AuditLog::new();
567
568 for i in 0..10 {
569 log.record(AuditOperation::Custom {
570 name: format!("test_{}", i),
571 data: format!("data_{}", i),
572 });
573 }
574
575 assert!(log.verify_integrity());
576
577 if let AuditOperation::Custom { ref mut data, .. } = log.entries[5].operation {
579 *data = "tampered".to_string();
580 }
581
582 assert!(!log.verify_integrity());
584 }
585
586 #[test]
587 fn test_verify_integrity_detailed() {
588 let mut log = AuditLog::new();
589
590 log.record(AuditOperation::EmbedStart {
591 repo_path: "/test".to_string(),
592 settings_hash: "hash".to_string(),
593 });
594
595 let report = log.verify_integrity_detailed();
596 assert!(report.is_valid);
597 assert_eq!(report.entries_checked, 2);
598 assert!(report.errors.is_empty());
599 }
600
601 #[test]
602 fn test_tamper_detection_chain_broken() {
603 let mut log = AuditLog::new();
604
605 log.record(AuditOperation::Custom {
606 name: "op1".to_string(),
607 data: "data1".to_string(),
608 });
609 log.record(AuditOperation::Custom {
610 name: "op2".to_string(),
611 data: "data2".to_string(),
612 });
613
614 log.entries[2].prev_hash = "fake_hash".to_string();
616
617 let report = log.verify_integrity_detailed();
618 assert!(!report.is_valid);
619 assert!(!report.errors.is_empty());
620 assert!(matches!(
621 report.errors[0],
622 IntegrityError::ChainBroken { .. }
623 ));
624 }
625
626 #[test]
627 fn test_tamper_detection_hash_mismatch() {
628 let mut log = AuditLog::new();
629
630 log.record(AuditOperation::Custom {
631 name: "op1".to_string(),
632 data: "data1".to_string(),
633 });
634
635 log.entries[1].hash = "fake_hash".to_string();
637
638 let report = log.verify_integrity_detailed();
639 assert!(!report.is_valid);
640 assert!(report
641 .errors
642 .iter()
643 .any(|e| matches!(e, IntegrityError::HashMismatch { .. })));
644 }
645
646 #[test]
647 fn test_head_hash() {
648 let mut log = AuditLog::new();
649
650 let initial_head = log.head_hash().map(String::from);
651 assert!(initial_head.is_some());
652
653 let new_hash = log.record(AuditOperation::Custom {
654 name: "test".to_string(),
655 data: "data".to_string(),
656 });
657
658 assert_eq!(log.head_hash(), Some(new_hash.as_str()));
659 assert_ne!(log.head_hash().map(String::from), initial_head);
660 }
661
662 #[test]
663 fn test_filter_by_type() {
664 let mut log = AuditLog::new();
665
666 log.record(AuditOperation::EmbedStart {
667 repo_path: "/repo1".to_string(),
668 settings_hash: "h1".to_string(),
669 });
670 log.record(AuditOperation::EmbedComplete {
671 chunks_count: 100,
672 total_tokens: 50000,
673 manifest_hash: "m1".to_string(),
674 });
675 log.record(AuditOperation::EmbedStart {
676 repo_path: "/repo2".to_string(),
677 settings_hash: "h2".to_string(),
678 });
679
680 let starts = log.filter_by_type(|op| matches!(op, AuditOperation::EmbedStart { .. }));
681 assert_eq!(starts.len(), 2);
682
683 let completes = log.filter_by_type(|op| matches!(op, AuditOperation::EmbedComplete { .. }));
684 assert_eq!(completes.len(), 1);
685 }
686
687 #[test]
688 fn test_filter_by_time() {
689 let mut log = AuditLog::new();
690
691 log.record(AuditOperation::Custom {
693 name: "test".to_string(),
694 data: "data".to_string(),
695 });
696
697 let now = SystemTime::now()
698 .duration_since(UNIX_EPOCH)
699 .unwrap()
700 .as_secs();
701
702 let entries = log.filter_by_time(now - 60, now + 60);
703 assert!(!entries.is_empty());
704 }
705
706 #[test]
707 fn test_save_and_load() {
708 let temp_dir = tempfile::TempDir::new().unwrap();
709 let log_path = temp_dir.path().join("audit.json");
710
711 let mut log = AuditLog::new();
712 log.record(AuditOperation::EmbedStart {
713 repo_path: "/test/repo".to_string(),
714 settings_hash: "abc123".to_string(),
715 });
716 log.record(AuditOperation::EmbedComplete {
717 chunks_count: 100,
718 total_tokens: 50000,
719 manifest_hash: "def456".to_string(),
720 });
721
722 log.save(&log_path).unwrap();
724
725 let loaded = AuditLog::load(&log_path).unwrap();
727 assert_eq!(loaded.entries.len(), log.entries.len());
728 assert!(loaded.verify_integrity());
729
730 for (orig, loaded) in log.entries.iter().zip(loaded.entries.iter()) {
732 assert_eq!(orig.hash, loaded.hash);
733 assert_eq!(orig.prev_hash, loaded.prev_hash);
734 }
735 }
736
737 #[test]
738 fn test_save_jsonl() {
739 let temp_dir = tempfile::TempDir::new().unwrap();
740 let log_path = temp_dir.path().join("audit.jsonl");
741
742 let mut log = AuditLog::new();
743 log.record(AuditOperation::Custom {
744 name: "test".to_string(),
745 data: "data".to_string(),
746 });
747
748 log.save_jsonl(&log_path).unwrap();
749
750 let content = std::fs::read_to_string(&log_path).unwrap();
752 assert!(!content.is_empty());
753
754 let lines: Vec<_> = content.lines().collect();
756 assert_eq!(lines.len(), 3); }
758
759 #[test]
760 fn test_security_scan_operation() {
761 let mut log = AuditLog::new();
762
763 log.record(AuditOperation::SecurityScan {
764 findings_count: 5,
765 secrets_redacted: true,
766 });
767
768 assert!(log.verify_integrity());
769
770 let scans = log.filter_by_type(|op| matches!(op, AuditOperation::SecurityScan { .. }));
771 assert_eq!(scans.len(), 1);
772
773 if let AuditOperation::SecurityScan {
774 findings_count,
775 secrets_redacted,
776 } = &scans[0].operation
777 {
778 assert_eq!(*findings_count, 5);
779 assert!(*secrets_redacted);
780 }
781 }
782
783 #[test]
784 fn test_batch_operations() {
785 let mut log = AuditLog::new();
786
787 log.record(AuditOperation::BatchStart {
788 repo_count: 3,
789 total_settings_hash: "settings_hash".to_string(),
790 });
791
792 for i in 0..3 {
793 log.record(AuditOperation::BatchRepoComplete {
794 repo_index: i,
795 repo_path: format!("/repo{}", i),
796 chunks_count: 100 * (i + 1),
797 success: true,
798 });
799 }
800
801 log.record(AuditOperation::BatchComplete {
802 successful: 3,
803 failed: 0,
804 total_chunks: 600,
805 });
806
807 assert!(log.verify_integrity());
808 assert_eq!(log.entries.len(), 6); }
810
811 #[test]
812 fn test_checkpoint_operations() {
813 let mut log = AuditLog::new();
814
815 log.record(AuditOperation::CheckpointCreated {
816 checkpoint_hash: "ckpt_abc123".to_string(),
817 files_processed: 50,
818 chunks_generated: 200,
819 });
820
821 log.record(AuditOperation::ResumeFromCheckpoint {
822 checkpoint_hash: "ckpt_abc123".to_string(),
823 files_remaining: 100,
824 });
825
826 assert!(log.verify_integrity());
827 }
828}