1use std::path::Path;
43use std::time::{SystemTime, UNIX_EPOCH};
44
45use blake3::Hasher;
46use serde::{Deserialize, Serialize};
47
48use super::error::EmbedError;
49
50#[derive(Debug, Clone, Serialize, Deserialize)]
52pub struct AuditEntry {
53 pub sequence: u64,
55
56 pub timestamp: u64,
58
59 pub prev_hash: String,
61
62 pub hash: String,
64
65 pub operation: AuditOperation,
67}
68
69#[derive(Debug, Clone, Serialize, Deserialize)]
71#[serde(tag = "type")]
72pub enum AuditOperation {
73 LogCreated { version: u32, created_by: String },
75
76 EmbedStart { repo_path: String, settings_hash: String },
78
79 EmbedComplete { chunks_count: usize, total_tokens: u64, manifest_hash: String },
81
82 EmbedFailed { error_code: String, error_message: String },
84
85 ManifestLoaded { path: String, manifest_hash: String, chunks_count: usize },
87
88 ManifestSaved { path: String, manifest_hash: String },
90
91 DiffComputed { added: usize, modified: usize, removed: usize },
93
94 BatchStart { repo_count: usize, total_settings_hash: String },
96
97 BatchRepoComplete { repo_index: usize, repo_path: String, chunks_count: usize, success: bool },
99
100 BatchComplete { successful: usize, failed: usize, total_chunks: usize },
102
103 SecurityScan { findings_count: usize, secrets_redacted: bool },
105
106 CheckpointCreated { checkpoint_hash: String, files_processed: usize, chunks_generated: usize },
108
109 ResumeFromCheckpoint { checkpoint_hash: String, files_remaining: usize },
111
112 Custom { name: String, data: String },
114}
115
116pub const AUDIT_LOG_VERSION: u32 = 1;
118
119#[derive(Debug, Clone, Serialize, Deserialize)]
121pub struct AuditLog {
122 pub version: u32,
124
125 pub entries: Vec<AuditEntry>,
127}
128
129impl Default for AuditLog {
130 fn default() -> Self {
131 Self::new()
132 }
133}
134
135impl AuditLog {
136 pub fn new() -> Self {
138 let mut log = Self { version: AUDIT_LOG_VERSION, entries: Vec::new() };
139
140 log.record(AuditOperation::LogCreated {
142 version: AUDIT_LOG_VERSION,
143 created_by: format!("infiniloom-engine/{}", env!("CARGO_PKG_VERSION")),
144 });
145
146 log
147 }
148
149 pub fn record(&mut self, operation: AuditOperation) -> String {
153 let sequence = self.entries.len() as u64;
154 let timestamp = SystemTime::now()
155 .duration_since(UNIX_EPOCH)
156 .unwrap_or_default()
157 .as_secs();
158
159 let prev_hash = self
160 .entries
161 .last()
162 .map(|e| e.hash.clone())
163 .unwrap_or_default();
164
165 let hash = compute_entry_hash(sequence, timestamp, &prev_hash, &operation);
167
168 let entry = AuditEntry { sequence, timestamp, prev_hash, hash: hash.clone(), operation };
169
170 self.entries.push(entry);
171 hash
172 }
173
174 pub fn verify_integrity(&self) -> bool {
178 let mut prev_hash = String::new();
179
180 for entry in &self.entries {
181 if entry.prev_hash != prev_hash {
183 return false;
184 }
185
186 let expected_hash =
188 compute_entry_hash(entry.sequence, entry.timestamp, &prev_hash, &entry.operation);
189
190 if entry.hash != expected_hash {
191 return false;
192 }
193
194 prev_hash = entry.hash.clone();
195 }
196
197 true
198 }
199
200 pub fn verify_integrity_detailed(&self) -> IntegrityReport {
202 let mut errors = Vec::new();
203 let mut prev_hash = String::new();
204
205 for (index, entry) in self.entries.iter().enumerate() {
206 if entry.prev_hash != prev_hash {
208 errors.push(IntegrityError::ChainBroken {
209 entry_index: index,
210 expected_prev: prev_hash.clone(),
211 actual_prev: entry.prev_hash.clone(),
212 });
213 }
214
215 let expected_hash =
217 compute_entry_hash(entry.sequence, entry.timestamp, &prev_hash, &entry.operation);
218
219 if entry.hash != expected_hash {
220 errors.push(IntegrityError::HashMismatch {
221 entry_index: index,
222 expected: expected_hash,
223 actual: entry.hash.clone(),
224 });
225 }
226
227 prev_hash = entry.hash.clone();
228 }
229
230 IntegrityReport { is_valid: errors.is_empty(), entries_checked: self.entries.len(), errors }
231 }
232
233 pub fn len(&self) -> usize {
235 self.entries.len()
236 }
237
238 pub fn is_empty(&self) -> bool {
240 self.entries.len() <= 1
241 }
242
243 pub fn head_hash(&self) -> Option<&str> {
245 self.entries.last().map(|e| e.hash.as_str())
246 }
247
248 pub fn filter_by_type<F>(&self, predicate: F) -> Vec<&AuditEntry>
250 where
251 F: Fn(&AuditOperation) -> bool,
252 {
253 self.entries
254 .iter()
255 .filter(|e| predicate(&e.operation))
256 .collect()
257 }
258
259 pub fn filter_by_time(&self, start: u64, end: u64) -> Vec<&AuditEntry> {
261 self.entries
262 .iter()
263 .filter(|e| e.timestamp >= start && e.timestamp <= end)
264 .collect()
265 }
266
267 pub fn save(&self, path: &Path) -> Result<(), EmbedError> {
269 let json =
270 serde_json::to_string_pretty(self).map_err(|e| EmbedError::SerializationError {
271 reason: format!("Failed to serialize audit log: {}", e),
272 })?;
273
274 std::fs::write(path, json)
275 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
276
277 Ok(())
278 }
279
280 pub fn save_jsonl(&self, path: &Path) -> Result<(), EmbedError> {
284 use std::io::Write;
285
286 let file = std::fs::File::create(path)
287 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
288
289 let mut writer = std::io::BufWriter::new(file);
290
291 let header = serde_json::json!({
293 "audit_log_version": self.version,
294 "entry_count": self.entries.len()
295 });
296 writeln!(writer, "{}", header)
297 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
298
299 for entry in &self.entries {
301 let line =
302 serde_json::to_string(entry).map_err(|e| EmbedError::SerializationError {
303 reason: format!("Failed to serialize audit entry: {}", e),
304 })?;
305 writeln!(writer, "{}", line)
306 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
307 }
308
309 writer
310 .flush()
311 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
312
313 Ok(())
314 }
315
316 pub fn load(path: &Path) -> Result<Self, EmbedError> {
318 let content = std::fs::read_to_string(path)
319 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
320
321 let log: Self =
322 serde_json::from_str(&content).map_err(|e| EmbedError::DeserializationError {
323 reason: format!("Failed to deserialize audit log: {}", e),
324 })?;
325
326 if !log.verify_integrity() {
328 return Err(EmbedError::ManifestCorrupted {
329 path: path.to_path_buf(),
330 expected: "valid hash chain".to_owned(),
331 actual: "hash chain broken".to_owned(),
332 });
333 }
334
335 Ok(log)
336 }
337
338 pub fn append_entry_to_file(path: &Path, entry: &AuditEntry) -> Result<(), EmbedError> {
342 use std::io::Write;
343
344 let file = std::fs::OpenOptions::new()
345 .create(true)
346 .append(true)
347 .open(path)
348 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
349
350 let mut writer = std::io::BufWriter::new(file);
351 let line = serde_json::to_string(entry).map_err(|e| EmbedError::SerializationError {
352 reason: format!("Failed to serialize audit entry: {}", e),
353 })?;
354 writeln!(writer, "{}", line)
355 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
356
357 writer
358 .flush()
359 .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
360
361 Ok(())
362 }
363}
364
365fn compute_entry_hash(
367 sequence: u64,
368 timestamp: u64,
369 prev_hash: &str,
370 operation: &AuditOperation,
371) -> String {
372 let mut hasher = Hasher::new();
373
374 hasher.update(&sequence.to_le_bytes());
376
377 hasher.update(×tamp.to_le_bytes());
379
380 hasher.update(prev_hash.as_bytes());
382
383 let op_json = serde_json::to_string(operation).unwrap_or_default();
385 hasher.update(op_json.as_bytes());
386
387 hasher.finalize().to_hex().to_string()
389}
390
391#[derive(Debug, Clone)]
393pub struct IntegrityReport {
394 pub is_valid: bool,
396
397 pub entries_checked: usize,
399
400 pub errors: Vec<IntegrityError>,
402}
403
404#[derive(Debug, Clone)]
406pub enum IntegrityError {
407 ChainBroken { entry_index: usize, expected_prev: String, actual_prev: String },
409
410 HashMismatch { entry_index: usize, expected: String, actual: String },
412}
413
414impl std::fmt::Display for IntegrityError {
415 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
416 match self {
417 Self::ChainBroken { entry_index, expected_prev, actual_prev } => {
418 write!(
419 f,
420 "Chain broken at entry {}: expected prev_hash '{}', got '{}'",
421 entry_index,
422 &expected_prev[..8.min(expected_prev.len())],
423 &actual_prev[..8.min(actual_prev.len())]
424 )
425 },
426 Self::HashMismatch { entry_index, expected, actual } => {
427 write!(
428 f,
429 "Hash mismatch at entry {}: expected '{}', got '{}'",
430 entry_index,
431 &expected[..8.min(expected.len())],
432 &actual[..8.min(actual.len())]
433 )
434 },
435 }
436 }
437}
438
439#[cfg(test)]
440mod tests {
441 use super::*;
442
443 #[test]
444 fn test_new_audit_log() {
445 let log = AuditLog::new();
446 assert_eq!(log.version, AUDIT_LOG_VERSION);
447 assert_eq!(log.entries.len(), 1); assert!(log.verify_integrity());
449 }
450
451 #[test]
452 fn test_record_operations() {
453 let mut log = AuditLog::new();
454
455 log.record(AuditOperation::EmbedStart {
456 repo_path: "/test/repo".to_owned(),
457 settings_hash: "abc123".to_owned(),
458 });
459
460 log.record(AuditOperation::EmbedComplete {
461 chunks_count: 100,
462 total_tokens: 50000,
463 manifest_hash: "def456".to_owned(),
464 });
465
466 assert_eq!(log.entries.len(), 3);
467 assert!(log.verify_integrity());
468 }
469
470 #[test]
471 fn test_hash_chain_integrity() {
472 let mut log = AuditLog::new();
473
474 for i in 0..10 {
475 log.record(AuditOperation::Custom {
476 name: format!("test_{}", i),
477 data: format!("data_{}", i),
478 });
479 }
480
481 assert!(log.verify_integrity());
482
483 if let AuditOperation::Custom { ref mut data, .. } = log.entries[5].operation {
485 *data = "tampered".to_owned();
486 }
487
488 assert!(!log.verify_integrity());
490 }
491
492 #[test]
493 fn test_verify_integrity_detailed() {
494 let mut log = AuditLog::new();
495
496 log.record(AuditOperation::EmbedStart {
497 repo_path: "/test".to_owned(),
498 settings_hash: "hash".to_owned(),
499 });
500
501 let report = log.verify_integrity_detailed();
502 assert!(report.is_valid);
503 assert_eq!(report.entries_checked, 2);
504 assert!(report.errors.is_empty());
505 }
506
507 #[test]
508 fn test_tamper_detection_chain_broken() {
509 let mut log = AuditLog::new();
510
511 log.record(AuditOperation::Custom { name: "op1".to_owned(), data: "data1".to_owned() });
512 log.record(AuditOperation::Custom { name: "op2".to_owned(), data: "data2".to_owned() });
513
514 log.entries[2].prev_hash = "fake_hash".to_owned();
516
517 let report = log.verify_integrity_detailed();
518 assert!(!report.is_valid);
519 assert!(!report.errors.is_empty());
520 assert!(matches!(report.errors[0], IntegrityError::ChainBroken { .. }));
521 }
522
523 #[test]
524 fn test_tamper_detection_hash_mismatch() {
525 let mut log = AuditLog::new();
526
527 log.record(AuditOperation::Custom { name: "op1".to_owned(), data: "data1".to_owned() });
528
529 log.entries[1].hash = "fake_hash".to_owned();
531
532 let report = log.verify_integrity_detailed();
533 assert!(!report.is_valid);
534 assert!(report
535 .errors
536 .iter()
537 .any(|e| matches!(e, IntegrityError::HashMismatch { .. })));
538 }
539
540 #[test]
541 fn test_head_hash() {
542 let mut log = AuditLog::new();
543
544 let initial_head = log.head_hash().map(String::from);
545 assert!(initial_head.is_some());
546
547 let new_hash =
548 log.record(AuditOperation::Custom { name: "test".to_owned(), data: "data".to_owned() });
549
550 assert_eq!(log.head_hash(), Some(new_hash.as_str()));
551 assert_ne!(log.head_hash().map(String::from), initial_head);
552 }
553
554 #[test]
555 fn test_filter_by_type() {
556 let mut log = AuditLog::new();
557
558 log.record(AuditOperation::EmbedStart {
559 repo_path: "/repo1".to_owned(),
560 settings_hash: "h1".to_owned(),
561 });
562 log.record(AuditOperation::EmbedComplete {
563 chunks_count: 100,
564 total_tokens: 50000,
565 manifest_hash: "m1".to_owned(),
566 });
567 log.record(AuditOperation::EmbedStart {
568 repo_path: "/repo2".to_owned(),
569 settings_hash: "h2".to_owned(),
570 });
571
572 let starts = log.filter_by_type(|op| matches!(op, AuditOperation::EmbedStart { .. }));
573 assert_eq!(starts.len(), 2);
574
575 let completes = log.filter_by_type(|op| matches!(op, AuditOperation::EmbedComplete { .. }));
576 assert_eq!(completes.len(), 1);
577 }
578
579 #[test]
580 fn test_filter_by_time() {
581 let mut log = AuditLog::new();
582
583 log.record(AuditOperation::Custom { name: "test".to_owned(), data: "data".to_owned() });
585
586 let now = SystemTime::now()
587 .duration_since(UNIX_EPOCH)
588 .unwrap()
589 .as_secs();
590
591 let entries = log.filter_by_time(now - 60, now + 60);
592 assert!(!entries.is_empty());
593 }
594
595 #[test]
596 fn test_save_and_load() {
597 let temp_dir = tempfile::TempDir::new().unwrap();
598 let log_path = temp_dir.path().join("audit.json");
599
600 let mut log = AuditLog::new();
601 log.record(AuditOperation::EmbedStart {
602 repo_path: "/test/repo".to_owned(),
603 settings_hash: "abc123".to_owned(),
604 });
605 log.record(AuditOperation::EmbedComplete {
606 chunks_count: 100,
607 total_tokens: 50000,
608 manifest_hash: "def456".to_owned(),
609 });
610
611 log.save(&log_path).unwrap();
613
614 let loaded = AuditLog::load(&log_path).unwrap();
616 assert_eq!(loaded.entries.len(), log.entries.len());
617 assert!(loaded.verify_integrity());
618
619 for (orig, loaded) in log.entries.iter().zip(loaded.entries.iter()) {
621 assert_eq!(orig.hash, loaded.hash);
622 assert_eq!(orig.prev_hash, loaded.prev_hash);
623 }
624 }
625
626 #[test]
627 fn test_save_jsonl() {
628 let temp_dir = tempfile::TempDir::new().unwrap();
629 let log_path = temp_dir.path().join("audit.jsonl");
630
631 let mut log = AuditLog::new();
632 log.record(AuditOperation::Custom { name: "test".to_owned(), data: "data".to_owned() });
633
634 log.save_jsonl(&log_path).unwrap();
635
636 let content = std::fs::read_to_string(&log_path).unwrap();
638 assert!(!content.is_empty());
639
640 let lines: Vec<_> = content.lines().collect();
642 assert_eq!(lines.len(), 3); }
644
645 #[test]
646 fn test_security_scan_operation() {
647 let mut log = AuditLog::new();
648
649 log.record(AuditOperation::SecurityScan { findings_count: 5, secrets_redacted: true });
650
651 assert!(log.verify_integrity());
652
653 let scans = log.filter_by_type(|op| matches!(op, AuditOperation::SecurityScan { .. }));
654 assert_eq!(scans.len(), 1);
655
656 if let AuditOperation::SecurityScan { findings_count, secrets_redacted } =
657 &scans[0].operation
658 {
659 assert_eq!(*findings_count, 5);
660 assert!(*secrets_redacted);
661 }
662 }
663
664 #[test]
665 fn test_batch_operations() {
666 let mut log = AuditLog::new();
667
668 log.record(AuditOperation::BatchStart {
669 repo_count: 3,
670 total_settings_hash: "settings_hash".to_owned(),
671 });
672
673 for i in 0..3 {
674 log.record(AuditOperation::BatchRepoComplete {
675 repo_index: i,
676 repo_path: format!("/repo{}", i),
677 chunks_count: 100 * (i + 1),
678 success: true,
679 });
680 }
681
682 log.record(AuditOperation::BatchComplete { successful: 3, failed: 0, total_chunks: 600 });
683
684 assert!(log.verify_integrity());
685 assert_eq!(log.entries.len(), 6); }
687
688 #[test]
689 fn test_checkpoint_operations() {
690 let mut log = AuditLog::new();
691
692 log.record(AuditOperation::CheckpointCreated {
693 checkpoint_hash: "ckpt_abc123".to_owned(),
694 files_processed: 50,
695 chunks_generated: 200,
696 });
697
698 log.record(AuditOperation::ResumeFromCheckpoint {
699 checkpoint_hash: "ckpt_abc123".to_owned(),
700 files_remaining: 100,
701 });
702
703 assert!(log.verify_integrity());
704 }
705}