Skip to main content

hematite/agent/
redact_audit.rs

1// Redaction audit trail — metadata-only JSONL log.
2//
3// Written to ~/.hematite/redact_audit.jsonl on every MCP tool call when
4// edge redaction is active. Never logs raw output, original values, or
5// summaries — only call metadata and redaction statistics.
6//
7// Each line is a self-contained JSON object (JSONL format).
8
9use std::collections::BTreeMap;
10use std::io::Write;
11use std::path::PathBuf;
12
13#[derive(Debug)]
14pub struct AuditEntry {
15    pub topic: String,
16    pub mode: RedactMode,
17    pub tier1_hits: BTreeMap<String, usize>,
18    pub semantic_applied: bool,
19    pub input_chars: usize,
20    pub output_chars: usize,
21    pub caller_pid: u32,
22}
23
24#[derive(Debug)]
25pub enum RedactMode {
26    None,
27    Regex,
28    Semantic,
29}
30
31impl RedactMode {
32    fn as_str(&self) -> &'static str {
33        match self {
34            RedactMode::None => "none",
35            RedactMode::Regex => "regex",
36            RedactMode::Semantic => "semantic",
37        }
38    }
39}
40
41/// Append one audit entry to ~/.hematite/redact_audit.jsonl.
42/// Failures are logged to stderr and silently ignored — the audit trail
43/// must never block the main request path.
44pub fn record(entry: &AuditEntry) {
45    if let Err(e) = try_record(entry) {
46        eprintln!("[hematite mcp] audit log write failed: {e}");
47    }
48}
49
50fn try_record(entry: &AuditEntry) -> std::io::Result<()> {
51    let path = audit_log_path()?;
52
53    // Ensure parent directory exists
54    if let Some(parent) = path.parent() {
55        std::fs::create_dir_all(parent)?;
56    }
57
58    let shrink_ratio = if entry.input_chars > 0 {
59        entry.output_chars as f64 / entry.input_chars as f64
60    } else {
61        1.0
62    };
63
64    // Build tier1_hits as a plain object
65    let tier1_obj: serde_json::Value = entry
66        .tier1_hits
67        .iter()
68        .map(|(k, v)| (k.clone(), serde_json::Value::from(*v)))
69        .collect::<serde_json::Map<_, _>>()
70        .into();
71
72    let line = serde_json::json!({
73        "ts": chrono_now_utc(),
74        "topic": entry.topic,
75        "mode": entry.mode.as_str(),
76        "tier1_hits": tier1_obj,
77        "semantic_applied": entry.semantic_applied,
78        "input_chars": entry.input_chars,
79        "output_chars": entry.output_chars,
80        "shrink_ratio": (shrink_ratio * 1000.0).round() / 1000.0,
81        "caller_pid": entry.caller_pid,
82        "suspicious_low_shrink": shrink_ratio > 0.9 && entry.mode.as_str() == "semantic",
83    });
84
85    let mut file = std::fs::OpenOptions::new()
86        .create(true)
87        .append(true)
88        .open(&path)?;
89
90    let mut json_str = serde_json::to_string(&line)?;
91    json_str.push('\n');
92    file.write_all(json_str.as_bytes())?;
93    Ok(())
94}
95
96fn audit_log_path() -> std::io::Result<PathBuf> {
97    let home = std::env::var_os("USERPROFILE")
98        .or_else(|| std::env::var_os("HOME"))
99        .map(PathBuf::from)
100        .ok_or_else(|| {
101            std::io::Error::new(std::io::ErrorKind::NotFound, "HOME directory not found")
102        })?;
103    Ok(home.join(".hematite").join("redact_audit.jsonl"))
104}
105
106fn chrono_now_utc() -> String {
107    // Use std::time to avoid a chrono dep; format as ISO 8601 manually.
108    use std::time::{SystemTime, UNIX_EPOCH};
109    let secs = SystemTime::now()
110        .duration_since(UNIX_EPOCH)
111        .unwrap_or_default()
112        .as_secs();
113    // Convert epoch seconds to UTC date-time string (good enough for audit logs)
114    let s = secs % 60;
115    let m = (secs / 60) % 60;
116    let h = (secs / 3600) % 24;
117    let days = secs / 86400;
118    // Days since 1970-01-01
119    let (year, month, day) = days_to_ymd(days);
120    format!("{year:04}-{month:02}-{day:02}T{h:02}:{m:02}:{s:02}Z")
121}
122
123fn days_to_ymd(mut days: u64) -> (u64, u64, u64) {
124    let mut year = 1970u64;
125    loop {
126        let leap = is_leap(year);
127        let days_in_year = if leap { 366 } else { 365 };
128        if days < days_in_year {
129            break;
130        }
131        days -= days_in_year;
132        year += 1;
133    }
134    let leap = is_leap(year);
135    let month_days = [
136        31u64,
137        if leap { 29 } else { 28 },
138        31,
139        30,
140        31,
141        30,
142        31,
143        31,
144        30,
145        31,
146        30,
147        31,
148    ];
149    let mut month = 1u64;
150    for &md in &month_days {
151        if days < md {
152            break;
153        }
154        days -= md;
155        month += 1;
156    }
157    (year, month, days + 1)
158}
159
160fn is_leap(year: u64) -> bool {
161    (year % 4 == 0 && year % 100 != 0) || (year % 400 == 0)
162}
163
164#[cfg(test)]
165mod tests {
166    use super::*;
167
168    #[test]
169    fn ymd_known_dates() {
170        // 2026-04-19: days since epoch
171        // 2026 - 1970 = 56 years; quick sanity check
172        let (y, _m, _d) = days_to_ymd(20563);
173        assert_eq!(y, 2026);
174    }
175
176    #[test]
177    fn chrono_now_utc_format() {
178        let ts = chrono_now_utc();
179        assert!(ts.len() == 20, "expected ISO 8601 format, got: {ts}");
180        assert!(ts.ends_with('Z'));
181        assert!(ts.contains('T'));
182    }
183}