Skip to main content

kaizen/sync/
redact.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Client-side redaction before enqueueing sync outbox rows.
3
4use aho_corasick::AhoCorasick;
5use regex::Regex;
6use serde_json::Value;
7use std::path::Path;
8use std::sync::OnceLock;
9
10fn email_re() -> &'static Regex {
11    static RE: OnceLock<Regex> = OnceLock::new();
12    RE.get_or_init(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap())
13}
14
15fn win_drive_re() -> &'static Regex {
16    static RE: OnceLock<Regex> = OnceLock::new();
17    RE.get_or_init(|| Regex::new(r"(?i)(?P<p>[a-z]):[/\\]").unwrap())
18}
19
20/// Patterns matched literally (secret-shaped substrings, common markers).
21fn secret_needles() -> Vec<Vec<u8>> {
22    vec![
23        b"Bearer ".to_vec(),
24        b"Authorization:".to_vec(),
25        b"sk-".to_vec(),
26        b"ghp_".to_vec(),
27        b"gho_".to_vec(),
28        b"xoxb-".to_vec(),
29        b"AKIA".to_vec(), // AWS key prefix
30    ]
31}
32
33/// Redact a full outbound event payload tree plus string leaves.
34pub fn redact_payload(value: &mut Value, workspace: &Path, team_salt: &[u8; 32]) {
35    redact_value(value, workspace, team_salt, true);
36}
37
38fn redact_value(v: &mut Value, workspace: &Path, team_salt: &[u8; 32], is_root: bool) {
39    match v {
40        Value::String(s) => {
41            *s = redact_string(s, workspace, team_salt);
42        }
43        Value::Array(items) => {
44            for x in items {
45                redact_value(x, workspace, team_salt, false);
46            }
47        }
48        Value::Object(map) => {
49            map.retain(|k, _| !drop_key(k));
50            let keys: Vec<String> = map.keys().cloned().collect();
51            for k in keys {
52                if let Some(val) = map.get_mut(&k) {
53                    if k.ends_with("_TOKEN") || k.ends_with("_KEY") || k == "env" {
54                        *val = Value::String("<REDACTED:secret>".to_string());
55                    } else if k == "tool_args" || k == "command" {
56                        redact_tool_args(val, workspace, team_salt);
57                    } else {
58                        redact_value(val, workspace, team_salt, false);
59                    }
60                }
61            }
62        }
63        _ => {}
64    }
65    let _ = is_root;
66}
67
68fn drop_key(k: &str) -> bool {
69    matches!(
70        k,
71        "user" | "git_email" | "prompt_text" | "completion_text" | "email"
72    )
73}
74
75fn redact_tool_args(v: &mut Value, workspace: &Path, team_salt: &[u8; 32]) {
76    match v {
77        Value::Object(m) => {
78            if let Some(Value::String(cmd)) = m.get_mut("command") {
79                let redacted = redact_shell_command(cmd, workspace, team_salt);
80                *cmd = redacted;
81            }
82            let keys: Vec<String> = m.keys().cloned().collect();
83            for k in keys {
84                if k != "command"
85                    && let Some(val) = m.get_mut(&k)
86                {
87                    redact_value(val, workspace, team_salt, false);
88                }
89            }
90        }
91        _ => redact_value(v, workspace, team_salt, false),
92    }
93}
94
95fn redact_shell_command(cmd: &str, workspace: &Path, team_salt: &[u8; 32]) -> String {
96    let mut parts = cmd.split_whitespace();
97    let Some(first) = parts.next() else {
98        return String::new();
99    };
100    let rest: Vec<&str> = parts.collect();
101    if rest.is_empty() {
102        return redact_string(first, workspace, team_salt);
103    }
104    let redacted_rest: Vec<String> = rest
105        .iter()
106        .map(|t| {
107            if looks_secret_token(t) {
108                "<REDACTED:arg>".to_string()
109            } else {
110                redact_string(t, workspace, team_salt)
111            }
112        })
113        .collect();
114    format!(
115        "{} {}",
116        redact_string(first, workspace, team_salt),
117        redacted_rest.join(" ")
118    )
119}
120
121fn looks_secret_token(s: &str) -> bool {
122    s.contains('=') && (s.contains("TOKEN") || s.contains("KEY") || s.contains("SECRET"))
123        || s.starts_with("sk-")
124        || s.starts_with("ghp_")
125        || s.len() > 40
126            && s.chars()
127                .all(|c| c.is_alphanumeric() || "+/=_-".contains(c))
128}
129
130pub fn redact_string(s: &str, workspace: &Path, team_salt: &[u8; 32]) -> String {
131    let mut out = s.to_string();
132    out = email_re()
133        .replace_all(&out, "<REDACTED:email>")
134        .into_owned();
135    out = replace_path_prefixes(&out, workspace, team_salt);
136    scrub_secrets(&mut out);
137    out
138}
139
140fn replace_path_prefixes(s: &str, workspace: &Path, team_salt: &[u8; 32]) -> String {
141    let mut out = s.to_string();
142    loop {
143        let mut replaced = false;
144        for prefix in ["/Users/", "/home/", "/var/folders/", "/private/var/"] {
145            if let Some(idx) = out.find(prefix) {
146                let tail = &out[idx + prefix.len()..];
147                let end = tail
148                    .find(|c: char| c.is_whitespace() || c == '"' || c == '\'' || c == ')')
149                    .unwrap_or(tail.len());
150                let segment = &tail[..end];
151                let placeholder = file_placeholder(workspace, team_salt, segment);
152                out.replace_range(idx..idx + prefix.len() + end, &placeholder);
153                replaced = true;
154                break;
155            }
156        }
157        if !replaced {
158            break;
159        }
160    }
161    out = win_drive_re()
162        .replace_all(&out, |caps: &regex::Captures| {
163            format!("<REDACTED:drive>{}", &caps["p"])
164        })
165        .into_owned();
166    out
167}
168
169fn file_placeholder(workspace: &Path, team_salt: &[u8; 32], abs_tail: &str) -> String {
170    let basename = abs_tail
171        .rsplit('/')
172        .next()
173        .filter(|s| !s.is_empty())
174        .unwrap_or("file");
175    let class = basename_class(basename);
176    let rel_hash = rel_path_hash(workspace, team_salt, abs_tail);
177    format!("<{rel_hash}:{class}>")
178}
179
180fn basename_class(name: &str) -> &'static str {
181    if name.contains('.') { "file" } else { "path" }
182}
183
184fn rel_path_hash(workspace: &Path, team_salt: &[u8; 32], tail_after_prefix: &str) -> String {
185    let synthetic =
186        workspace.to_string_lossy().into_owned() + "/" + tail_after_prefix.trim_start_matches('/');
187    let full = crate::sync::outbound::hash_with_salt(team_salt, synthetic.as_bytes());
188    full.strip_prefix("blake3:")
189        .map(|h| h[..8.min(h.len())].to_string())
190        .unwrap_or_else(|| "hash".to_string())
191}
192
193fn scrub_secrets(s: &mut String) {
194    let ac = AhoCorasick::new(secret_needles()).expect("patterns");
195    let mut cursor = 0usize;
196    while cursor < s.len() {
197        let window = &s.as_bytes()[cursor..];
198        if let Some(m) = ac.find(window) {
199            let start = cursor + m.start();
200            let mut end = start + m.len();
201            while end < s.len() && !s.as_bytes()[end].is_ascii_whitespace() {
202                end += 1;
203            }
204            s.replace_range(start..end, "<REDACTED:token>");
205            cursor = start + "<REDACTED:token>".len();
206        } else {
207            break;
208        }
209    }
210}
211
212/// Returns true if `s` still contains forbidden path markers (for tests / guards).
213fn forbidden_drive_users_re() -> &'static Regex {
214    static RE: OnceLock<Regex> = OnceLock::new();
215    RE.get_or_init(|| Regex::new(r"(?i)[a-z]:\\Users\\").unwrap())
216}
217
218pub fn contains_forbidden_path_markers(s: &str) -> bool {
219    s.contains("/Users/")
220        || s.contains("/home/")
221        || s.contains("/var/folders/")
222        || s.contains("\\Users\\")
223        || forbidden_drive_users_re().is_match(s)
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    use serde_json::json;
230
231    #[test]
232    fn redacts_email() {
233        let salt = [1u8; 32];
234        let ws = Path::new("/proj");
235        let r = redact_string("contact me at user@example.com ok", ws, &salt);
236        assert!(!r.contains('@'));
237        assert!(r.contains("REDACTED"));
238    }
239
240    #[test]
241    fn redacts_users_path() {
242        let salt = [2u8; 32];
243        let ws = Path::new("/proj");
244        let r = redact_string("file /Users/alice/secret.txt", ws, &salt);
245        assert!(!r.contains("/Users/"));
246    }
247
248    #[test]
249    fn drops_prompt_from_object() {
250        let salt = [3u8; 32];
251        let ws = Path::new("/w");
252        let mut v = json!({"prompt_text": "x", "ok": true});
253        redact_payload(&mut v, ws, &salt);
254        assert!(!v.as_object().unwrap().contains_key("prompt_text"));
255    }
256}