1use aho_corasick::AhoCorasick;
5use regex::Regex;
6use serde_json::Value;
7use std::path::Path;
8use std::sync::OnceLock;
9
10fn email_re() -> &'static Regex {
11 static RE: OnceLock<Regex> = OnceLock::new();
12 RE.get_or_init(|| Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap())
13}
14
15fn win_drive_re() -> &'static Regex {
16 static RE: OnceLock<Regex> = OnceLock::new();
17 RE.get_or_init(|| Regex::new(r"(?i)(?P<p>[a-z]):[/\\]").unwrap())
18}
19
20fn secret_needles() -> Vec<Vec<u8>> {
22 vec![
23 b"Bearer ".to_vec(),
24 b"Authorization:".to_vec(),
25 b"sk-".to_vec(),
26 b"ghp_".to_vec(),
27 b"gho_".to_vec(),
28 b"xoxb-".to_vec(),
29 b"AKIA".to_vec(), ]
31}
32
33pub fn redact_payload(value: &mut Value, workspace: &Path, team_salt: &[u8; 32]) {
35 redact_value(value, workspace, team_salt, true);
36}
37
38fn redact_value(v: &mut Value, workspace: &Path, team_salt: &[u8; 32], is_root: bool) {
39 match v {
40 Value::String(s) => {
41 *s = redact_string(s, workspace, team_salt);
42 }
43 Value::Array(items) => {
44 for x in items {
45 redact_value(x, workspace, team_salt, false);
46 }
47 }
48 Value::Object(map) => {
49 map.retain(|k, _| !drop_key(k));
50 let keys: Vec<String> = map.keys().cloned().collect();
51 for k in keys {
52 if let Some(val) = map.get_mut(&k) {
53 if k.ends_with("_TOKEN") || k.ends_with("_KEY") || k == "env" {
54 *val = Value::String("<REDACTED:secret>".to_string());
55 } else if k == "tool_args" || k == "command" {
56 redact_tool_args(val, workspace, team_salt);
57 } else {
58 redact_value(val, workspace, team_salt, false);
59 }
60 }
61 }
62 }
63 _ => {}
64 }
65 let _ = is_root;
66}
67
68fn drop_key(k: &str) -> bool {
69 matches!(
70 k,
71 "user" | "git_email" | "prompt_text" | "completion_text" | "email"
72 )
73}
74
75fn redact_tool_args(v: &mut Value, workspace: &Path, team_salt: &[u8; 32]) {
76 match v {
77 Value::Object(m) => {
78 if let Some(Value::String(cmd)) = m.get_mut("command") {
79 let redacted = redact_shell_command(cmd, workspace, team_salt);
80 *cmd = redacted;
81 }
82 let keys: Vec<String> = m.keys().cloned().collect();
83 for k in keys {
84 if k != "command"
85 && let Some(val) = m.get_mut(&k)
86 {
87 redact_value(val, workspace, team_salt, false);
88 }
89 }
90 }
91 _ => redact_value(v, workspace, team_salt, false),
92 }
93}
94
95fn redact_shell_command(cmd: &str, workspace: &Path, team_salt: &[u8; 32]) -> String {
96 let mut parts = cmd.split_whitespace();
97 let Some(first) = parts.next() else {
98 return String::new();
99 };
100 let rest: Vec<&str> = parts.collect();
101 if rest.is_empty() {
102 return redact_string(first, workspace, team_salt);
103 }
104 let redacted_rest: Vec<String> = rest
105 .iter()
106 .map(|t| {
107 if looks_secret_token(t) {
108 "<REDACTED:arg>".to_string()
109 } else {
110 redact_string(t, workspace, team_salt)
111 }
112 })
113 .collect();
114 format!(
115 "{} {}",
116 redact_string(first, workspace, team_salt),
117 redacted_rest.join(" ")
118 )
119}
120
121fn looks_secret_token(s: &str) -> bool {
122 s.contains('=') && (s.contains("TOKEN") || s.contains("KEY") || s.contains("SECRET"))
123 || s.starts_with("sk-")
124 || s.starts_with("ghp_")
125 || s.len() > 40
126 && s.chars()
127 .all(|c| c.is_alphanumeric() || "+/=_-".contains(c))
128}
129
130pub fn redact_string(s: &str, workspace: &Path, team_salt: &[u8; 32]) -> String {
131 let mut out = s.to_string();
132 out = email_re()
133 .replace_all(&out, "<REDACTED:email>")
134 .into_owned();
135 out = replace_path_prefixes(&out, workspace, team_salt);
136 scrub_secrets(&mut out);
137 out
138}
139
140fn replace_path_prefixes(s: &str, workspace: &Path, team_salt: &[u8; 32]) -> String {
141 let mut out = s.to_string();
142 loop {
143 let mut replaced = false;
144 for prefix in ["/Users/", "/home/", "/var/folders/", "/private/var/"] {
145 if let Some(idx) = out.find(prefix) {
146 let tail = &out[idx + prefix.len()..];
147 let end = tail
148 .find(|c: char| c.is_whitespace() || c == '"' || c == '\'' || c == ')')
149 .unwrap_or(tail.len());
150 let segment = &tail[..end];
151 let placeholder = file_placeholder(workspace, team_salt, segment);
152 out.replace_range(idx..idx + prefix.len() + end, &placeholder);
153 replaced = true;
154 break;
155 }
156 }
157 if !replaced {
158 break;
159 }
160 }
161 out = win_drive_re()
162 .replace_all(&out, |caps: ®ex::Captures| {
163 format!("<REDACTED:drive>{}", &caps["p"])
164 })
165 .into_owned();
166 out
167}
168
169fn file_placeholder(workspace: &Path, team_salt: &[u8; 32], abs_tail: &str) -> String {
170 let basename = abs_tail
171 .rsplit('/')
172 .next()
173 .filter(|s| !s.is_empty())
174 .unwrap_or("file");
175 let class = basename_class(basename);
176 let rel_hash = rel_path_hash(workspace, team_salt, abs_tail);
177 format!("<{rel_hash}:{class}>")
178}
179
180fn basename_class(name: &str) -> &'static str {
181 if name.contains('.') { "file" } else { "path" }
182}
183
184fn rel_path_hash(workspace: &Path, team_salt: &[u8; 32], tail_after_prefix: &str) -> String {
185 let synthetic =
186 workspace.to_string_lossy().into_owned() + "/" + tail_after_prefix.trim_start_matches('/');
187 let full = crate::sync::outbound::hash_with_salt(team_salt, synthetic.as_bytes());
188 full.strip_prefix("blake3:")
189 .map(|h| h[..8.min(h.len())].to_string())
190 .unwrap_or_else(|| "hash".to_string())
191}
192
193fn scrub_secrets(s: &mut String) {
194 let ac = AhoCorasick::new(secret_needles()).expect("patterns");
195 let mut cursor = 0usize;
196 while cursor < s.len() {
197 let window = &s.as_bytes()[cursor..];
198 if let Some(m) = ac.find(window) {
199 let start = cursor + m.start();
200 let mut end = start + m.len();
201 while end < s.len() && !s.as_bytes()[end].is_ascii_whitespace() {
202 end += 1;
203 }
204 s.replace_range(start..end, "<REDACTED:token>");
205 cursor = start + "<REDACTED:token>".len();
206 } else {
207 break;
208 }
209 }
210}
211
212fn forbidden_drive_users_re() -> &'static Regex {
214 static RE: OnceLock<Regex> = OnceLock::new();
215 RE.get_or_init(|| Regex::new(r"(?i)[a-z]:\\Users\\").unwrap())
216}
217
218pub fn contains_forbidden_path_markers(s: &str) -> bool {
219 s.contains("/Users/")
220 || s.contains("/home/")
221 || s.contains("/var/folders/")
222 || s.contains("\\Users\\")
223 || forbidden_drive_users_re().is_match(s)
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229 use serde_json::json;
230
231 #[test]
232 fn redacts_email() {
233 let salt = [1u8; 32];
234 let ws = Path::new("/proj");
235 let r = redact_string("contact me at user@example.com ok", ws, &salt);
236 assert!(!r.contains('@'));
237 assert!(r.contains("REDACTED"));
238 }
239
240 #[test]
241 fn redacts_users_path() {
242 let salt = [2u8; 32];
243 let ws = Path::new("/proj");
244 let r = redact_string("file /Users/alice/secret.txt", ws, &salt);
245 assert!(!r.contains("/Users/"));
246 }
247
248 #[test]
249 fn drops_prompt_from_object() {
250 let salt = [3u8; 32];
251 let ws = Path::new("/w");
252 let mut v = json!({"prompt_text": "x", "ok": true});
253 redact_payload(&mut v, ws, &salt);
254 assert!(!v.as_object().unwrap().contains_key("prompt_text"));
255 }
256}