Skip to main content

opensession_core/
sanitize.rs

1use crate::trace::{ContentBlock, Event, EventType, Session};
2use regex::Regex;
3use std::sync::LazyLock;
4
5/// Configuration for sanitization
6#[derive(Debug, Clone)]
7pub struct SanitizeConfig {
8    /// Strip absolute file paths (replace with relative)
9    pub strip_paths: bool,
10    /// Strip environment variable values
11    pub strip_env_vars: bool,
12    /// Patterns to exclude (glob-like)
13    pub exclude_patterns: Vec<String>,
14}
15
16impl Default for SanitizeConfig {
17    fn default() -> Self {
18        Self {
19            strip_paths: true,
20            strip_env_vars: true,
21            exclude_patterns: vec![
22                "*.env".to_string(),
23                "*secret*".to_string(),
24                "*credential*".to_string(),
25                "*password*".to_string(),
26                "*token*".to_string(),
27                "*api_key*".to_string(),
28                "*apikey*".to_string(),
29            ],
30        }
31    }
32}
33
34static HOME_DIR_RE: LazyLock<Regex> =
35    LazyLock::new(|| Regex::new(r"(/Users/[^/\s]+|/home/[^/\s]+|C:\\Users\\[^\\\s]+)").unwrap());
36
37static ENV_VAR_RE: LazyLock<Regex> = LazyLock::new(|| {
38    Regex::new(r"(?i)(api[_-]?key|token|secret|password|credential|auth)[=:]\s*\S+").unwrap()
39});
40
41/// Sanitize a session in-place
42pub fn sanitize_session(session: &mut Session, config: &SanitizeConfig) {
43    for event in &mut session.events {
44        sanitize_event(event, config);
45    }
46}
47
48/// Sanitize a single event
49pub fn sanitize_event(event: &mut Event, config: &SanitizeConfig) {
50    // Sanitize event type fields
51    match &mut event.event_type {
52        EventType::FileEdit { path, .. }
53        | EventType::FileCreate { path }
54        | EventType::FileDelete { path } => {
55            if config.strip_paths {
56                *path = strip_home_dir(path);
57            }
58        }
59        EventType::ShellCommand { command, .. } => {
60            if config.strip_env_vars {
61                *command = strip_env_vars(command);
62            }
63            if config.strip_paths {
64                *command = strip_home_dir(command);
65            }
66        }
67        _ => {}
68    }
69
70    // Sanitize content blocks
71    for block in &mut event.content.blocks {
72        sanitize_content_block(block, config);
73    }
74}
75
76fn sanitize_content_block(block: &mut ContentBlock, config: &SanitizeConfig) {
77    match block {
78        ContentBlock::Text { text } => {
79            if config.strip_paths {
80                *text = strip_home_dir(text);
81            }
82            if config.strip_env_vars {
83                *text = strip_env_vars(text);
84            }
85        }
86        ContentBlock::Code { code, .. } => {
87            if config.strip_paths {
88                *code = strip_home_dir(code);
89            }
90            if config.strip_env_vars {
91                *code = strip_env_vars(code);
92            }
93        }
94        ContentBlock::File { path, content, .. } => {
95            if config.strip_paths {
96                *path = strip_home_dir(path);
97            }
98            if let Some(c) = content {
99                if config.strip_env_vars {
100                    *c = strip_env_vars(c);
101                }
102            }
103        }
104        _ => {}
105    }
106}
107
108/// Replace home directory paths with ~
109fn strip_home_dir(text: &str) -> String {
110    HOME_DIR_RE.replace_all(text, "~").to_string()
111}
112
113/// Replace environment variable values with [REDACTED]
114fn strip_env_vars(text: &str) -> String {
115    ENV_VAR_RE
116        .replace_all(text, "[REDACTED_CREDENTIAL]")
117        .to_string()
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123    use crate::trace::*;
124    use chrono::Utc;
125    use std::collections::HashMap;
126
127    #[test]
128    fn test_strip_home_dir() {
129        assert_eq!(strip_home_dir("/Users/john/projects/foo"), "~/projects/foo");
130        assert_eq!(strip_home_dir("/home/john/projects/foo"), "~/projects/foo");
131    }
132
133    #[test]
134    fn test_strip_env_vars() {
135        assert_eq!(
136            strip_env_vars("API_KEY=sk-1234567890"),
137            "[REDACTED_CREDENTIAL]"
138        );
139        assert_eq!(strip_env_vars("token: abc123def"), "[REDACTED_CREDENTIAL]");
140    }
141
142    #[test]
143    fn test_sanitize_event() {
144        let config = SanitizeConfig::default();
145        let mut event = Event {
146            event_id: "e1".to_string(),
147            timestamp: Utc::now(),
148            event_type: EventType::FileEdit {
149                path: "/Users/john/projects/foo/src/main.rs".to_string(),
150                diff: None,
151            },
152            task_id: None,
153            content: Content::text("Editing /Users/john/projects/foo/src/main.rs"),
154            duration_ms: None,
155            attributes: HashMap::new(),
156        };
157
158        sanitize_event(&mut event, &config);
159
160        match &event.event_type {
161            EventType::FileEdit { path, .. } => {
162                assert_eq!(path, "~/projects/foo/src/main.rs");
163            }
164            _ => panic!("wrong type"),
165        }
166
167        match &event.content.blocks[0] {
168            ContentBlock::Text { text } => {
169                assert!(text.contains("~/projects/foo/src/main.rs"));
170                assert!(!text.contains("/Users/john"));
171            }
172            _ => panic!("wrong block type"),
173        }
174    }
175
176    fn make_event(event_type: EventType, content: Content) -> Event {
177        Event {
178            event_id: "e1".to_string(),
179            timestamp: Utc::now(),
180            event_type,
181            task_id: None,
182            content,
183            duration_ms: None,
184            attributes: HashMap::new(),
185        }
186    }
187
188    #[test]
189    fn test_sanitize_code_block() {
190        let config = SanitizeConfig::default();
191        let mut event = make_event(
192            EventType::AgentMessage,
193            Content {
194                blocks: vec![ContentBlock::Code {
195                    code: "let path = \"/Users/alice/project/main.rs\"; API_KEY=sk-abc123"
196                        .to_string(),
197                    language: Some("rust".to_string()),
198                    start_line: None,
199                }],
200            },
201        );
202        sanitize_event(&mut event, &config);
203        match &event.content.blocks[0] {
204            ContentBlock::Code { code, .. } => {
205                assert!(!code.contains("/Users/alice"));
206                assert!(code.contains("~/project/main.rs"));
207                assert!(!code.contains("sk-abc123"));
208                assert!(code.contains("[REDACTED_CREDENTIAL]"));
209            }
210            _ => panic!("expected Code block"),
211        }
212    }
213
214    #[test]
215    fn test_sanitize_file_block() {
216        let config = SanitizeConfig::default();
217        let mut event = make_event(
218            EventType::AgentMessage,
219            Content {
220                blocks: vec![ContentBlock::File {
221                    path: "/home/bob/docs/readme.md".to_string(),
222                    content: Some("secret=hunter2".to_string()),
223                }],
224            },
225        );
226        sanitize_event(&mut event, &config);
227        match &event.content.blocks[0] {
228            ContentBlock::File { path, content, .. } => {
229                assert_eq!(path, "~/docs/readme.md");
230                let c = content.as_deref().unwrap();
231                assert!(!c.contains("hunter2"));
232                assert!(c.contains("[REDACTED_CREDENTIAL]"));
233            }
234            _ => panic!("expected File block"),
235        }
236    }
237
238    #[test]
239    fn test_sanitize_shell_command() {
240        let config = SanitizeConfig::default();
241        let mut event = make_event(
242            EventType::ShellCommand {
243                command: "TOKEN=abc123 /Users/alice/bin/run".to_string(),
244                exit_code: Some(0),
245            },
246            Content::text("output"),
247        );
248        sanitize_event(&mut event, &config);
249        match &event.event_type {
250            EventType::ShellCommand { command, .. } => {
251                assert!(!command.contains("abc123"));
252                assert!(command.contains("[REDACTED_CREDENTIAL]"));
253                assert!(!command.contains("/Users/alice"));
254                assert!(command.contains("~/bin/run"));
255            }
256            _ => panic!("expected ShellCommand"),
257        }
258    }
259
260    #[test]
261    fn test_sanitize_file_edit() {
262        let config = SanitizeConfig::default();
263        let mut event = make_event(
264            EventType::FileEdit {
265                path: "/home/dev/project/src/lib.rs".to_string(),
266                diff: Some("+ some diff".to_string()),
267            },
268            Content::text("edited file"),
269        );
270        sanitize_event(&mut event, &config);
271        match &event.event_type {
272            EventType::FileEdit { path, .. } => {
273                assert_eq!(path, "~/project/src/lib.rs");
274            }
275            _ => panic!("expected FileEdit"),
276        }
277    }
278
279    #[test]
280    fn test_sanitize_tool_call() {
281        // ToolCall has only a name field; sanitization applies to content blocks
282        let config = SanitizeConfig::default();
283        let mut event = make_event(
284            EventType::ToolCall {
285                name: "Bash".to_string(),
286            },
287            Content::text("Running /Users/alice/scripts/deploy.sh with password=abc123"),
288        );
289        sanitize_event(&mut event, &config);
290        match &event.content.blocks[0] {
291            ContentBlock::Text { text } => {
292                assert!(!text.contains("/Users/alice"));
293                assert!(text.contains("~/scripts/deploy.sh"));
294                assert!(!text.contains("abc123"));
295                assert!(text.contains("[REDACTED_CREDENTIAL]"));
296            }
297            _ => panic!("expected Text block"),
298        }
299    }
300
301    #[test]
302    fn test_config_strip_paths_false() {
303        let config = SanitizeConfig {
304            strip_paths: false,
305            strip_env_vars: true,
306            exclude_patterns: vec![],
307        };
308        let mut event = make_event(
309            EventType::FileEdit {
310                path: "/Users/john/project/main.rs".to_string(),
311                diff: None,
312            },
313            Content::text("Editing /Users/john/project/main.rs"),
314        );
315        sanitize_event(&mut event, &config);
316        match &event.event_type {
317            EventType::FileEdit { path, .. } => {
318                // Path should NOT be stripped
319                assert_eq!(path, "/Users/john/project/main.rs");
320            }
321            _ => panic!("expected FileEdit"),
322        }
323        match &event.content.blocks[0] {
324            ContentBlock::Text { text } => {
325                assert!(text.contains("/Users/john/project/main.rs"));
326            }
327            _ => panic!("expected Text block"),
328        }
329    }
330
331    #[test]
332    fn test_config_exclude_patterns() {
333        // exclude_patterns is a config field for downstream consumers; sanitize_event
334        // itself does not filter events — it only transforms content in-place.
335        // Verify the default patterns are populated correctly.
336        let config = SanitizeConfig::default();
337        assert!(config.exclude_patterns.contains(&"*.env".to_string()));
338        assert!(config.exclude_patterns.contains(&"*secret*".to_string()));
339        assert!(config.exclude_patterns.contains(&"*token*".to_string()));
340        assert_eq!(config.exclude_patterns.len(), 7);
341    }
342
343    #[test]
344    fn test_config_strip_env_false() {
345        let config = SanitizeConfig {
346            strip_paths: true,
347            strip_env_vars: false,
348            exclude_patterns: vec![],
349        };
350        let mut event = make_event(
351            EventType::ShellCommand {
352                command: "API_KEY=sk-12345 /Users/alice/bin/run".to_string(),
353                exit_code: Some(0),
354            },
355            Content::text("token: abc123"),
356        );
357        sanitize_event(&mut event, &config);
358        match &event.event_type {
359            EventType::ShellCommand { command, .. } => {
360                // Env vars should NOT be stripped
361                assert!(command.contains("API_KEY=sk-12345"));
362                // But paths should still be stripped
363                assert!(!command.contains("/Users/alice"));
364                assert!(command.contains("~/bin/run"));
365            }
366            _ => panic!("expected ShellCommand"),
367        }
368        match &event.content.blocks[0] {
369            ContentBlock::Text { text } => {
370                // Env vars in content should NOT be stripped
371                assert!(text.contains("token: abc123"));
372            }
373            _ => panic!("expected Text block"),
374        }
375    }
376}