Skip to main content

opensession_core/
sanitize.rs

1use crate::trace::{ContentBlock, Event, EventType, Session};
2use regex::Regex;
3use std::sync::LazyLock;
4
5/// Configuration for sanitization
6#[derive(Debug, Clone)]
7pub struct SanitizeConfig {
8    /// Strip absolute file paths (replace with relative)
9    pub strip_paths: bool,
10    /// Strip environment variable values
11    pub strip_env_vars: bool,
12    /// Patterns to exclude (glob-like)
13    pub exclude_patterns: Vec<String>,
14}
15
16impl Default for SanitizeConfig {
17    fn default() -> Self {
18        Self {
19            strip_paths: true,
20            strip_env_vars: true,
21            exclude_patterns: vec![
22                "*.env".to_string(),
23                "*secret*".to_string(),
24                "*credential*".to_string(),
25                "*password*".to_string(),
26                "*token*".to_string(),
27                "*api_key*".to_string(),
28                "*apikey*".to_string(),
29            ],
30        }
31    }
32}
33
34static HOME_DIR_RE: LazyLock<Regex> =
35    LazyLock::new(|| Regex::new(r"(/Users/[^/\s]+|/home/[^/\s]+|C:\\Users\\[^\\\s]+)").unwrap());
36
37static ENV_VAR_RE: LazyLock<Regex> = LazyLock::new(|| {
38    Regex::new(r"(?i)(api[_-]?key|token|secret|password|credential|auth)[=:]\s*\S+").unwrap()
39});
40
41/// Sanitize a session in-place
42pub fn sanitize_session(session: &mut Session, config: &SanitizeConfig) {
43    for event in &mut session.events {
44        sanitize_event(event, config);
45    }
46}
47
48/// Sanitize a single event
49pub fn sanitize_event(event: &mut Event, config: &SanitizeConfig) {
50    // Sanitize event type fields
51    match &mut event.event_type {
52        EventType::FileEdit { path, .. }
53        | EventType::FileCreate { path }
54        | EventType::FileDelete { path } => {
55            if config.strip_paths {
56                *path = strip_home_dir(path);
57            }
58        }
59        EventType::ShellCommand { command, .. } => {
60            if config.strip_env_vars {
61                *command = strip_env_vars(command);
62            }
63            if config.strip_paths {
64                *command = strip_home_dir(command);
65            }
66        }
67        _ => {}
68    }
69
70    // Sanitize content blocks
71    for block in &mut event.content.blocks {
72        sanitize_content_block(block, config);
73    }
74}
75
76fn sanitize_content_block(block: &mut ContentBlock, config: &SanitizeConfig) {
77    match block {
78        ContentBlock::Text { text } => {
79            if config.strip_paths {
80                *text = strip_home_dir(text);
81            }
82            if config.strip_env_vars {
83                *text = strip_env_vars(text);
84            }
85        }
86        ContentBlock::Code { code, .. } => {
87            if config.strip_paths {
88                *code = strip_home_dir(code);
89            }
90            if config.strip_env_vars {
91                *code = strip_env_vars(code);
92            }
93        }
94        ContentBlock::File { path, content, .. } => {
95            if config.strip_paths {
96                *path = strip_home_dir(path);
97            }
98            if let Some(c) = content {
99                if config.strip_env_vars {
100                    *c = strip_env_vars(c);
101                }
102            }
103        }
104        _ => {}
105    }
106}
107
108/// Replace home directory paths with ~
109fn strip_home_dir(text: &str) -> String {
110    HOME_DIR_RE.replace_all(text, "~").to_string()
111}
112
113/// Replace environment variable values with [REDACTED]
114fn strip_env_vars(text: &str) -> String {
115    ENV_VAR_RE
116        .replace_all(text, "[REDACTED_CREDENTIAL]")
117        .to_string()
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123    use crate::trace::*;
124    use chrono::Utc;
125    use std::collections::HashMap;
126
127    #[test]
128    fn test_strip_home_dir() {
129        assert_eq!(strip_home_dir("/Users/john/projects/foo"), "~/projects/foo");
130        assert_eq!(strip_home_dir("/home/john/projects/foo"), "~/projects/foo");
131    }
132
133    #[test]
134    fn test_strip_env_vars() {
135        assert_eq!(
136            strip_env_vars("API_KEY=sk-1234567890"),
137            "[REDACTED_CREDENTIAL]"
138        );
139        assert_eq!(strip_env_vars("token: abc123def"), "[REDACTED_CREDENTIAL]");
140    }
141
142    #[test]
143    fn test_sanitize_event() {
144        let config = SanitizeConfig::default();
145        let mut event = Event {
146            event_id: "e1".to_string(),
147            timestamp: Utc::now(),
148            event_type: EventType::FileEdit {
149                path: "/Users/john/projects/foo/src/main.rs".to_string(),
150                diff: None,
151            },
152            task_id: None,
153            content: Content::text("Editing /Users/john/projects/foo/src/main.rs"),
154            duration_ms: None,
155            attributes: HashMap::new(),
156        };
157
158        sanitize_event(&mut event, &config);
159
160        match &event.event_type {
161            EventType::FileEdit { path, .. } => {
162                assert_eq!(path, "~/projects/foo/src/main.rs");
163            }
164            _ => panic!("wrong type"),
165        }
166
167        match &event.content.blocks[0] {
168            ContentBlock::Text { text } => {
169                assert!(text.contains("~/projects/foo/src/main.rs"));
170                assert!(!text.contains("/Users/john"));
171            }
172            _ => panic!("wrong block type"),
173        }
174    }
175}