Skip to main content

coding_agent_search/pages/
redact.rs

1use regex::Regex;
2use serde_json::{Map, Value};
3use std::collections::HashMap;
4use std::path::PathBuf;
5use std::sync::Mutex;
6use std::sync::atomic::{AtomicUsize, Ordering};
7
8#[derive(Debug, Clone)]
9pub struct RedactionConfig {
10    /// Redact home directory paths (e.g., /Users/alice -> ~).
11    pub redact_home_paths: bool,
12    /// Redact usernames in path contexts.
13    pub redact_usernames: bool,
14    /// Username mappings (real -> fake).
15    pub username_map: HashMap<String, String>,
16    /// Path prefix replacements.
17    pub path_replacements: Vec<(String, String)>,
18    /// Custom regex patterns.
19    pub custom_patterns: Vec<CustomPattern>,
20    /// Preserve structure but anonymize project directory names.
21    pub anonymize_project_names: bool,
22    /// Redact hostnames (e.g., internal server names).
23    pub redact_hostnames: bool,
24    /// Redact email addresses.
25    pub redact_emails: bool,
26    /// Block export if critical secrets are detected (private keys, cloud credentials).
27    pub block_on_critical_secrets: bool,
28}
29
30impl Default for RedactionConfig {
31    fn default() -> Self {
32        Self {
33            redact_home_paths: true,
34            redact_usernames: true,
35            username_map: HashMap::new(),
36            path_replacements: Vec::new(),
37            custom_patterns: Vec::new(),
38            anonymize_project_names: false,
39            redact_hostnames: false,
40            redact_emails: true,
41            block_on_critical_secrets: true,
42        }
43    }
44}
45
46#[derive(Debug, Clone)]
47pub struct CustomPattern {
48    pub name: String,
49    pub pattern: Regex,
50    pub replacement: String,
51    pub enabled: bool,
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
55pub enum RedactionKind {
56    HomePath,
57    Username,
58    Email,
59    Hostname,
60    PathReplacement,
61    CustomPattern,
62    ProjectName,
63}
64
65impl RedactionKind {
66    pub fn label(self) -> &'static str {
67        match self {
68            RedactionKind::HomePath => "home_path",
69            RedactionKind::Username => "username",
70            RedactionKind::Email => "email",
71            RedactionKind::Hostname => "hostname",
72            RedactionKind::PathReplacement => "path_replace",
73            RedactionKind::CustomPattern => "custom_pattern",
74            RedactionKind::ProjectName => "project_name",
75        }
76    }
77}
78
79#[derive(Debug, Clone)]
80pub struct RedactionChange {
81    pub kind: RedactionKind,
82    pub original: String,
83    pub redacted: String,
84}
85
86#[derive(Debug, Clone)]
87pub struct RedactedString {
88    pub output: String,
89    pub changes: Vec<RedactionChange>,
90}
91
92#[derive(Debug, Clone, Default)]
93pub struct RedactionReport {
94    pub total_redactions: usize,
95    pub by_kind: HashMap<RedactionKind, usize>,
96    pub samples: Vec<RedactionSample>,
97    pub scanned_conversations: usize,
98    pub scanned_messages: usize,
99    pub truncated: bool,
100    max_samples: usize,
101}
102
103#[derive(Debug, Clone)]
104pub struct RedactionSample {
105    pub location: String,
106    pub before: String,
107    pub after: String,
108    pub kinds: Vec<RedactionKind>,
109}
110
111pub struct RedactionEngine {
112    config: RedactionConfig,
113    home_str: Option<String>,
114    username_patterns: Vec<(Regex, String)>,
115    project_map: Mutex<HashMap<String, String>>,
116    project_counter: AtomicUsize,
117}
118
119pub const SWARM_REDACTION_POLICY: &str = "strict";
120pub const SWARM_MAIL_BODY_OMITTED: &str = "[MAIL_BODY_OMITTED]";
121pub const SWARM_ENV_VALUE_REDACTED: &str = "[ENV_VALUE_REDACTED]";
122pub const SWARM_SECRET_ENV_ASSIGNMENT_REDACTED: &str = "[SECRET_ENV_REDACTED]";
123pub const SWARM_SECRET_LITERAL_REDACTED: &str = "[SECRET_REDACTED]";
124
125#[derive(Debug, Clone, Copy, PartialEq, Eq)]
126pub enum SwarmEvidenceField {
127    SensitivePath,
128    CommandArgument,
129    EnvironmentValue,
130    MailboxSnippet,
131    EvidenceReference,
132}
133
134#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
135pub struct SwarmEvidenceRedactionConfig {
136    pub include_mail_body_snippets: bool,
137    pub include_raw_session_content: bool,
138}
139
140#[derive(Debug, Clone, PartialEq, Eq)]
141pub struct SwarmEvidenceRedactionReport {
142    pub redaction_policy: &'static str,
143    pub raw_session_content_included: bool,
144    pub mail_body_snippets_included: bool,
145    pub redaction_applied: bool,
146    pub sensitive_paths_scrubbed: usize,
147    pub command_arguments_scrubbed: usize,
148    pub env_values_scrubbed: usize,
149    pub mailbox_snippets_omitted: usize,
150    pub evidence_references_scrubbed: usize,
151    pub opt_in_boundary: &'static str,
152}
153
154impl Default for SwarmEvidenceRedactionReport {
155    fn default() -> Self {
156        Self {
157            redaction_policy: SWARM_REDACTION_POLICY,
158            raw_session_content_included: false,
159            mail_body_snippets_included: false,
160            redaction_applied: false,
161            sensitive_paths_scrubbed: 0,
162            command_arguments_scrubbed: 0,
163            env_values_scrubbed: 0,
164            mailbox_snippets_omitted: 0,
165            evidence_references_scrubbed: 0,
166            opt_in_boundary: "mail body snippets require --include-evidence; raw session content is unsupported in cass.swarm.status.v1",
167        }
168    }
169}
170
171pub struct SwarmEvidenceRedactor {
172    engine: RedactionEngine,
173    report: SwarmEvidenceRedactionReport,
174}
175
176impl SwarmEvidenceRedactor {
177    pub fn strict_default() -> Self {
178        Self::new(SwarmEvidenceRedactionConfig::default())
179    }
180
181    pub fn new(config: SwarmEvidenceRedactionConfig) -> Self {
182        let engine = RedactionEngine::new(swarm_evidence_redaction_config());
183        let report = SwarmEvidenceRedactionReport {
184            raw_session_content_included: false,
185            mail_body_snippets_included: config.include_mail_body_snippets,
186            ..Default::default()
187        };
188        Self { engine, report }
189    }
190
191    pub fn redact_sensitive_path(&mut self, value: &str) -> String {
192        let redacted = self.engine.redact_path(value);
193        self.record(
194            SwarmEvidenceField::SensitivePath,
195            redacted.changes.len(),
196            value != redacted.output,
197        );
198        redacted.output
199    }
200
201    pub fn redact_command_argument(&mut self, value: &str) -> String {
202        let redacted = self.engine.redact_text(value);
203        self.record(
204            SwarmEvidenceField::CommandArgument,
205            redacted.changes.len(),
206            value != redacted.output,
207        );
208        redacted.output
209    }
210
211    pub fn redact_environment_value(&mut self, value: &str) -> String {
212        if value.is_empty() {
213            return String::new();
214        }
215        self.record(SwarmEvidenceField::EnvironmentValue, 1, true);
216        SWARM_ENV_VALUE_REDACTED.to_string()
217    }
218
219    pub fn redact_mail_body_snippet(&mut self, value: &str) -> String {
220        if !self.report.mail_body_snippets_included {
221            self.record(SwarmEvidenceField::MailboxSnippet, 1, true);
222            return SWARM_MAIL_BODY_OMITTED.to_string();
223        }
224
225        let redacted = self.engine.redact_text(value);
226        if !redacted.changes.is_empty() || value != redacted.output {
227            self.report.redaction_applied = true;
228        }
229        redacted.output
230    }
231
232    pub fn redact_evidence_reference(&mut self, value: &str) -> String {
233        let redacted = self.engine.redact_text(value);
234        self.record(
235            SwarmEvidenceField::EvidenceReference,
236            redacted.changes.len(),
237            value != redacted.output,
238        );
239        redacted.output
240    }
241
242    pub fn report(&self) -> SwarmEvidenceRedactionReport {
243        self.report.clone()
244    }
245
246    fn record(&mut self, field: SwarmEvidenceField, change_count: usize, changed: bool) {
247        if change_count == 0 && !changed {
248            return;
249        }
250        self.report.redaction_applied = true;
251        let count = change_count.max(1);
252        match field {
253            SwarmEvidenceField::SensitivePath => self.report.sensitive_paths_scrubbed += count,
254            SwarmEvidenceField::CommandArgument => self.report.command_arguments_scrubbed += count,
255            SwarmEvidenceField::EnvironmentValue => self.report.env_values_scrubbed += count,
256            SwarmEvidenceField::MailboxSnippet => self.report.mailbox_snippets_omitted += count,
257            SwarmEvidenceField::EvidenceReference => {
258                self.report.evidence_references_scrubbed += count;
259            }
260        }
261    }
262}
263
264impl RedactionEngine {
265    pub fn new(config: RedactionConfig) -> Self {
266        let home_dir = directories::UserDirs::new().map(|u| u.home_dir().to_path_buf());
267        let home_str = home_dir.as_ref().map(|p| p.to_string_lossy().to_string());
268
269        let username_patterns = build_username_patterns(
270            config.redact_usernames,
271            &config.username_map,
272            home_dir.as_ref(),
273        );
274
275        Self {
276            config,
277            home_str,
278            username_patterns,
279            project_map: Mutex::new(HashMap::new()),
280            project_counter: AtomicUsize::new(0),
281        }
282    }
283
284    pub fn redact_text(&self, input: &str) -> RedactedString {
285        self.redact_internal(input, false)
286    }
287
288    pub fn redact_path(&self, input: &str) -> RedactedString {
289        self.redact_internal(input, false)
290    }
291
292    pub fn redact_workspace(&self, input: &str) -> RedactedString {
293        self.redact_internal(input, true)
294    }
295
296    fn redact_internal(&self, input: &str, anonymize_project: bool) -> RedactedString {
297        let mut output = input.to_string();
298        let mut changes = Vec::new();
299
300        if self.config.redact_home_paths
301            && let Some(home_str) = &self.home_str
302            && let Some(redacted) = replace_home_path_prefixes(&output, home_str)
303        {
304            output = redacted;
305            changes.push(RedactionChange {
306                kind: RedactionKind::HomePath,
307                original: home_str.clone(),
308                redacted: "~".to_string(),
309            });
310        }
311
312        if self.config.redact_usernames {
313            for (pattern, replacement) in &self.username_patterns {
314                if pattern.is_match(&output) {
315                    let replaced = pattern.replace_all(&output, |caps: &regex::Captures| {
316                        format!("{}{}{}", &caps["prefix"], replacement, &caps["suffix"])
317                    });
318                    output = replaced.to_string();
319                    changes.push(RedactionChange {
320                        kind: RedactionKind::Username,
321                        original: pattern.as_str().to_string(),
322                        redacted: replacement.clone(),
323                    });
324                }
325            }
326        }
327
328        for (from, to) in &self.config.path_replacements {
329            if output.contains(from) {
330                output = output.replace(from, to);
331                changes.push(RedactionChange {
332                    kind: RedactionKind::PathReplacement,
333                    original: from.clone(),
334                    redacted: to.clone(),
335                });
336            }
337        }
338
339        if self.config.redact_emails && EMAIL_RE.is_match(&output) {
340            output = EMAIL_RE
341                .replace_all(&output, "[EMAIL_REDACTED]")
342                .to_string();
343            changes.push(RedactionChange {
344                kind: RedactionKind::Email,
345                original: "email".to_string(),
346                redacted: "[EMAIL_REDACTED]".to_string(),
347            });
348        }
349
350        if self.config.redact_hostnames && URL_HOST_RE.is_match(&output) {
351            output = URL_HOST_RE
352                .replace_all(&output, |caps: &regex::Captures| {
353                    let scheme = caps.name("scheme").map_or("", |m| m.as_str());
354                    let userinfo = caps.name("userinfo").map_or("", |m| m.as_str());
355                    let port = caps.name("port").map_or("", |m| m.as_str());
356                    if userinfo.is_empty() {
357                        format!("{scheme}://[HOST_REDACTED]{port}")
358                    } else {
359                        format!("{scheme}://[USERINFO_REDACTED]@[HOST_REDACTED]{port}")
360                    }
361                })
362                .to_string();
363            changes.push(RedactionChange {
364                kind: RedactionKind::Hostname,
365                original: "url_hostname".to_string(),
366                redacted: "[HOST_REDACTED]".to_string(),
367            });
368        }
369
370        for pattern in &self.config.custom_patterns {
371            if pattern.enabled && pattern.pattern.is_match(&output) {
372                output = pattern
373                    .pattern
374                    .replace_all(&output, pattern.replacement.as_str())
375                    .to_string();
376                changes.push(RedactionChange {
377                    kind: RedactionKind::CustomPattern,
378                    original: pattern.name.clone(),
379                    redacted: pattern.replacement.clone(),
380                });
381            }
382        }
383
384        if anonymize_project
385            && self.config.anonymize_project_names
386            && let Some(redacted) =
387                anonymize_last_segment(&output, |name| self.map_project_name(name))
388            && redacted != output
389        {
390            changes.push(RedactionChange {
391                kind: RedactionKind::ProjectName,
392                original: output.clone(),
393                redacted: redacted.clone(),
394            });
395            output = redacted;
396        }
397
398        RedactedString { output, changes }
399    }
400
401    fn map_project_name(&self, name: &str) -> String {
402        let mut map = self
403            .project_map
404            .lock()
405            .unwrap_or_else(|poisoned| poisoned.into_inner());
406        if let Some(existing) = map.get(name) {
407            return existing.clone();
408        }
409
410        let next = self.project_counter.fetch_add(1, Ordering::Relaxed) + 1;
411        let anonymized = format!("project-{}", next);
412        map.insert(name.to_string(), anonymized.clone());
413        anonymized
414    }
415}
416
417pub fn swarm_evidence_redaction_config() -> RedactionConfig {
418    let mut config = RedactionConfig {
419        anonymize_project_names: true,
420        redact_hostnames: true,
421        ..Default::default()
422    };
423    config.custom_patterns.push(CustomPattern {
424        name: "absolute_path_with_spaces".to_string(),
425        pattern: Regex::new(
426            r#"(?i)(?:/home/|/Users/|[A-Z]:\\Users\\|/data/projects/)[^"'<>;,)#\r\n]+"#,
427        )
428        .expect("swarm absolute path redaction regex must compile"),
429        replacement: "[REDACTED_PATH]".to_string(),
430        enabled: true,
431    });
432    config.custom_patterns.push(CustomPattern {
433        name: "absolute_path".to_string(),
434        pattern: Regex::new(
435            r#"(?i)(?:/home/|/Users/|[A-Z]:\\Users\\|/data/projects/)[^\s"'<>;,)#]+"#,
436        )
437        .expect("swarm absolute path redaction regex must compile"),
438        replacement: "[REDACTED_PATH]".to_string(),
439        enabled: true,
440    });
441    config.custom_patterns.push(CustomPattern {
442        name: "secret_env_assignment".to_string(),
443        pattern: Regex::new(
444            r#"(?i)\b(?:TOKEN|SECRET|KEY|PASSWORD|PASS|CREDENTIAL|AUTH|[A-Z_][A-Z0-9_]*(?:TOKEN|SECRET|KEY|PASSWORD|PASS|CREDENTIAL|AUTH)[A-Z0-9_]*)=(?:"(?:\\.|[^"\\\r\n])*"|'(?:\\.|[^'\\\r\n])*'|[^\s]+)"#,
445        )
446        .expect("swarm secret env redaction regex must compile"),
447        replacement: SWARM_SECRET_ENV_ASSIGNMENT_REDACTED.to_string(),
448        enabled: true,
449    });
450    config.custom_patterns.push(CustomPattern {
451        name: "bearer_secret".to_string(),
452        pattern: Regex::new(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]{8,}")
453            .expect("swarm bearer redaction regex must compile"),
454        replacement: format!("Bearer {SWARM_SECRET_LITERAL_REDACTED}"),
455        enabled: true,
456    });
457    config.custom_patterns.push(CustomPattern {
458        name: "api_key_literal".to_string(),
459        pattern: Regex::new(
460            r"(?i)\b(?:sk-(?:ant-)?[A-Za-z0-9_-]{8,}|gh[pousr]_[A-Za-z0-9_]{8,}|github_pat_[A-Za-z0-9_]{8,}|(?:AKIA|ASIA)[A-Z0-9]{16})\b",
461        )
462        .expect("swarm API key literal redaction regex must compile"),
463        replacement: SWARM_SECRET_LITERAL_REDACTED.to_string(),
464        enabled: true,
465    });
466    config
467}
468
469pub fn redact_swarm_text(input: &str) -> String {
470    let engine = RedactionEngine::new(swarm_evidence_redaction_config());
471    engine.redact_text(input).output
472}
473
474pub fn redact_swarm_json_value(value: &Value) -> Value {
475    let engine = RedactionEngine::new(swarm_evidence_redaction_config());
476    redact_swarm_json_value_with_engine(&engine, value)
477}
478
479fn redact_swarm_json_value_with_engine(engine: &RedactionEngine, value: &Value) -> Value {
480    match value {
481        Value::String(text) => Value::String(engine.redact_text(text).output),
482        Value::Array(items) => Value::Array(
483            items
484                .iter()
485                .map(|item| redact_swarm_json_value_with_engine(engine, item))
486                .collect(),
487        ),
488        Value::Object(object) => Value::Object(
489            object
490                .iter()
491                .map(|(key, value)| {
492                    (
493                        engine.redact_text(key).output,
494                        redact_swarm_json_value_with_engine(engine, value),
495                    )
496                })
497                .collect::<Map<_, _>>(),
498        ),
499        Value::Null | Value::Bool(_) | Value::Number(_) => value.clone(),
500    }
501}
502
503static EMAIL_RE: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
504    Regex::new(r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b")
505        .expect("email redaction regex must compile")
506});
507
508static URL_HOST_RE: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
509    Regex::new(
510        r"(?i)\b(?P<scheme>https?|ssh|wss?)://(?:(?P<userinfo>[^\s/@]+)@)?(?P<host>\[[0-9A-F:.]+\]|[A-Z0-9](?:[A-Z0-9.-]{0,253}[A-Z0-9])?)(?P<port>:\d+)?",
511    )
512    .expect("URL hostname redaction regex must compile")
513});
514
515impl RedactionReport {
516    pub fn new(max_samples: usize) -> Self {
517        Self {
518            max_samples,
519            ..Default::default()
520        }
521    }
522
523    pub fn record(
524        &mut self,
525        location: &str,
526        before: &str,
527        after: &str,
528        changes: &[RedactionChange],
529    ) {
530        if changes.is_empty() {
531            return;
532        }
533
534        self.total_redactions += changes.len();
535        for change in changes {
536            *self.by_kind.entry(change.kind).or_insert(0) += 1;
537        }
538
539        if self.samples.len() < self.max_samples {
540            let mut kinds = Vec::new();
541            for change in changes {
542                if !kinds.contains(&change.kind) {
543                    kinds.push(change.kind);
544                }
545            }
546            self.samples.push(RedactionSample {
547                location: location.to_string(),
548                before: truncate_for_report(before, 140),
549                after: truncate_for_report(after, 140),
550                kinds,
551            });
552        }
553    }
554}
555
556fn truncate_for_report(input: &str, max: usize) -> String {
557    let mut chars = input.chars();
558    let mut out: String = chars.by_ref().take(max).collect();
559    if chars.next().is_some() && !out.is_empty() {
560        out.pop(); // remove the last character to make room for the ellipsis
561        out.push('…');
562    }
563    out
564}
565
566fn build_username_patterns(
567    redact_usernames: bool,
568    username_map: &HashMap<String, String>,
569    home_dir: Option<&PathBuf>,
570) -> Vec<(Regex, String)> {
571    if !redact_usernames {
572        return Vec::new();
573    }
574
575    let mut patterns = Vec::new();
576
577    for (from, to) in username_map {
578        if let Some(pattern) = build_username_pattern(from, to) {
579            patterns.push(pattern);
580        }
581    }
582
583    if let Some(home) = home_dir
584        && let Some(username) = home.file_name().and_then(|s| s.to_str())
585        && let Some(pattern) = build_username_pattern(username, "user")
586    {
587        patterns.push(pattern);
588    }
589
590    patterns
591}
592
593fn build_username_pattern(username: &str, replacement: &str) -> Option<(Regex, String)> {
594    if username.is_empty() {
595        return None;
596    }
597    let escaped = regex::escape(username);
598    let pattern = format!(
599        r"(?P<prefix>/Users/|/home/|\\Users\\){}(?P<suffix>[/\\])",
600        escaped
601    );
602    let regex = Regex::new(&pattern).ok()?;
603    Some((regex, replacement.to_string()))
604}
605
606fn anonymize_last_segment<F>(path: &str, map_name: F) -> Option<String>
607where
608    F: FnOnce(&str) -> String,
609{
610    let (sep, idx) = find_last_separator(path)?;
611    let last = &path[idx + sep.len_utf8()..];
612    if last.is_empty() {
613        return None;
614    }
615    let replacement = map_name(last);
616    Some(format!("{}{}", &path[..idx + sep.len_utf8()], replacement))
617}
618
619fn replace_home_path_prefixes(input: &str, home_str: &str) -> Option<String> {
620    if home_str.is_empty() {
621        return None;
622    }
623
624    let mut output = String::with_capacity(input.len());
625    let mut cursor = 0usize;
626    let mut changed = false;
627
628    for (idx, matched) in input.match_indices(home_str) {
629        let after_idx = idx + matched.len();
630        let next_char = input[after_idx..].chars().next();
631        if !matches!(next_char, None | Some('/' | '\\')) {
632            continue;
633        }
634
635        changed = true;
636        output.push_str(&input[cursor..idx]);
637        output.push('~');
638        cursor = after_idx;
639    }
640
641    if !changed {
642        return None;
643    }
644
645    output.push_str(&input[cursor..]);
646    Some(output)
647}
648
649fn find_last_separator(path: &str) -> Option<(char, usize)> {
650    let slash_idx = path.rfind('/');
651    let backslash_idx = path.rfind('\\');
652
653    match (slash_idx, backslash_idx) {
654        (Some(slash), Some(backslash)) => {
655            if slash > backslash {
656                Some(('/', slash))
657            } else {
658                Some(('\\', backslash))
659            }
660        }
661        (Some(slash), None) => Some(('/', slash)),
662        (None, Some(backslash)) => Some(('\\', backslash)),
663        (None, None) => None,
664    }
665}
666
667#[cfg(test)]
668mod tests {
669    use super::*;
670
671    fn engine_with_context(home: &str) -> RedactionEngine {
672        let config = RedactionConfig::default();
673        let home_dir = PathBuf::from(home);
674        let home_str = Some(home.to_string());
675        let username_patterns = build_username_patterns(
676            config.redact_usernames,
677            &config.username_map,
678            Some(&home_dir),
679        );
680
681        RedactionEngine {
682            config,
683            home_str,
684            username_patterns,
685            project_map: Mutex::new(HashMap::new()),
686            project_counter: AtomicUsize::new(0),
687        }
688    }
689
690    #[test]
691    fn test_home_path_redaction() {
692        let engine = engine_with_context("/home/alice");
693        let result = engine.redact_text("/home/alice/projects/cass/src/main.rs");
694        assert!(result.output.contains("~/projects"));
695    }
696
697    #[test]
698    fn test_home_path_redaction_respects_segment_boundaries() {
699        let engine = engine_with_context("/home/alice");
700        let input = "/home/alice2/projects/cass/src/main.rs";
701        let result = engine.redact_text(input);
702        assert_eq!(result.output, input);
703        assert!(result.changes.is_empty());
704    }
705
706    #[test]
707    fn test_username_redaction_in_paths() {
708        let mut engine = engine_with_context("/home/alice");
709        engine.config.redact_home_paths = false;
710        let result = engine.redact_text("Error in /home/alice/projects/app.rs");
711        assert!(result.output.contains("/home/user/"));
712    }
713
714    #[test]
715    fn test_custom_pattern_redaction() {
716        let mut config = RedactionConfig::default();
717        config.custom_patterns.push(CustomPattern {
718            name: "codename".to_string(),
719            pattern: Regex::new(r"Project\s+Falcon").unwrap(),
720            replacement: "Project X".to_string(),
721            enabled: true,
722        });
723        let engine = RedactionEngine::new(config);
724        let result = engine.redact_text("Working on Project Falcon");
725        assert_eq!(result.output, "Working on Project X");
726    }
727
728    #[test]
729    fn test_project_anonymization() {
730        let config = RedactionConfig {
731            anonymize_project_names: true,
732            ..Default::default()
733        };
734        let engine = RedactionEngine::new(config);
735
736        let result1 = engine.redact_workspace("/home/alice/project-alpha");
737        let result2 = engine.redact_workspace("/home/alice/project-alpha");
738        assert!(result1.output.contains("project-1"));
739        assert!(result2.output.contains("project-1"));
740    }
741
742    #[test]
743    fn test_email_redaction_enabled() {
744        let engine = engine_with_context("/home/alice");
745        let result = engine.redact_text("Contact me at alice@example.com for details");
746        assert!(!result.output.contains("alice@example.com"));
747        assert!(result.output.contains("[EMAIL_REDACTED]"));
748        assert!(
749            result
750                .changes
751                .iter()
752                .any(|change| change.kind == RedactionKind::Email)
753        );
754    }
755
756    #[test]
757    fn test_email_redaction_disabled() {
758        let config = RedactionConfig {
759            redact_emails: false,
760            ..Default::default()
761        };
762        let engine = RedactionEngine::new(config);
763        let result = engine.redact_text("Email bob@example.com");
764        assert!(result.output.contains("bob@example.com"));
765    }
766
767    #[test]
768    fn test_hostname_redaction_in_urls() {
769        let config = RedactionConfig {
770            redact_hostnames: true,
771            redact_emails: false,
772            ..Default::default()
773        };
774        let engine = RedactionEngine::new(config);
775        let result = engine.redact_text("Fetch https://internal.example.corp:8443/api now");
776        assert!(result.output.contains("https://[HOST_REDACTED]:8443/api"));
777        assert!(
778            result
779                .changes
780                .iter()
781                .any(|change| change.kind == RedactionKind::Hostname)
782        );
783    }
784
785    #[test]
786    fn test_hostname_redaction_redacts_url_userinfo() {
787        let config = RedactionConfig {
788            redact_hostnames: true,
789            redact_emails: false,
790            ..Default::default()
791        };
792        let engine = RedactionEngine::new(config);
793
794        let token_result = engine.redact_text("Fetch https://token@internal.example.corp/api");
795        assert_eq!(
796            token_result.output,
797            "Fetch https://[USERINFO_REDACTED]@[HOST_REDACTED]/api"
798        );
799        assert!(!token_result.output.contains("token"));
800
801        let password_result =
802            engine.redact_text("Clone ssh://alice:secret@git.internal.example.corp:2222/repo");
803        assert_eq!(
804            password_result.output,
805            "Clone ssh://[USERINFO_REDACTED]@[HOST_REDACTED]:2222/repo"
806        );
807        assert!(!password_result.output.contains("alice:secret"));
808    }
809
810    #[test]
811    fn test_hostname_redaction_covers_single_label_and_ip_hosts() -> Result<(), &'static str> {
812        let config = RedactionConfig {
813            redact_hostnames: true,
814            redact_emails: false,
815            ..Default::default()
816        };
817        let engine = RedactionEngine::new(config);
818
819        for (input, expected) in [
820            (
821                "Open http://trj:8000/status",
822                "Open http://[HOST_REDACTED]:8000/status",
823            ),
824            (
825                "Open http://localhost:8000/status",
826                "Open http://[HOST_REDACTED]:8000/status",
827            ),
828            (
829                "Fetch https://192.168.1.124:8443/api",
830                "Fetch https://[HOST_REDACTED]:8443/api",
831            ),
832            (
833                "Fetch http://[::1]:8000/health",
834                "Fetch http://[HOST_REDACTED]:8000/health",
835            ),
836        ] {
837            let result = engine.redact_text(input);
838
839            if !result.output.as_str().eq(expected) {
840                return Err("hostname redaction output mismatch");
841            }
842            if !result
843                .changes
844                .iter()
845                .any(|change| change.kind == RedactionKind::Hostname)
846            {
847                return Err("hostname redaction change missing");
848            }
849        }
850
851        Ok(())
852    }
853
854    #[test]
855    fn test_hostname_redaction_preserves_non_url_paths() {
856        let config = RedactionConfig {
857            redact_hostnames: true,
858            redact_home_paths: false,
859            redact_usernames: false,
860            ..Default::default()
861        };
862        let engine = RedactionEngine::new(config);
863        let input = "/home/alice/project/main.rs";
864        let result = engine.redact_text(input);
865        assert_eq!(result.output, input);
866    }
867
868    #[test]
869    fn swarm_evidence_redactor_scrubs_paths_secrets_and_omits_mail_by_default() {
870        let mut redactor = SwarmEvidenceRedactor::strict_default();
871
872        let path = redactor.redact_sensitive_path("/home/alice/private-client/src/lib.rs");
873        assert_eq!(path, "[REDACTED_PATH]");
874
875        let command = redactor.redact_command_argument(
876            "rch exec -- env TOKEN=SECRET_VALUE CARGO_TARGET_DIR=/home/alice/build cargo test",
877        );
878        assert!(!command.contains("SECRET_VALUE"));
879        assert!(!command.contains("/home/alice"));
880        assert!(!command.contains("TOKEN="));
881        assert!(command.contains(SWARM_SECRET_ENV_ASSIGNMENT_REDACTED));
882        assert!(command.contains("CARGO_TARGET_DIR=[REDACTED_PATH]"));
883
884        let env_value = redactor.redact_environment_value("sk-live-secret");
885        assert_eq!(env_value, SWARM_ENV_VALUE_REDACTED);
886
887        let snippet = redactor.redact_mail_body_snippet(
888            "Please inspect /Users/alice/acme and email alice@example.com",
889        );
890        assert_eq!(snippet, SWARM_MAIL_BODY_OMITTED);
891
892        let evidence_ref = redactor
893            .redact_evidence_reference("pack:///data/projects/private-client/session.jsonl#L44");
894        assert_eq!(evidence_ref, "pack://[REDACTED_PATH]#L44");
895        assert!(!evidence_ref.contains("/data/projects/private-client"));
896
897        let report = redactor.report();
898        assert_eq!(report.redaction_policy, SWARM_REDACTION_POLICY);
899        assert!(!report.raw_session_content_included);
900        assert!(!report.mail_body_snippets_included);
901        assert!(report.redaction_applied);
902        assert!(report.sensitive_paths_scrubbed >= 1);
903        assert!(report.command_arguments_scrubbed >= 2);
904        assert_eq!(report.env_values_scrubbed, 1);
905        assert_eq!(report.mailbox_snippets_omitted, 1);
906        assert!(report.evidence_references_scrubbed >= 1);
907    }
908
909    #[test]
910    fn swarm_evidence_mail_snippet_opt_in_still_redacts_content() {
911        let mut redactor = SwarmEvidenceRedactor::new(SwarmEvidenceRedactionConfig {
912            include_mail_body_snippets: true,
913            include_raw_session_content: false,
914        });
915
916        let snippet =
917            redactor.redact_mail_body_snippet("Contact alice@example.com about /home/alice/secret");
918
919        assert!(redactor.report().mail_body_snippets_included);
920        assert!(snippet.contains("[EMAIL_REDACTED]"));
921        assert!(snippet.contains("[REDACTED_PATH]"));
922        assert!(!snippet.contains("alice@example.com"));
923        assert!(!snippet.contains("/home/alice"));
924    }
925
926    #[test]
927    fn swarm_redaction_scrubs_absolute_paths_with_spaces() {
928        for path in [
929            "/home/alice/Secret Project",
930            "/Users/alice/Secret Project",
931            "C:\\Users\\alice\\Secret Project",
932            "/data/projects/Secret Project",
933        ] {
934            let redacted = redact_swarm_text(&format!("Blocked on {path}"));
935
936            assert_eq!(redacted, "Blocked on [REDACTED_PATH]");
937            assert!(!redacted.contains(path));
938            assert!(!redacted.contains("Secret Project"));
939        }
940    }
941
942    #[test]
943    fn swarm_json_redaction_scrubs_object_keys_and_values() {
944        let input = serde_json::json!({
945            "/home/alice/private-client/src/lib.rs": {
946                "TOKEN=SECRET_VALUE": "pack:///data/projects/private-client/session.jsonl#L44",
947                "owner": "alice@example.com"
948            }
949        });
950
951        let output = redact_swarm_json_value(&input);
952        let serialized = output.to_string();
953
954        assert!(!serialized.contains("/home/alice"));
955        assert!(!serialized.contains("/data/projects/private-client"));
956        assert!(!serialized.contains("SECRET_VALUE"));
957        assert!(!serialized.contains("TOKEN="));
958        assert!(!serialized.contains("alice@example.com"));
959        assert!(serialized.contains("[REDACTED_PATH]"));
960        assert!(serialized.contains("[SECRET_ENV_REDACTED]"));
961        assert!(serialized.contains("pack://[REDACTED_PATH]#L44"));
962    }
963
964    #[test]
965    fn swarm_redaction_scrubs_quoted_secret_env_values() {
966        for (command, leaked_fragments) in [
967            (
968                r#"rch exec -- env TOKEN="super secret value" cargo test"#,
969                &["TOKEN=", "super secret value"][..],
970            ),
971            (
972                "rch exec -- env PASSWORD='correct horse battery staple' cargo test",
973                &["PASSWORD=", "correct horse battery staple"][..],
974            ),
975            (
976                r#"API_TOKEN="secret \"quoted\" value" cargo check"#,
977                &["API_TOKEN=", "secret", "quoted"][..],
978            ),
979        ] {
980            let redacted = redact_swarm_text(command);
981
982            assert!(
983                redacted.contains(SWARM_SECRET_ENV_ASSIGNMENT_REDACTED),
984                "secret assignment should be replaced in {redacted:?}"
985            );
986            for fragment in leaked_fragments {
987                assert!(
988                    !redacted.contains(fragment),
989                    "redacted command leaked {fragment:?}: {redacted:?}"
990                );
991            }
992        }
993    }
994
995    #[test]
996    fn swarm_redaction_scrubs_api_key_literals_in_command_args() {
997        for (input, leaked_fragments) in [
998            (
999                "cass pack --api-key sk-live-secret --json",
1000                &["sk-live-secret"][..],
1001            ),
1002            (
1003                "curl -H 'Authorization: Bearer ghp_1234567890abcdef' https://api.example.com",
1004                &["ghp_1234567890abcdef"][..],
1005            ),
1006            (
1007                "AWS_ACCESS_KEY_ID ASIA1234567890ABCDEF",
1008                &["ASIA1234567890ABCDEF"][..],
1009            ),
1010        ] {
1011            let redacted = redact_swarm_text(input);
1012
1013            assert!(
1014                redacted.contains(SWARM_SECRET_LITERAL_REDACTED),
1015                "API key literal should be replaced in {redacted:?}"
1016            );
1017            for fragment in leaked_fragments {
1018                assert!(
1019                    !redacted.contains(fragment),
1020                    "redacted command leaked {fragment:?}: {redacted:?}"
1021                );
1022            }
1023        }
1024
1025        let redacted_json = redact_swarm_json_value(&serde_json::json!({
1026            "command": "cass pack --api-key sk-live-secret --json"
1027        }));
1028        let serialized = redacted_json.to_string();
1029
1030        assert!(!serialized.contains("sk-live-secret"));
1031        assert!(serialized.contains(SWARM_SECRET_LITERAL_REDACTED));
1032    }
1033
1034    #[test]
1035    fn test_report_records_changes() {
1036        let engine = engine_with_context("/home/alice");
1037        let result = engine.redact_text("/home/alice/projects/app.rs");
1038        let mut report = RedactionReport::new(2);
1039
1040        report.record(
1041            "message.content",
1042            "/home/alice/projects/app.rs",
1043            &result.output,
1044            &result.changes,
1045        );
1046
1047        assert!(report.total_redactions > 0);
1048        assert!(!report.samples.is_empty());
1049    }
1050}