Skip to main content

coding_agent_search/pages/
secret_scan.rs

1use anyhow::{Context, Result, bail};
2use console::{Term, style};
3use frankensqlite::compat::{ConnectionExt, ParamValue, RowExt, params_from_iter};
4use frankensqlite::params;
5use indicatif::{ProgressBar, ProgressStyle};
6use once_cell::sync::Lazy;
7use regex::Regex;
8use serde::Serialize;
9use std::collections::{HashMap, HashSet};
10use std::io::Write;
11use std::path::{Path, PathBuf};
12use std::sync::Arc;
13use std::sync::atomic::{AtomicBool, Ordering};
14use std::time::Duration;
15
16const DEFAULT_ENTROPY_THRESHOLD: f64 = 4.0;
17const DEFAULT_ENTROPY_MIN_LEN: usize = 20;
18const DEFAULT_CONTEXT_BYTES: usize = 120;
19const DEFAULT_MAX_FINDINGS: usize = 500;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
22#[serde(rename_all = "snake_case")]
23pub enum SecretSeverity {
24    Critical,
25    High,
26    Medium,
27    Low,
28}
29
30impl SecretSeverity {
31    fn rank(self) -> u8 {
32        match self {
33            SecretSeverity::Critical => 0,
34            SecretSeverity::High => 1,
35            SecretSeverity::Medium => 2,
36            SecretSeverity::Low => 3,
37        }
38    }
39
40    pub fn label(self) -> &'static str {
41        match self {
42            SecretSeverity::Critical => "critical",
43            SecretSeverity::High => "high",
44            SecretSeverity::Medium => "medium",
45            SecretSeverity::Low => "low",
46        }
47    }
48
49    fn styled(self, text: &str) -> String {
50        match self {
51            SecretSeverity::Critical => style(text).red().bold().to_string(),
52            SecretSeverity::High => style(text).red().to_string(),
53            SecretSeverity::Medium => style(text).yellow().to_string(),
54            SecretSeverity::Low => style(text).blue().to_string(),
55        }
56    }
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
60#[serde(rename_all = "snake_case")]
61pub enum SecretLocation {
62    ConversationTitle,
63    ConversationMetadata,
64    MessageContent,
65    MessageMetadata,
66    MessageSnippet,
67}
68
69impl SecretLocation {
70    fn label(&self) -> &'static str {
71        match self {
72            SecretLocation::ConversationTitle => "conversation.title",
73            SecretLocation::ConversationMetadata => "conversation.metadata",
74            SecretLocation::MessageContent => "message.content",
75            SecretLocation::MessageMetadata => "message.metadata",
76            SecretLocation::MessageSnippet => "message.snippet",
77        }
78    }
79}
80
81#[derive(Debug, Clone, Serialize)]
82pub struct SecretFinding {
83    pub severity: SecretSeverity,
84    pub kind: String,
85    pub pattern: String,
86    pub match_redacted: String,
87    pub context: String,
88    pub location: SecretLocation,
89    pub agent: Option<String>,
90    pub workspace: Option<String>,
91    pub source_path: Option<String>,
92    pub conversation_id: Option<i64>,
93    pub message_id: Option<i64>,
94    pub message_idx: Option<i64>,
95}
96
97#[derive(Debug, Clone, Serialize)]
98pub struct SecretScanSummary {
99    pub total: usize,
100    pub by_severity: HashMap<SecretSeverity, usize>,
101    pub has_critical: bool,
102    pub truncated: bool,
103}
104
105#[derive(Debug, Clone, Serialize)]
106pub struct SecretScanReport {
107    pub summary: SecretScanSummary,
108    pub findings: Vec<SecretFinding>,
109}
110
111#[derive(Debug, Clone)]
112pub struct SecretScanFilters {
113    pub agents: Option<Vec<String>>,
114    pub workspaces: Option<Vec<PathBuf>>,
115    pub since_ts: Option<i64>,
116    pub until_ts: Option<i64>,
117}
118
119#[derive(Debug, Clone)]
120pub struct SecretScanConfig {
121    pub allowlist: Vec<Regex>,
122    pub denylist: Vec<Regex>,
123    pub allowlist_raw: Vec<String>,
124    pub denylist_raw: Vec<String>,
125    pub entropy_threshold: f64,
126    pub entropy_min_len: usize,
127    pub context_bytes: usize,
128    pub max_findings: usize,
129}
130
131impl SecretScanConfig {
132    pub fn from_inputs(allowlist: &[String], denylist: &[String]) -> Result<Self> {
133        Self::from_inputs_with_env(allowlist, denylist, true)
134    }
135
136    pub fn from_inputs_with_env(
137        allowlist: &[String],
138        denylist: &[String],
139        use_env: bool,
140    ) -> Result<Self> {
141        let allowlist_raw = if allowlist.is_empty() && use_env {
142            parse_env_regex_list("CASS_SECRETS_ALLOWLIST")?
143        } else {
144            allowlist.to_vec()
145        };
146        let denylist_raw = if denylist.is_empty() && use_env {
147            parse_env_regex_list("CASS_SECRETS_DENYLIST")?
148        } else {
149            denylist.to_vec()
150        };
151
152        Ok(Self {
153            allowlist: compile_regexes(&allowlist_raw, "allowlist")?,
154            denylist: compile_regexes(&denylist_raw, "denylist")?,
155            allowlist_raw,
156            denylist_raw,
157            entropy_threshold: DEFAULT_ENTROPY_THRESHOLD,
158            entropy_min_len: DEFAULT_ENTROPY_MIN_LEN,
159            context_bytes: DEFAULT_CONTEXT_BYTES,
160            max_findings: DEFAULT_MAX_FINDINGS,
161        })
162    }
163}
164
165struct SecretPattern {
166    id: &'static str,
167    severity: SecretSeverity,
168    regex: Regex,
169}
170
171static BUILTIN_PATTERNS: Lazy<Vec<SecretPattern>> = Lazy::new(|| {
172    vec![
173        SecretPattern {
174            id: "aws_access_key_id",
175            severity: SecretSeverity::High,
176            regex: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").expect("aws access key regex"),
177        },
178        SecretPattern {
179            id: "aws_secret_key",
180            severity: SecretSeverity::Critical,
181            regex: Regex::new(
182                r#"(?i)aws(.{0,20})?(secret|access)?[_-]?key\s*[:=]\s*['"]?[A-Za-z0-9/+=]{40}['"]?"#,
183            )
184                .expect("aws secret regex"),
185        },
186        SecretPattern {
187            id: "github_pat",
188            severity: SecretSeverity::High,
189            regex: Regex::new(r"\bgh[pousr]_[A-Za-z0-9]{36}\b").expect("github pat regex"),
190        },
191        SecretPattern {
192            id: "openai_key",
193            severity: SecretSeverity::High,
194            // Note: this also matches Anthropic keys (sk-ant-...) — the anthropic_key
195            // pattern below is more specific and checked separately. Dedup by position
196            // in the caller prevents double-reporting.
197            regex: Regex::new(r"\bsk-[A-Za-z0-9]{20,}\b").expect("openai key regex"),
198        },
199        SecretPattern {
200            id: "anthropic_key",
201            severity: SecretSeverity::High,
202            regex: Regex::new(r"\bsk-ant-[A-Za-z0-9]{20,}\b").expect("anthropic key regex"),
203        },
204        SecretPattern {
205            id: "jwt",
206            severity: SecretSeverity::Medium,
207            regex: Regex::new(r"\beyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\b")
208                .expect("jwt regex"),
209        },
210        SecretPattern {
211            id: "private_key",
212            severity: SecretSeverity::Critical,
213            regex: Regex::new(
214                r"-----BEGIN (?:RSA |EC |DSA |OPENSSH |PGP |ENCRYPTED )?PRIVATE KEY-----",
215            )
216            .expect("private key regex"),
217        },
218        SecretPattern {
219            id: "database_url",
220            severity: SecretSeverity::Medium,
221            regex: Regex::new(r"(?i)\b(postgres|postgresql|mysql|mongodb|redis)://[^\s]+")
222                .expect("db url regex"),
223        },
224        SecretPattern {
225            id: "generic_api_key",
226            severity: SecretSeverity::Low,
227            regex: Regex::new(
228                r#"(?i)(api[_-]?key|token|secret|password|passwd)\s*[:=]\s*['"]?[A-Za-z0-9_\-]{8,}['"]?"#,
229            )
230            .expect("generic api key regex"),
231        },
232    ]
233});
234
235static ENTROPY_BASE64_RE: Lazy<Regex> =
236    Lazy::new(|| Regex::new(r"[A-Za-z0-9+/=_-]{20,}").expect("entropy base64 regex"));
237static ENTROPY_HEX_RE: Lazy<Regex> =
238    Lazy::new(|| Regex::new(r"\b[A-Fa-f0-9]{32,}\b").expect("entropy hex regex"));
239
240#[derive(Debug, Clone)]
241struct ScanContext {
242    agent: Option<String>,
243    workspace: Option<String>,
244    source_path: Option<String>,
245    conversation_id: Option<i64>,
246    message_id: Option<i64>,
247    message_idx: Option<i64>,
248}
249
250struct FindingCandidate<'a> {
251    severity: SecretSeverity,
252    kind: &'a str,
253    pattern: &'a str,
254    text: &'a str,
255    start: usize,
256    end: usize,
257    location: SecretLocation,
258    ctx: &'a ScanContext,
259}
260
261pub fn scan_database<P: AsRef<Path>>(
262    db_path: P,
263    filters: &SecretScanFilters,
264    config: &SecretScanConfig,
265    running: Option<Arc<AtomicBool>>,
266    progress: Option<&ProgressBar>,
267) -> Result<SecretScanReport> {
268    let conn = super::open_existing_sqlite_db(db_path.as_ref())
269        .context("Failed to open database for secret scan")?;
270
271    let mut findings: Vec<SecretFinding> = Vec::new();
272    let mut seen: HashSet<String> = HashSet::new();
273    let mut truncated = false;
274
275    // LEFT JOIN + COALESCE on agents so secret scanning also covers legacy
276    // conversations with NULL agent_id — dropping them would hide credential
277    // leaks rather than exposing them.
278    let (conv_where, conv_params) = build_where_clause(filters)?;
279    let conv_sql = format!(
280        "SELECT c.id, c.title, c.metadata_json, c.source_path, COALESCE(a.slug, 'unknown'), w.path\n         FROM conversations c\n         LEFT JOIN agents a ON c.agent_id = a.id\n         LEFT JOIN workspaces w ON c.workspace_id = w.id{}",
281        conv_where
282    );
283    let conv_param_values = params_from_iter(conv_params);
284    let conv_rows = conn.query_with_params(&conv_sql, &conv_param_values)?;
285
286    for row in &conv_rows {
287        if running
288            .as_ref()
289            .is_some_and(|flag| !flag.load(Ordering::Relaxed))
290        {
291            break;
292        }
293        let conv_id: i64 = row.get_typed(0)?;
294        let title: Option<String> = row.get_typed(1)?;
295        let metadata_json: Option<String> = row.get_typed(2)?;
296        let source_path: String = row.get_typed(3)?;
297        let agent_slug: String = row.get_typed(4)?;
298        let workspace_path: Option<String> = row.get_typed(5)?;
299
300        let ctx = ScanContext {
301            agent: Some(agent_slug),
302            workspace: workspace_path,
303            source_path: Some(source_path),
304            conversation_id: Some(conv_id),
305            message_id: None,
306            message_idx: None,
307        };
308
309        if let Some(title_text) = title {
310            scan_text(
311                &title_text,
312                SecretLocation::ConversationTitle,
313                &ctx,
314                config,
315                &mut findings,
316                &mut seen,
317                &mut truncated,
318            );
319        }
320        if let Some(meta) = metadata_json {
321            scan_text(
322                &meta,
323                SecretLocation::ConversationMetadata,
324                &ctx,
325                config,
326                &mut findings,
327                &mut seen,
328                &mut truncated,
329            );
330        }
331
332        if truncated {
333            break;
334        }
335
336        if let Some(pb) = progress {
337            pb.inc(1);
338        }
339    }
340
341    if !truncated {
342        let (msg_where, msg_params) = build_where_clause(filters)?;
343        let msg_sql = format!(
344            "SELECT m.id, m.idx, m.content, m.extra_json, c.id, c.source_path, COALESCE(a.slug, 'unknown'), w.path\n             FROM messages m\n             JOIN conversations c ON m.conversation_id = c.id\n             LEFT JOIN agents a ON c.agent_id = a.id\n             LEFT JOIN workspaces w ON c.workspace_id = w.id{}",
345            msg_where
346        );
347        let msg_param_values = params_from_iter(msg_params);
348        let msg_rows = conn.query_with_params(&msg_sql, &msg_param_values)?;
349
350        for row in &msg_rows {
351            if running
352                .as_ref()
353                .is_some_and(|flag| !flag.load(Ordering::Relaxed))
354            {
355                break;
356            }
357            let msg_id: i64 = row.get_typed(0)?;
358            let msg_idx: i64 = row.get_typed(1)?;
359            let content: String = row.get_typed(2)?;
360            let extra_json: Option<String> = row.get_typed(3)?;
361            let conv_id: i64 = row.get_typed(4)?;
362            let source_path: String = row.get_typed(5)?;
363            let agent_slug: String = row.get_typed(6)?;
364            let workspace_path: Option<String> = row.get_typed(7)?;
365
366            let ctx = ScanContext {
367                agent: Some(agent_slug),
368                workspace: workspace_path,
369                source_path: Some(source_path),
370                conversation_id: Some(conv_id),
371                message_id: Some(msg_id),
372                message_idx: Some(msg_idx),
373            };
374
375            scan_text(
376                &content,
377                SecretLocation::MessageContent,
378                &ctx,
379                config,
380                &mut findings,
381                &mut seen,
382                &mut truncated,
383            );
384            if let Some(extra) = extra_json {
385                scan_text(
386                    &extra,
387                    SecretLocation::MessageMetadata,
388                    &ctx,
389                    config,
390                    &mut findings,
391                    &mut seen,
392                    &mut truncated,
393                );
394            }
395
396            if truncated {
397                break;
398            }
399
400            if let Some(pb) = progress {
401                pb.inc(1);
402            }
403        }
404    }
405
406    if !truncated && table_exists(&conn, "snippets") {
407        let (snip_where, snip_params) = build_where_clause(filters)?;
408        let snip_sql = format!(
409            "SELECT s.snippet_text, m.id, m.idx, c.id, c.source_path, COALESCE(a.slug, 'unknown'), w.path\n             FROM snippets s\n             JOIN messages m ON s.message_id = m.id\n             JOIN conversations c ON m.conversation_id = c.id\n             LEFT JOIN agents a ON c.agent_id = a.id\n             LEFT JOIN workspaces w ON c.workspace_id = w.id{}",
410            snip_where
411        );
412        let snip_param_values = params_from_iter(snip_params);
413        let snip_rows = conn.query_with_params(&snip_sql, &snip_param_values)?;
414
415        for row in &snip_rows {
416            if running
417                .as_ref()
418                .is_some_and(|flag| !flag.load(Ordering::Relaxed))
419            {
420                break;
421            }
422            let snippet_text: String = row.get_typed(0)?;
423            let msg_id: i64 = row.get_typed(1)?;
424            let msg_idx: i64 = row.get_typed(2)?;
425            let conv_id: i64 = row.get_typed(3)?;
426            let source_path: String = row.get_typed(4)?;
427            let agent_slug: String = row.get_typed(5)?;
428            let workspace_path: Option<String> = row.get_typed(6)?;
429
430            let ctx = ScanContext {
431                agent: Some(agent_slug),
432                workspace: workspace_path,
433                source_path: Some(source_path),
434                conversation_id: Some(conv_id),
435                message_id: Some(msg_id),
436                message_idx: Some(msg_idx),
437            };
438
439            scan_text(
440                &snippet_text,
441                SecretLocation::MessageSnippet,
442                &ctx,
443                config,
444                &mut findings,
445                &mut seen,
446                &mut truncated,
447            );
448
449            if truncated {
450                break;
451            }
452
453            if let Some(pb) = progress {
454                pb.inc(1);
455            }
456        }
457    }
458
459    findings.sort_by(|a, b| {
460        a.severity
461            .rank()
462            .cmp(&b.severity.rank())
463            .then_with(|| a.kind.cmp(&b.kind))
464    });
465
466    let mut by_severity: HashMap<SecretSeverity, usize> = HashMap::new();
467    for finding in &findings {
468        *by_severity.entry(finding.severity).or_insert(0) += 1;
469    }
470
471    let has_critical = by_severity
472        .get(&SecretSeverity::Critical)
473        .copied()
474        .unwrap_or(0)
475        > 0;
476
477    Ok(SecretScanReport {
478        summary: SecretScanSummary {
479            total: findings.len(),
480            by_severity,
481            has_critical,
482            truncated,
483        },
484        findings,
485    })
486}
487
488fn table_exists(conn: &frankensqlite::Connection, table_name: &str) -> bool {
489    if !table_name
490        .chars()
491        .all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
492    {
493        return false;
494    }
495
496    let pragma = format!("PRAGMA table_info({table_name})");
497    conn.query_map_collect(&pragma, params![], |row| row.get_typed::<String>(1))
498        .map(|columns| !columns.is_empty())
499        .unwrap_or(false)
500}
501
502pub fn print_human_report(
503    term: &mut Term,
504    report: &SecretScanReport,
505    max_examples: usize,
506) -> Result<()> {
507    let total = report.summary.total;
508    if total == 0 {
509        writeln!(term, "  {} No secrets detected", style("✓").green())?;
510        return Ok(());
511    }
512
513    writeln!(
514        term,
515        "  {} {} potential secret(s) detected",
516        style("⚠").yellow(),
517        total
518    )?;
519
520    let mut severities = vec![
521        SecretSeverity::Critical,
522        SecretSeverity::High,
523        SecretSeverity::Medium,
524        SecretSeverity::Low,
525    ];
526
527    severities.sort_by_key(|s| s.rank());
528
529    for severity in severities {
530        let count = report
531            .summary
532            .by_severity
533            .get(&severity)
534            .copied()
535            .unwrap_or(0);
536        if count == 0 {
537            continue;
538        }
539        let label = severity.styled(severity.label());
540        writeln!(term, "  {}: {}", label, count)?;
541
542        for finding in report
543            .findings
544            .iter()
545            .filter(|f| f.severity == severity)
546            .take(max_examples)
547        {
548            writeln!(
549                term,
550                "    - {} in {} ({})",
551                finding.kind,
552                finding.location.label(),
553                finding.match_redacted
554            )?;
555            if !finding.context.is_empty() {
556                writeln!(term, "      {}", style(&finding.context).dim())?;
557            }
558        }
559        if count > max_examples {
560            writeln!(term, "      {}", style("…additional findings hidden").dim())?;
561        }
562    }
563
564    if report.summary.truncated {
565        writeln!(
566            term,
567            "  {} Results truncated (max findings reached)",
568            style("⚠").yellow()
569        )?;
570    }
571
572    Ok(())
573}
574
575pub fn print_cli_report(report: &SecretScanReport, json: bool) -> Result<()> {
576    if json {
577        let payload = serde_json::to_string_pretty(report)?;
578        println!("{payload}");
579        return Ok(());
580    }
581
582    let mut term = Term::stdout();
583    print_human_report(&mut term, report, 3)
584}
585
586pub fn run_secret_scan_cli<P: AsRef<Path>>(
587    db_path: P,
588    filters: &SecretScanFilters,
589    config: &SecretScanConfig,
590    json: bool,
591    fail_on_secrets: bool,
592) -> Result<()> {
593    let progress = ProgressBar::new_spinner();
594    progress.set_style(
595        ProgressStyle::with_template("{spinner} {msg}")
596            .unwrap()
597            .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]),
598    );
599    progress.set_message("Scanning for secrets...");
600    progress.enable_steady_tick(Duration::from_millis(120));
601
602    let report = scan_database(db_path, filters, config, None, Some(&progress))?;
603    progress.finish_and_clear();
604
605    print_cli_report(&report, json)?;
606
607    if fail_on_secrets && report.summary.total > 0 {
608        bail!("Secrets detected ({} finding(s))", report.summary.total);
609    }
610
611    Ok(())
612}
613
614pub fn wizard_secret_scan<P: AsRef<Path>>(
615    db_path: P,
616    filters: &SecretScanFilters,
617    config: &SecretScanConfig,
618) -> Result<SecretScanReport> {
619    let progress = ProgressBar::new_spinner();
620    progress.set_style(
621        ProgressStyle::with_template("{spinner} {msg}")
622            .unwrap()
623            .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]),
624    );
625    progress.set_message("Scanning for secrets...");
626    progress.enable_steady_tick(Duration::from_millis(120));
627
628    let report = scan_database(db_path, filters, config, None, Some(&progress))?;
629    progress.finish_and_clear();
630    Ok(report)
631}
632
633fn scan_text(
634    text: &str,
635    location: SecretLocation,
636    ctx: &ScanContext,
637    config: &SecretScanConfig,
638    findings: &mut Vec<SecretFinding>,
639    seen: &mut HashSet<String>,
640    truncated: &mut bool,
641) {
642    if *truncated || text.is_empty() {
643        return;
644    }
645
646    // Denylist first (always critical)
647    for deny in &config.denylist {
648        for mat in deny.find_iter(text) {
649            if findings.len() >= config.max_findings {
650                *truncated = true;
651                return;
652            }
653            push_finding(
654                findings,
655                seen,
656                FindingCandidate {
657                    severity: SecretSeverity::Critical,
658                    kind: "denylist",
659                    pattern: deny.as_str(),
660                    text,
661                    start: mat.start(),
662                    end: mat.end(),
663                    location: location.clone(),
664                    ctx,
665                },
666                config,
667            );
668        }
669    }
670
671    // Built-in patterns
672    for pattern in BUILTIN_PATTERNS.iter() {
673        for mat in pattern.regex.find_iter(text) {
674            if findings.len() >= config.max_findings {
675                *truncated = true;
676                return;
677            }
678            let matched = &text[mat.start()..mat.end()];
679            if is_allowlisted(matched, config) {
680                continue;
681            }
682            push_finding(
683                findings,
684                seen,
685                FindingCandidate {
686                    severity: pattern.severity,
687                    kind: pattern.id,
688                    pattern: pattern.regex.as_str(),
689                    text,
690                    start: mat.start(),
691                    end: mat.end(),
692                    location: location.clone(),
693                    ctx,
694                },
695                config,
696            );
697        }
698    }
699
700    // Entropy-based detection
701    for mat in ENTROPY_BASE64_RE.find_iter(text) {
702        if findings.len() >= config.max_findings {
703            *truncated = true;
704            return;
705        }
706        let candidate = &text[mat.start()..mat.end()];
707        if candidate.len() < config.entropy_min_len {
708            continue;
709        }
710        if is_allowlisted(candidate, config) {
711            continue;
712        }
713        // Heuristic: Pure alphabetic strings are likely code identifiers (CamelCase), not secrets.
714        // Secrets usually have digits or symbols.
715        if candidate.chars().all(|c| c.is_ascii_alphabetic()) {
716            continue;
717        }
718
719        let entropy = shannon_entropy(candidate);
720        if entropy >= config.entropy_threshold {
721            push_finding(
722                findings,
723                seen,
724                FindingCandidate {
725                    severity: SecretSeverity::Medium,
726                    kind: "high_entropy_base64",
727                    pattern: "entropy",
728                    text,
729                    start: mat.start(),
730                    end: mat.end(),
731                    location: location.clone(),
732                    ctx,
733                },
734                config,
735            );
736        }
737    }
738
739    for mat in ENTROPY_HEX_RE.find_iter(text) {
740        if findings.len() >= config.max_findings {
741            *truncated = true;
742            return;
743        }
744        let candidate = &text[mat.start()..mat.end()];
745        if candidate.len() < 32 {
746            continue;
747        }
748        if is_allowlisted(candidate, config) {
749            continue;
750        }
751        let entropy = shannon_entropy(candidate);
752        if entropy >= 3.0 {
753            push_finding(
754                findings,
755                seen,
756                FindingCandidate {
757                    severity: SecretSeverity::Low,
758                    kind: "high_entropy_hex",
759                    pattern: "entropy",
760                    text,
761                    start: mat.start(),
762                    end: mat.end(),
763                    location: location.clone(),
764                    ctx,
765                },
766                config,
767            );
768        }
769    }
770}
771
772fn push_finding(
773    findings: &mut Vec<SecretFinding>,
774    seen: &mut HashSet<String>,
775    candidate: FindingCandidate<'_>,
776    config: &SecretScanConfig,
777) {
778    let match_text = &candidate.text[candidate.start..candidate.end];
779    let match_redacted = redact_token(match_text);
780    let context = redact_context(
781        candidate.text,
782        candidate.start,
783        candidate.end,
784        config.context_bytes,
785        &match_redacted,
786    );
787
788    let key = format!(
789        "{}:{}:{}:{}:{}",
790        candidate.ctx.conversation_id.unwrap_or_default(),
791        candidate.ctx.message_id.unwrap_or_default(),
792        candidate.location.label(),
793        candidate.kind,
794        match_redacted
795    );
796
797    if !seen.insert(key) {
798        return;
799    }
800
801    findings.push(SecretFinding {
802        severity: candidate.severity,
803        kind: candidate.kind.to_string(),
804        pattern: candidate.pattern.to_string(),
805        match_redacted,
806        context,
807        location: candidate.location,
808        agent: candidate.ctx.agent.clone(),
809        workspace: candidate.ctx.workspace.clone(),
810        source_path: candidate.ctx.source_path.clone(),
811        conversation_id: candidate.ctx.conversation_id,
812        message_id: candidate.ctx.message_id,
813        message_idx: candidate.ctx.message_idx,
814    });
815}
816
817fn redact_token(token: &str) -> String {
818    let chars: Vec<char> = token.chars().collect();
819    let len = chars.len();
820    if len <= 8 {
821        return "[redacted]".to_string();
822    }
823    let prefix: String = chars.iter().take(2).collect();
824    let suffix: String = chars
825        .iter()
826        .rev()
827        .take(2)
828        .collect::<Vec<_>>()
829        .into_iter()
830        .rev()
831        .collect();
832    format!("{}…{} (len {})", prefix, suffix, len)
833}
834
835fn redact_context(
836    text: &str,
837    start: usize,
838    end: usize,
839    window: usize,
840    replacement: &str,
841) -> String {
842    if text.is_empty() || start >= end || start >= text.len() {
843        return String::new();
844    }
845
846    let ctx_start = start.saturating_sub(window / 2);
847    let ctx_end = (end + window / 2).min(text.len());
848    let ctx_start = adjust_to_char_boundary(text, ctx_start, false);
849    let ctx_end = adjust_to_char_boundary(text, ctx_end, true);
850
851    if ctx_start >= ctx_end {
852        return String::new();
853    }
854
855    let safe_start = start.min(text.len());
856    let safe_end = end.min(text.len());
857
858    let prefix = &text[ctx_start..safe_start];
859    let suffix = &text[safe_end..ctx_end];
860
861    let mut snippet = String::new();
862    snippet.push_str(prefix);
863    snippet.push_str(replacement);
864    snippet.push_str(suffix);
865    snippet
866}
867
868fn adjust_to_char_boundary(text: &str, idx: usize, forward: bool) -> usize {
869    if idx >= text.len() {
870        return text.len();
871    }
872    if text.is_char_boundary(idx) {
873        return idx;
874    }
875    if forward {
876        for i in idx..text.len() {
877            if text.is_char_boundary(i) {
878                return i;
879            }
880        }
881        text.len()
882    } else {
883        for i in (0..=idx).rev() {
884            if text.is_char_boundary(i) {
885                return i;
886            }
887        }
888        0
889    }
890}
891
892fn shannon_entropy(token: &str) -> f64 {
893    let bytes = token.as_bytes();
894    let len = bytes.len() as f64;
895    if len == 0.0 {
896        return 0.0;
897    }
898    let mut freq = [0usize; 256];
899    for b in bytes {
900        freq[*b as usize] += 1;
901    }
902    let mut entropy = 0.0;
903    for count in freq.iter().copied() {
904        if count == 0 {
905            continue;
906        }
907        let p = count as f64 / len;
908        entropy -= p * p.log2();
909    }
910    entropy
911}
912
913fn is_allowlisted(matched: &str, config: &SecretScanConfig) -> bool {
914    for allow in &config.allowlist {
915        if allow.is_match(matched) {
916            return true;
917        }
918    }
919    false
920}
921
922fn build_where_clause(filters: &SecretScanFilters) -> Result<(String, Vec<ParamValue>)> {
923    let mut conditions: Vec<String> = Vec::new();
924    let mut params: Vec<ParamValue> = Vec::new();
925
926    if let Some(agents) = filters.agents.as_ref() {
927        if agents.is_empty() {
928            conditions.push("1=0".to_string());
929        } else {
930            let placeholders: Vec<&str> = agents.iter().map(|_| "?").collect();
931            conditions.push(format!("a.slug IN ({})", placeholders.join(", ")));
932            for agent in agents {
933                params.push(ParamValue::from(agent.as_str()));
934            }
935        }
936    }
937
938    if let Some(workspaces) = filters.workspaces.as_ref() {
939        if workspaces.is_empty() {
940            conditions.push("1=0".to_string());
941        } else {
942            let placeholders: Vec<&str> = workspaces.iter().map(|_| "?").collect();
943            conditions.push(format!("w.path IN ({})", placeholders.join(", ")));
944            for ws in workspaces {
945                params.push(ParamValue::from(ws.to_string_lossy().to_string()));
946            }
947        }
948    }
949
950    if let Some(since) = filters.since_ts {
951        conditions.push("c.started_at >= ?".to_string());
952        params.push(ParamValue::from(since));
953    }
954
955    if let Some(until) = filters.until_ts {
956        conditions.push("c.started_at <= ?".to_string());
957        params.push(ParamValue::from(until));
958    }
959
960    let where_clause = if conditions.is_empty() {
961        String::new()
962    } else {
963        format!(" WHERE {}", conditions.join(" AND "))
964    };
965
966    Ok((where_clause, params))
967}
968
969fn parse_env_regex_list(var: &str) -> Result<Vec<String>> {
970    let value = match dotenvy::var(var) {
971        Ok(v) => v,
972        Err(_) => return Ok(Vec::new()),
973    };
974    let items = value
975        .split(',')
976        .map(|s| s.trim().to_string())
977        .filter(|s| !s.is_empty())
978        .collect::<Vec<_>>();
979    Ok(items)
980}
981
982fn compile_regexes(patterns: &[String], label: &str) -> Result<Vec<Regex>> {
983    let mut compiled = Vec::new();
984    for pat in patterns {
985        let regex = Regex::new(pat).with_context(|| format!("Invalid {} regex: {}", label, pat))?;
986        compiled.push(regex);
987    }
988    Ok(compiled)
989}
990
991#[cfg(test)]
992mod tests {
993    use super::*;
994
995    // =========================================================================
996    // Shannon entropy tests
997    // =========================================================================
998
999    #[test]
1000    fn shannon_entropy_empty_string_returns_zero() {
1001        assert_eq!(shannon_entropy(""), 0.0);
1002    }
1003
1004    #[test]
1005    fn shannon_entropy_single_repeated_char_returns_zero() {
1006        assert_eq!(shannon_entropy("aaaaaaaaaa"), 0.0);
1007    }
1008
1009    #[test]
1010    fn shannon_entropy_two_equal_chars_returns_one() {
1011        let e = shannon_entropy("ab");
1012        assert!((e - 1.0).abs() < 0.001, "expected ~1.0, got {}", e);
1013    }
1014
1015    #[test]
1016    fn shannon_entropy_high_entropy_base64() {
1017        // A string with many distinct chars should have high entropy
1018        let token = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1019        let e = shannon_entropy(token);
1020        assert!(e > 4.0, "expected entropy > 4.0, got {}", e);
1021    }
1022
1023    #[test]
1024    fn shannon_entropy_hex_string() {
1025        let hex = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4";
1026        let e = shannon_entropy(hex);
1027        assert!(e > 3.0, "expected entropy > 3.0 for hex, got {}", e);
1028    }
1029
1030    // =========================================================================
1031    // Redact token tests
1032    // =========================================================================
1033
1034    #[test]
1035    fn redact_token_short_returns_redacted() {
1036        assert_eq!(redact_token("abcd"), "[redacted]");
1037        assert_eq!(redact_token("12345678"), "[redacted]");
1038    }
1039
1040    #[test]
1041    fn redact_token_long_shows_prefix_suffix_len() {
1042        let result = redact_token("sk-abcdefghijklmnop");
1043        assert!(
1044            result.starts_with("sk"),
1045            "should start with first 2 chars: {}",
1046            result
1047        );
1048        assert!(
1049            result.contains("op"),
1050            "should end with last 2 chars: {}",
1051            result
1052        );
1053        assert!(result.contains("len 19"), "should show length: {}", result);
1054    }
1055
1056    #[test]
1057    fn redact_token_nine_chars_shows_format() {
1058        let result = redact_token("123456789");
1059        assert!(result.starts_with("12"), "{}", result);
1060        assert!(result.contains("89"), "{}", result);
1061        assert!(result.contains("len 9"), "{}", result);
1062    }
1063
1064    // =========================================================================
1065    // Redact context tests
1066    // =========================================================================
1067
1068    #[test]
1069    fn redact_context_empty_text_returns_empty() {
1070        assert_eq!(redact_context("", 0, 0, 120, "[REDACTED]"), "");
1071    }
1072
1073    #[test]
1074    fn redact_context_replaces_match_with_replacement() {
1075        let text = "The key is sk-ABCDEFGHIJ and more";
1076        let start = 11;
1077        let end = 25;
1078        let result = redact_context(text, start, end, 120, "[REDACTED]");
1079        assert!(result.contains("[REDACTED]"), "result: {}", result);
1080        assert!(
1081            !result.contains("sk-ABCDEFGHIJ"),
1082            "secret should be removed: {}",
1083            result
1084        );
1085    }
1086
1087    #[test]
1088    fn redact_context_match_at_start() {
1089        let text = "sk-SECRET rest of the text";
1090        let result = redact_context(text, 0, 9, 120, "[R]");
1091        assert!(result.starts_with("[R]"), "result: {}", result);
1092    }
1093
1094    #[test]
1095    fn redact_context_match_at_end() {
1096        let text = "prefix sk-SECRET";
1097        let result = redact_context(text, 7, 16, 120, "[R]");
1098        assert!(result.ends_with("[R]"), "result: {}", result);
1099    }
1100
1101    #[test]
1102    fn redact_context_start_beyond_text_returns_empty() {
1103        assert_eq!(redact_context("short", 10, 15, 120, "[R]"), "");
1104    }
1105
1106    // =========================================================================
1107    // Allowlist tests
1108    // =========================================================================
1109
1110    #[test]
1111    fn is_allowlisted_returns_true_for_matching_pattern() {
1112        let config =
1113            SecretScanConfig::from_inputs_with_env(&["sk-test.*".to_string()], &[], false).unwrap();
1114        assert!(is_allowlisted("sk-test1234567890abcdef", &config));
1115    }
1116
1117    #[test]
1118    fn is_allowlisted_returns_false_when_no_match() {
1119        let config =
1120            SecretScanConfig::from_inputs_with_env(&["sk-test.*".to_string()], &[], false).unwrap();
1121        assert!(!is_allowlisted("sk-prod1234567890abcdef", &config));
1122    }
1123
1124    #[test]
1125    fn is_allowlisted_empty_list_returns_false() {
1126        let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1127        assert!(!is_allowlisted("anything", &config));
1128    }
1129
1130    // =========================================================================
1131    // Adjust to char boundary tests
1132    // =========================================================================
1133
1134    #[test]
1135    fn adjust_to_char_boundary_ascii() {
1136        let text = "hello";
1137        assert_eq!(adjust_to_char_boundary(text, 3, true), 3);
1138        assert_eq!(adjust_to_char_boundary(text, 3, false), 3);
1139    }
1140
1141    #[test]
1142    fn adjust_to_char_boundary_multibyte_forward() {
1143        let text = "héllo"; // 'é' is 2 bytes (0xC3 0xA9)
1144        // Index 2 is in the middle of 'é', forward should skip to next boundary
1145        let idx = adjust_to_char_boundary(text, 2, true);
1146        assert!(
1147            text.is_char_boundary(idx),
1148            "idx {} not a char boundary",
1149            idx
1150        );
1151    }
1152
1153    #[test]
1154    fn adjust_to_char_boundary_multibyte_backward() {
1155        let text = "héllo";
1156        let idx = adjust_to_char_boundary(text, 2, false);
1157        assert!(
1158            text.is_char_boundary(idx),
1159            "idx {} not a char boundary",
1160            idx
1161        );
1162    }
1163
1164    #[test]
1165    fn adjust_to_char_boundary_beyond_len() {
1166        let text = "abc";
1167        assert_eq!(adjust_to_char_boundary(text, 100, true), 3);
1168    }
1169
1170    // =========================================================================
1171    // Config construction tests
1172    // =========================================================================
1173
1174    #[test]
1175    fn config_from_inputs_with_valid_patterns() {
1176        let config = SecretScanConfig::from_inputs_with_env(
1177            &["allowed_.*".to_string()],
1178            &["denied_.*".to_string()],
1179            false,
1180        )
1181        .unwrap();
1182        assert_eq!(config.allowlist.len(), 1);
1183        assert_eq!(config.denylist.len(), 1);
1184        assert_eq!(config.entropy_threshold, DEFAULT_ENTROPY_THRESHOLD);
1185    }
1186
1187    #[test]
1188    fn config_from_inputs_with_invalid_regex_returns_error() {
1189        let result = SecretScanConfig::from_inputs_with_env(&["[invalid".to_string()], &[], false);
1190        assert!(result.is_err(), "invalid regex should return error");
1191    }
1192
1193    #[test]
1194    fn config_from_inputs_empty_lists() {
1195        let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1196        assert!(config.allowlist.is_empty());
1197        assert!(config.denylist.is_empty());
1198        assert_eq!(config.max_findings, DEFAULT_MAX_FINDINGS);
1199    }
1200
1201    // =========================================================================
1202    // Scan text tests (via scan_database with crafted DB)
1203    // =========================================================================
1204
1205    #[test]
1206    fn builtin_patterns_aws_access_key_detected() {
1207        let text = "Found key AKIAIOSFODNN7EXAMPLE in config";
1208        let pattern = &BUILTIN_PATTERNS[0]; // aws_access_key_id
1209        assert!(
1210            pattern.regex.is_match(text),
1211            "should detect AWS access key ID"
1212        );
1213    }
1214
1215    #[test]
1216    fn builtin_patterns_github_pat_detected() {
1217        let text = "token ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
1218        let pattern = &BUILTIN_PATTERNS[2]; // github_pat
1219        assert!(pattern.regex.is_match(text), "should detect GitHub PAT");
1220    }
1221
1222    #[test]
1223    fn builtin_patterns_anthropic_key_detected() {
1224        let text = "sk-ant-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh";
1225        let pattern = &BUILTIN_PATTERNS[4]; // anthropic_key
1226        assert!(pattern.regex.is_match(text), "should detect Anthropic key");
1227    }
1228
1229    #[test]
1230    fn builtin_patterns_jwt_detected() {
1231        let text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123";
1232        let pattern = &BUILTIN_PATTERNS[5]; // jwt
1233        assert!(pattern.regex.is_match(text), "should detect JWT");
1234    }
1235
1236    #[test]
1237    fn builtin_patterns_private_key_detected() {
1238        let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIE...";
1239        let pattern = &BUILTIN_PATTERNS[6]; // private_key
1240        assert!(pattern.regex.is_match(text), "should detect private key");
1241    }
1242
1243    #[test]
1244    fn builtin_patterns_database_url_detected() {
1245        let text = "database_url=postgres://user:pass@host:5432/db";
1246        let pattern = &BUILTIN_PATTERNS[7]; // database_url
1247        assert!(pattern.regex.is_match(text), "should detect database URL");
1248    }
1249
1250    #[test]
1251    fn builtin_patterns_generic_api_key_detected() {
1252        let text = "api_key=abcdefgh12345678";
1253        let pattern = &BUILTIN_PATTERNS[8]; // generic_api_key
1254        assert!(
1255            pattern.regex.is_match(text),
1256            "should detect generic API key"
1257        );
1258    }
1259
1260    #[test]
1261    fn builtin_patterns_safe_text_not_detected() {
1262        let safe_text = "This is a normal message about Rust programming.";
1263        for pattern in BUILTIN_PATTERNS.iter() {
1264            assert!(
1265                !pattern.regex.is_match(safe_text),
1266                "pattern {} should not match safe text",
1267                pattern.id,
1268            );
1269        }
1270    }
1271
1272    // =========================================================================
1273    // Severity ranking tests
1274    // =========================================================================
1275
1276    #[test]
1277    fn severity_rank_ordering() {
1278        assert!(SecretSeverity::Critical.rank() < SecretSeverity::High.rank());
1279        assert!(SecretSeverity::High.rank() < SecretSeverity::Medium.rank());
1280        assert!(SecretSeverity::Medium.rank() < SecretSeverity::Low.rank());
1281    }
1282
1283    #[test]
1284    fn severity_label_values() {
1285        assert_eq!(SecretSeverity::Critical.label(), "critical");
1286        assert_eq!(SecretSeverity::High.label(), "high");
1287        assert_eq!(SecretSeverity::Medium.label(), "medium");
1288        assert_eq!(SecretSeverity::Low.label(), "low");
1289    }
1290
1291    // =========================================================================
1292    // SecretLocation label tests
1293    // =========================================================================
1294
1295    #[test]
1296    fn location_labels() {
1297        assert_eq!(
1298            SecretLocation::ConversationTitle.label(),
1299            "conversation.title"
1300        );
1301        assert_eq!(
1302            SecretLocation::ConversationMetadata.label(),
1303            "conversation.metadata"
1304        );
1305        assert_eq!(SecretLocation::MessageContent.label(), "message.content");
1306        assert_eq!(SecretLocation::MessageMetadata.label(), "message.metadata");
1307    }
1308
1309    // =========================================================================
1310    // Build where clause tests
1311    // =========================================================================
1312
1313    #[test]
1314    fn build_where_clause_empty_filters() {
1315        let filters = SecretScanFilters {
1316            agents: None,
1317            workspaces: None,
1318            since_ts: None,
1319            until_ts: None,
1320        };
1321        let (clause, params) = build_where_clause(&filters).unwrap();
1322        assert!(clause.is_empty(), "empty filters should give empty clause");
1323        assert!(params.is_empty());
1324    }
1325
1326    #[test]
1327    fn build_where_clause_with_agent_filter() {
1328        let filters = SecretScanFilters {
1329            agents: Some(vec!["claude".to_string(), "codex".to_string()]),
1330            workspaces: None,
1331            since_ts: None,
1332            until_ts: None,
1333        };
1334        let (clause, params) = build_where_clause(&filters).unwrap();
1335        assert!(clause.contains("a.slug IN"), "clause: {}", clause);
1336        assert_eq!(params.len(), 2);
1337    }
1338
1339    #[test]
1340    fn build_where_clause_with_time_range() {
1341        let filters = SecretScanFilters {
1342            agents: None,
1343            workspaces: None,
1344            since_ts: Some(1000),
1345            until_ts: Some(2000),
1346        };
1347        let (clause, params) = build_where_clause(&filters).unwrap();
1348        assert!(clause.contains("c.started_at >="), "clause: {}", clause);
1349        assert!(clause.contains("c.started_at <="), "clause: {}", clause);
1350        assert_eq!(params.len(), 2);
1351    }
1352
1353    #[test]
1354    fn build_where_clause_with_workspace_filter() {
1355        let filters = SecretScanFilters {
1356            agents: None,
1357            workspaces: Some(vec![PathBuf::from("/home/user/project")]),
1358            since_ts: None,
1359            until_ts: None,
1360        };
1361        let (clause, params) = build_where_clause(&filters).unwrap();
1362        assert!(clause.contains("w.path IN"), "clause: {}", clause);
1363        assert_eq!(params.len(), 1);
1364    }
1365
1366    #[test]
1367    fn build_where_clause_empty_agent_list_matches_nothing() {
1368        let filters = SecretScanFilters {
1369            agents: Some(vec![]),
1370            workspaces: None,
1371            since_ts: None,
1372            until_ts: None,
1373        };
1374        let (clause, _) = build_where_clause(&filters).unwrap();
1375        assert!(
1376            clause.contains("1=0"),
1377            "empty agent list should match nothing: {}",
1378            clause
1379        );
1380    }
1381
1382    #[test]
1383    fn build_where_clause_empty_workspace_list_matches_nothing() {
1384        let filters = SecretScanFilters {
1385            agents: None,
1386            workspaces: Some(vec![]),
1387            since_ts: None,
1388            until_ts: None,
1389        };
1390        let (clause, _) = build_where_clause(&filters).unwrap();
1391        assert!(
1392            clause.contains("1=0"),
1393            "empty workspace list should match nothing: {}",
1394            clause
1395        );
1396    }
1397
1398    // =========================================================================
1399    // Entropy regex tests
1400    // =========================================================================
1401
1402    #[test]
1403    fn entropy_base64_regex_matches_long_strings() {
1404        assert!(ENTROPY_BASE64_RE.is_match("ABCDEFGHIJKLMNOPQRSTuv"));
1405        assert!(!ENTROPY_BASE64_RE.is_match("short"));
1406    }
1407
1408    #[test]
1409    fn entropy_hex_regex_matches_32_plus_chars() {
1410        assert!(ENTROPY_HEX_RE.is_match("a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4"));
1411        assert!(!ENTROPY_HEX_RE.is_match("a1b2c3d4"));
1412    }
1413
1414    // =========================================================================
1415    // Edge case tests — malformed input robustness (br-ig84)
1416    // =========================================================================
1417
1418    #[test]
1419    fn scan_text_empty_text_no_findings() {
1420        let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1421        let ctx = ScanContext {
1422            agent: None,
1423            workspace: None,
1424            source_path: None,
1425            conversation_id: None,
1426            message_id: None,
1427            message_idx: None,
1428        };
1429        let mut findings = Vec::new();
1430        let mut seen = HashSet::new();
1431        let mut truncated = false;
1432
1433        scan_text(
1434            "",
1435            SecretLocation::MessageContent,
1436            &ctx,
1437            &config,
1438            &mut findings,
1439            &mut seen,
1440            &mut truncated,
1441        );
1442        assert!(findings.is_empty());
1443        assert!(!truncated);
1444    }
1445
1446    #[test]
1447    fn scan_text_already_truncated_skips() {
1448        let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1449        let ctx = ScanContext {
1450            agent: None,
1451            workspace: None,
1452            source_path: None,
1453            conversation_id: None,
1454            message_id: None,
1455            message_idx: None,
1456        };
1457        let mut findings = Vec::new();
1458        let mut seen = HashSet::new();
1459        let mut truncated = true; // pre-set
1460
1461        scan_text(
1462            "sk-test1234567890abcdefghijklmnopqr",
1463            SecretLocation::MessageContent,
1464            &ctx,
1465            &config,
1466            &mut findings,
1467            &mut seen,
1468            &mut truncated,
1469        );
1470        assert!(findings.is_empty(), "should skip when already truncated");
1471    }
1472
1473    #[test]
1474    fn scan_text_denylist_always_critical() {
1475        let config =
1476            SecretScanConfig::from_inputs_with_env(&[], &["FORBIDDEN_TOKEN_.*".to_string()], false)
1477                .unwrap();
1478        let ctx = ScanContext {
1479            agent: Some("test".to_string()),
1480            workspace: None,
1481            source_path: None,
1482            conversation_id: Some(1),
1483            message_id: Some(1),
1484            message_idx: Some(0),
1485        };
1486        let mut findings = Vec::new();
1487        let mut seen = HashSet::new();
1488        let mut truncated = false;
1489
1490        scan_text(
1491            "prefix FORBIDDEN_TOKEN_abc suffix",
1492            SecretLocation::MessageContent,
1493            &ctx,
1494            &config,
1495            &mut findings,
1496            &mut seen,
1497            &mut truncated,
1498        );
1499
1500        assert_eq!(findings.len(), 1);
1501        assert_eq!(findings[0].severity, SecretSeverity::Critical);
1502        assert_eq!(findings[0].kind, "denylist");
1503    }
1504
1505    #[test]
1506    fn scan_text_allowlist_suppresses_builtin_match() {
1507        let config =
1508            SecretScanConfig::from_inputs_with_env(&["sk-test.*".to_string()], &[], false).unwrap();
1509        let ctx = ScanContext {
1510            agent: None,
1511            workspace: None,
1512            source_path: None,
1513            conversation_id: Some(1),
1514            message_id: Some(1),
1515            message_idx: Some(0),
1516        };
1517        let mut findings = Vec::new();
1518        let mut seen = HashSet::new();
1519        let mut truncated = false;
1520
1521        scan_text(
1522            "sk-testABCDEFGHIJKLMNOPQRSTUVWXYZ12345",
1523            SecretLocation::MessageContent,
1524            &ctx,
1525            &config,
1526            &mut findings,
1527            &mut seen,
1528            &mut truncated,
1529        );
1530
1531        // The openai_key pattern should match but be suppressed by allowlist
1532        assert!(
1533            !findings.iter().any(|f| f.kind == "openai_key"),
1534            "allowlisted key should be suppressed"
1535        );
1536    }
1537
1538    #[test]
1539    fn scan_text_deduplicates_findings() {
1540        let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1541        let ctx = ScanContext {
1542            agent: None,
1543            workspace: None,
1544            source_path: None,
1545            conversation_id: Some(1),
1546            message_id: Some(1),
1547            message_idx: Some(0),
1548        };
1549        let mut findings = Vec::new();
1550        let mut seen = HashSet::new();
1551        let mut truncated = false;
1552
1553        // Scan same text twice — same context, so duplicates should be skipped
1554        let text = "sk-ABCDEFGHIJKLMNOPQRSTUVWXYZ123456789";
1555        scan_text(
1556            text,
1557            SecretLocation::MessageContent,
1558            &ctx,
1559            &config,
1560            &mut findings,
1561            &mut seen,
1562            &mut truncated,
1563        );
1564        let count_after_first = findings.len();
1565
1566        scan_text(
1567            text,
1568            SecretLocation::MessageContent,
1569            &ctx,
1570            &config,
1571            &mut findings,
1572            &mut seen,
1573            &mut truncated,
1574        );
1575        assert_eq!(
1576            findings.len(),
1577            count_after_first,
1578            "duplicate findings should be skipped"
1579        );
1580    }
1581
1582    #[test]
1583    fn scan_text_max_findings_truncates() {
1584        // Use longer tokens (>8 chars) so each gets a unique redacted form for dedup
1585        let mut config =
1586            SecretScanConfig::from_inputs_with_env(&[], &["LONG_SECRET_\\d+".to_string()], false)
1587                .unwrap();
1588        config.max_findings = 3;
1589
1590        let ctx = ScanContext {
1591            agent: None,
1592            workspace: None,
1593            source_path: None,
1594            conversation_id: Some(1),
1595            message_id: Some(1),
1596            message_idx: Some(0),
1597        };
1598        let mut findings = Vec::new();
1599        let mut seen = HashSet::new();
1600        let mut truncated = false;
1601
1602        // Each match is >8 chars so redact_token produces unique output per token
1603        let text =
1604            "LONG_SECRET_001 LONG_SECRET_002 LONG_SECRET_003 LONG_SECRET_004 LONG_SECRET_005";
1605        scan_text(
1606            text,
1607            SecretLocation::MessageContent,
1608            &ctx,
1609            &config,
1610            &mut findings,
1611            &mut seen,
1612            &mut truncated,
1613        );
1614
1615        assert!(
1616            findings.len() <= 3,
1617            "should cap at max_findings: {}",
1618            findings.len()
1619        );
1620        assert!(truncated, "should set truncated flag");
1621    }
1622
1623    #[test]
1624    fn scan_text_pure_alphabetic_base64_skipped() {
1625        // Pure alphabetic strings (CamelCase identifiers) should NOT trigger entropy detection
1626        let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1627        let ctx = ScanContext {
1628            agent: None,
1629            workspace: None,
1630            source_path: None,
1631            conversation_id: Some(1),
1632            message_id: Some(1),
1633            message_idx: Some(0),
1634        };
1635        let mut findings = Vec::new();
1636        let mut seen = HashSet::new();
1637        let mut truncated = false;
1638
1639        // This is a pure alphabetic string — should be skipped by the heuristic
1640        let text = "SecretScanConfigFromInputsWithEnvTest";
1641        scan_text(
1642            text,
1643            SecretLocation::MessageContent,
1644            &ctx,
1645            &config,
1646            &mut findings,
1647            &mut seen,
1648            &mut truncated,
1649        );
1650
1651        assert!(
1652            !findings.iter().any(|f| f.kind == "high_entropy_base64"),
1653            "pure alphabetic strings should not trigger entropy detection"
1654        );
1655    }
1656}