1use anyhow::{Context, Result, bail};
2use console::{Term, style};
3use frankensqlite::compat::{ConnectionExt, ParamValue, RowExt, params_from_iter};
4use frankensqlite::params;
5use indicatif::{ProgressBar, ProgressStyle};
6use once_cell::sync::Lazy;
7use regex::Regex;
8use serde::Serialize;
9use std::collections::{HashMap, HashSet};
10use std::io::Write;
11use std::path::{Path, PathBuf};
12use std::sync::Arc;
13use std::sync::atomic::{AtomicBool, Ordering};
14use std::time::Duration;
15
16const DEFAULT_ENTROPY_THRESHOLD: f64 = 4.0;
17const DEFAULT_ENTROPY_MIN_LEN: usize = 20;
18const DEFAULT_CONTEXT_BYTES: usize = 120;
19const DEFAULT_MAX_FINDINGS: usize = 500;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)]
22#[serde(rename_all = "snake_case")]
23pub enum SecretSeverity {
24 Critical,
25 High,
26 Medium,
27 Low,
28}
29
30impl SecretSeverity {
31 fn rank(self) -> u8 {
32 match self {
33 SecretSeverity::Critical => 0,
34 SecretSeverity::High => 1,
35 SecretSeverity::Medium => 2,
36 SecretSeverity::Low => 3,
37 }
38 }
39
40 pub fn label(self) -> &'static str {
41 match self {
42 SecretSeverity::Critical => "critical",
43 SecretSeverity::High => "high",
44 SecretSeverity::Medium => "medium",
45 SecretSeverity::Low => "low",
46 }
47 }
48
49 fn styled(self, text: &str) -> String {
50 match self {
51 SecretSeverity::Critical => style(text).red().bold().to_string(),
52 SecretSeverity::High => style(text).red().to_string(),
53 SecretSeverity::Medium => style(text).yellow().to_string(),
54 SecretSeverity::Low => style(text).blue().to_string(),
55 }
56 }
57}
58
59#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize)]
60#[serde(rename_all = "snake_case")]
61pub enum SecretLocation {
62 ConversationTitle,
63 ConversationMetadata,
64 MessageContent,
65 MessageMetadata,
66 MessageSnippet,
67}
68
69impl SecretLocation {
70 fn label(&self) -> &'static str {
71 match self {
72 SecretLocation::ConversationTitle => "conversation.title",
73 SecretLocation::ConversationMetadata => "conversation.metadata",
74 SecretLocation::MessageContent => "message.content",
75 SecretLocation::MessageMetadata => "message.metadata",
76 SecretLocation::MessageSnippet => "message.snippet",
77 }
78 }
79}
80
81#[derive(Debug, Clone, Serialize)]
82pub struct SecretFinding {
83 pub severity: SecretSeverity,
84 pub kind: String,
85 pub pattern: String,
86 pub match_redacted: String,
87 pub context: String,
88 pub location: SecretLocation,
89 pub agent: Option<String>,
90 pub workspace: Option<String>,
91 pub source_path: Option<String>,
92 pub conversation_id: Option<i64>,
93 pub message_id: Option<i64>,
94 pub message_idx: Option<i64>,
95}
96
97#[derive(Debug, Clone, Serialize)]
98pub struct SecretScanSummary {
99 pub total: usize,
100 pub by_severity: HashMap<SecretSeverity, usize>,
101 pub has_critical: bool,
102 pub truncated: bool,
103}
104
105#[derive(Debug, Clone, Serialize)]
106pub struct SecretScanReport {
107 pub summary: SecretScanSummary,
108 pub findings: Vec<SecretFinding>,
109}
110
111#[derive(Debug, Clone)]
112pub struct SecretScanFilters {
113 pub agents: Option<Vec<String>>,
114 pub workspaces: Option<Vec<PathBuf>>,
115 pub since_ts: Option<i64>,
116 pub until_ts: Option<i64>,
117}
118
119#[derive(Debug, Clone)]
120pub struct SecretScanConfig {
121 pub allowlist: Vec<Regex>,
122 pub denylist: Vec<Regex>,
123 pub allowlist_raw: Vec<String>,
124 pub denylist_raw: Vec<String>,
125 pub entropy_threshold: f64,
126 pub entropy_min_len: usize,
127 pub context_bytes: usize,
128 pub max_findings: usize,
129}
130
131impl SecretScanConfig {
132 pub fn from_inputs(allowlist: &[String], denylist: &[String]) -> Result<Self> {
133 Self::from_inputs_with_env(allowlist, denylist, true)
134 }
135
136 pub fn from_inputs_with_env(
137 allowlist: &[String],
138 denylist: &[String],
139 use_env: bool,
140 ) -> Result<Self> {
141 let allowlist_raw = if allowlist.is_empty() && use_env {
142 parse_env_regex_list("CASS_SECRETS_ALLOWLIST")?
143 } else {
144 allowlist.to_vec()
145 };
146 let denylist_raw = if denylist.is_empty() && use_env {
147 parse_env_regex_list("CASS_SECRETS_DENYLIST")?
148 } else {
149 denylist.to_vec()
150 };
151
152 Ok(Self {
153 allowlist: compile_regexes(&allowlist_raw, "allowlist")?,
154 denylist: compile_regexes(&denylist_raw, "denylist")?,
155 allowlist_raw,
156 denylist_raw,
157 entropy_threshold: DEFAULT_ENTROPY_THRESHOLD,
158 entropy_min_len: DEFAULT_ENTROPY_MIN_LEN,
159 context_bytes: DEFAULT_CONTEXT_BYTES,
160 max_findings: DEFAULT_MAX_FINDINGS,
161 })
162 }
163}
164
165struct SecretPattern {
166 id: &'static str,
167 severity: SecretSeverity,
168 regex: Regex,
169}
170
171static BUILTIN_PATTERNS: Lazy<Vec<SecretPattern>> = Lazy::new(|| {
172 vec![
173 SecretPattern {
174 id: "aws_access_key_id",
175 severity: SecretSeverity::High,
176 regex: Regex::new(r"\bAKIA[0-9A-Z]{16}\b").expect("aws access key regex"),
177 },
178 SecretPattern {
179 id: "aws_secret_key",
180 severity: SecretSeverity::Critical,
181 regex: Regex::new(
182 r#"(?i)aws(.{0,20})?(secret|access)?[_-]?key\s*[:=]\s*['"]?[A-Za-z0-9/+=]{40}['"]?"#,
183 )
184 .expect("aws secret regex"),
185 },
186 SecretPattern {
187 id: "github_pat",
188 severity: SecretSeverity::High,
189 regex: Regex::new(r"\bgh[pousr]_[A-Za-z0-9]{36}\b").expect("github pat regex"),
190 },
191 SecretPattern {
192 id: "openai_key",
193 severity: SecretSeverity::High,
194 regex: Regex::new(r"\bsk-[A-Za-z0-9]{20,}\b").expect("openai key regex"),
198 },
199 SecretPattern {
200 id: "anthropic_key",
201 severity: SecretSeverity::High,
202 regex: Regex::new(r"\bsk-ant-[A-Za-z0-9]{20,}\b").expect("anthropic key regex"),
203 },
204 SecretPattern {
205 id: "jwt",
206 severity: SecretSeverity::Medium,
207 regex: Regex::new(r"\beyJ[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\.[A-Za-z0-9_\-]+\b")
208 .expect("jwt regex"),
209 },
210 SecretPattern {
211 id: "private_key",
212 severity: SecretSeverity::Critical,
213 regex: Regex::new(
214 r"-----BEGIN (?:RSA |EC |DSA |OPENSSH |PGP |ENCRYPTED )?PRIVATE KEY-----",
215 )
216 .expect("private key regex"),
217 },
218 SecretPattern {
219 id: "database_url",
220 severity: SecretSeverity::Medium,
221 regex: Regex::new(r"(?i)\b(postgres|postgresql|mysql|mongodb|redis)://[^\s]+")
222 .expect("db url regex"),
223 },
224 SecretPattern {
225 id: "generic_api_key",
226 severity: SecretSeverity::Low,
227 regex: Regex::new(
228 r#"(?i)(api[_-]?key|token|secret|password|passwd)\s*[:=]\s*['"]?[A-Za-z0-9_\-]{8,}['"]?"#,
229 )
230 .expect("generic api key regex"),
231 },
232 ]
233});
234
235static ENTROPY_BASE64_RE: Lazy<Regex> =
236 Lazy::new(|| Regex::new(r"[A-Za-z0-9+/=_-]{20,}").expect("entropy base64 regex"));
237static ENTROPY_HEX_RE: Lazy<Regex> =
238 Lazy::new(|| Regex::new(r"\b[A-Fa-f0-9]{32,}\b").expect("entropy hex regex"));
239
240#[derive(Debug, Clone)]
241struct ScanContext {
242 agent: Option<String>,
243 workspace: Option<String>,
244 source_path: Option<String>,
245 conversation_id: Option<i64>,
246 message_id: Option<i64>,
247 message_idx: Option<i64>,
248}
249
250struct FindingCandidate<'a> {
251 severity: SecretSeverity,
252 kind: &'a str,
253 pattern: &'a str,
254 text: &'a str,
255 start: usize,
256 end: usize,
257 location: SecretLocation,
258 ctx: &'a ScanContext,
259}
260
261pub fn scan_database<P: AsRef<Path>>(
262 db_path: P,
263 filters: &SecretScanFilters,
264 config: &SecretScanConfig,
265 running: Option<Arc<AtomicBool>>,
266 progress: Option<&ProgressBar>,
267) -> Result<SecretScanReport> {
268 let conn = super::open_existing_sqlite_db(db_path.as_ref())
269 .context("Failed to open database for secret scan")?;
270
271 let mut findings: Vec<SecretFinding> = Vec::new();
272 let mut seen: HashSet<String> = HashSet::new();
273 let mut truncated = false;
274
275 let (conv_where, conv_params) = build_where_clause(filters)?;
279 let conv_sql = format!(
280 "SELECT c.id, c.title, c.metadata_json, c.source_path, COALESCE(a.slug, 'unknown'), w.path\n FROM conversations c\n LEFT JOIN agents a ON c.agent_id = a.id\n LEFT JOIN workspaces w ON c.workspace_id = w.id{}",
281 conv_where
282 );
283 let conv_param_values = params_from_iter(conv_params);
284 let conv_rows = conn.query_with_params(&conv_sql, &conv_param_values)?;
285
286 for row in &conv_rows {
287 if running
288 .as_ref()
289 .is_some_and(|flag| !flag.load(Ordering::Relaxed))
290 {
291 break;
292 }
293 let conv_id: i64 = row.get_typed(0)?;
294 let title: Option<String> = row.get_typed(1)?;
295 let metadata_json: Option<String> = row.get_typed(2)?;
296 let source_path: String = row.get_typed(3)?;
297 let agent_slug: String = row.get_typed(4)?;
298 let workspace_path: Option<String> = row.get_typed(5)?;
299
300 let ctx = ScanContext {
301 agent: Some(agent_slug),
302 workspace: workspace_path,
303 source_path: Some(source_path),
304 conversation_id: Some(conv_id),
305 message_id: None,
306 message_idx: None,
307 };
308
309 if let Some(title_text) = title {
310 scan_text(
311 &title_text,
312 SecretLocation::ConversationTitle,
313 &ctx,
314 config,
315 &mut findings,
316 &mut seen,
317 &mut truncated,
318 );
319 }
320 if let Some(meta) = metadata_json {
321 scan_text(
322 &meta,
323 SecretLocation::ConversationMetadata,
324 &ctx,
325 config,
326 &mut findings,
327 &mut seen,
328 &mut truncated,
329 );
330 }
331
332 if truncated {
333 break;
334 }
335
336 if let Some(pb) = progress {
337 pb.inc(1);
338 }
339 }
340
341 if !truncated {
342 let (msg_where, msg_params) = build_where_clause(filters)?;
343 let msg_sql = format!(
344 "SELECT m.id, m.idx, m.content, m.extra_json, c.id, c.source_path, COALESCE(a.slug, 'unknown'), w.path\n FROM messages m\n JOIN conversations c ON m.conversation_id = c.id\n LEFT JOIN agents a ON c.agent_id = a.id\n LEFT JOIN workspaces w ON c.workspace_id = w.id{}",
345 msg_where
346 );
347 let msg_param_values = params_from_iter(msg_params);
348 let msg_rows = conn.query_with_params(&msg_sql, &msg_param_values)?;
349
350 for row in &msg_rows {
351 if running
352 .as_ref()
353 .is_some_and(|flag| !flag.load(Ordering::Relaxed))
354 {
355 break;
356 }
357 let msg_id: i64 = row.get_typed(0)?;
358 let msg_idx: i64 = row.get_typed(1)?;
359 let content: String = row.get_typed(2)?;
360 let extra_json: Option<String> = row.get_typed(3)?;
361 let conv_id: i64 = row.get_typed(4)?;
362 let source_path: String = row.get_typed(5)?;
363 let agent_slug: String = row.get_typed(6)?;
364 let workspace_path: Option<String> = row.get_typed(7)?;
365
366 let ctx = ScanContext {
367 agent: Some(agent_slug),
368 workspace: workspace_path,
369 source_path: Some(source_path),
370 conversation_id: Some(conv_id),
371 message_id: Some(msg_id),
372 message_idx: Some(msg_idx),
373 };
374
375 scan_text(
376 &content,
377 SecretLocation::MessageContent,
378 &ctx,
379 config,
380 &mut findings,
381 &mut seen,
382 &mut truncated,
383 );
384 if let Some(extra) = extra_json {
385 scan_text(
386 &extra,
387 SecretLocation::MessageMetadata,
388 &ctx,
389 config,
390 &mut findings,
391 &mut seen,
392 &mut truncated,
393 );
394 }
395
396 if truncated {
397 break;
398 }
399
400 if let Some(pb) = progress {
401 pb.inc(1);
402 }
403 }
404 }
405
406 if !truncated && table_exists(&conn, "snippets") {
407 let (snip_where, snip_params) = build_where_clause(filters)?;
408 let snip_sql = format!(
409 "SELECT s.snippet_text, m.id, m.idx, c.id, c.source_path, COALESCE(a.slug, 'unknown'), w.path\n FROM snippets s\n JOIN messages m ON s.message_id = m.id\n JOIN conversations c ON m.conversation_id = c.id\n LEFT JOIN agents a ON c.agent_id = a.id\n LEFT JOIN workspaces w ON c.workspace_id = w.id{}",
410 snip_where
411 );
412 let snip_param_values = params_from_iter(snip_params);
413 let snip_rows = conn.query_with_params(&snip_sql, &snip_param_values)?;
414
415 for row in &snip_rows {
416 if running
417 .as_ref()
418 .is_some_and(|flag| !flag.load(Ordering::Relaxed))
419 {
420 break;
421 }
422 let snippet_text: String = row.get_typed(0)?;
423 let msg_id: i64 = row.get_typed(1)?;
424 let msg_idx: i64 = row.get_typed(2)?;
425 let conv_id: i64 = row.get_typed(3)?;
426 let source_path: String = row.get_typed(4)?;
427 let agent_slug: String = row.get_typed(5)?;
428 let workspace_path: Option<String> = row.get_typed(6)?;
429
430 let ctx = ScanContext {
431 agent: Some(agent_slug),
432 workspace: workspace_path,
433 source_path: Some(source_path),
434 conversation_id: Some(conv_id),
435 message_id: Some(msg_id),
436 message_idx: Some(msg_idx),
437 };
438
439 scan_text(
440 &snippet_text,
441 SecretLocation::MessageSnippet,
442 &ctx,
443 config,
444 &mut findings,
445 &mut seen,
446 &mut truncated,
447 );
448
449 if truncated {
450 break;
451 }
452
453 if let Some(pb) = progress {
454 pb.inc(1);
455 }
456 }
457 }
458
459 findings.sort_by(|a, b| {
460 a.severity
461 .rank()
462 .cmp(&b.severity.rank())
463 .then_with(|| a.kind.cmp(&b.kind))
464 });
465
466 let mut by_severity: HashMap<SecretSeverity, usize> = HashMap::new();
467 for finding in &findings {
468 *by_severity.entry(finding.severity).or_insert(0) += 1;
469 }
470
471 let has_critical = by_severity
472 .get(&SecretSeverity::Critical)
473 .copied()
474 .unwrap_or(0)
475 > 0;
476
477 Ok(SecretScanReport {
478 summary: SecretScanSummary {
479 total: findings.len(),
480 by_severity,
481 has_critical,
482 truncated,
483 },
484 findings,
485 })
486}
487
488fn table_exists(conn: &frankensqlite::Connection, table_name: &str) -> bool {
489 if !table_name
490 .chars()
491 .all(|ch| ch.is_ascii_alphanumeric() || ch == '_')
492 {
493 return false;
494 }
495
496 let pragma = format!("PRAGMA table_info({table_name})");
497 conn.query_map_collect(&pragma, params![], |row| row.get_typed::<String>(1))
498 .map(|columns| !columns.is_empty())
499 .unwrap_or(false)
500}
501
502pub fn print_human_report(
503 term: &mut Term,
504 report: &SecretScanReport,
505 max_examples: usize,
506) -> Result<()> {
507 let total = report.summary.total;
508 if total == 0 {
509 writeln!(term, " {} No secrets detected", style("✓").green())?;
510 return Ok(());
511 }
512
513 writeln!(
514 term,
515 " {} {} potential secret(s) detected",
516 style("⚠").yellow(),
517 total
518 )?;
519
520 let mut severities = vec![
521 SecretSeverity::Critical,
522 SecretSeverity::High,
523 SecretSeverity::Medium,
524 SecretSeverity::Low,
525 ];
526
527 severities.sort_by_key(|s| s.rank());
528
529 for severity in severities {
530 let count = report
531 .summary
532 .by_severity
533 .get(&severity)
534 .copied()
535 .unwrap_or(0);
536 if count == 0 {
537 continue;
538 }
539 let label = severity.styled(severity.label());
540 writeln!(term, " {}: {}", label, count)?;
541
542 for finding in report
543 .findings
544 .iter()
545 .filter(|f| f.severity == severity)
546 .take(max_examples)
547 {
548 writeln!(
549 term,
550 " - {} in {} ({})",
551 finding.kind,
552 finding.location.label(),
553 finding.match_redacted
554 )?;
555 if !finding.context.is_empty() {
556 writeln!(term, " {}", style(&finding.context).dim())?;
557 }
558 }
559 if count > max_examples {
560 writeln!(term, " {}", style("…additional findings hidden").dim())?;
561 }
562 }
563
564 if report.summary.truncated {
565 writeln!(
566 term,
567 " {} Results truncated (max findings reached)",
568 style("⚠").yellow()
569 )?;
570 }
571
572 Ok(())
573}
574
575pub fn print_cli_report(report: &SecretScanReport, json: bool) -> Result<()> {
576 if json {
577 let payload = serde_json::to_string_pretty(report)?;
578 println!("{payload}");
579 return Ok(());
580 }
581
582 let mut term = Term::stdout();
583 print_human_report(&mut term, report, 3)
584}
585
586pub fn run_secret_scan_cli<P: AsRef<Path>>(
587 db_path: P,
588 filters: &SecretScanFilters,
589 config: &SecretScanConfig,
590 json: bool,
591 fail_on_secrets: bool,
592) -> Result<()> {
593 let progress = ProgressBar::new_spinner();
594 progress.set_style(
595 ProgressStyle::with_template("{spinner} {msg}")
596 .unwrap()
597 .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]),
598 );
599 progress.set_message("Scanning for secrets...");
600 progress.enable_steady_tick(Duration::from_millis(120));
601
602 let report = scan_database(db_path, filters, config, None, Some(&progress))?;
603 progress.finish_and_clear();
604
605 print_cli_report(&report, json)?;
606
607 if fail_on_secrets && report.summary.total > 0 {
608 bail!("Secrets detected ({} finding(s))", report.summary.total);
609 }
610
611 Ok(())
612}
613
614pub fn wizard_secret_scan<P: AsRef<Path>>(
615 db_path: P,
616 filters: &SecretScanFilters,
617 config: &SecretScanConfig,
618) -> Result<SecretScanReport> {
619 let progress = ProgressBar::new_spinner();
620 progress.set_style(
621 ProgressStyle::with_template("{spinner} {msg}")
622 .unwrap()
623 .tick_strings(&["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]),
624 );
625 progress.set_message("Scanning for secrets...");
626 progress.enable_steady_tick(Duration::from_millis(120));
627
628 let report = scan_database(db_path, filters, config, None, Some(&progress))?;
629 progress.finish_and_clear();
630 Ok(report)
631}
632
633fn scan_text(
634 text: &str,
635 location: SecretLocation,
636 ctx: &ScanContext,
637 config: &SecretScanConfig,
638 findings: &mut Vec<SecretFinding>,
639 seen: &mut HashSet<String>,
640 truncated: &mut bool,
641) {
642 if *truncated || text.is_empty() {
643 return;
644 }
645
646 for deny in &config.denylist {
648 for mat in deny.find_iter(text) {
649 if findings.len() >= config.max_findings {
650 *truncated = true;
651 return;
652 }
653 push_finding(
654 findings,
655 seen,
656 FindingCandidate {
657 severity: SecretSeverity::Critical,
658 kind: "denylist",
659 pattern: deny.as_str(),
660 text,
661 start: mat.start(),
662 end: mat.end(),
663 location: location.clone(),
664 ctx,
665 },
666 config,
667 );
668 }
669 }
670
671 for pattern in BUILTIN_PATTERNS.iter() {
673 for mat in pattern.regex.find_iter(text) {
674 if findings.len() >= config.max_findings {
675 *truncated = true;
676 return;
677 }
678 let matched = &text[mat.start()..mat.end()];
679 if is_allowlisted(matched, config) {
680 continue;
681 }
682 push_finding(
683 findings,
684 seen,
685 FindingCandidate {
686 severity: pattern.severity,
687 kind: pattern.id,
688 pattern: pattern.regex.as_str(),
689 text,
690 start: mat.start(),
691 end: mat.end(),
692 location: location.clone(),
693 ctx,
694 },
695 config,
696 );
697 }
698 }
699
700 for mat in ENTROPY_BASE64_RE.find_iter(text) {
702 if findings.len() >= config.max_findings {
703 *truncated = true;
704 return;
705 }
706 let candidate = &text[mat.start()..mat.end()];
707 if candidate.len() < config.entropy_min_len {
708 continue;
709 }
710 if is_allowlisted(candidate, config) {
711 continue;
712 }
713 if candidate.chars().all(|c| c.is_ascii_alphabetic()) {
716 continue;
717 }
718
719 let entropy = shannon_entropy(candidate);
720 if entropy >= config.entropy_threshold {
721 push_finding(
722 findings,
723 seen,
724 FindingCandidate {
725 severity: SecretSeverity::Medium,
726 kind: "high_entropy_base64",
727 pattern: "entropy",
728 text,
729 start: mat.start(),
730 end: mat.end(),
731 location: location.clone(),
732 ctx,
733 },
734 config,
735 );
736 }
737 }
738
739 for mat in ENTROPY_HEX_RE.find_iter(text) {
740 if findings.len() >= config.max_findings {
741 *truncated = true;
742 return;
743 }
744 let candidate = &text[mat.start()..mat.end()];
745 if candidate.len() < 32 {
746 continue;
747 }
748 if is_allowlisted(candidate, config) {
749 continue;
750 }
751 let entropy = shannon_entropy(candidate);
752 if entropy >= 3.0 {
753 push_finding(
754 findings,
755 seen,
756 FindingCandidate {
757 severity: SecretSeverity::Low,
758 kind: "high_entropy_hex",
759 pattern: "entropy",
760 text,
761 start: mat.start(),
762 end: mat.end(),
763 location: location.clone(),
764 ctx,
765 },
766 config,
767 );
768 }
769 }
770}
771
772fn push_finding(
773 findings: &mut Vec<SecretFinding>,
774 seen: &mut HashSet<String>,
775 candidate: FindingCandidate<'_>,
776 config: &SecretScanConfig,
777) {
778 let match_text = &candidate.text[candidate.start..candidate.end];
779 let match_redacted = redact_token(match_text);
780 let context = redact_context(
781 candidate.text,
782 candidate.start,
783 candidate.end,
784 config.context_bytes,
785 &match_redacted,
786 );
787
788 let key = format!(
789 "{}:{}:{}:{}:{}",
790 candidate.ctx.conversation_id.unwrap_or_default(),
791 candidate.ctx.message_id.unwrap_or_default(),
792 candidate.location.label(),
793 candidate.kind,
794 match_redacted
795 );
796
797 if !seen.insert(key) {
798 return;
799 }
800
801 findings.push(SecretFinding {
802 severity: candidate.severity,
803 kind: candidate.kind.to_string(),
804 pattern: candidate.pattern.to_string(),
805 match_redacted,
806 context,
807 location: candidate.location,
808 agent: candidate.ctx.agent.clone(),
809 workspace: candidate.ctx.workspace.clone(),
810 source_path: candidate.ctx.source_path.clone(),
811 conversation_id: candidate.ctx.conversation_id,
812 message_id: candidate.ctx.message_id,
813 message_idx: candidate.ctx.message_idx,
814 });
815}
816
817fn redact_token(token: &str) -> String {
818 let chars: Vec<char> = token.chars().collect();
819 let len = chars.len();
820 if len <= 8 {
821 return "[redacted]".to_string();
822 }
823 let prefix: String = chars.iter().take(2).collect();
824 let suffix: String = chars
825 .iter()
826 .rev()
827 .take(2)
828 .collect::<Vec<_>>()
829 .into_iter()
830 .rev()
831 .collect();
832 format!("{}…{} (len {})", prefix, suffix, len)
833}
834
835fn redact_context(
836 text: &str,
837 start: usize,
838 end: usize,
839 window: usize,
840 replacement: &str,
841) -> String {
842 if text.is_empty() || start >= end || start >= text.len() {
843 return String::new();
844 }
845
846 let ctx_start = start.saturating_sub(window / 2);
847 let ctx_end = (end + window / 2).min(text.len());
848 let ctx_start = adjust_to_char_boundary(text, ctx_start, false);
849 let ctx_end = adjust_to_char_boundary(text, ctx_end, true);
850
851 if ctx_start >= ctx_end {
852 return String::new();
853 }
854
855 let safe_start = start.min(text.len());
856 let safe_end = end.min(text.len());
857
858 let prefix = &text[ctx_start..safe_start];
859 let suffix = &text[safe_end..ctx_end];
860
861 let mut snippet = String::new();
862 snippet.push_str(prefix);
863 snippet.push_str(replacement);
864 snippet.push_str(suffix);
865 snippet
866}
867
868fn adjust_to_char_boundary(text: &str, idx: usize, forward: bool) -> usize {
869 if idx >= text.len() {
870 return text.len();
871 }
872 if text.is_char_boundary(idx) {
873 return idx;
874 }
875 if forward {
876 for i in idx..text.len() {
877 if text.is_char_boundary(i) {
878 return i;
879 }
880 }
881 text.len()
882 } else {
883 for i in (0..=idx).rev() {
884 if text.is_char_boundary(i) {
885 return i;
886 }
887 }
888 0
889 }
890}
891
892fn shannon_entropy(token: &str) -> f64 {
893 let bytes = token.as_bytes();
894 let len = bytes.len() as f64;
895 if len == 0.0 {
896 return 0.0;
897 }
898 let mut freq = [0usize; 256];
899 for b in bytes {
900 freq[*b as usize] += 1;
901 }
902 let mut entropy = 0.0;
903 for count in freq.iter().copied() {
904 if count == 0 {
905 continue;
906 }
907 let p = count as f64 / len;
908 entropy -= p * p.log2();
909 }
910 entropy
911}
912
913fn is_allowlisted(matched: &str, config: &SecretScanConfig) -> bool {
914 for allow in &config.allowlist {
915 if allow.is_match(matched) {
916 return true;
917 }
918 }
919 false
920}
921
922fn build_where_clause(filters: &SecretScanFilters) -> Result<(String, Vec<ParamValue>)> {
923 let mut conditions: Vec<String> = Vec::new();
924 let mut params: Vec<ParamValue> = Vec::new();
925
926 if let Some(agents) = filters.agents.as_ref() {
927 if agents.is_empty() {
928 conditions.push("1=0".to_string());
929 } else {
930 let placeholders: Vec<&str> = agents.iter().map(|_| "?").collect();
931 conditions.push(format!("a.slug IN ({})", placeholders.join(", ")));
932 for agent in agents {
933 params.push(ParamValue::from(agent.as_str()));
934 }
935 }
936 }
937
938 if let Some(workspaces) = filters.workspaces.as_ref() {
939 if workspaces.is_empty() {
940 conditions.push("1=0".to_string());
941 } else {
942 let placeholders: Vec<&str> = workspaces.iter().map(|_| "?").collect();
943 conditions.push(format!("w.path IN ({})", placeholders.join(", ")));
944 for ws in workspaces {
945 params.push(ParamValue::from(ws.to_string_lossy().to_string()));
946 }
947 }
948 }
949
950 if let Some(since) = filters.since_ts {
951 conditions.push("c.started_at >= ?".to_string());
952 params.push(ParamValue::from(since));
953 }
954
955 if let Some(until) = filters.until_ts {
956 conditions.push("c.started_at <= ?".to_string());
957 params.push(ParamValue::from(until));
958 }
959
960 let where_clause = if conditions.is_empty() {
961 String::new()
962 } else {
963 format!(" WHERE {}", conditions.join(" AND "))
964 };
965
966 Ok((where_clause, params))
967}
968
969fn parse_env_regex_list(var: &str) -> Result<Vec<String>> {
970 let value = match dotenvy::var(var) {
971 Ok(v) => v,
972 Err(_) => return Ok(Vec::new()),
973 };
974 let items = value
975 .split(',')
976 .map(|s| s.trim().to_string())
977 .filter(|s| !s.is_empty())
978 .collect::<Vec<_>>();
979 Ok(items)
980}
981
982fn compile_regexes(patterns: &[String], label: &str) -> Result<Vec<Regex>> {
983 let mut compiled = Vec::new();
984 for pat in patterns {
985 let regex = Regex::new(pat).with_context(|| format!("Invalid {} regex: {}", label, pat))?;
986 compiled.push(regex);
987 }
988 Ok(compiled)
989}
990
991#[cfg(test)]
992mod tests {
993 use super::*;
994
995 #[test]
1000 fn shannon_entropy_empty_string_returns_zero() {
1001 assert_eq!(shannon_entropy(""), 0.0);
1002 }
1003
1004 #[test]
1005 fn shannon_entropy_single_repeated_char_returns_zero() {
1006 assert_eq!(shannon_entropy("aaaaaaaaaa"), 0.0);
1007 }
1008
1009 #[test]
1010 fn shannon_entropy_two_equal_chars_returns_one() {
1011 let e = shannon_entropy("ab");
1012 assert!((e - 1.0).abs() < 0.001, "expected ~1.0, got {}", e);
1013 }
1014
1015 #[test]
1016 fn shannon_entropy_high_entropy_base64() {
1017 let token = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1019 let e = shannon_entropy(token);
1020 assert!(e > 4.0, "expected entropy > 4.0, got {}", e);
1021 }
1022
1023 #[test]
1024 fn shannon_entropy_hex_string() {
1025 let hex = "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4";
1026 let e = shannon_entropy(hex);
1027 assert!(e > 3.0, "expected entropy > 3.0 for hex, got {}", e);
1028 }
1029
1030 #[test]
1035 fn redact_token_short_returns_redacted() {
1036 assert_eq!(redact_token("abcd"), "[redacted]");
1037 assert_eq!(redact_token("12345678"), "[redacted]");
1038 }
1039
1040 #[test]
1041 fn redact_token_long_shows_prefix_suffix_len() {
1042 let result = redact_token("sk-abcdefghijklmnop");
1043 assert!(
1044 result.starts_with("sk"),
1045 "should start with first 2 chars: {}",
1046 result
1047 );
1048 assert!(
1049 result.contains("op"),
1050 "should end with last 2 chars: {}",
1051 result
1052 );
1053 assert!(result.contains("len 19"), "should show length: {}", result);
1054 }
1055
1056 #[test]
1057 fn redact_token_nine_chars_shows_format() {
1058 let result = redact_token("123456789");
1059 assert!(result.starts_with("12"), "{}", result);
1060 assert!(result.contains("89"), "{}", result);
1061 assert!(result.contains("len 9"), "{}", result);
1062 }
1063
1064 #[test]
1069 fn redact_context_empty_text_returns_empty() {
1070 assert_eq!(redact_context("", 0, 0, 120, "[REDACTED]"), "");
1071 }
1072
1073 #[test]
1074 fn redact_context_replaces_match_with_replacement() {
1075 let text = "The key is sk-ABCDEFGHIJ and more";
1076 let start = 11;
1077 let end = 25;
1078 let result = redact_context(text, start, end, 120, "[REDACTED]");
1079 assert!(result.contains("[REDACTED]"), "result: {}", result);
1080 assert!(
1081 !result.contains("sk-ABCDEFGHIJ"),
1082 "secret should be removed: {}",
1083 result
1084 );
1085 }
1086
1087 #[test]
1088 fn redact_context_match_at_start() {
1089 let text = "sk-SECRET rest of the text";
1090 let result = redact_context(text, 0, 9, 120, "[R]");
1091 assert!(result.starts_with("[R]"), "result: {}", result);
1092 }
1093
1094 #[test]
1095 fn redact_context_match_at_end() {
1096 let text = "prefix sk-SECRET";
1097 let result = redact_context(text, 7, 16, 120, "[R]");
1098 assert!(result.ends_with("[R]"), "result: {}", result);
1099 }
1100
1101 #[test]
1102 fn redact_context_start_beyond_text_returns_empty() {
1103 assert_eq!(redact_context("short", 10, 15, 120, "[R]"), "");
1104 }
1105
1106 #[test]
1111 fn is_allowlisted_returns_true_for_matching_pattern() {
1112 let config =
1113 SecretScanConfig::from_inputs_with_env(&["sk-test.*".to_string()], &[], false).unwrap();
1114 assert!(is_allowlisted("sk-test1234567890abcdef", &config));
1115 }
1116
1117 #[test]
1118 fn is_allowlisted_returns_false_when_no_match() {
1119 let config =
1120 SecretScanConfig::from_inputs_with_env(&["sk-test.*".to_string()], &[], false).unwrap();
1121 assert!(!is_allowlisted("sk-prod1234567890abcdef", &config));
1122 }
1123
1124 #[test]
1125 fn is_allowlisted_empty_list_returns_false() {
1126 let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1127 assert!(!is_allowlisted("anything", &config));
1128 }
1129
1130 #[test]
1135 fn adjust_to_char_boundary_ascii() {
1136 let text = "hello";
1137 assert_eq!(adjust_to_char_boundary(text, 3, true), 3);
1138 assert_eq!(adjust_to_char_boundary(text, 3, false), 3);
1139 }
1140
1141 #[test]
1142 fn adjust_to_char_boundary_multibyte_forward() {
1143 let text = "héllo"; let idx = adjust_to_char_boundary(text, 2, true);
1146 assert!(
1147 text.is_char_boundary(idx),
1148 "idx {} not a char boundary",
1149 idx
1150 );
1151 }
1152
1153 #[test]
1154 fn adjust_to_char_boundary_multibyte_backward() {
1155 let text = "héllo";
1156 let idx = adjust_to_char_boundary(text, 2, false);
1157 assert!(
1158 text.is_char_boundary(idx),
1159 "idx {} not a char boundary",
1160 idx
1161 );
1162 }
1163
1164 #[test]
1165 fn adjust_to_char_boundary_beyond_len() {
1166 let text = "abc";
1167 assert_eq!(adjust_to_char_boundary(text, 100, true), 3);
1168 }
1169
1170 #[test]
1175 fn config_from_inputs_with_valid_patterns() {
1176 let config = SecretScanConfig::from_inputs_with_env(
1177 &["allowed_.*".to_string()],
1178 &["denied_.*".to_string()],
1179 false,
1180 )
1181 .unwrap();
1182 assert_eq!(config.allowlist.len(), 1);
1183 assert_eq!(config.denylist.len(), 1);
1184 assert_eq!(config.entropy_threshold, DEFAULT_ENTROPY_THRESHOLD);
1185 }
1186
1187 #[test]
1188 fn config_from_inputs_with_invalid_regex_returns_error() {
1189 let result = SecretScanConfig::from_inputs_with_env(&["[invalid".to_string()], &[], false);
1190 assert!(result.is_err(), "invalid regex should return error");
1191 }
1192
1193 #[test]
1194 fn config_from_inputs_empty_lists() {
1195 let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1196 assert!(config.allowlist.is_empty());
1197 assert!(config.denylist.is_empty());
1198 assert_eq!(config.max_findings, DEFAULT_MAX_FINDINGS);
1199 }
1200
1201 #[test]
1206 fn builtin_patterns_aws_access_key_detected() {
1207 let text = "Found key AKIAIOSFODNN7EXAMPLE in config";
1208 let pattern = &BUILTIN_PATTERNS[0]; assert!(
1210 pattern.regex.is_match(text),
1211 "should detect AWS access key ID"
1212 );
1213 }
1214
1215 #[test]
1216 fn builtin_patterns_github_pat_detected() {
1217 let text = "token ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij";
1218 let pattern = &BUILTIN_PATTERNS[2]; assert!(pattern.regex.is_match(text), "should detect GitHub PAT");
1220 }
1221
1222 #[test]
1223 fn builtin_patterns_anthropic_key_detected() {
1224 let text = "sk-ant-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefgh";
1225 let pattern = &BUILTIN_PATTERNS[4]; assert!(pattern.regex.is_match(text), "should detect Anthropic key");
1227 }
1228
1229 #[test]
1230 fn builtin_patterns_jwt_detected() {
1231 let text = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.abc123";
1232 let pattern = &BUILTIN_PATTERNS[5]; assert!(pattern.regex.is_match(text), "should detect JWT");
1234 }
1235
1236 #[test]
1237 fn builtin_patterns_private_key_detected() {
1238 let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIE...";
1239 let pattern = &BUILTIN_PATTERNS[6]; assert!(pattern.regex.is_match(text), "should detect private key");
1241 }
1242
1243 #[test]
1244 fn builtin_patterns_database_url_detected() {
1245 let text = "database_url=postgres://user:pass@host:5432/db";
1246 let pattern = &BUILTIN_PATTERNS[7]; assert!(pattern.regex.is_match(text), "should detect database URL");
1248 }
1249
1250 #[test]
1251 fn builtin_patterns_generic_api_key_detected() {
1252 let text = "api_key=abcdefgh12345678";
1253 let pattern = &BUILTIN_PATTERNS[8]; assert!(
1255 pattern.regex.is_match(text),
1256 "should detect generic API key"
1257 );
1258 }
1259
1260 #[test]
1261 fn builtin_patterns_safe_text_not_detected() {
1262 let safe_text = "This is a normal message about Rust programming.";
1263 for pattern in BUILTIN_PATTERNS.iter() {
1264 assert!(
1265 !pattern.regex.is_match(safe_text),
1266 "pattern {} should not match safe text",
1267 pattern.id,
1268 );
1269 }
1270 }
1271
1272 #[test]
1277 fn severity_rank_ordering() {
1278 assert!(SecretSeverity::Critical.rank() < SecretSeverity::High.rank());
1279 assert!(SecretSeverity::High.rank() < SecretSeverity::Medium.rank());
1280 assert!(SecretSeverity::Medium.rank() < SecretSeverity::Low.rank());
1281 }
1282
1283 #[test]
1284 fn severity_label_values() {
1285 assert_eq!(SecretSeverity::Critical.label(), "critical");
1286 assert_eq!(SecretSeverity::High.label(), "high");
1287 assert_eq!(SecretSeverity::Medium.label(), "medium");
1288 assert_eq!(SecretSeverity::Low.label(), "low");
1289 }
1290
1291 #[test]
1296 fn location_labels() {
1297 assert_eq!(
1298 SecretLocation::ConversationTitle.label(),
1299 "conversation.title"
1300 );
1301 assert_eq!(
1302 SecretLocation::ConversationMetadata.label(),
1303 "conversation.metadata"
1304 );
1305 assert_eq!(SecretLocation::MessageContent.label(), "message.content");
1306 assert_eq!(SecretLocation::MessageMetadata.label(), "message.metadata");
1307 }
1308
1309 #[test]
1314 fn build_where_clause_empty_filters() {
1315 let filters = SecretScanFilters {
1316 agents: None,
1317 workspaces: None,
1318 since_ts: None,
1319 until_ts: None,
1320 };
1321 let (clause, params) = build_where_clause(&filters).unwrap();
1322 assert!(clause.is_empty(), "empty filters should give empty clause");
1323 assert!(params.is_empty());
1324 }
1325
1326 #[test]
1327 fn build_where_clause_with_agent_filter() {
1328 let filters = SecretScanFilters {
1329 agents: Some(vec!["claude".to_string(), "codex".to_string()]),
1330 workspaces: None,
1331 since_ts: None,
1332 until_ts: None,
1333 };
1334 let (clause, params) = build_where_clause(&filters).unwrap();
1335 assert!(clause.contains("a.slug IN"), "clause: {}", clause);
1336 assert_eq!(params.len(), 2);
1337 }
1338
1339 #[test]
1340 fn build_where_clause_with_time_range() {
1341 let filters = SecretScanFilters {
1342 agents: None,
1343 workspaces: None,
1344 since_ts: Some(1000),
1345 until_ts: Some(2000),
1346 };
1347 let (clause, params) = build_where_clause(&filters).unwrap();
1348 assert!(clause.contains("c.started_at >="), "clause: {}", clause);
1349 assert!(clause.contains("c.started_at <="), "clause: {}", clause);
1350 assert_eq!(params.len(), 2);
1351 }
1352
1353 #[test]
1354 fn build_where_clause_with_workspace_filter() {
1355 let filters = SecretScanFilters {
1356 agents: None,
1357 workspaces: Some(vec![PathBuf::from("/home/user/project")]),
1358 since_ts: None,
1359 until_ts: None,
1360 };
1361 let (clause, params) = build_where_clause(&filters).unwrap();
1362 assert!(clause.contains("w.path IN"), "clause: {}", clause);
1363 assert_eq!(params.len(), 1);
1364 }
1365
1366 #[test]
1367 fn build_where_clause_empty_agent_list_matches_nothing() {
1368 let filters = SecretScanFilters {
1369 agents: Some(vec![]),
1370 workspaces: None,
1371 since_ts: None,
1372 until_ts: None,
1373 };
1374 let (clause, _) = build_where_clause(&filters).unwrap();
1375 assert!(
1376 clause.contains("1=0"),
1377 "empty agent list should match nothing: {}",
1378 clause
1379 );
1380 }
1381
1382 #[test]
1383 fn build_where_clause_empty_workspace_list_matches_nothing() {
1384 let filters = SecretScanFilters {
1385 agents: None,
1386 workspaces: Some(vec![]),
1387 since_ts: None,
1388 until_ts: None,
1389 };
1390 let (clause, _) = build_where_clause(&filters).unwrap();
1391 assert!(
1392 clause.contains("1=0"),
1393 "empty workspace list should match nothing: {}",
1394 clause
1395 );
1396 }
1397
1398 #[test]
1403 fn entropy_base64_regex_matches_long_strings() {
1404 assert!(ENTROPY_BASE64_RE.is_match("ABCDEFGHIJKLMNOPQRSTuv"));
1405 assert!(!ENTROPY_BASE64_RE.is_match("short"));
1406 }
1407
1408 #[test]
1409 fn entropy_hex_regex_matches_32_plus_chars() {
1410 assert!(ENTROPY_HEX_RE.is_match("a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4"));
1411 assert!(!ENTROPY_HEX_RE.is_match("a1b2c3d4"));
1412 }
1413
1414 #[test]
1419 fn scan_text_empty_text_no_findings() {
1420 let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1421 let ctx = ScanContext {
1422 agent: None,
1423 workspace: None,
1424 source_path: None,
1425 conversation_id: None,
1426 message_id: None,
1427 message_idx: None,
1428 };
1429 let mut findings = Vec::new();
1430 let mut seen = HashSet::new();
1431 let mut truncated = false;
1432
1433 scan_text(
1434 "",
1435 SecretLocation::MessageContent,
1436 &ctx,
1437 &config,
1438 &mut findings,
1439 &mut seen,
1440 &mut truncated,
1441 );
1442 assert!(findings.is_empty());
1443 assert!(!truncated);
1444 }
1445
1446 #[test]
1447 fn scan_text_already_truncated_skips() {
1448 let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1449 let ctx = ScanContext {
1450 agent: None,
1451 workspace: None,
1452 source_path: None,
1453 conversation_id: None,
1454 message_id: None,
1455 message_idx: None,
1456 };
1457 let mut findings = Vec::new();
1458 let mut seen = HashSet::new();
1459 let mut truncated = true; scan_text(
1462 "sk-test1234567890abcdefghijklmnopqr",
1463 SecretLocation::MessageContent,
1464 &ctx,
1465 &config,
1466 &mut findings,
1467 &mut seen,
1468 &mut truncated,
1469 );
1470 assert!(findings.is_empty(), "should skip when already truncated");
1471 }
1472
1473 #[test]
1474 fn scan_text_denylist_always_critical() {
1475 let config =
1476 SecretScanConfig::from_inputs_with_env(&[], &["FORBIDDEN_TOKEN_.*".to_string()], false)
1477 .unwrap();
1478 let ctx = ScanContext {
1479 agent: Some("test".to_string()),
1480 workspace: None,
1481 source_path: None,
1482 conversation_id: Some(1),
1483 message_id: Some(1),
1484 message_idx: Some(0),
1485 };
1486 let mut findings = Vec::new();
1487 let mut seen = HashSet::new();
1488 let mut truncated = false;
1489
1490 scan_text(
1491 "prefix FORBIDDEN_TOKEN_abc suffix",
1492 SecretLocation::MessageContent,
1493 &ctx,
1494 &config,
1495 &mut findings,
1496 &mut seen,
1497 &mut truncated,
1498 );
1499
1500 assert_eq!(findings.len(), 1);
1501 assert_eq!(findings[0].severity, SecretSeverity::Critical);
1502 assert_eq!(findings[0].kind, "denylist");
1503 }
1504
1505 #[test]
1506 fn scan_text_allowlist_suppresses_builtin_match() {
1507 let config =
1508 SecretScanConfig::from_inputs_with_env(&["sk-test.*".to_string()], &[], false).unwrap();
1509 let ctx = ScanContext {
1510 agent: None,
1511 workspace: None,
1512 source_path: None,
1513 conversation_id: Some(1),
1514 message_id: Some(1),
1515 message_idx: Some(0),
1516 };
1517 let mut findings = Vec::new();
1518 let mut seen = HashSet::new();
1519 let mut truncated = false;
1520
1521 scan_text(
1522 "sk-testABCDEFGHIJKLMNOPQRSTUVWXYZ12345",
1523 SecretLocation::MessageContent,
1524 &ctx,
1525 &config,
1526 &mut findings,
1527 &mut seen,
1528 &mut truncated,
1529 );
1530
1531 assert!(
1533 !findings.iter().any(|f| f.kind == "openai_key"),
1534 "allowlisted key should be suppressed"
1535 );
1536 }
1537
1538 #[test]
1539 fn scan_text_deduplicates_findings() {
1540 let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1541 let ctx = ScanContext {
1542 agent: None,
1543 workspace: None,
1544 source_path: None,
1545 conversation_id: Some(1),
1546 message_id: Some(1),
1547 message_idx: Some(0),
1548 };
1549 let mut findings = Vec::new();
1550 let mut seen = HashSet::new();
1551 let mut truncated = false;
1552
1553 let text = "sk-ABCDEFGHIJKLMNOPQRSTUVWXYZ123456789";
1555 scan_text(
1556 text,
1557 SecretLocation::MessageContent,
1558 &ctx,
1559 &config,
1560 &mut findings,
1561 &mut seen,
1562 &mut truncated,
1563 );
1564 let count_after_first = findings.len();
1565
1566 scan_text(
1567 text,
1568 SecretLocation::MessageContent,
1569 &ctx,
1570 &config,
1571 &mut findings,
1572 &mut seen,
1573 &mut truncated,
1574 );
1575 assert_eq!(
1576 findings.len(),
1577 count_after_first,
1578 "duplicate findings should be skipped"
1579 );
1580 }
1581
1582 #[test]
1583 fn scan_text_max_findings_truncates() {
1584 let mut config =
1586 SecretScanConfig::from_inputs_with_env(&[], &["LONG_SECRET_\\d+".to_string()], false)
1587 .unwrap();
1588 config.max_findings = 3;
1589
1590 let ctx = ScanContext {
1591 agent: None,
1592 workspace: None,
1593 source_path: None,
1594 conversation_id: Some(1),
1595 message_id: Some(1),
1596 message_idx: Some(0),
1597 };
1598 let mut findings = Vec::new();
1599 let mut seen = HashSet::new();
1600 let mut truncated = false;
1601
1602 let text =
1604 "LONG_SECRET_001 LONG_SECRET_002 LONG_SECRET_003 LONG_SECRET_004 LONG_SECRET_005";
1605 scan_text(
1606 text,
1607 SecretLocation::MessageContent,
1608 &ctx,
1609 &config,
1610 &mut findings,
1611 &mut seen,
1612 &mut truncated,
1613 );
1614
1615 assert!(
1616 findings.len() <= 3,
1617 "should cap at max_findings: {}",
1618 findings.len()
1619 );
1620 assert!(truncated, "should set truncated flag");
1621 }
1622
1623 #[test]
1624 fn scan_text_pure_alphabetic_base64_skipped() {
1625 let config = SecretScanConfig::from_inputs_with_env(&[], &[], false).unwrap();
1627 let ctx = ScanContext {
1628 agent: None,
1629 workspace: None,
1630 source_path: None,
1631 conversation_id: Some(1),
1632 message_id: Some(1),
1633 message_idx: Some(0),
1634 };
1635 let mut findings = Vec::new();
1636 let mut seen = HashSet::new();
1637 let mut truncated = false;
1638
1639 let text = "SecretScanConfigFromInputsWithEnvTest";
1641 scan_text(
1642 text,
1643 SecretLocation::MessageContent,
1644 &ctx,
1645 &config,
1646 &mut findings,
1647 &mut seen,
1648 &mut truncated,
1649 );
1650
1651 assert!(
1652 !findings.iter().any(|f| f.kind == "high_entropy_base64"),
1653 "pure alphabetic strings should not trigger entropy detection"
1654 );
1655 }
1656}