1use std::collections::{HashMap, HashSet};
2use std::fmt;
3use std::sync::LazyLock;
4
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
8#[serde(rename_all = "kebab-case")]
9pub enum SensitiveTier {
10 ConfirmedSecret,
11 SensitiveArtifact,
12 Suspicious,
13}
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
16#[serde(rename_all = "lowercase")]
17pub enum SensitiveSeverity {
18 Block,
19 Warn,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
23#[serde(rename_all = "kebab-case")]
24pub enum SensitiveEnforcement {
25 #[default]
26 Warn,
27 BlockHigh,
28 BlockAll,
29 StrictHigh,
30 StrictAll,
31}
32
33#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
34#[serde(rename_all = "kebab-case")]
35pub struct SensitiveAllowlistEntry {
36 #[serde(default)]
37 pub path_regex: Option<String>,
38 #[serde(default)]
39 pub rule: Option<String>,
40 #[serde(default)]
41 pub value_regex: Option<String>,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct SensitiveFinding {
46 pub category: &'static str,
47 pub rule: &'static str,
48 pub file_path: String,
49 pub line_number: Option<usize>,
50 pub preview: String,
51 pub tier: SensitiveTier,
52 pub severity: SensitiveSeverity,
53}
54
55#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
56pub struct SensitiveReport {
57 pub findings: Vec<SensitiveFinding>,
58 pub enforcement: SensitiveEnforcement,
59 pub warning_count: usize,
60 pub blocking_count: usize,
61 pub has_findings: bool,
62 pub has_blocking_findings: bool,
63}
64
65impl SensitiveReport {
66 pub fn from_findings(findings: Vec<SensitiveFinding>) -> Self {
67 Self::from_findings_with_enforcement(findings, SensitiveEnforcement::Warn)
68 }
69
70 pub fn from_findings_with_enforcement(
71 findings: Vec<SensitiveFinding>,
72 enforcement: SensitiveEnforcement,
73 ) -> Self {
74 let mut warning_count = 0;
75 let mut blocking_count = 0;
76
77 for finding in &findings {
78 if is_blocking_finding(finding, enforcement) {
79 blocking_count += 1;
80 } else {
81 warning_count += 1;
82 }
83 }
84
85 Self {
86 has_findings: !findings.is_empty(),
87 has_blocking_findings: blocking_count > 0,
88 findings,
89 enforcement,
90 warning_count,
91 blocking_count,
92 }
93 }
94
95 pub fn has_findings(&self) -> bool {
96 self.has_findings
97 }
98
99 pub fn has_blocking_findings(&self) -> bool {
100 self.has_blocking_findings
101 }
102
103 pub fn format_occ_commit_message(&self) -> String {
104 if self.has_blocking_findings {
105 let footer = if allows_sensitive_bypass(self.enforcement) {
106 "Sensitive content detected in diff. Use --allow-sensitive to skip this check."
107 } else {
108 "Sensitive content detected in diff. Strict sensitive mode is active; change the config to continue."
109 };
110 self.format_message(footer)
111 } else {
112 self.format_message("Sensitive findings are warnings only.")
113 }
114 }
115
116 pub fn format_git_hook_message(&self) -> String {
117 if self.has_blocking_findings {
118 let footer = if allows_sensitive_bypass(self.enforcement) {
119 "Commit blocked by OpenCodeCommit.\nBypass only OCC for this command with: OCC_ALLOW_SENSITIVE=1 git commit ..."
120 } else {
121 "Commit blocked by OpenCodeCommit.\nStrict sensitive mode is active; change the config to continue."
122 };
123 self.format_message(footer)
124 } else {
125 self.format_message("OpenCodeCommit warning: sensitive findings detected.")
126 }
127 }
128
129 fn format_message(&self, footer: &str) -> String {
130 if self.findings.is_empty() {
131 return footer.to_owned();
132 }
133
134 let mut lines = vec!["Sensitive findings:".to_owned()];
135 for finding in &self.findings {
136 let location = match finding.line_number {
137 Some(line) => format!("{}:{}", finding.file_path, line),
138 None => finding.file_path.clone(),
139 };
140 let action = if is_blocking_finding(finding, self.enforcement) {
141 "BLOCK"
142 } else {
143 "WARN"
144 };
145 lines.push(format!(
146 "- {} {} [{:?} / {}] {}",
147 action, location, finding.tier, finding.rule, finding.preview
148 ));
149 }
150 lines.push(footer.to_owned());
151 lines.join("\n")
152 }
153}
154
155impl fmt::Display for SensitiveReport {
156 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
157 write!(f, "{}", self.format_occ_commit_message())
158 }
159}
160
161#[derive(Debug, Clone)]
162struct DiffFileEntry {
163 path: String,
164 deleted: bool,
165}
166
167#[derive(Debug, Clone)]
168struct PathContext {
169 normalized_path: String,
170 lower_path: String,
171 skip_content: bool,
172 low_confidence: bool,
173 env_template: bool,
174 env_file: bool,
175 docker_config: bool,
176 npmrc: bool,
177 kube_config: bool,
178}
179
180#[derive(Debug, Clone)]
181struct ProviderRule {
182 pattern: regex::Regex,
183 category: &'static str,
184 rule: &'static str,
185 tier: SensitiveTier,
186 severity: SensitiveSeverity,
187}
188
189#[derive(Debug, Clone)]
190struct LineCandidate {
191 category: &'static str,
192 rule: &'static str,
193 file_path: String,
194 line_number: Option<usize>,
195 preview: String,
196 raw_value: Option<String>,
197 tier: SensitiveTier,
198 severity: SensitiveSeverity,
199}
200
201static DIFF_FILE_RE: LazyLock<regex::Regex> =
202 LazyLock::new(|| regex::Regex::new(r"^diff --git a/.+ b/(.+)$").unwrap());
203
204static DIFF_HUNK_RE: LazyLock<regex::Regex> =
205 LazyLock::new(|| regex::Regex::new(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@").unwrap());
206
207static COMMENT_ONLY_RE: LazyLock<regex::Regex> =
208 LazyLock::new(|| regex::Regex::new(r"^\s*(?:#|//|/\*|\*|--|%|rem\b|')").unwrap());
209
210static IPV4_RE: LazyLock<regex::Regex> =
211 LazyLock::new(|| regex::Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").unwrap());
212
213static PRIVATE_KEY_HEADER_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
214 regex::Regex::new(r"-----BEGIN (?:(?:RSA|DSA|EC|OPENSSH|PGP) )?PRIVATE KEY(?: BLOCK)?-----")
215 .unwrap()
216});
217
218static ENCRYPTED_PRIVATE_KEY_RE: LazyLock<regex::Regex> =
219 LazyLock::new(|| regex::Regex::new(r"-----BEGIN ENCRYPTED PRIVATE KEY-----").unwrap());
220
221static CONNECTION_STRING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
222 regex::Regex::new(
223 r#"\b((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|rediss|amqp|amqps|mssql|sqlserver)://)([^/\s:@]+):([^@\s]+)@([^\s'"]+)"#,
224 )
225 .unwrap()
226});
227
228static BEARER_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
229 regex::Regex::new(
230 r#"(?i)\b(?:authorization|bearer)\b\s*[:=]\s*['"]?bearer\s+([A-Za-z0-9._~+/\-]{20,})"#,
231 )
232 .unwrap()
233});
234
235static JWT_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
236 regex::Regex::new(r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_.+/=-]{10,}\b")
237 .unwrap()
238});
239
240static DOCKER_AUTH_RE: LazyLock<regex::Regex> =
241 LazyLock::new(|| regex::Regex::new(r#""auth"\s*:\s*"([^"]+)""#).unwrap());
242
243static KUBECONFIG_AUTH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
244 regex::Regex::new(r#"(?:^|\b)(token|client-key-data)\b\s*:\s*("?[^"\s]+"?)"#).unwrap()
245});
246
247static NPM_LITERAL_AUTH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
248 regex::Regex::new(
249 r#"(?i)(?::|^)_(?:authToken|auth|password)\s*=\s*([^\s#]+)|//[^\s]+:_authToken\s*=\s*([^\s#]+)"#,
250 )
251 .unwrap()
252});
253
254static GENERIC_ASSIGNMENT_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
255 regex::Regex::new(
256 "(?i)\\b([A-Za-z0-9_.-]{0,40}(?:password|passwd|pwd|secret|token|api[_-]?key|apikey|auth[_-]?token|access[_-]?token|private[_-]?key|client[_-]?secret|credentials?|database[_-]?url|db[_-]?password|webhook[_-]?secret|signing[_-]?key|encryption[_-]?key)[A-Za-z0-9_.-]{0,20})\\b[\"']?\\s*[:=]\\s*(\"[^\"]*\"|'[^']*'|[^\\s,#;]+)",
257 )
258 .unwrap()
259});
260
261static TEMPLATE_ENV_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
262 regex::Regex::new(
263 r"(?:^|/)(?:\.env\.(?:example|sample|template|defaults|schema|spec|test|ci)|[^/]*\.(?:example|sample|template)\.env)$",
264 )
265 .unwrap()
266});
267
268static REAL_ENV_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
269 regex::Regex::new(r"(?:^|/)\.env(?:\.[^/]+)?$|(?:^|/)\.envrc$|(?:^|/)\.direnv/").unwrap()
270});
271
272static LOW_CONFIDENCE_PATH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
273 regex::Regex::new(
274 r"(?:^|/)(?:test|tests|__tests__|spec|__spec__|docs|documentation|example|examples|sample|samples|fixture|fixtures|__fixtures__|testdata|test-data|mock|mocks|__mocks__|stubs?)(?:/|$)",
275 )
276 .unwrap()
277});
278
279static LOW_CONFIDENCE_EXT_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
280 regex::Regex::new(r"\.(?:md|rst|adoc|txt|d\.ts|schema\.json|schema\.ya?ml)$").unwrap()
281});
282
283static SKIP_CONTENT_PATH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
284 regex::Regex::new(
285 r"(?i)(?:^|/)(?:vendor|node_modules|third_party|\.git)(?:/|$)|(?:^|/)(?:package-lock\.json|yarn\.lock|pnpm-lock\.yaml|Gemfile\.lock|Cargo\.lock|poetry\.lock|composer\.lock|go\.sum|Pipfile\.lock)$|\.(?:png|jpe?g|gif|bmp|ico|svg|tiff|webp|mp[34]|avi|mov|wav|flac|ogg|woff2?|eot|otf|ttf|exe|dll|so|dylib|bin|o|a|class|pyc|pyo|wasm|zip|tar|gz|bz2|xz|rar|7z|jar|war|ear)$",
286 )
287 .unwrap()
288});
289
290static PROVIDER_RULES: LazyLock<Vec<ProviderRule>> = LazyLock::new(|| {
291 [
292 (
293 r"github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}",
294 "token",
295 "github-fine-grained-token",
296 SensitiveTier::ConfirmedSecret,
297 SensitiveSeverity::Block,
298 ),
299 (
300 r"gh[pousr]_[A-Za-z0-9]{36,76}",
301 "token",
302 "github-token",
303 SensitiveTier::ConfirmedSecret,
304 SensitiveSeverity::Block,
305 ),
306 (
307 r"(?:AKIA|ASIA)[A-Z0-9]{16}",
308 "token",
309 "aws-access-key",
310 SensitiveTier::ConfirmedSecret,
311 SensitiveSeverity::Block,
312 ),
313 (
314 r"gl(?:pat|dt|ptt|rt)-[0-9A-Za-z_-]{20,}",
315 "token",
316 "gitlab-token",
317 SensitiveTier::ConfirmedSecret,
318 SensitiveSeverity::Block,
319 ),
320 (
321 r"xoxb-[0-9]+-[0-9A-Za-z]+-[A-Za-z0-9]+",
322 "token",
323 "slack-bot-token",
324 SensitiveTier::ConfirmedSecret,
325 SensitiveSeverity::Block,
326 ),
327 (
328 r"(?i)xoxp-[0-9]+-[0-9]+-[0-9]+-[a-f0-9]+",
329 "token",
330 "slack-user-token",
331 SensitiveTier::ConfirmedSecret,
332 SensitiveSeverity::Block,
333 ),
334 (
335 r"xapp-1-[A-Z0-9]+-[0-9]+-[A-Za-z0-9]+",
336 "token",
337 "slack-app-token",
338 SensitiveTier::ConfirmedSecret,
339 SensitiveSeverity::Block,
340 ),
341 (
342 r"https://hooks\.slack\.com/services/T[a-zA-Z0-9_]+/B[a-zA-Z0-9_]+/[a-zA-Z0-9_]+",
343 "webhook",
344 "slack-webhook",
345 SensitiveTier::ConfirmedSecret,
346 SensitiveSeverity::Block,
347 ),
348 (
349 r"sk_live_[0-9A-Za-z]{24,}",
350 "token",
351 "stripe-live-secret-key",
352 SensitiveTier::ConfirmedSecret,
353 SensitiveSeverity::Block,
354 ),
355 (
356 r"rk_live_[0-9A-Za-z]{24,}",
357 "token",
358 "stripe-live-restricted-key",
359 SensitiveTier::ConfirmedSecret,
360 SensitiveSeverity::Block,
361 ),
362 (
363 r"sk_test_[0-9A-Za-z]{24,}",
364 "token",
365 "stripe-test-secret-key",
366 SensitiveTier::Suspicious,
367 SensitiveSeverity::Warn,
368 ),
369 (
370 r"rk_test_[0-9A-Za-z]{24,}",
371 "token",
372 "stripe-test-restricted-key",
373 SensitiveTier::Suspicious,
374 SensitiveSeverity::Warn,
375 ),
376 (
377 r"SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}",
378 "token",
379 "sendgrid-api-key",
380 SensitiveTier::ConfirmedSecret,
381 SensitiveSeverity::Block,
382 ),
383 (
384 r"sk-proj-[A-Za-z0-9_-]{20,}",
385 "token",
386 "openai-project-key",
387 SensitiveTier::ConfirmedSecret,
388 SensitiveSeverity::Block,
389 ),
390 (
391 r"sk-svcacct-[A-Za-z0-9_-]{20,}",
392 "token",
393 "openai-service-account-key",
394 SensitiveTier::ConfirmedSecret,
395 SensitiveSeverity::Block,
396 ),
397 (
398 r"\bsk-[A-Za-z0-9]{32,}\b",
399 "token",
400 "openai-legacy-key",
401 SensitiveTier::ConfirmedSecret,
402 SensitiveSeverity::Block,
403 ),
404 (
405 r"sk-ant-(?:api03|admin01)-[A-Za-z0-9_-]{80,}",
406 "token",
407 "anthropic-key",
408 SensitiveTier::ConfirmedSecret,
409 SensitiveSeverity::Block,
410 ),
411 (
412 r"AIza[0-9A-Za-z_-]{35}",
413 "token",
414 "gcp-api-key",
415 SensitiveTier::ConfirmedSecret,
416 SensitiveSeverity::Block,
417 ),
418 (
419 r"GOCSPX-[A-Za-z0-9_-]{28}",
420 "token",
421 "gcp-oauth-secret",
422 SensitiveTier::ConfirmedSecret,
423 SensitiveSeverity::Block,
424 ),
425 (
426 r"npm_[A-Za-z0-9]{36}",
427 "token",
428 "npm-token",
429 SensitiveTier::ConfirmedSecret,
430 SensitiveSeverity::Block,
431 ),
432 (
433 r"pypi-[A-Za-z0-9_-]{50,}",
434 "token",
435 "pypi-token",
436 SensitiveTier::ConfirmedSecret,
437 SensitiveSeverity::Block,
438 ),
439 (
440 r"dckr_pat_[A-Za-z0-9_-]{20,}",
441 "token",
442 "docker-token",
443 SensitiveTier::ConfirmedSecret,
444 SensitiveSeverity::Block,
445 ),
446 (
447 r"sntrys_[A-Za-z0-9+/=_-]{20,}",
448 "token",
449 "sentry-token",
450 SensitiveTier::ConfirmedSecret,
451 SensitiveSeverity::Block,
452 ),
453 (
454 r"(?i)key-[0-9a-f]{32}",
455 "token",
456 "mailgun-key",
457 SensitiveTier::ConfirmedSecret,
458 SensitiveSeverity::Block,
459 ),
460 (
461 r"hvs\.[A-Za-z0-9_-]{24,}",
462 "token",
463 "vault-token",
464 SensitiveTier::ConfirmedSecret,
465 SensitiveSeverity::Block,
466 ),
467 (
468 r"https://discord(?:app)?\.com/api/webhooks/[0-9]+/[A-Za-z0-9_-]+",
469 "webhook",
470 "discord-webhook",
471 SensitiveTier::ConfirmedSecret,
472 SensitiveSeverity::Block,
473 ),
474 (
475 "(?i)https://[a-z0-9.-]+\\.webhook\\.office\\.com/[^\\s'\"`]+",
476 "webhook",
477 "teams-webhook",
478 SensitiveTier::ConfirmedSecret,
479 SensitiveSeverity::Block,
480 ),
481 (
482 r"AGE-SECRET-KEY-1[qpzry9x8gf2tvdw0s3jn54khce6mua7l]{58}",
483 "key",
484 "age-secret-key",
485 SensitiveTier::ConfirmedSecret,
486 SensitiveSeverity::Block,
487 ),
488 ]
489 .into_iter()
490 .map(|(pattern, category, rule, tier, severity)| ProviderRule {
491 pattern: regex::Regex::new(pattern).unwrap(),
492 category,
493 rule,
494 tier,
495 severity,
496 })
497 .collect()
498});
499
500pub fn allows_sensitive_bypass(enforcement: SensitiveEnforcement) -> bool {
501 matches!(
502 enforcement,
503 SensitiveEnforcement::Warn
504 | SensitiveEnforcement::BlockHigh
505 | SensitiveEnforcement::BlockAll
506 )
507}
508
509pub fn is_blocking_finding(finding: &SensitiveFinding, enforcement: SensitiveEnforcement) -> bool {
510 match enforcement {
511 SensitiveEnforcement::Warn => false,
512 SensitiveEnforcement::BlockHigh | SensitiveEnforcement::StrictHigh => {
513 finding.severity == SensitiveSeverity::Block
514 }
515 SensitiveEnforcement::BlockAll | SensitiveEnforcement::StrictAll => true,
516 }
517}
518
519pub fn scan_diff_for_sensitive_content(diff: &str, changed_files: &[String]) -> SensitiveReport {
520 scan_diff_for_sensitive_content_with_options(
521 diff,
522 changed_files,
523 SensitiveEnforcement::Warn,
524 &[],
525 )
526}
527
528pub fn scan_diff_for_sensitive_content_with_options(
529 diff: &str,
530 changed_files: &[String],
531 enforcement: SensitiveEnforcement,
532 allowlist: &[SensitiveAllowlistEntry],
533) -> SensitiveReport {
534 let deletion_state: HashMap<String, bool> = parse_diff_file_entries(diff)
535 .into_iter()
536 .map(|entry| (entry.path, entry.deleted))
537 .collect();
538
539 let mut findings = Vec::new();
540 for file in changed_files {
541 let info = classify_path(file);
542 if deletion_state
543 .get(info.normalized_path.as_str())
544 .copied()
545 .unwrap_or(false)
546 {
547 continue;
548 }
549
550 findings.extend(scan_file_path(&info, allowlist));
551 }
552
553 let fallback_file = changed_files
554 .first()
555 .filter(|_| changed_files.len() == 1)
556 .map(|file| normalize_path(file));
557
558 let mut current_file = fallback_file;
559 let mut current_info = current_file.as_deref().map(classify_path_from_normalized);
560 let mut current_line: Option<usize> = None;
561
562 for line in diff.lines() {
563 if let Some(captures) = DIFF_FILE_RE.captures(line) {
564 current_file = Some(normalize_path(&captures[1]));
565 current_info = current_file.as_deref().map(classify_path_from_normalized);
566 current_line = None;
567 continue;
568 }
569
570 if let Some(captures) = DIFF_HUNK_RE.captures(line) {
571 current_line = captures[1].parse::<usize>().ok();
572 continue;
573 }
574
575 if line.starts_with("+++") {
576 continue;
577 }
578
579 if let Some(added_line) = line.strip_prefix('+') {
580 let file_path = current_file.clone().unwrap_or_else(|| "unknown".to_owned());
581 let info = current_info
582 .clone()
583 .unwrap_or_else(|| classify_path_from_normalized(file_path.as_str()));
584 if !info.skip_content {
585 findings.extend(scan_added_line(
586 &file_path,
587 &info,
588 added_line,
589 current_line,
590 allowlist,
591 ));
592 }
593
594 if let Some(line_no) = current_line.as_mut() {
595 *line_no += 1;
596 }
597 continue;
598 }
599
600 if line.starts_with(' ')
601 && let Some(line_no) = current_line.as_mut()
602 {
603 *line_no += 1;
604 }
605 }
606
607 SensitiveReport::from_findings_with_enforcement(dedupe_findings(findings), enforcement)
608}
609
610fn parse_diff_file_entries(diff: &str) -> Vec<DiffFileEntry> {
611 let mut entries = Vec::new();
612 let mut current: Option<DiffFileEntry> = None;
613
614 for line in diff.lines() {
615 if let Some(captures) = DIFF_FILE_RE.captures(line) {
616 if let Some(entry) = current.take() {
617 entries.push(entry);
618 }
619 current = Some(DiffFileEntry {
620 path: normalize_path(&captures[1]),
621 deleted: false,
622 });
623 continue;
624 }
625
626 if (line == "deleted file mode 100644"
627 || line == "deleted file mode 100755"
628 || line == "+++ /dev/null")
629 && let Some(entry) = current.as_mut()
630 {
631 entry.deleted = true;
632 }
633 }
634
635 if let Some(entry) = current {
636 entries.push(entry);
637 }
638
639 entries
640}
641
642fn normalize_path(file_path: &str) -> String {
643 file_path.replace('\\', "/")
644}
645
646fn classify_path(file_path: &str) -> PathContext {
647 classify_path_from_normalized(&normalize_path(file_path))
648}
649
650fn classify_path_from_normalized(file_path: &str) -> PathContext {
651 let normalized_path = file_path.to_owned();
652 let lower_path = normalized_path.to_lowercase();
653 let env_template = TEMPLATE_ENV_RE.is_match(lower_path.as_str());
654 let env_file = REAL_ENV_RE.is_match(lower_path.as_str()) && !env_template;
655
656 PathContext {
657 normalized_path,
658 lower_path: lower_path.clone(),
659 skip_content: SKIP_CONTENT_PATH_RE.is_match(lower_path.as_str()),
660 low_confidence: LOW_CONFIDENCE_PATH_RE.is_match(lower_path.as_str())
661 || LOW_CONFIDENCE_EXT_RE.is_match(lower_path.as_str()),
662 env_template,
663 env_file,
664 docker_config: lower_path.ends_with("/.docker/config.json")
665 || lower_path == ".docker/config.json"
666 || lower_path.ends_with("/.dockercfg")
667 || lower_path == ".dockercfg",
668 npmrc: lower_path.ends_with("/.npmrc") || lower_path == ".npmrc",
669 kube_config: lower_path.ends_with("/kubeconfig")
670 || lower_path == "kubeconfig"
671 || lower_path.ends_with("/.kube/config")
672 || lower_path == ".kube/config",
673 }
674}
675
676fn scan_file_path(
677 info: &PathContext,
678 allowlist: &[SensitiveAllowlistEntry],
679) -> Vec<SensitiveFinding> {
680 let mut findings = Vec::new();
681
682 let mut push = |category: &'static str,
683 rule: &'static str,
684 tier: SensitiveTier,
685 severity: SensitiveSeverity| {
686 push_candidate(
687 &mut findings,
688 allowlist,
689 LineCandidate {
690 category,
691 rule,
692 file_path: info.normalized_path.clone(),
693 line_number: None,
694 preview: info.normalized_path.clone(),
695 raw_value: None,
696 tier,
697 severity,
698 },
699 );
700 };
701
702 if info.env_file {
703 push(
704 "artifact",
705 "env-file",
706 SensitiveTier::SensitiveArtifact,
707 SensitiveSeverity::Block,
708 );
709 } else if info.lower_path.ends_with("/.netrc")
710 || info.lower_path == ".netrc"
711 || info.lower_path.ends_with("/.git-credentials")
712 || info.lower_path == ".git-credentials"
713 {
714 push(
715 "artifact",
716 "credential-store-file",
717 SensitiveTier::SensitiveArtifact,
718 SensitiveSeverity::Block,
719 );
720 } else if info.docker_config {
721 push(
722 "artifact",
723 "docker-config-file",
724 SensitiveTier::Suspicious,
725 SensitiveSeverity::Warn,
726 );
727 } else if info.npmrc {
728 push(
729 "artifact",
730 "npmrc-file",
731 SensitiveTier::Suspicious,
732 SensitiveSeverity::Warn,
733 );
734 } else if info.lower_path.ends_with("/.pypirc")
735 || info.lower_path == ".pypirc"
736 || info.lower_path.ends_with("/.gem/credentials")
737 || info.lower_path == ".gem/credentials"
738 || regex::Regex::new(r"(?:^|/)\.cargo/credentials(?:\.toml)?$")
739 .unwrap()
740 .is_match(info.lower_path.as_str())
741 {
742 push(
743 "artifact",
744 "package-manager-credential-file",
745 SensitiveTier::SensitiveArtifact,
746 SensitiveSeverity::Block,
747 );
748 } else if regex::Regex::new(r"terraform\.tfstate(?:\.backup)?$")
749 .unwrap()
750 .is_match(info.lower_path.as_str())
751 || info.lower_path.contains("/.terraform/")
752 {
753 push(
754 "artifact",
755 "terraform-state-file",
756 SensitiveTier::SensitiveArtifact,
757 SensitiveSeverity::Block,
758 );
759 } else if info.lower_path.ends_with(".tfvars") || info.lower_path.ends_with(".auto.tfvars") {
760 push(
761 "artifact",
762 "terraform-vars-file",
763 SensitiveTier::Suspicious,
764 SensitiveSeverity::Warn,
765 );
766 } else if info.kube_config {
767 push(
768 "artifact",
769 "kubeconfig-file",
770 SensitiveTier::SensitiveArtifact,
771 SensitiveSeverity::Block,
772 );
773 } else if regex::Regex::new(r"(?:^|/)credentials\.json$")
774 .unwrap()
775 .is_match(info.lower_path.as_str())
776 || regex::Regex::new(r"(?:^|/)service[-_]?account.*\.json$")
777 .unwrap()
778 .is_match(info.lower_path.as_str())
779 {
780 push(
781 "artifact",
782 "service-account-file",
783 SensitiveTier::SensitiveArtifact,
784 SensitiveSeverity::Block,
785 );
786 } else if regex::Regex::new(r"(?:^|/)id_(?:rsa|ed25519|ecdsa|dsa)$")
787 .unwrap()
788 .is_match(info.lower_path.as_str())
789 || regex::Regex::new(r"(?:^|/)\.ssh/")
790 .unwrap()
791 .is_match(info.lower_path.as_str())
792 {
793 push(
794 "artifact",
795 "ssh-private-key-file",
796 SensitiveTier::SensitiveArtifact,
797 SensitiveSeverity::Block,
798 );
799 } else if info.lower_path.ends_with(".pem") {
800 push(
801 "artifact",
802 "pem-file",
803 SensitiveTier::Suspicious,
804 SensitiveSeverity::Warn,
805 );
806 } else if regex::Regex::new(r"\.(?:p12|pfx|keystore|jks|pepk|ppk|key)$")
807 .unwrap()
808 .is_match(info.lower_path.as_str())
809 || info.lower_path.ends_with("/key.properties")
810 || info.lower_path == "key.properties"
811 {
812 push(
813 "artifact",
814 "key-material-file",
815 SensitiveTier::SensitiveArtifact,
816 SensitiveSeverity::Block,
817 );
818 } else if info.lower_path.ends_with(".har") {
819 push(
820 "artifact",
821 "http-archive-file",
822 SensitiveTier::SensitiveArtifact,
823 SensitiveSeverity::Block,
824 );
825 } else if regex::Regex::new(r"\.(?:hprof|core|dmp|mdmp|pcap|pcapng)$")
826 .unwrap()
827 .is_match(info.lower_path.as_str())
828 || regex::Regex::new(r"core\.\d+$")
829 .unwrap()
830 .is_match(info.lower_path.as_str())
831 {
832 push(
833 "artifact",
834 "dump-file",
835 SensitiveTier::SensitiveArtifact,
836 SensitiveSeverity::Block,
837 );
838 } else if info.lower_path.ends_with(".mobileprovision") {
839 push(
840 "artifact",
841 "mobileprovision-file",
842 SensitiveTier::Suspicious,
843 SensitiveSeverity::Warn,
844 );
845 } else if regex::Regex::new(r"\.(?:sqlite|sqlite3|db|sql)$")
846 .unwrap()
847 .is_match(info.lower_path.as_str())
848 {
849 push(
850 "artifact",
851 "database-artifact-file",
852 SensitiveTier::Suspicious,
853 SensitiveSeverity::Warn,
854 );
855 } else if info.lower_path.ends_with(".map") {
856 push(
857 "artifact",
858 "source-map-file",
859 SensitiveTier::Suspicious,
860 SensitiveSeverity::Warn,
861 );
862 } else if info.lower_path.ends_with("/.htpasswd") || info.lower_path == ".htpasswd" {
863 push(
864 "artifact",
865 "auth-file",
866 SensitiveTier::SensitiveArtifact,
867 SensitiveSeverity::Block,
868 );
869 }
870
871 findings
872}
873
874fn scan_added_line(
875 file_path: &str,
876 info: &PathContext,
877 line: &str,
878 line_number: Option<usize>,
879 allowlist: &[SensitiveAllowlistEntry],
880) -> Vec<SensitiveFinding> {
881 let provider_matched = has_provider_match(line);
882 let structural_matched = has_structural_match(info, line);
883 let providers = scan_provider_line(file_path, line, line_number, allowlist);
884 let structural = scan_structural_line(file_path, info, line, line_number, allowlist);
885 if provider_matched || structural_matched {
886 return dedupe_findings([providers, structural].concat());
887 }
888
889 if COMMENT_ONLY_RE.is_match(line) {
890 return vec![];
891 }
892
893 let generic = scan_generic_assignments(file_path, info, line, line_number, allowlist);
894 let network = scan_ip_line(file_path, line, line_number, allowlist);
895 dedupe_findings([generic, network].concat())
896}
897
898fn scan_provider_line(
899 file_path: &str,
900 line: &str,
901 line_number: Option<usize>,
902 allowlist: &[SensitiveAllowlistEntry],
903) -> Vec<SensitiveFinding> {
904 let mut findings = Vec::new();
905
906 for rule in PROVIDER_RULES.iter() {
907 for matched in rule.pattern.find_iter(line) {
908 let value = matched.as_str();
909 if is_placeholder_value(value) {
910 continue;
911 }
912
913 push_candidate(
914 &mut findings,
915 allowlist,
916 LineCandidate {
917 category: rule.category,
918 rule: rule.rule,
919 file_path: file_path.to_owned(),
920 line_number,
921 preview: format_line_preview(line),
922 raw_value: Some(value.to_owned()),
923 tier: rule.tier,
924 severity: rule.severity,
925 },
926 );
927 }
928 }
929
930 dedupe_findings(findings)
931}
932
933fn has_provider_match(line: &str) -> bool {
934 PROVIDER_RULES.iter().any(|rule| {
935 rule.pattern
936 .find_iter(line)
937 .any(|matched| !is_placeholder_value(matched.as_str()))
938 })
939}
940
941fn scan_structural_line(
942 file_path: &str,
943 info: &PathContext,
944 line: &str,
945 line_number: Option<usize>,
946 allowlist: &[SensitiveAllowlistEntry],
947) -> Vec<SensitiveFinding> {
948 let mut findings = Vec::new();
949
950 if PRIVATE_KEY_HEADER_RE.is_match(line) {
951 push_candidate(
952 &mut findings,
953 allowlist,
954 LineCandidate {
955 category: "key",
956 rule: "private-key-block",
957 file_path: file_path.to_owned(),
958 line_number,
959 preview: format_line_preview(line),
960 raw_value: Some(line.trim().to_owned()),
961 tier: SensitiveTier::ConfirmedSecret,
962 severity: SensitiveSeverity::Block,
963 },
964 );
965 } else if ENCRYPTED_PRIVATE_KEY_RE.is_match(line) {
966 push_candidate(
967 &mut findings,
968 allowlist,
969 LineCandidate {
970 category: "key",
971 rule: "encrypted-private-key-block",
972 file_path: file_path.to_owned(),
973 line_number,
974 preview: format_line_preview(line),
975 raw_value: Some(line.trim().to_owned()),
976 tier: SensitiveTier::Suspicious,
977 severity: SensitiveSeverity::Warn,
978 },
979 );
980 }
981
982 for captures in CONNECTION_STRING_RE.captures_iter(line) {
983 let password = clean_value(captures.get(3).map(|m| m.as_str()).unwrap_or_default());
984 let host = captures.get(4).map(|m| m.as_str()).unwrap_or_default();
985 if is_placeholder_value(password.as_str()) {
986 continue;
987 }
988
989 let severity = if is_local_host(host) {
990 SensitiveSeverity::Warn
991 } else {
992 SensitiveSeverity::Block
993 };
994 let tier = if severity == SensitiveSeverity::Block {
995 SensitiveTier::ConfirmedSecret
996 } else {
997 SensitiveTier::Suspicious
998 };
999
1000 push_candidate(
1001 &mut findings,
1002 allowlist,
1003 LineCandidate {
1004 category: "connection",
1005 rule: "credential-connection-string",
1006 file_path: file_path.to_owned(),
1007 line_number,
1008 preview: format_line_preview(line),
1009 raw_value: Some(password),
1010 tier,
1011 severity,
1012 },
1013 );
1014 }
1015
1016 for captures in BEARER_RE.captures_iter(line) {
1017 let token = clean_value(captures.get(1).map(|m| m.as_str()).unwrap_or_default());
1018 if is_placeholder_value(token.as_str()) {
1019 continue;
1020 }
1021
1022 push_candidate(
1023 &mut findings,
1024 allowlist,
1025 LineCandidate {
1026 category: "token",
1027 rule: "bearer-token",
1028 file_path: file_path.to_owned(),
1029 line_number,
1030 preview: format_line_preview(line),
1031 raw_value: Some(token),
1032 tier: SensitiveTier::ConfirmedSecret,
1033 severity: SensitiveSeverity::Block,
1034 },
1035 );
1036 }
1037
1038 for matched in JWT_RE.find_iter(line) {
1039 let token = matched.as_str();
1040 if is_placeholder_value(token) {
1041 continue;
1042 }
1043
1044 push_candidate(
1045 &mut findings,
1046 allowlist,
1047 LineCandidate {
1048 category: "token",
1049 rule: "jwt-token",
1050 file_path: file_path.to_owned(),
1051 line_number,
1052 preview: format_line_preview(line),
1053 raw_value: Some(token.to_owned()),
1054 tier: SensitiveTier::Suspicious,
1055 severity: SensitiveSeverity::Warn,
1056 },
1057 );
1058 }
1059
1060 if info.docker_config {
1061 for captures in DOCKER_AUTH_RE.captures_iter(line) {
1062 let value = clean_value(captures.get(1).map(|m| m.as_str()).unwrap_or_default());
1063 if is_placeholder_value(value.as_str()) {
1064 continue;
1065 }
1066
1067 push_candidate(
1068 &mut findings,
1069 allowlist,
1070 LineCandidate {
1071 category: "credential",
1072 rule: "docker-config-auth",
1073 file_path: file_path.to_owned(),
1074 line_number,
1075 preview: format_line_preview(line),
1076 raw_value: Some(value),
1077 tier: SensitiveTier::ConfirmedSecret,
1078 severity: SensitiveSeverity::Block,
1079 },
1080 );
1081 }
1082 }
1083
1084 if info.kube_config {
1085 for captures in KUBECONFIG_AUTH_RE.captures_iter(line) {
1086 let value = clean_value(captures.get(2).map(|m| m.as_str()).unwrap_or_default());
1087 if is_placeholder_value(value.as_str()) {
1088 continue;
1089 }
1090
1091 push_candidate(
1092 &mut findings,
1093 allowlist,
1094 LineCandidate {
1095 category: "credential",
1096 rule: "kubeconfig-auth",
1097 file_path: file_path.to_owned(),
1098 line_number,
1099 preview: format_line_preview(line),
1100 raw_value: Some(value),
1101 tier: SensitiveTier::ConfirmedSecret,
1102 severity: SensitiveSeverity::Block,
1103 },
1104 );
1105 }
1106 }
1107
1108 if info.npmrc {
1109 for captures in NPM_LITERAL_AUTH_RE.captures_iter(line) {
1110 let value = clean_value(
1111 captures
1112 .get(1)
1113 .or_else(|| captures.get(2))
1114 .map(|m| m.as_str())
1115 .unwrap_or_default(),
1116 );
1117 if value.is_empty() || is_placeholder_value(value.as_str()) {
1118 continue;
1119 }
1120
1121 push_candidate(
1122 &mut findings,
1123 allowlist,
1124 LineCandidate {
1125 category: "credential",
1126 rule: "npm-auth",
1127 file_path: file_path.to_owned(),
1128 line_number,
1129 preview: format_line_preview(line),
1130 raw_value: Some(value),
1131 tier: SensitiveTier::ConfirmedSecret,
1132 severity: SensitiveSeverity::Block,
1133 },
1134 );
1135 }
1136 }
1137
1138 dedupe_findings(findings)
1139}
1140
1141fn has_structural_match(info: &PathContext, line: &str) -> bool {
1142 if PRIVATE_KEY_HEADER_RE.is_match(line) || ENCRYPTED_PRIVATE_KEY_RE.is_match(line) {
1143 return true;
1144 }
1145
1146 if CONNECTION_STRING_RE.captures_iter(line).any(|captures| {
1147 let password = clean_value(captures.get(3).map(|m| m.as_str()).unwrap_or_default());
1148 !is_placeholder_value(password.as_str())
1149 }) || BEARER_RE.captures_iter(line).any(|captures| {
1150 let token = clean_value(captures.get(1).map(|m| m.as_str()).unwrap_or_default());
1151 !is_placeholder_value(token.as_str())
1152 }) || JWT_RE
1153 .find_iter(line)
1154 .any(|matched| !is_placeholder_value(matched.as_str()))
1155 {
1156 return true;
1157 }
1158
1159 if info.docker_config
1160 && DOCKER_AUTH_RE.captures_iter(line).any(|captures| {
1161 let value = clean_value(captures.get(1).map(|m| m.as_str()).unwrap_or_default());
1162 !is_placeholder_value(value.as_str())
1163 })
1164 {
1165 return true;
1166 }
1167
1168 if info.kube_config
1169 && KUBECONFIG_AUTH_RE.captures_iter(line).any(|captures| {
1170 let value = clean_value(captures.get(2).map(|m| m.as_str()).unwrap_or_default());
1171 !is_placeholder_value(value.as_str())
1172 })
1173 {
1174 return true;
1175 }
1176
1177 info.npmrc
1178 && NPM_LITERAL_AUTH_RE.captures_iter(line).any(|captures| {
1179 let value = clean_value(
1180 captures
1181 .get(1)
1182 .or_else(|| captures.get(2))
1183 .map(|m| m.as_str())
1184 .unwrap_or_default(),
1185 );
1186 !value.is_empty() && !is_placeholder_value(value.as_str())
1187 })
1188}
1189
1190fn scan_generic_assignments(
1191 file_path: &str,
1192 info: &PathContext,
1193 line: &str,
1194 line_number: Option<usize>,
1195 allowlist: &[SensitiveAllowlistEntry],
1196) -> Vec<SensitiveFinding> {
1197 let mut findings = Vec::new();
1198
1199 for captures in GENERIC_ASSIGNMENT_RE.captures_iter(line) {
1200 let value = clean_value(captures.get(2).map(|m| m.as_str()).unwrap_or_default());
1201 if value.is_empty()
1202 || is_placeholder_value(value.as_str())
1203 || is_reference_value(value.as_str())
1204 || !passes_generic_secret_heuristics(value.as_str())
1205 {
1206 continue;
1207 }
1208
1209 let _downgraded = info.low_confidence || info.env_template;
1210 push_candidate(
1211 &mut findings,
1212 allowlist,
1213 LineCandidate {
1214 category: "credential",
1215 rule: "generic-secret-assignment",
1216 file_path: file_path.to_owned(),
1217 line_number,
1218 preview: format_line_preview(line),
1219 raw_value: Some(value),
1220 tier: SensitiveTier::Suspicious,
1221 severity: SensitiveSeverity::Warn,
1222 },
1223 );
1224 }
1225
1226 dedupe_findings(findings)
1227}
1228
1229fn scan_ip_line(
1230 file_path: &str,
1231 line: &str,
1232 line_number: Option<usize>,
1233 allowlist: &[SensitiveAllowlistEntry],
1234) -> Vec<SensitiveFinding> {
1235 let mut findings = Vec::new();
1236
1237 for matched in IPV4_RE.find_iter(line) {
1238 let ip = matched.as_str();
1239 let Some(parsed) = parse_ipv4(ip) else {
1240 continue;
1241 };
1242 if !is_public_ipv4(parsed) {
1243 continue;
1244 }
1245
1246 push_candidate(
1247 &mut findings,
1248 allowlist,
1249 LineCandidate {
1250 category: "network",
1251 rule: "public-ipv4",
1252 file_path: file_path.to_owned(),
1253 line_number,
1254 preview: format_line_preview(line),
1255 raw_value: Some(ip.to_owned()),
1256 tier: SensitiveTier::Suspicious,
1257 severity: SensitiveSeverity::Warn,
1258 },
1259 );
1260 }
1261
1262 dedupe_findings(findings)
1263}
1264
1265fn push_candidate(
1266 findings: &mut Vec<SensitiveFinding>,
1267 allowlist: &[SensitiveAllowlistEntry],
1268 candidate: LineCandidate,
1269) {
1270 if matches_allowlist(&candidate, allowlist) {
1271 return;
1272 }
1273
1274 findings.push(SensitiveFinding {
1275 category: candidate.category,
1276 rule: candidate.rule,
1277 file_path: candidate.file_path,
1278 line_number: candidate.line_number,
1279 preview: candidate.preview,
1280 tier: candidate.tier,
1281 severity: candidate.severity,
1282 });
1283}
1284
1285fn matches_allowlist(candidate: &LineCandidate, allowlist: &[SensitiveAllowlistEntry]) -> bool {
1286 allowlist.iter().any(|entry| {
1287 let path_ok = entry
1288 .path_regex
1289 .as_deref()
1290 .map(|pattern| {
1291 regex::Regex::new(pattern)
1292 .unwrap()
1293 .is_match(&candidate.file_path)
1294 })
1295 .unwrap_or(true);
1296 let rule_ok = entry
1297 .rule
1298 .as_deref()
1299 .map(|rule| rule == candidate.rule)
1300 .unwrap_or(true);
1301 let value_target = candidate.raw_value.as_deref().unwrap_or(&candidate.preview);
1302 let value_ok = entry
1303 .value_regex
1304 .as_deref()
1305 .map(|pattern| regex::Regex::new(pattern).unwrap().is_match(value_target))
1306 .unwrap_or(true);
1307 path_ok && rule_ok && value_ok
1308 })
1309}
1310
1311fn dedupe_findings(findings: Vec<SensitiveFinding>) -> Vec<SensitiveFinding> {
1312 let mut seen = HashSet::new();
1313 findings
1314 .into_iter()
1315 .filter(|finding| {
1316 let key = format!(
1317 "{}::{}::{}::{}",
1318 finding.rule,
1319 finding.file_path,
1320 finding.line_number.unwrap_or_default(),
1321 finding.preview
1322 );
1323 seen.insert(key)
1324 })
1325 .collect()
1326}
1327
1328fn clean_value(value: &str) -> String {
1329 value
1330 .trim()
1331 .trim_start_matches(|ch| matches!(ch, '"' | '\'' | '`'))
1332 .trim_end_matches(|ch| matches!(ch, '"' | '\'' | '`' | ';' | ','))
1333 .to_owned()
1334}
1335
1336fn format_line_preview(line: impl Into<String>) -> String {
1337 let mut preview = line.into().trim().to_owned();
1338 if preview.len() > 160 {
1339 preview.truncate(157);
1340 preview.push_str("...");
1341 }
1342 preview
1343}
1344
1345fn is_placeholder_value(value: &str) -> bool {
1346 let trimmed = clean_value(value);
1347 let lower = trimmed.to_lowercase();
1348
1349 if trimmed.is_empty() || trimmed.len() < 8 {
1350 return true;
1351 }
1352 if is_reference_value(trimmed.as_str()) {
1353 return true;
1354 }
1355
1356 let exact_placeholders = [
1357 "example",
1358 "sample",
1359 "demo",
1360 "test",
1361 "dummy",
1362 "fake",
1363 "placeholder",
1364 "mock",
1365 "fixme",
1366 "todo",
1367 "temp",
1368 "tmp",
1369 "none",
1370 "null",
1371 "undefined",
1372 "empty",
1373 "default",
1374 "redacted",
1375 "removed",
1376 "censored",
1377 "changeme",
1378 "replace_me",
1379 "password",
1380 "qwerty",
1381 "letmein",
1382 "123456",
1383 "000000",
1384 "111111",
1385 "user:pass",
1386 "username:password",
1387 ];
1388 if exact_placeholders.contains(&lower.as_str()) {
1389 return true;
1390 }
1391
1392 if regex::Regex::new(
1393 r"(?i)your[_-]?(?:api[_-]?key|token|secret|password|key)[_-]?here|(?:replace|change|insert|fill|update|put|add)[_-]?(?:me|your)",
1394 )
1395 .unwrap()
1396 .is_match(trimmed.as_str())
1397 {
1398 return true;
1399 }
1400
1401 if regex::Regex::new(r"(?i)^(?:x{4,}|\*{4,}|0{6,}|1{6,}|#{4,}|\.{4,})$")
1402 .unwrap()
1403 .is_match(trimmed.as_str())
1404 {
1405 return true;
1406 }
1407
1408 trimmed.contains("...")
1409}
1410
1411fn is_reference_value(value: &str) -> bool {
1412 regex::Regex::new(
1413 r#"(?ix)
1414 ^\$\{.+\}$|
1415 ^\$\(.+\)$|
1416 ^%[A-Z_][A-Z0-9_]*%$|
1417 ^\{\{.+\}\}$|
1418 ^<[A-Za-z0-9_-]+>$|
1419 ^\$[A-Z_][A-Z0-9_]*$|
1420 \bprocess\.env\.|
1421 \bos\.environ\[|
1422 \bos\.getenv\(|
1423 \bSystem\.getenv\(|
1424 \bENV\[|
1425 \$ENV\{|
1426 \benv\(['"][A-Za-z0-9_]+['"]\)
1427 "#,
1428 )
1429 .unwrap()
1430 .is_match(value)
1431 || value.contains("${")
1432 || value.contains("{{")
1433 || value.contains("$(")
1434}
1435
1436fn passes_generic_secret_heuristics(value: &str) -> bool {
1437 if value.len() < 8 {
1438 return false;
1439 }
1440 if value.chars().filter(|ch| ch.is_ascii_digit()).count() < 2 {
1441 return false;
1442 }
1443
1444 let unique_chars = value.chars().collect::<HashSet<_>>().len();
1445 if unique_chars < 6 {
1446 return false;
1447 }
1448
1449 let hex_like = regex::Regex::new(r"^[0-9a-f]+$").unwrap().is_match(value);
1450 let entropy = shannon_entropy(value);
1451 if hex_like {
1452 entropy >= 3.0
1453 } else {
1454 entropy >= 3.0
1455 }
1456}
1457
1458fn shannon_entropy(value: &str) -> f64 {
1459 let mut counts = HashMap::new();
1460 for ch in value.chars() {
1461 *counts.entry(ch).or_insert(0_usize) += 1;
1462 }
1463
1464 let len = value.len() as f64;
1465 counts
1466 .values()
1467 .map(|count| {
1468 let p = *count as f64 / len;
1469 -p * p.log2()
1470 })
1471 .sum()
1472}
1473
1474fn parse_ipv4(value: &str) -> Option<[u8; 4]> {
1475 let mut octets = [0_u8; 4];
1476 let mut count = 0;
1477
1478 for (index, part) in value.split('.').enumerate() {
1479 if index >= 4 {
1480 return None;
1481 }
1482 octets[index] = part.parse::<u8>().ok()?;
1483 count += 1;
1484 }
1485
1486 (count == 4).then_some(octets)
1487}
1488
1489fn is_public_ipv4(ip: [u8; 4]) -> bool {
1490 let [a, b, c, _] = ip;
1491 if a == 10 || a == 127 || a == 0 {
1492 return false;
1493 }
1494 if (a, b) == (169, 254) {
1495 return false;
1496 }
1497 if a == 172 && (16..=31).contains(&b) {
1498 return false;
1499 }
1500 if (a, b) == (192, 168) {
1501 return false;
1502 }
1503 !matches!((a, b, c), (192, 0, 2) | (198, 51, 100) | (203, 0, 113))
1504}
1505
1506fn is_local_host(host: &str) -> bool {
1507 let value = host
1508 .to_lowercase()
1509 .split([':', '/'])
1510 .next()
1511 .unwrap_or_default()
1512 .to_owned();
1513
1514 if matches!(
1515 value.as_str(),
1516 "localhost" | "127.0.0.1" | "0.0.0.0" | "::1"
1517 ) || value.ends_with(".local")
1518 || value.ends_with(".internal")
1519 || value.ends_with(".example")
1520 || value.ends_with(".test")
1521 {
1522 return true;
1523 }
1524
1525 parse_ipv4(value.as_str())
1526 .map(|ip| !is_public_ipv4(ip))
1527 .unwrap_or(false)
1528}
1529
1530#[cfg(test)]
1531mod tests {
1532 use super::*;
1533 use serde::Deserialize;
1534
1535 #[derive(Debug, Deserialize)]
1536 #[serde(rename_all = "camelCase")]
1537 struct ScenarioFinding {
1538 category: String,
1539 rule: String,
1540 file_path: String,
1541 line_number: Option<usize>,
1542 preview: String,
1543 tier: String,
1544 severity: String,
1545 }
1546
1547 #[derive(Debug, Deserialize)]
1548 #[serde(rename_all = "camelCase")]
1549 struct Scenario {
1550 name: String,
1551 diff: String,
1552 changed_files: Vec<String>,
1553 expected_findings: Vec<ScenarioFinding>,
1554 }
1555
1556 fn load_shared_scenarios() -> Vec<Scenario> {
1557 let path = format!(
1558 "{}/../../test-fixtures/sensitive-scenarios.json",
1559 env!("CARGO_MANIFEST_DIR")
1560 );
1561 let content = std::fs::read_to_string(path).unwrap();
1562 serde_json::from_str(&content).unwrap()
1563 }
1564
1565 fn tier_name(tier: SensitiveTier) -> &'static str {
1566 match tier {
1567 SensitiveTier::ConfirmedSecret => "confirmed-secret",
1568 SensitiveTier::SensitiveArtifact => "sensitive-artifact",
1569 SensitiveTier::Suspicious => "suspicious",
1570 }
1571 }
1572
1573 fn severity_name(severity: SensitiveSeverity) -> &'static str {
1574 match severity {
1575 SensitiveSeverity::Block => "block",
1576 SensitiveSeverity::Warn => "warn",
1577 }
1578 }
1579
1580 #[test]
1581 fn shared_scenarios_match_rust_detector() {
1582 for scenario in load_shared_scenarios() {
1583 let report = scan_diff_for_sensitive_content(&scenario.diff, &scenario.changed_files);
1584 assert_eq!(
1585 report.findings.len(),
1586 scenario.expected_findings.len(),
1587 "scenario {} finding count mismatch",
1588 scenario.name
1589 );
1590
1591 for (finding, expected) in report.findings.iter().zip(&scenario.expected_findings) {
1592 assert_eq!(finding.category, expected.category, "{}", scenario.name);
1593 assert_eq!(finding.rule, expected.rule, "{}", scenario.name);
1594 assert_eq!(finding.file_path, expected.file_path, "{}", scenario.name);
1595 assert_eq!(
1596 finding.line_number, expected.line_number,
1597 "{}",
1598 scenario.name
1599 );
1600 assert_eq!(finding.preview, expected.preview, "{}", scenario.name);
1601 assert_eq!(tier_name(finding.tier), expected.tier, "{}", scenario.name);
1602 assert_eq!(
1603 severity_name(finding.severity),
1604 expected.severity,
1605 "{}",
1606 scenario.name
1607 );
1608 }
1609 }
1610 }
1611
1612 #[test]
1613 fn allowlist_matches_path_rule_and_value() {
1614 let diff = "\
1615diff --git a/.env.example b/.env.example
1616--- a/.env.example
1617+++ b/.env.example
1618@@ -0,0 +1 @@
1619+OPENAI_API_KEY=sk-proj-abcdefghijklmnopqrstuvwxyz1234567890
1620";
1621 let report = scan_diff_for_sensitive_content_with_options(
1622 diff,
1623 &[".env.example".to_owned()],
1624 SensitiveEnforcement::Warn,
1625 &[SensitiveAllowlistEntry {
1626 path_regex: Some(r"\.env\.example$".to_owned()),
1627 rule: Some("openai-project-key".to_owned()),
1628 value_regex: Some(r"^sk-proj-".to_owned()),
1629 }],
1630 );
1631 assert!(!report.has_findings());
1632 }
1633
1634 #[test]
1635 fn allowlist_matches_path_only_artifact_findings() {
1636 let report = scan_diff_for_sensitive_content_with_options(
1637 "diff",
1638 &[".env".to_owned()],
1639 SensitiveEnforcement::Warn,
1640 &[SensitiveAllowlistEntry {
1641 path_regex: Some(r"\.env$".to_owned()),
1642 rule: Some("env-file".to_owned()),
1643 value_regex: None,
1644 }],
1645 );
1646 assert!(!report.has_findings());
1647 }
1648
1649 #[test]
1650 fn warn_mode_never_marks_findings_blocking() {
1651 let report = SensitiveReport::from_findings_with_enforcement(
1652 vec![SensitiveFinding {
1653 category: "artifact",
1654 rule: "env-file",
1655 file_path: ".env".to_owned(),
1656 line_number: None,
1657 preview: ".env".to_owned(),
1658 tier: SensitiveTier::SensitiveArtifact,
1659 severity: SensitiveSeverity::Block,
1660 }],
1661 SensitiveEnforcement::Warn,
1662 );
1663 assert_eq!(report.blocking_count, 0);
1664 assert_eq!(report.warning_count, 1);
1665 }
1666
1667 #[test]
1668 fn strict_all_blocks_warnings_and_disables_bypass() {
1669 let report = SensitiveReport::from_findings_with_enforcement(
1670 vec![SensitiveFinding {
1671 category: "credential",
1672 rule: "generic-secret-assignment",
1673 file_path: "src/auth.ts".to_owned(),
1674 line_number: Some(1),
1675 preview: r#"const PASSWORD = "Alpha9981Zeta""#.to_owned(),
1676 tier: SensitiveTier::Suspicious,
1677 severity: SensitiveSeverity::Warn,
1678 }],
1679 SensitiveEnforcement::StrictAll,
1680 );
1681 assert!(report.has_blocking_findings());
1682 assert!(!allows_sensitive_bypass(report.enforcement));
1683 }
1684
1685 #[test]
1686 fn formats_git_hook_message_with_strict_footer() {
1687 let report = SensitiveReport::from_findings_with_enforcement(
1688 vec![SensitiveFinding {
1689 category: "credential",
1690 rule: "generic-secret-assignment",
1691 file_path: "src/auth.ts".to_owned(),
1692 line_number: Some(18),
1693 preview: r#"const PASSWORD = "Alpha9981Zeta""#.to_owned(),
1694 tier: SensitiveTier::Suspicious,
1695 severity: SensitiveSeverity::Warn,
1696 }],
1697 SensitiveEnforcement::StrictAll,
1698 );
1699
1700 let message = report.format_git_hook_message();
1701 assert!(message.contains("Strict sensitive mode is active"));
1702 }
1703}