Skip to main content

opencodecommit/
sensitive.rs

1use std::collections::{HashMap, HashSet};
2use std::fmt;
3use std::sync::LazyLock;
4
5use serde::{Deserialize, Serialize};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
8#[serde(rename_all = "kebab-case")]
9pub enum SensitiveTier {
10    ConfirmedSecret,
11    SensitiveArtifact,
12    Suspicious,
13}
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
16#[serde(rename_all = "lowercase")]
17pub enum SensitiveSeverity {
18    Block,
19    Warn,
20}
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
23#[serde(rename_all = "kebab-case")]
24pub enum SensitiveEnforcement {
25    #[default]
26    Warn,
27    BlockHigh,
28    BlockAll,
29    StrictHigh,
30    StrictAll,
31}
32
33#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
34#[serde(rename_all = "kebab-case")]
35pub struct SensitiveAllowlistEntry {
36    #[serde(default)]
37    pub path_regex: Option<String>,
38    #[serde(default)]
39    pub rule: Option<String>,
40    #[serde(default)]
41    pub value_regex: Option<String>,
42}
43
44#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
45pub struct SensitiveFinding {
46    pub category: &'static str,
47    pub rule: &'static str,
48    pub file_path: String,
49    pub line_number: Option<usize>,
50    pub preview: String,
51    pub tier: SensitiveTier,
52    pub severity: SensitiveSeverity,
53}
54
55#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
56pub struct SensitiveReport {
57    pub findings: Vec<SensitiveFinding>,
58    pub enforcement: SensitiveEnforcement,
59    pub warning_count: usize,
60    pub blocking_count: usize,
61    pub has_findings: bool,
62    pub has_blocking_findings: bool,
63}
64
65impl SensitiveReport {
66    pub fn from_findings(findings: Vec<SensitiveFinding>) -> Self {
67        Self::from_findings_with_enforcement(findings, SensitiveEnforcement::Warn)
68    }
69
70    pub fn from_findings_with_enforcement(
71        findings: Vec<SensitiveFinding>,
72        enforcement: SensitiveEnforcement,
73    ) -> Self {
74        let mut warning_count = 0;
75        let mut blocking_count = 0;
76
77        for finding in &findings {
78            if is_blocking_finding(finding, enforcement) {
79                blocking_count += 1;
80            } else {
81                warning_count += 1;
82            }
83        }
84
85        Self {
86            has_findings: !findings.is_empty(),
87            has_blocking_findings: blocking_count > 0,
88            findings,
89            enforcement,
90            warning_count,
91            blocking_count,
92        }
93    }
94
95    pub fn has_findings(&self) -> bool {
96        self.has_findings
97    }
98
99    pub fn has_blocking_findings(&self) -> bool {
100        self.has_blocking_findings
101    }
102
103    pub fn format_occ_commit_message(&self) -> String {
104        if self.has_blocking_findings {
105            let footer = if allows_sensitive_bypass(self.enforcement) {
106                "Sensitive content detected in diff. Use --allow-sensitive to skip this check."
107            } else {
108                "Sensitive content detected in diff. Strict sensitive mode is active; change the config to continue."
109            };
110            self.format_message(footer)
111        } else {
112            self.format_message("Sensitive findings are warnings only.")
113        }
114    }
115
116    pub fn format_git_hook_message(&self) -> String {
117        if self.has_blocking_findings {
118            let footer = if allows_sensitive_bypass(self.enforcement) {
119                "Commit blocked by OpenCodeCommit.\nBypass only OCC for this command with: OCC_ALLOW_SENSITIVE=1 git commit ..."
120            } else {
121                "Commit blocked by OpenCodeCommit.\nStrict sensitive mode is active; change the config to continue."
122            };
123            self.format_message(footer)
124        } else {
125            self.format_message("OpenCodeCommit warning: sensitive findings detected.")
126        }
127    }
128
129    fn format_message(&self, footer: &str) -> String {
130        if self.findings.is_empty() {
131            return footer.to_owned();
132        }
133
134        let mut lines = vec!["Sensitive findings:".to_owned()];
135        for finding in &self.findings {
136            let location = match finding.line_number {
137                Some(line) => format!("{}:{}", finding.file_path, line),
138                None => finding.file_path.clone(),
139            };
140            let action = if is_blocking_finding(finding, self.enforcement) {
141                "BLOCK"
142            } else {
143                "WARN"
144            };
145            lines.push(format!(
146                "- {} {} [{:?} / {}] {}",
147                action, location, finding.tier, finding.rule, finding.preview
148            ));
149        }
150        lines.push(footer.to_owned());
151        lines.join("\n")
152    }
153}
154
155impl fmt::Display for SensitiveReport {
156    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
157        write!(f, "{}", self.format_occ_commit_message())
158    }
159}
160
161#[derive(Debug, Clone)]
162struct DiffFileEntry {
163    path: String,
164    deleted: bool,
165}
166
167#[derive(Debug, Clone)]
168struct PathContext {
169    normalized_path: String,
170    lower_path: String,
171    skip_content: bool,
172    low_confidence: bool,
173    env_template: bool,
174    env_file: bool,
175    docker_config: bool,
176    npmrc: bool,
177    kube_config: bool,
178}
179
180#[derive(Debug, Clone)]
181struct ProviderRule {
182    pattern: regex::Regex,
183    category: &'static str,
184    rule: &'static str,
185    tier: SensitiveTier,
186    severity: SensitiveSeverity,
187}
188
189#[derive(Debug, Clone)]
190struct LineCandidate {
191    category: &'static str,
192    rule: &'static str,
193    file_path: String,
194    line_number: Option<usize>,
195    preview: String,
196    raw_value: Option<String>,
197    tier: SensitiveTier,
198    severity: SensitiveSeverity,
199}
200
201static DIFF_FILE_RE: LazyLock<regex::Regex> =
202    LazyLock::new(|| regex::Regex::new(r"^diff --git a/.+ b/(.+)$").unwrap());
203
204static DIFF_HUNK_RE: LazyLock<regex::Regex> =
205    LazyLock::new(|| regex::Regex::new(r"^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@").unwrap());
206
207static COMMENT_ONLY_RE: LazyLock<regex::Regex> =
208    LazyLock::new(|| regex::Regex::new(r"^\s*(?:#|//|/\*|\*|--|%|rem\b|')").unwrap());
209
210static IPV4_RE: LazyLock<regex::Regex> =
211    LazyLock::new(|| regex::Regex::new(r"\b(?:\d{1,3}\.){3}\d{1,3}\b").unwrap());
212
213static PRIVATE_KEY_HEADER_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
214    regex::Regex::new(r"-----BEGIN (?:(?:RSA|DSA|EC|OPENSSH|PGP) )?PRIVATE KEY(?: BLOCK)?-----")
215        .unwrap()
216});
217
218static ENCRYPTED_PRIVATE_KEY_RE: LazyLock<regex::Regex> =
219    LazyLock::new(|| regex::Regex::new(r"-----BEGIN ENCRYPTED PRIVATE KEY-----").unwrap());
220
221static CONNECTION_STRING_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
222    regex::Regex::new(
223        r#"\b((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|rediss|amqp|amqps|mssql|sqlserver)://)([^/\s:@]+):([^@\s]+)@([^\s'"]+)"#,
224    )
225    .unwrap()
226});
227
228static BEARER_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
229    regex::Regex::new(
230        r#"(?i)\b(?:authorization|bearer)\b\s*[:=]\s*['"]?bearer\s+([A-Za-z0-9._~+/\-]{20,})"#,
231    )
232    .unwrap()
233});
234
235static JWT_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
236    regex::Regex::new(r"\beyJ[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_-]{10,}\.[A-Za-z0-9_.+/=-]{10,}\b")
237        .unwrap()
238});
239
240static DOCKER_AUTH_RE: LazyLock<regex::Regex> =
241    LazyLock::new(|| regex::Regex::new(r#""auth"\s*:\s*"([^"]+)""#).unwrap());
242
243static KUBECONFIG_AUTH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
244    regex::Regex::new(r#"(?:^|\b)(token|client-key-data)\b\s*:\s*("?[^"\s]+"?)"#).unwrap()
245});
246
247static NPM_LITERAL_AUTH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
248    regex::Regex::new(
249        r#"(?i)(?::|^)_(?:authToken|auth|password)\s*=\s*([^\s#]+)|//[^\s]+:_authToken\s*=\s*([^\s#]+)"#,
250    )
251    .unwrap()
252});
253
254static GENERIC_ASSIGNMENT_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
255    regex::Regex::new(
256        "(?i)\\b([A-Za-z0-9_.-]{0,40}(?:password|passwd|pwd|secret|token|api[_-]?key|apikey|auth[_-]?token|access[_-]?token|private[_-]?key|client[_-]?secret|credentials?|database[_-]?url|db[_-]?password|webhook[_-]?secret|signing[_-]?key|encryption[_-]?key)[A-Za-z0-9_.-]{0,20})\\b[\"']?\\s*[:=]\\s*(\"[^\"]*\"|'[^']*'|[^\\s,#;]+)",
257    )
258    .unwrap()
259});
260
261static TEMPLATE_ENV_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
262    regex::Regex::new(
263        r"(?:^|/)(?:\.env\.(?:example|sample|template|defaults|schema|spec|test|ci)|[^/]*\.(?:example|sample|template)\.env)$",
264    )
265    .unwrap()
266});
267
268static REAL_ENV_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
269    regex::Regex::new(r"(?:^|/)\.env(?:\.[^/]+)?$|(?:^|/)\.envrc$|(?:^|/)\.direnv/").unwrap()
270});
271
272static LOW_CONFIDENCE_PATH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
273    regex::Regex::new(
274        r"(?:^|/)(?:test|tests|__tests__|spec|__spec__|docs|documentation|example|examples|sample|samples|fixture|fixtures|__fixtures__|testdata|test-data|mock|mocks|__mocks__|stubs?)(?:/|$)",
275    )
276    .unwrap()
277});
278
279static LOW_CONFIDENCE_EXT_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
280    regex::Regex::new(r"\.(?:md|rst|adoc|txt|d\.ts|schema\.json|schema\.ya?ml)$").unwrap()
281});
282
283static SKIP_CONTENT_PATH_RE: LazyLock<regex::Regex> = LazyLock::new(|| {
284    regex::Regex::new(
285        r"(?i)(?:^|/)(?:vendor|node_modules|third_party|\.git)(?:/|$)|(?:^|/)(?:package-lock\.json|yarn\.lock|pnpm-lock\.yaml|Gemfile\.lock|Cargo\.lock|poetry\.lock|composer\.lock|go\.sum|Pipfile\.lock)$|\.(?:png|jpe?g|gif|bmp|ico|svg|tiff|webp|mp[34]|avi|mov|wav|flac|ogg|woff2?|eot|otf|ttf|exe|dll|so|dylib|bin|o|a|class|pyc|pyo|wasm|zip|tar|gz|bz2|xz|rar|7z|jar|war|ear)$",
286    )
287    .unwrap()
288});
289
290static PROVIDER_RULES: LazyLock<Vec<ProviderRule>> = LazyLock::new(|| {
291    [
292        (
293            r"github_pat_[A-Za-z0-9]{22}_[A-Za-z0-9]{59}",
294            "token",
295            "github-fine-grained-token",
296            SensitiveTier::ConfirmedSecret,
297            SensitiveSeverity::Block,
298        ),
299        (
300            r"gh[pousr]_[A-Za-z0-9]{36,76}",
301            "token",
302            "github-token",
303            SensitiveTier::ConfirmedSecret,
304            SensitiveSeverity::Block,
305        ),
306        (
307            r"(?:AKIA|ASIA)[A-Z0-9]{16}",
308            "token",
309            "aws-access-key",
310            SensitiveTier::ConfirmedSecret,
311            SensitiveSeverity::Block,
312        ),
313        (
314            r"gl(?:pat|dt|ptt|rt)-[0-9A-Za-z_-]{20,}",
315            "token",
316            "gitlab-token",
317            SensitiveTier::ConfirmedSecret,
318            SensitiveSeverity::Block,
319        ),
320        (
321            r"xoxb-[0-9]+-[0-9A-Za-z]+-[A-Za-z0-9]+",
322            "token",
323            "slack-bot-token",
324            SensitiveTier::ConfirmedSecret,
325            SensitiveSeverity::Block,
326        ),
327        (
328            r"(?i)xoxp-[0-9]+-[0-9]+-[0-9]+-[a-f0-9]+",
329            "token",
330            "slack-user-token",
331            SensitiveTier::ConfirmedSecret,
332            SensitiveSeverity::Block,
333        ),
334        (
335            r"xapp-1-[A-Z0-9]+-[0-9]+-[A-Za-z0-9]+",
336            "token",
337            "slack-app-token",
338            SensitiveTier::ConfirmedSecret,
339            SensitiveSeverity::Block,
340        ),
341        (
342            r"https://hooks\.slack\.com/services/T[a-zA-Z0-9_]+/B[a-zA-Z0-9_]+/[a-zA-Z0-9_]+",
343            "webhook",
344            "slack-webhook",
345            SensitiveTier::ConfirmedSecret,
346            SensitiveSeverity::Block,
347        ),
348        (
349            r"sk_live_[0-9A-Za-z]{24,}",
350            "token",
351            "stripe-live-secret-key",
352            SensitiveTier::ConfirmedSecret,
353            SensitiveSeverity::Block,
354        ),
355        (
356            r"rk_live_[0-9A-Za-z]{24,}",
357            "token",
358            "stripe-live-restricted-key",
359            SensitiveTier::ConfirmedSecret,
360            SensitiveSeverity::Block,
361        ),
362        (
363            r"sk_test_[0-9A-Za-z]{24,}",
364            "token",
365            "stripe-test-secret-key",
366            SensitiveTier::Suspicious,
367            SensitiveSeverity::Warn,
368        ),
369        (
370            r"rk_test_[0-9A-Za-z]{24,}",
371            "token",
372            "stripe-test-restricted-key",
373            SensitiveTier::Suspicious,
374            SensitiveSeverity::Warn,
375        ),
376        (
377            r"SG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}",
378            "token",
379            "sendgrid-api-key",
380            SensitiveTier::ConfirmedSecret,
381            SensitiveSeverity::Block,
382        ),
383        (
384            r"sk-proj-[A-Za-z0-9_-]{20,}",
385            "token",
386            "openai-project-key",
387            SensitiveTier::ConfirmedSecret,
388            SensitiveSeverity::Block,
389        ),
390        (
391            r"sk-svcacct-[A-Za-z0-9_-]{20,}",
392            "token",
393            "openai-service-account-key",
394            SensitiveTier::ConfirmedSecret,
395            SensitiveSeverity::Block,
396        ),
397        (
398            r"\bsk-[A-Za-z0-9]{32,}\b",
399            "token",
400            "openai-legacy-key",
401            SensitiveTier::ConfirmedSecret,
402            SensitiveSeverity::Block,
403        ),
404        (
405            r"sk-ant-(?:api03|admin01)-[A-Za-z0-9_-]{80,}",
406            "token",
407            "anthropic-key",
408            SensitiveTier::ConfirmedSecret,
409            SensitiveSeverity::Block,
410        ),
411        (
412            r"AIza[0-9A-Za-z_-]{35}",
413            "token",
414            "gcp-api-key",
415            SensitiveTier::ConfirmedSecret,
416            SensitiveSeverity::Block,
417        ),
418        (
419            r"GOCSPX-[A-Za-z0-9_-]{28}",
420            "token",
421            "gcp-oauth-secret",
422            SensitiveTier::ConfirmedSecret,
423            SensitiveSeverity::Block,
424        ),
425        (
426            r"npm_[A-Za-z0-9]{36}",
427            "token",
428            "npm-token",
429            SensitiveTier::ConfirmedSecret,
430            SensitiveSeverity::Block,
431        ),
432        (
433            r"pypi-[A-Za-z0-9_-]{50,}",
434            "token",
435            "pypi-token",
436            SensitiveTier::ConfirmedSecret,
437            SensitiveSeverity::Block,
438        ),
439        (
440            r"dckr_pat_[A-Za-z0-9_-]{20,}",
441            "token",
442            "docker-token",
443            SensitiveTier::ConfirmedSecret,
444            SensitiveSeverity::Block,
445        ),
446        (
447            r"sntrys_[A-Za-z0-9+/=_-]{20,}",
448            "token",
449            "sentry-token",
450            SensitiveTier::ConfirmedSecret,
451            SensitiveSeverity::Block,
452        ),
453        (
454            r"(?i)key-[0-9a-f]{32}",
455            "token",
456            "mailgun-key",
457            SensitiveTier::ConfirmedSecret,
458            SensitiveSeverity::Block,
459        ),
460        (
461            r"hvs\.[A-Za-z0-9_-]{24,}",
462            "token",
463            "vault-token",
464            SensitiveTier::ConfirmedSecret,
465            SensitiveSeverity::Block,
466        ),
467        (
468            r"https://discord(?:app)?\.com/api/webhooks/[0-9]+/[A-Za-z0-9_-]+",
469            "webhook",
470            "discord-webhook",
471            SensitiveTier::ConfirmedSecret,
472            SensitiveSeverity::Block,
473        ),
474        (
475            "(?i)https://[a-z0-9.-]+\\.webhook\\.office\\.com/[^\\s'\"`]+",
476            "webhook",
477            "teams-webhook",
478            SensitiveTier::ConfirmedSecret,
479            SensitiveSeverity::Block,
480        ),
481        (
482            r"AGE-SECRET-KEY-1[qpzry9x8gf2tvdw0s3jn54khce6mua7l]{58}",
483            "key",
484            "age-secret-key",
485            SensitiveTier::ConfirmedSecret,
486            SensitiveSeverity::Block,
487        ),
488    ]
489    .into_iter()
490    .map(|(pattern, category, rule, tier, severity)| ProviderRule {
491        pattern: regex::Regex::new(pattern).unwrap(),
492        category,
493        rule,
494        tier,
495        severity,
496    })
497    .collect()
498});
499
500pub fn allows_sensitive_bypass(enforcement: SensitiveEnforcement) -> bool {
501    matches!(
502        enforcement,
503        SensitiveEnforcement::Warn
504            | SensitiveEnforcement::BlockHigh
505            | SensitiveEnforcement::BlockAll
506    )
507}
508
509pub fn is_blocking_finding(finding: &SensitiveFinding, enforcement: SensitiveEnforcement) -> bool {
510    match enforcement {
511        SensitiveEnforcement::Warn => false,
512        SensitiveEnforcement::BlockHigh | SensitiveEnforcement::StrictHigh => {
513            finding.severity == SensitiveSeverity::Block
514        }
515        SensitiveEnforcement::BlockAll | SensitiveEnforcement::StrictAll => true,
516    }
517}
518
519pub fn scan_diff_for_sensitive_content(diff: &str, changed_files: &[String]) -> SensitiveReport {
520    scan_diff_for_sensitive_content_with_options(
521        diff,
522        changed_files,
523        SensitiveEnforcement::Warn,
524        &[],
525    )
526}
527
528pub fn scan_diff_for_sensitive_content_with_options(
529    diff: &str,
530    changed_files: &[String],
531    enforcement: SensitiveEnforcement,
532    allowlist: &[SensitiveAllowlistEntry],
533) -> SensitiveReport {
534    let deletion_state: HashMap<String, bool> = parse_diff_file_entries(diff)
535        .into_iter()
536        .map(|entry| (entry.path, entry.deleted))
537        .collect();
538
539    let mut findings = Vec::new();
540    for file in changed_files {
541        let info = classify_path(file);
542        if deletion_state
543            .get(info.normalized_path.as_str())
544            .copied()
545            .unwrap_or(false)
546        {
547            continue;
548        }
549
550        findings.extend(scan_file_path(&info, allowlist));
551    }
552
553    let fallback_file = changed_files
554        .first()
555        .filter(|_| changed_files.len() == 1)
556        .map(|file| normalize_path(file));
557
558    let mut current_file = fallback_file;
559    let mut current_info = current_file.as_deref().map(classify_path_from_normalized);
560    let mut current_line: Option<usize> = None;
561
562    for line in diff.lines() {
563        if let Some(captures) = DIFF_FILE_RE.captures(line) {
564            current_file = Some(normalize_path(&captures[1]));
565            current_info = current_file.as_deref().map(classify_path_from_normalized);
566            current_line = None;
567            continue;
568        }
569
570        if let Some(captures) = DIFF_HUNK_RE.captures(line) {
571            current_line = captures[1].parse::<usize>().ok();
572            continue;
573        }
574
575        if line.starts_with("+++") {
576            continue;
577        }
578
579        if let Some(added_line) = line.strip_prefix('+') {
580            let file_path = current_file.clone().unwrap_or_else(|| "unknown".to_owned());
581            let info = current_info
582                .clone()
583                .unwrap_or_else(|| classify_path_from_normalized(file_path.as_str()));
584            if !info.skip_content {
585                findings.extend(scan_added_line(
586                    &file_path,
587                    &info,
588                    added_line,
589                    current_line,
590                    allowlist,
591                ));
592            }
593
594            if let Some(line_no) = current_line.as_mut() {
595                *line_no += 1;
596            }
597            continue;
598        }
599
600        if line.starts_with(' ')
601            && let Some(line_no) = current_line.as_mut()
602        {
603            *line_no += 1;
604        }
605    }
606
607    SensitiveReport::from_findings_with_enforcement(dedupe_findings(findings), enforcement)
608}
609
610fn parse_diff_file_entries(diff: &str) -> Vec<DiffFileEntry> {
611    let mut entries = Vec::new();
612    let mut current: Option<DiffFileEntry> = None;
613
614    for line in diff.lines() {
615        if let Some(captures) = DIFF_FILE_RE.captures(line) {
616            if let Some(entry) = current.take() {
617                entries.push(entry);
618            }
619            current = Some(DiffFileEntry {
620                path: normalize_path(&captures[1]),
621                deleted: false,
622            });
623            continue;
624        }
625
626        if (line == "deleted file mode 100644"
627            || line == "deleted file mode 100755"
628            || line == "+++ /dev/null")
629            && let Some(entry) = current.as_mut()
630        {
631            entry.deleted = true;
632        }
633    }
634
635    if let Some(entry) = current {
636        entries.push(entry);
637    }
638
639    entries
640}
641
642fn normalize_path(file_path: &str) -> String {
643    file_path.replace('\\', "/")
644}
645
646fn classify_path(file_path: &str) -> PathContext {
647    classify_path_from_normalized(&normalize_path(file_path))
648}
649
650fn classify_path_from_normalized(file_path: &str) -> PathContext {
651    let normalized_path = file_path.to_owned();
652    let lower_path = normalized_path.to_lowercase();
653    let env_template = TEMPLATE_ENV_RE.is_match(lower_path.as_str());
654    let env_file = REAL_ENV_RE.is_match(lower_path.as_str()) && !env_template;
655
656    PathContext {
657        normalized_path,
658        lower_path: lower_path.clone(),
659        skip_content: SKIP_CONTENT_PATH_RE.is_match(lower_path.as_str()),
660        low_confidence: LOW_CONFIDENCE_PATH_RE.is_match(lower_path.as_str())
661            || LOW_CONFIDENCE_EXT_RE.is_match(lower_path.as_str()),
662        env_template,
663        env_file,
664        docker_config: lower_path.ends_with("/.docker/config.json")
665            || lower_path == ".docker/config.json"
666            || lower_path.ends_with("/.dockercfg")
667            || lower_path == ".dockercfg",
668        npmrc: lower_path.ends_with("/.npmrc") || lower_path == ".npmrc",
669        kube_config: lower_path.ends_with("/kubeconfig")
670            || lower_path == "kubeconfig"
671            || lower_path.ends_with("/.kube/config")
672            || lower_path == ".kube/config",
673    }
674}
675
676fn scan_file_path(
677    info: &PathContext,
678    allowlist: &[SensitiveAllowlistEntry],
679) -> Vec<SensitiveFinding> {
680    let mut findings = Vec::new();
681
682    let mut push = |category: &'static str,
683                    rule: &'static str,
684                    tier: SensitiveTier,
685                    severity: SensitiveSeverity| {
686        push_candidate(
687            &mut findings,
688            allowlist,
689            LineCandidate {
690                category,
691                rule,
692                file_path: info.normalized_path.clone(),
693                line_number: None,
694                preview: info.normalized_path.clone(),
695                raw_value: None,
696                tier,
697                severity,
698            },
699        );
700    };
701
702    if info.env_file {
703        push(
704            "artifact",
705            "env-file",
706            SensitiveTier::SensitiveArtifact,
707            SensitiveSeverity::Block,
708        );
709    } else if info.lower_path.ends_with("/.netrc")
710        || info.lower_path == ".netrc"
711        || info.lower_path.ends_with("/.git-credentials")
712        || info.lower_path == ".git-credentials"
713    {
714        push(
715            "artifact",
716            "credential-store-file",
717            SensitiveTier::SensitiveArtifact,
718            SensitiveSeverity::Block,
719        );
720    } else if info.docker_config {
721        push(
722            "artifact",
723            "docker-config-file",
724            SensitiveTier::Suspicious,
725            SensitiveSeverity::Warn,
726        );
727    } else if info.npmrc {
728        push(
729            "artifact",
730            "npmrc-file",
731            SensitiveTier::Suspicious,
732            SensitiveSeverity::Warn,
733        );
734    } else if info.lower_path.ends_with("/.pypirc")
735        || info.lower_path == ".pypirc"
736        || info.lower_path.ends_with("/.gem/credentials")
737        || info.lower_path == ".gem/credentials"
738        || regex::Regex::new(r"(?:^|/)\.cargo/credentials(?:\.toml)?$")
739            .unwrap()
740            .is_match(info.lower_path.as_str())
741    {
742        push(
743            "artifact",
744            "package-manager-credential-file",
745            SensitiveTier::SensitiveArtifact,
746            SensitiveSeverity::Block,
747        );
748    } else if regex::Regex::new(r"terraform\.tfstate(?:\.backup)?$")
749        .unwrap()
750        .is_match(info.lower_path.as_str())
751        || info.lower_path.contains("/.terraform/")
752    {
753        push(
754            "artifact",
755            "terraform-state-file",
756            SensitiveTier::SensitiveArtifact,
757            SensitiveSeverity::Block,
758        );
759    } else if info.lower_path.ends_with(".tfvars") || info.lower_path.ends_with(".auto.tfvars") {
760        push(
761            "artifact",
762            "terraform-vars-file",
763            SensitiveTier::Suspicious,
764            SensitiveSeverity::Warn,
765        );
766    } else if info.kube_config {
767        push(
768            "artifact",
769            "kubeconfig-file",
770            SensitiveTier::SensitiveArtifact,
771            SensitiveSeverity::Block,
772        );
773    } else if regex::Regex::new(r"(?:^|/)credentials\.json$")
774        .unwrap()
775        .is_match(info.lower_path.as_str())
776        || regex::Regex::new(r"(?:^|/)service[-_]?account.*\.json$")
777            .unwrap()
778            .is_match(info.lower_path.as_str())
779    {
780        push(
781            "artifact",
782            "service-account-file",
783            SensitiveTier::SensitiveArtifact,
784            SensitiveSeverity::Block,
785        );
786    } else if regex::Regex::new(r"(?:^|/)id_(?:rsa|ed25519|ecdsa|dsa)$")
787        .unwrap()
788        .is_match(info.lower_path.as_str())
789        || regex::Regex::new(r"(?:^|/)\.ssh/")
790            .unwrap()
791            .is_match(info.lower_path.as_str())
792    {
793        push(
794            "artifact",
795            "ssh-private-key-file",
796            SensitiveTier::SensitiveArtifact,
797            SensitiveSeverity::Block,
798        );
799    } else if info.lower_path.ends_with(".pem") {
800        push(
801            "artifact",
802            "pem-file",
803            SensitiveTier::Suspicious,
804            SensitiveSeverity::Warn,
805        );
806    } else if regex::Regex::new(r"\.(?:p12|pfx|keystore|jks|pepk|ppk|key)$")
807        .unwrap()
808        .is_match(info.lower_path.as_str())
809        || info.lower_path.ends_with("/key.properties")
810        || info.lower_path == "key.properties"
811    {
812        push(
813            "artifact",
814            "key-material-file",
815            SensitiveTier::SensitiveArtifact,
816            SensitiveSeverity::Block,
817        );
818    } else if info.lower_path.ends_with(".har") {
819        push(
820            "artifact",
821            "http-archive-file",
822            SensitiveTier::SensitiveArtifact,
823            SensitiveSeverity::Block,
824        );
825    } else if regex::Regex::new(r"\.(?:hprof|core|dmp|mdmp|pcap|pcapng)$")
826        .unwrap()
827        .is_match(info.lower_path.as_str())
828        || regex::Regex::new(r"core\.\d+$")
829            .unwrap()
830            .is_match(info.lower_path.as_str())
831    {
832        push(
833            "artifact",
834            "dump-file",
835            SensitiveTier::SensitiveArtifact,
836            SensitiveSeverity::Block,
837        );
838    } else if info.lower_path.ends_with(".mobileprovision") {
839        push(
840            "artifact",
841            "mobileprovision-file",
842            SensitiveTier::Suspicious,
843            SensitiveSeverity::Warn,
844        );
845    } else if regex::Regex::new(r"\.(?:sqlite|sqlite3|db|sql)$")
846        .unwrap()
847        .is_match(info.lower_path.as_str())
848    {
849        push(
850            "artifact",
851            "database-artifact-file",
852            SensitiveTier::Suspicious,
853            SensitiveSeverity::Warn,
854        );
855    } else if info.lower_path.ends_with(".map") {
856        push(
857            "artifact",
858            "source-map-file",
859            SensitiveTier::Suspicious,
860            SensitiveSeverity::Warn,
861        );
862    } else if info.lower_path.ends_with("/.htpasswd") || info.lower_path == ".htpasswd" {
863        push(
864            "artifact",
865            "auth-file",
866            SensitiveTier::SensitiveArtifact,
867            SensitiveSeverity::Block,
868        );
869    }
870
871    findings
872}
873
874fn scan_added_line(
875    file_path: &str,
876    info: &PathContext,
877    line: &str,
878    line_number: Option<usize>,
879    allowlist: &[SensitiveAllowlistEntry],
880) -> Vec<SensitiveFinding> {
881    let provider_matched = has_provider_match(line);
882    let structural_matched = has_structural_match(info, line);
883    let providers = scan_provider_line(file_path, line, line_number, allowlist);
884    let structural = scan_structural_line(file_path, info, line, line_number, allowlist);
885    if provider_matched || structural_matched {
886        return dedupe_findings([providers, structural].concat());
887    }
888
889    if COMMENT_ONLY_RE.is_match(line) {
890        return vec![];
891    }
892
893    let generic = scan_generic_assignments(file_path, info, line, line_number, allowlist);
894    let network = scan_ip_line(file_path, line, line_number, allowlist);
895    dedupe_findings([generic, network].concat())
896}
897
898fn scan_provider_line(
899    file_path: &str,
900    line: &str,
901    line_number: Option<usize>,
902    allowlist: &[SensitiveAllowlistEntry],
903) -> Vec<SensitiveFinding> {
904    let mut findings = Vec::new();
905
906    for rule in PROVIDER_RULES.iter() {
907        for matched in rule.pattern.find_iter(line) {
908            let value = matched.as_str();
909            if is_placeholder_value(value) {
910                continue;
911            }
912
913            push_candidate(
914                &mut findings,
915                allowlist,
916                LineCandidate {
917                    category: rule.category,
918                    rule: rule.rule,
919                    file_path: file_path.to_owned(),
920                    line_number,
921                    preview: format_line_preview(line),
922                    raw_value: Some(value.to_owned()),
923                    tier: rule.tier,
924                    severity: rule.severity,
925                },
926            );
927        }
928    }
929
930    dedupe_findings(findings)
931}
932
933fn has_provider_match(line: &str) -> bool {
934    PROVIDER_RULES.iter().any(|rule| {
935        rule.pattern
936            .find_iter(line)
937            .any(|matched| !is_placeholder_value(matched.as_str()))
938    })
939}
940
941fn scan_structural_line(
942    file_path: &str,
943    info: &PathContext,
944    line: &str,
945    line_number: Option<usize>,
946    allowlist: &[SensitiveAllowlistEntry],
947) -> Vec<SensitiveFinding> {
948    let mut findings = Vec::new();
949
950    if PRIVATE_KEY_HEADER_RE.is_match(line) {
951        push_candidate(
952            &mut findings,
953            allowlist,
954            LineCandidate {
955                category: "key",
956                rule: "private-key-block",
957                file_path: file_path.to_owned(),
958                line_number,
959                preview: format_line_preview(line),
960                raw_value: Some(line.trim().to_owned()),
961                tier: SensitiveTier::ConfirmedSecret,
962                severity: SensitiveSeverity::Block,
963            },
964        );
965    } else if ENCRYPTED_PRIVATE_KEY_RE.is_match(line) {
966        push_candidate(
967            &mut findings,
968            allowlist,
969            LineCandidate {
970                category: "key",
971                rule: "encrypted-private-key-block",
972                file_path: file_path.to_owned(),
973                line_number,
974                preview: format_line_preview(line),
975                raw_value: Some(line.trim().to_owned()),
976                tier: SensitiveTier::Suspicious,
977                severity: SensitiveSeverity::Warn,
978            },
979        );
980    }
981
982    for captures in CONNECTION_STRING_RE.captures_iter(line) {
983        let password = clean_value(captures.get(3).map(|m| m.as_str()).unwrap_or_default());
984        let host = captures.get(4).map(|m| m.as_str()).unwrap_or_default();
985        if is_placeholder_value(password.as_str()) {
986            continue;
987        }
988
989        let severity = if is_local_host(host) {
990            SensitiveSeverity::Warn
991        } else {
992            SensitiveSeverity::Block
993        };
994        let tier = if severity == SensitiveSeverity::Block {
995            SensitiveTier::ConfirmedSecret
996        } else {
997            SensitiveTier::Suspicious
998        };
999
1000        push_candidate(
1001            &mut findings,
1002            allowlist,
1003            LineCandidate {
1004                category: "connection",
1005                rule: "credential-connection-string",
1006                file_path: file_path.to_owned(),
1007                line_number,
1008                preview: format_line_preview(line),
1009                raw_value: Some(password),
1010                tier,
1011                severity,
1012            },
1013        );
1014    }
1015
1016    for captures in BEARER_RE.captures_iter(line) {
1017        let token = clean_value(captures.get(1).map(|m| m.as_str()).unwrap_or_default());
1018        if is_placeholder_value(token.as_str()) {
1019            continue;
1020        }
1021
1022        push_candidate(
1023            &mut findings,
1024            allowlist,
1025            LineCandidate {
1026                category: "token",
1027                rule: "bearer-token",
1028                file_path: file_path.to_owned(),
1029                line_number,
1030                preview: format_line_preview(line),
1031                raw_value: Some(token),
1032                tier: SensitiveTier::ConfirmedSecret,
1033                severity: SensitiveSeverity::Block,
1034            },
1035        );
1036    }
1037
1038    for matched in JWT_RE.find_iter(line) {
1039        let token = matched.as_str();
1040        if is_placeholder_value(token) {
1041            continue;
1042        }
1043
1044        push_candidate(
1045            &mut findings,
1046            allowlist,
1047            LineCandidate {
1048                category: "token",
1049                rule: "jwt-token",
1050                file_path: file_path.to_owned(),
1051                line_number,
1052                preview: format_line_preview(line),
1053                raw_value: Some(token.to_owned()),
1054                tier: SensitiveTier::Suspicious,
1055                severity: SensitiveSeverity::Warn,
1056            },
1057        );
1058    }
1059
1060    if info.docker_config {
1061        for captures in DOCKER_AUTH_RE.captures_iter(line) {
1062            let value = clean_value(captures.get(1).map(|m| m.as_str()).unwrap_or_default());
1063            if is_placeholder_value(value.as_str()) {
1064                continue;
1065            }
1066
1067            push_candidate(
1068                &mut findings,
1069                allowlist,
1070                LineCandidate {
1071                    category: "credential",
1072                    rule: "docker-config-auth",
1073                    file_path: file_path.to_owned(),
1074                    line_number,
1075                    preview: format_line_preview(line),
1076                    raw_value: Some(value),
1077                    tier: SensitiveTier::ConfirmedSecret,
1078                    severity: SensitiveSeverity::Block,
1079                },
1080            );
1081        }
1082    }
1083
1084    if info.kube_config {
1085        for captures in KUBECONFIG_AUTH_RE.captures_iter(line) {
1086            let value = clean_value(captures.get(2).map(|m| m.as_str()).unwrap_or_default());
1087            if is_placeholder_value(value.as_str()) {
1088                continue;
1089            }
1090
1091            push_candidate(
1092                &mut findings,
1093                allowlist,
1094                LineCandidate {
1095                    category: "credential",
1096                    rule: "kubeconfig-auth",
1097                    file_path: file_path.to_owned(),
1098                    line_number,
1099                    preview: format_line_preview(line),
1100                    raw_value: Some(value),
1101                    tier: SensitiveTier::ConfirmedSecret,
1102                    severity: SensitiveSeverity::Block,
1103                },
1104            );
1105        }
1106    }
1107
1108    if info.npmrc {
1109        for captures in NPM_LITERAL_AUTH_RE.captures_iter(line) {
1110            let value = clean_value(
1111                captures
1112                    .get(1)
1113                    .or_else(|| captures.get(2))
1114                    .map(|m| m.as_str())
1115                    .unwrap_or_default(),
1116            );
1117            if value.is_empty() || is_placeholder_value(value.as_str()) {
1118                continue;
1119            }
1120
1121            push_candidate(
1122                &mut findings,
1123                allowlist,
1124                LineCandidate {
1125                    category: "credential",
1126                    rule: "npm-auth",
1127                    file_path: file_path.to_owned(),
1128                    line_number,
1129                    preview: format_line_preview(line),
1130                    raw_value: Some(value),
1131                    tier: SensitiveTier::ConfirmedSecret,
1132                    severity: SensitiveSeverity::Block,
1133                },
1134            );
1135        }
1136    }
1137
1138    dedupe_findings(findings)
1139}
1140
1141fn has_structural_match(info: &PathContext, line: &str) -> bool {
1142    if PRIVATE_KEY_HEADER_RE.is_match(line) || ENCRYPTED_PRIVATE_KEY_RE.is_match(line) {
1143        return true;
1144    }
1145
1146    if CONNECTION_STRING_RE.captures_iter(line).any(|captures| {
1147        let password = clean_value(captures.get(3).map(|m| m.as_str()).unwrap_or_default());
1148        !is_placeholder_value(password.as_str())
1149    }) || BEARER_RE.captures_iter(line).any(|captures| {
1150        let token = clean_value(captures.get(1).map(|m| m.as_str()).unwrap_or_default());
1151        !is_placeholder_value(token.as_str())
1152    }) || JWT_RE
1153        .find_iter(line)
1154        .any(|matched| !is_placeholder_value(matched.as_str()))
1155    {
1156        return true;
1157    }
1158
1159    if info.docker_config
1160        && DOCKER_AUTH_RE.captures_iter(line).any(|captures| {
1161            let value = clean_value(captures.get(1).map(|m| m.as_str()).unwrap_or_default());
1162            !is_placeholder_value(value.as_str())
1163        })
1164    {
1165        return true;
1166    }
1167
1168    if info.kube_config
1169        && KUBECONFIG_AUTH_RE.captures_iter(line).any(|captures| {
1170            let value = clean_value(captures.get(2).map(|m| m.as_str()).unwrap_or_default());
1171            !is_placeholder_value(value.as_str())
1172        })
1173    {
1174        return true;
1175    }
1176
1177    info.npmrc
1178        && NPM_LITERAL_AUTH_RE.captures_iter(line).any(|captures| {
1179            let value = clean_value(
1180                captures
1181                    .get(1)
1182                    .or_else(|| captures.get(2))
1183                    .map(|m| m.as_str())
1184                    .unwrap_or_default(),
1185            );
1186            !value.is_empty() && !is_placeholder_value(value.as_str())
1187        })
1188}
1189
1190fn scan_generic_assignments(
1191    file_path: &str,
1192    info: &PathContext,
1193    line: &str,
1194    line_number: Option<usize>,
1195    allowlist: &[SensitiveAllowlistEntry],
1196) -> Vec<SensitiveFinding> {
1197    let mut findings = Vec::new();
1198
1199    for captures in GENERIC_ASSIGNMENT_RE.captures_iter(line) {
1200        let value = clean_value(captures.get(2).map(|m| m.as_str()).unwrap_or_default());
1201        if value.is_empty()
1202            || is_placeholder_value(value.as_str())
1203            || is_reference_value(value.as_str())
1204            || !passes_generic_secret_heuristics(value.as_str())
1205        {
1206            continue;
1207        }
1208
1209        let _downgraded = info.low_confidence || info.env_template;
1210        push_candidate(
1211            &mut findings,
1212            allowlist,
1213            LineCandidate {
1214                category: "credential",
1215                rule: "generic-secret-assignment",
1216                file_path: file_path.to_owned(),
1217                line_number,
1218                preview: format_line_preview(line),
1219                raw_value: Some(value),
1220                tier: SensitiveTier::Suspicious,
1221                severity: SensitiveSeverity::Warn,
1222            },
1223        );
1224    }
1225
1226    dedupe_findings(findings)
1227}
1228
1229fn scan_ip_line(
1230    file_path: &str,
1231    line: &str,
1232    line_number: Option<usize>,
1233    allowlist: &[SensitiveAllowlistEntry],
1234) -> Vec<SensitiveFinding> {
1235    let mut findings = Vec::new();
1236
1237    for matched in IPV4_RE.find_iter(line) {
1238        let ip = matched.as_str();
1239        let Some(parsed) = parse_ipv4(ip) else {
1240            continue;
1241        };
1242        if !is_public_ipv4(parsed) {
1243            continue;
1244        }
1245
1246        push_candidate(
1247            &mut findings,
1248            allowlist,
1249            LineCandidate {
1250                category: "network",
1251                rule: "public-ipv4",
1252                file_path: file_path.to_owned(),
1253                line_number,
1254                preview: format_line_preview(line),
1255                raw_value: Some(ip.to_owned()),
1256                tier: SensitiveTier::Suspicious,
1257                severity: SensitiveSeverity::Warn,
1258            },
1259        );
1260    }
1261
1262    dedupe_findings(findings)
1263}
1264
1265fn push_candidate(
1266    findings: &mut Vec<SensitiveFinding>,
1267    allowlist: &[SensitiveAllowlistEntry],
1268    candidate: LineCandidate,
1269) {
1270    if matches_allowlist(&candidate, allowlist) {
1271        return;
1272    }
1273
1274    findings.push(SensitiveFinding {
1275        category: candidate.category,
1276        rule: candidate.rule,
1277        file_path: candidate.file_path,
1278        line_number: candidate.line_number,
1279        preview: candidate.preview,
1280        tier: candidate.tier,
1281        severity: candidate.severity,
1282    });
1283}
1284
1285fn matches_allowlist(candidate: &LineCandidate, allowlist: &[SensitiveAllowlistEntry]) -> bool {
1286    allowlist.iter().any(|entry| {
1287        let path_ok = entry
1288            .path_regex
1289            .as_deref()
1290            .map(|pattern| {
1291                regex::Regex::new(pattern)
1292                    .unwrap()
1293                    .is_match(&candidate.file_path)
1294            })
1295            .unwrap_or(true);
1296        let rule_ok = entry
1297            .rule
1298            .as_deref()
1299            .map(|rule| rule == candidate.rule)
1300            .unwrap_or(true);
1301        let value_target = candidate.raw_value.as_deref().unwrap_or(&candidate.preview);
1302        let value_ok = entry
1303            .value_regex
1304            .as_deref()
1305            .map(|pattern| regex::Regex::new(pattern).unwrap().is_match(value_target))
1306            .unwrap_or(true);
1307        path_ok && rule_ok && value_ok
1308    })
1309}
1310
1311fn dedupe_findings(findings: Vec<SensitiveFinding>) -> Vec<SensitiveFinding> {
1312    let mut seen = HashSet::new();
1313    findings
1314        .into_iter()
1315        .filter(|finding| {
1316            let key = format!(
1317                "{}::{}::{}::{}",
1318                finding.rule,
1319                finding.file_path,
1320                finding.line_number.unwrap_or_default(),
1321                finding.preview
1322            );
1323            seen.insert(key)
1324        })
1325        .collect()
1326}
1327
1328fn clean_value(value: &str) -> String {
1329    value
1330        .trim()
1331        .trim_start_matches(|ch| matches!(ch, '"' | '\'' | '`'))
1332        .trim_end_matches(|ch| matches!(ch, '"' | '\'' | '`' | ';' | ','))
1333        .to_owned()
1334}
1335
1336fn format_line_preview(line: impl Into<String>) -> String {
1337    let mut preview = line.into().trim().to_owned();
1338    if preview.len() > 160 {
1339        preview.truncate(157);
1340        preview.push_str("...");
1341    }
1342    preview
1343}
1344
1345fn is_placeholder_value(value: &str) -> bool {
1346    let trimmed = clean_value(value);
1347    let lower = trimmed.to_lowercase();
1348
1349    if trimmed.is_empty() || trimmed.len() < 8 {
1350        return true;
1351    }
1352    if is_reference_value(trimmed.as_str()) {
1353        return true;
1354    }
1355
1356    let exact_placeholders = [
1357        "example",
1358        "sample",
1359        "demo",
1360        "test",
1361        "dummy",
1362        "fake",
1363        "placeholder",
1364        "mock",
1365        "fixme",
1366        "todo",
1367        "temp",
1368        "tmp",
1369        "none",
1370        "null",
1371        "undefined",
1372        "empty",
1373        "default",
1374        "redacted",
1375        "removed",
1376        "censored",
1377        "changeme",
1378        "replace_me",
1379        "password",
1380        "qwerty",
1381        "letmein",
1382        "123456",
1383        "000000",
1384        "111111",
1385        "user:pass",
1386        "username:password",
1387    ];
1388    if exact_placeholders.contains(&lower.as_str()) {
1389        return true;
1390    }
1391
1392    if regex::Regex::new(
1393        r"(?i)your[_-]?(?:api[_-]?key|token|secret|password|key)[_-]?here|(?:replace|change|insert|fill|update|put|add)[_-]?(?:me|your)",
1394    )
1395    .unwrap()
1396    .is_match(trimmed.as_str())
1397    {
1398        return true;
1399    }
1400
1401    if regex::Regex::new(r"(?i)^(?:x{4,}|\*{4,}|0{6,}|1{6,}|#{4,}|\.{4,})$")
1402        .unwrap()
1403        .is_match(trimmed.as_str())
1404    {
1405        return true;
1406    }
1407
1408    trimmed.contains("...")
1409}
1410
1411fn is_reference_value(value: &str) -> bool {
1412    regex::Regex::new(
1413        r#"(?ix)
1414        ^\$\{.+\}$|
1415        ^\$\(.+\)$|
1416        ^%[A-Z_][A-Z0-9_]*%$|
1417        ^\{\{.+\}\}$|
1418        ^<[A-Za-z0-9_-]+>$|
1419        ^\$[A-Z_][A-Z0-9_]*$|
1420        \bprocess\.env\.|
1421        \bos\.environ\[|
1422        \bos\.getenv\(|
1423        \bSystem\.getenv\(|
1424        \bENV\[|
1425        \$ENV\{|
1426        \benv\(['"][A-Za-z0-9_]+['"]\)
1427        "#,
1428    )
1429    .unwrap()
1430    .is_match(value)
1431        || value.contains("${")
1432        || value.contains("{{")
1433        || value.contains("$(")
1434}
1435
1436fn passes_generic_secret_heuristics(value: &str) -> bool {
1437    if value.len() < 8 {
1438        return false;
1439    }
1440    if value.chars().filter(|ch| ch.is_ascii_digit()).count() < 2 {
1441        return false;
1442    }
1443
1444    let unique_chars = value.chars().collect::<HashSet<_>>().len();
1445    if unique_chars < 6 {
1446        return false;
1447    }
1448
1449    let hex_like = regex::Regex::new(r"^[0-9a-f]+$").unwrap().is_match(value);
1450    let entropy = shannon_entropy(value);
1451    if hex_like {
1452        entropy >= 3.0
1453    } else {
1454        entropy >= 3.0
1455    }
1456}
1457
1458fn shannon_entropy(value: &str) -> f64 {
1459    let mut counts = HashMap::new();
1460    for ch in value.chars() {
1461        *counts.entry(ch).or_insert(0_usize) += 1;
1462    }
1463
1464    let len = value.len() as f64;
1465    counts
1466        .values()
1467        .map(|count| {
1468            let p = *count as f64 / len;
1469            -p * p.log2()
1470        })
1471        .sum()
1472}
1473
1474fn parse_ipv4(value: &str) -> Option<[u8; 4]> {
1475    let mut octets = [0_u8; 4];
1476    let mut count = 0;
1477
1478    for (index, part) in value.split('.').enumerate() {
1479        if index >= 4 {
1480            return None;
1481        }
1482        octets[index] = part.parse::<u8>().ok()?;
1483        count += 1;
1484    }
1485
1486    (count == 4).then_some(octets)
1487}
1488
1489fn is_public_ipv4(ip: [u8; 4]) -> bool {
1490    let [a, b, c, _] = ip;
1491    if a == 10 || a == 127 || a == 0 {
1492        return false;
1493    }
1494    if (a, b) == (169, 254) {
1495        return false;
1496    }
1497    if a == 172 && (16..=31).contains(&b) {
1498        return false;
1499    }
1500    if (a, b) == (192, 168) {
1501        return false;
1502    }
1503    !matches!((a, b, c), (192, 0, 2) | (198, 51, 100) | (203, 0, 113))
1504}
1505
1506fn is_local_host(host: &str) -> bool {
1507    let value = host
1508        .to_lowercase()
1509        .split([':', '/'])
1510        .next()
1511        .unwrap_or_default()
1512        .to_owned();
1513
1514    if matches!(
1515        value.as_str(),
1516        "localhost" | "127.0.0.1" | "0.0.0.0" | "::1"
1517    ) || value.ends_with(".local")
1518        || value.ends_with(".internal")
1519        || value.ends_with(".example")
1520        || value.ends_with(".test")
1521    {
1522        return true;
1523    }
1524
1525    parse_ipv4(value.as_str())
1526        .map(|ip| !is_public_ipv4(ip))
1527        .unwrap_or(false)
1528}
1529
1530#[cfg(test)]
1531mod tests {
1532    use super::*;
1533    use serde::Deserialize;
1534
1535    #[derive(Debug, Deserialize)]
1536    #[serde(rename_all = "camelCase")]
1537    struct ScenarioFinding {
1538        category: String,
1539        rule: String,
1540        file_path: String,
1541        line_number: Option<usize>,
1542        preview: String,
1543        tier: String,
1544        severity: String,
1545    }
1546
1547    #[derive(Debug, Deserialize)]
1548    #[serde(rename_all = "camelCase")]
1549    struct Scenario {
1550        name: String,
1551        diff: String,
1552        changed_files: Vec<String>,
1553        expected_findings: Vec<ScenarioFinding>,
1554    }
1555
1556    fn load_shared_scenarios() -> Vec<Scenario> {
1557        let path = format!(
1558            "{}/../../test-fixtures/sensitive-scenarios.json",
1559            env!("CARGO_MANIFEST_DIR")
1560        );
1561        let content = std::fs::read_to_string(path).unwrap();
1562        serde_json::from_str(&content).unwrap()
1563    }
1564
1565    fn tier_name(tier: SensitiveTier) -> &'static str {
1566        match tier {
1567            SensitiveTier::ConfirmedSecret => "confirmed-secret",
1568            SensitiveTier::SensitiveArtifact => "sensitive-artifact",
1569            SensitiveTier::Suspicious => "suspicious",
1570        }
1571    }
1572
1573    fn severity_name(severity: SensitiveSeverity) -> &'static str {
1574        match severity {
1575            SensitiveSeverity::Block => "block",
1576            SensitiveSeverity::Warn => "warn",
1577        }
1578    }
1579
1580    #[test]
1581    fn shared_scenarios_match_rust_detector() {
1582        for scenario in load_shared_scenarios() {
1583            let report = scan_diff_for_sensitive_content(&scenario.diff, &scenario.changed_files);
1584            assert_eq!(
1585                report.findings.len(),
1586                scenario.expected_findings.len(),
1587                "scenario {} finding count mismatch",
1588                scenario.name
1589            );
1590
1591            for (finding, expected) in report.findings.iter().zip(&scenario.expected_findings) {
1592                assert_eq!(finding.category, expected.category, "{}", scenario.name);
1593                assert_eq!(finding.rule, expected.rule, "{}", scenario.name);
1594                assert_eq!(finding.file_path, expected.file_path, "{}", scenario.name);
1595                assert_eq!(
1596                    finding.line_number, expected.line_number,
1597                    "{}",
1598                    scenario.name
1599                );
1600                assert_eq!(finding.preview, expected.preview, "{}", scenario.name);
1601                assert_eq!(tier_name(finding.tier), expected.tier, "{}", scenario.name);
1602                assert_eq!(
1603                    severity_name(finding.severity),
1604                    expected.severity,
1605                    "{}",
1606                    scenario.name
1607                );
1608            }
1609        }
1610    }
1611
1612    #[test]
1613    fn allowlist_matches_path_rule_and_value() {
1614        let diff = "\
1615diff --git a/.env.example b/.env.example
1616--- a/.env.example
1617+++ b/.env.example
1618@@ -0,0 +1 @@
1619+OPENAI_API_KEY=sk-proj-abcdefghijklmnopqrstuvwxyz1234567890
1620";
1621        let report = scan_diff_for_sensitive_content_with_options(
1622            diff,
1623            &[".env.example".to_owned()],
1624            SensitiveEnforcement::Warn,
1625            &[SensitiveAllowlistEntry {
1626                path_regex: Some(r"\.env\.example$".to_owned()),
1627                rule: Some("openai-project-key".to_owned()),
1628                value_regex: Some(r"^sk-proj-".to_owned()),
1629            }],
1630        );
1631        assert!(!report.has_findings());
1632    }
1633
1634    #[test]
1635    fn allowlist_matches_path_only_artifact_findings() {
1636        let report = scan_diff_for_sensitive_content_with_options(
1637            "diff",
1638            &[".env".to_owned()],
1639            SensitiveEnforcement::Warn,
1640            &[SensitiveAllowlistEntry {
1641                path_regex: Some(r"\.env$".to_owned()),
1642                rule: Some("env-file".to_owned()),
1643                value_regex: None,
1644            }],
1645        );
1646        assert!(!report.has_findings());
1647    }
1648
1649    #[test]
1650    fn warn_mode_never_marks_findings_blocking() {
1651        let report = SensitiveReport::from_findings_with_enforcement(
1652            vec![SensitiveFinding {
1653                category: "artifact",
1654                rule: "env-file",
1655                file_path: ".env".to_owned(),
1656                line_number: None,
1657                preview: ".env".to_owned(),
1658                tier: SensitiveTier::SensitiveArtifact,
1659                severity: SensitiveSeverity::Block,
1660            }],
1661            SensitiveEnforcement::Warn,
1662        );
1663        assert_eq!(report.blocking_count, 0);
1664        assert_eq!(report.warning_count, 1);
1665    }
1666
1667    #[test]
1668    fn strict_all_blocks_warnings_and_disables_bypass() {
1669        let report = SensitiveReport::from_findings_with_enforcement(
1670            vec![SensitiveFinding {
1671                category: "credential",
1672                rule: "generic-secret-assignment",
1673                file_path: "src/auth.ts".to_owned(),
1674                line_number: Some(1),
1675                preview: r#"const PASSWORD = "Alpha9981Zeta""#.to_owned(),
1676                tier: SensitiveTier::Suspicious,
1677                severity: SensitiveSeverity::Warn,
1678            }],
1679            SensitiveEnforcement::StrictAll,
1680        );
1681        assert!(report.has_blocking_findings());
1682        assert!(!allows_sensitive_bypass(report.enforcement));
1683    }
1684
1685    #[test]
1686    fn formats_git_hook_message_with_strict_footer() {
1687        let report = SensitiveReport::from_findings_with_enforcement(
1688            vec![SensitiveFinding {
1689                category: "credential",
1690                rule: "generic-secret-assignment",
1691                file_path: "src/auth.ts".to_owned(),
1692                line_number: Some(18),
1693                preview: r#"const PASSWORD = "Alpha9981Zeta""#.to_owned(),
1694                tier: SensitiveTier::Suspicious,
1695                severity: SensitiveSeverity::Warn,
1696            }],
1697            SensitiveEnforcement::StrictAll,
1698        );
1699
1700        let message = report.format_git_hook_message();
1701        assert!(message.contains("Strict sensitive mode is active"));
1702    }
1703}