Skip to main content

cc_audit/rules/
heuristics.rs

1//! File and variable name heuristics for reducing false positives.
2//!
3//! This module provides heuristics to identify test files, dummy credentials,
4//! and other patterns that are likely to be false positives in security scans.
5
6use regex::Regex;
7use std::sync::LazyLock;
8
9/// Known dummy API key patterns that should be excluded from secret detection.
10pub static DUMMY_KEY_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
11    vec![
12        // AWS example keys from official documentation
13        Regex::new(r"AKIAIOSFODNN7EXAMPLE").unwrap(),
14        Regex::new(r"ASIAIOSFODNN7EXAMPLE").unwrap(),
15        Regex::new(r"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY").unwrap(),
16        // Stripe test keys
17        Regex::new(r"^sk_test_[A-Za-z0-9]{24}$").unwrap(),
18        Regex::new(r"^pk_test_[A-Za-z0-9]{24}$").unwrap(),
19        Regex::new(r"^rk_test_[A-Za-z0-9]{24}$").unwrap(),
20        // OpenAI dummy keys (placeholder patterns)
21        Regex::new(r"sk-[xX]{32,}").unwrap(),
22        Regex::new(r"sk-proj-[xX]{32,}").unwrap(),
23        // Anthropic dummy keys
24        Regex::new(r"sk-ant-[xX]{32,}").unwrap(),
25        Regex::new(r"sk-ant-api\d{2}-[xX]{32,}").unwrap(),
26        // Generic placeholder patterns
27        Regex::new(r"YOUR_API_KEY(?:_HERE)?").unwrap(),
28        Regex::new(r"INSERT_API_KEY").unwrap(),
29        Regex::new(r"<API_KEY>").unwrap(),
30        Regex::new(r"REPLACE_WITH_YOUR_KEY").unwrap(),
31        Regex::new(r"PUT_YOUR_KEY_HERE").unwrap(),
32        // All X's or zeros (common placeholders)
33        Regex::new(r"^[xX]{16,}$").unwrap(),
34        Regex::new(r"^[0]{16,}$").unwrap(),
35        // Common test/example literals
36        Regex::new(r"(?i)test[_-]?key").unwrap(),
37        Regex::new(r"(?i)example[_-]?key").unwrap(),
38        Regex::new(r"(?i)dummy[_-]?key").unwrap(),
39        Regex::new(r"(?i)fake[_-]?key").unwrap(),
40        Regex::new(r"(?i)sample[_-]?key").unwrap(),
41    ]
42});
43
44/// File name heuristics for identifying test/example files.
45pub struct FileHeuristics;
46
47impl FileHeuristics {
48    /// Patterns indicating a file is likely a test file.
49    const TEST_FILE_PATTERNS: &'static [&'static str] = &[
50        "test",
51        "tests",
52        "spec",
53        "specs",
54        "__test__",
55        "__tests__",
56        "__spec__",
57        "__specs__",
58        "_test",
59        "_spec",
60        ".test.",
61        ".spec.",
62        "mock",
63        "mocks",
64        "__mock__",
65        "__mocks__",
66        "fake",
67        "fakes",
68        "dummy",
69        "dummies",
70        "example",
71        "examples",
72        "fixture",
73        "fixtures",
74        "sample",
75        "samples",
76        "stub",
77        "stubs",
78        "testdata",
79        "test_data",
80        "testcases",
81        "test_cases",
82    ];
83
84    /// Check if a file path indicates a test/example file.
85    ///
86    /// # Examples
87    ///
88    /// ```
89    /// use cc_audit::rules::heuristics::FileHeuristics;
90    ///
91    /// assert!(FileHeuristics::is_test_file("src/test_utils.rs"));
92    /// assert!(FileHeuristics::is_test_file("tests/integration.rs"));
93    /// assert!(FileHeuristics::is_test_file("__tests__/api.test.js"));
94    /// assert!(FileHeuristics::is_test_file("fixtures/sample_data.json"));
95    /// assert!(!FileHeuristics::is_test_file("src/main.rs"));
96    /// ```
97    pub fn is_test_file(file_path: &str) -> bool {
98        let lower = file_path.to_lowercase();
99
100        // Check for test-related patterns in the path
101        Self::TEST_FILE_PATTERNS
102            .iter()
103            .any(|pattern| lower.contains(pattern))
104    }
105
106    /// Check if a variable name indicates a dummy/example credential.
107    ///
108    /// # Examples
109    ///
110    /// ```
111    /// use cc_audit::rules::heuristics::FileHeuristics;
112    ///
113    /// assert!(FileHeuristics::is_dummy_variable("EXAMPLE_API_KEY"));
114    /// assert!(FileHeuristics::is_dummy_variable("TEST_SECRET"));
115    /// assert!(FileHeuristics::is_dummy_variable("DUMMY_TOKEN"));
116    /// assert!(FileHeuristics::is_dummy_variable("SAMPLE_KEY"));
117    /// assert!(FileHeuristics::is_dummy_variable("MOCK_PASSWORD"));
118    /// assert!(!FileHeuristics::is_dummy_variable("API_KEY"));
119    /// ```
120    pub fn is_dummy_variable(var_name: &str) -> bool {
121        let upper = var_name.to_uppercase();
122        let prefixes = [
123            "EXAMPLE_", "TEST_", "DUMMY_", "SAMPLE_", "MOCK_", "FAKE_", "STUB_",
124        ];
125
126        prefixes.iter().any(|prefix| upper.starts_with(prefix))
127    }
128
129    /// Check if a line contains a dummy variable name pattern.
130    pub fn contains_dummy_variable(line: &str) -> bool {
131        static DUMMY_VAR_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
132            Regex::new(r"\b(?:EXAMPLE|TEST|DUMMY|SAMPLE|MOCK|FAKE|STUB)_[A-Z_]+")
133                .expect("Invalid dummy var regex")
134        });
135
136        DUMMY_VAR_PATTERN.is_match(line)
137    }
138
139    /// Check if a value matches known dummy key patterns.
140    pub fn is_dummy_key_value(value: &str) -> bool {
141        DUMMY_KEY_PATTERNS
142            .iter()
143            .any(|pattern| pattern.is_match(value))
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150
151    #[test]
152    fn test_is_test_file_common_patterns() {
153        // Test directories and files
154        assert!(FileHeuristics::is_test_file("src/test_utils.rs"));
155        assert!(FileHeuristics::is_test_file("tests/integration.rs"));
156        assert!(FileHeuristics::is_test_file("__tests__/api.test.js"));
157        assert!(FileHeuristics::is_test_file("src/__mocks__/db.js"));
158        assert!(FileHeuristics::is_test_file("test/unit/auth_test.go"));
159        assert!(FileHeuristics::is_test_file("spec/models/user_spec.rb"));
160    }
161
162    #[test]
163    fn test_is_test_file_fixture_patterns() {
164        assert!(FileHeuristics::is_test_file("fixtures/sample_data.json"));
165        assert!(FileHeuristics::is_test_file("testdata/config.yaml"));
166        assert!(FileHeuristics::is_test_file("test_data/credentials.txt"));
167        assert!(FileHeuristics::is_test_file("examples/usage.py"));
168        assert!(FileHeuristics::is_test_file("samples/demo.sh"));
169    }
170
171    #[test]
172    fn test_is_test_file_negative() {
173        assert!(!FileHeuristics::is_test_file("src/main.rs"));
174        assert!(!FileHeuristics::is_test_file("lib/auth.py"));
175        assert!(!FileHeuristics::is_test_file("config/settings.yaml"));
176        assert!(!FileHeuristics::is_test_file("app/models/user.rb"));
177    }
178
179    #[test]
180    fn test_is_dummy_variable() {
181        assert!(FileHeuristics::is_dummy_variable("EXAMPLE_API_KEY"));
182        assert!(FileHeuristics::is_dummy_variable("TEST_SECRET"));
183        assert!(FileHeuristics::is_dummy_variable("DUMMY_TOKEN"));
184        assert!(FileHeuristics::is_dummy_variable("SAMPLE_KEY"));
185        assert!(FileHeuristics::is_dummy_variable("MOCK_PASSWORD"));
186        assert!(FileHeuristics::is_dummy_variable("FAKE_CREDENTIAL"));
187        assert!(FileHeuristics::is_dummy_variable("STUB_API_TOKEN"));
188    }
189
190    #[test]
191    fn test_is_dummy_variable_negative() {
192        assert!(!FileHeuristics::is_dummy_variable("API_KEY"));
193        assert!(!FileHeuristics::is_dummy_variable("SECRET_TOKEN"));
194        assert!(!FileHeuristics::is_dummy_variable("AWS_ACCESS_KEY_ID"));
195        assert!(!FileHeuristics::is_dummy_variable("GITHUB_TOKEN"));
196    }
197
198    #[test]
199    fn test_contains_dummy_variable() {
200        assert!(FileHeuristics::contains_dummy_variable(
201            "const key = EXAMPLE_API_KEY"
202        ));
203        assert!(FileHeuristics::contains_dummy_variable(
204            "TEST_SECRET = 'abc123'"
205        ));
206        assert!(FileHeuristics::contains_dummy_variable(
207            "export DUMMY_TOKEN=xxx"
208        ));
209    }
210
211    #[test]
212    fn test_contains_dummy_variable_negative() {
213        assert!(!FileHeuristics::contains_dummy_variable(
214            "const key = API_KEY"
215        ));
216        assert!(!FileHeuristics::contains_dummy_variable(
217            "SECRET_TOKEN = 'real'"
218        ));
219    }
220
221    #[test]
222    fn test_is_dummy_key_value_aws() {
223        assert!(FileHeuristics::is_dummy_key_value("AKIAIOSFODNN7EXAMPLE"));
224        assert!(FileHeuristics::is_dummy_key_value("ASIAIOSFODNN7EXAMPLE"));
225        assert!(FileHeuristics::is_dummy_key_value(
226            "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
227        ));
228    }
229
230    #[test]
231    fn test_is_dummy_key_value_stripe() {
232        // Test that Stripe test key patterns are matched
233        // We test the pattern structure using direct regex matching to avoid
234        // triggering GitHub's secret scanning on actual pattern examples
235        let patterns = &*DUMMY_KEY_PATTERNS;
236
237        // Verify sk_test_ pattern exists and matches 24 char alphanumeric suffix
238        let sk_pattern = patterns.iter().find(|p| {
239            let s = format!("{:?}", p);
240            s.contains("sk_test_")
241        });
242        assert!(sk_pattern.is_some(), "sk_test_ pattern should exist");
243
244        // Verify pk_test_ pattern exists
245        let pk_pattern = patterns.iter().find(|p| {
246            let s = format!("{:?}", p);
247            s.contains("pk_test_")
248        });
249        assert!(pk_pattern.is_some(), "pk_test_ pattern should exist");
250
251        // Verify rk_test_ pattern exists
252        let rk_pattern = patterns.iter().find(|p| {
253            let s = format!("{:?}", p);
254            s.contains("rk_test_")
255        });
256        assert!(rk_pattern.is_some(), "rk_test_ pattern should exist");
257    }
258
259    #[test]
260    fn test_is_dummy_key_value_placeholder() {
261        assert!(FileHeuristics::is_dummy_key_value("YOUR_API_KEY_HERE"));
262        assert!(FileHeuristics::is_dummy_key_value("YOUR_API_KEY"));
263        assert!(FileHeuristics::is_dummy_key_value("INSERT_API_KEY"));
264        assert!(FileHeuristics::is_dummy_key_value("<API_KEY>"));
265        assert!(FileHeuristics::is_dummy_key_value("REPLACE_WITH_YOUR_KEY"));
266    }
267
268    #[test]
269    fn test_is_dummy_key_value_x_pattern() {
270        assert!(FileHeuristics::is_dummy_key_value("xxxxxxxxxxxxxxxx"));
271        assert!(FileHeuristics::is_dummy_key_value("XXXXXXXXXXXXXXXX"));
272        assert!(FileHeuristics::is_dummy_key_value("0000000000000000"));
273        assert!(FileHeuristics::is_dummy_key_value(
274            "sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
275        ));
276    }
277
278    #[test]
279    fn test_is_dummy_key_value_test_example() {
280        assert!(FileHeuristics::is_dummy_key_value("test_key"));
281        assert!(FileHeuristics::is_dummy_key_value("example_key"));
282        assert!(FileHeuristics::is_dummy_key_value("dummy-key"));
283        assert!(FileHeuristics::is_dummy_key_value("fake_key"));
284        assert!(FileHeuristics::is_dummy_key_value("sample-key"));
285    }
286
287    #[test]
288    fn test_is_dummy_key_value_negative() {
289        // Real-looking keys should not match
290        assert!(!FileHeuristics::is_dummy_key_value("AKIAI44QH8DHBEXAMPLE")); // Not exact AWS example
291        assert!(!FileHeuristics::is_dummy_key_value(
292            "sk_live_abcdefghij1234567890"
293        )); // Live key pattern
294        assert!(!FileHeuristics::is_dummy_key_value(
295            "ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
296        )); // GitHub token format
297    }
298}