Skip to main content

aptu_core/security/
patterns.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Security pattern engine with regex-based vulnerability detection.
4//!
5//! Patterns are defined in `patterns.json` (embedded at compile time). See [`PatternDefinition`]
6//! for the field schema. After editing, run `cargo test -p aptu-core` to validate JSON structure,
7//! required fields, and regex compilation.
8
9use crate::security::types::{Finding, PatternDefinition};
10use regex::Regex;
11use std::sync::LazyLock;
12
13/// Embedded pattern database JSON.
14const PATTERNS_JSON: &str = include_str!("patterns.json");
15
16/// Compiled pattern engine (initialized once on first use).
17static PATTERN_ENGINE: LazyLock<PatternEngine> = LazyLock::new(|| {
18    PatternEngine::from_embedded_json()
19        .expect("Failed to load embedded security patterns - patterns.json is malformed")
20});
21
22/// Pattern engine for security scanning.
23#[derive(Debug)]
24pub struct PatternEngine {
25    patterns: Vec<CompiledPattern>,
26}
27
28/// A pattern with pre-compiled regex.
29#[derive(Debug)]
30struct CompiledPattern {
31    definition: PatternDefinition,
32    regex: Regex,
33}
34
35impl PatternEngine {
36    /// Creates a pattern engine from the embedded JSON patterns.
37    ///
38    /// # Errors
39    ///
40    /// Returns an error if the JSON is malformed or regex compilation fails.
41    pub fn from_embedded_json() -> anyhow::Result<Self> {
42        let definitions: Vec<PatternDefinition> = serde_json::from_str(PATTERNS_JSON)?;
43        let mut patterns = Vec::new();
44
45        for def in definitions {
46            let regex = Regex::new(&def.pattern)?;
47            patterns.push(CompiledPattern {
48                definition: def,
49                regex,
50            });
51        }
52
53        Ok(Self { patterns })
54    }
55
56    /// Gets the global pattern engine instance.
57    #[must_use]
58    pub fn global() -> &'static Self {
59        &PATTERN_ENGINE
60    }
61
62    /// Scans text content for security vulnerabilities.
63    ///
64    /// # Arguments
65    ///
66    /// * `content` - The text content to scan
67    /// * `file_path` - Path to the file being scanned (for filtering and reporting)
68    ///
69    /// # Returns
70    ///
71    /// A vector of security findings.
72    pub fn scan(&self, content: &str, file_path: &str) -> Vec<Finding> {
73        let mut findings = Vec::new();
74        let file_ext = std::path::Path::new(file_path)
75            .extension()
76            .and_then(|e| e.to_str())
77            .map(|e| format!(".{e}"));
78
79        for (line_num, line) in content.lines().enumerate() {
80            for compiled in &self.patterns {
81                // Skip if pattern has file extension filter and doesn't match
82                if !compiled.definition.file_extensions.is_empty() {
83                    if let Some(ext) = &file_ext {
84                        if !compiled.definition.file_extensions.contains(ext) {
85                            continue;
86                        }
87                    } else {
88                        continue;
89                    }
90                }
91
92                if let Some(mat) = compiled.regex.find(line) {
93                    tracing::debug!(
94                        pattern_id = %compiled.definition.id,
95                        file = %file_path,
96                        line = line_num + 1,
97                        "Security pattern matched"
98                    );
99
100                    findings.push(Finding {
101                        pattern_id: compiled.definition.id.clone(),
102                        description: compiled.definition.description.clone(),
103                        severity: compiled.definition.severity,
104                        confidence: compiled.definition.confidence,
105                        file_path: file_path.to_string(),
106                        line_number: line_num + 1,
107                        matched_text: mat.as_str().to_string(),
108                        cwe: compiled.definition.cwe.clone(),
109                    });
110                }
111            }
112        }
113
114        findings
115    }
116
117    /// Returns the number of loaded patterns.
118    #[must_use]
119    pub fn pattern_count(&self) -> usize {
120        self.patterns.len()
121    }
122
123    /// Returns cloned pattern definitions (for SARIF rule metadata injection).
124    #[must_use]
125    pub fn definitions(&self) -> Vec<PatternDefinition> {
126        self.patterns.iter().map(|c| c.definition.clone()).collect()
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133    use crate::security::types::{Confidence, Severity};
134
135    #[test]
136    fn test_pattern_engine_loads() {
137        let engine = PatternEngine::from_embedded_json().unwrap();
138        assert!(
139            engine.pattern_count() >= 22,
140            "Should have at least 22 patterns"
141        );
142    }
143
144    #[test]
145    fn test_global_engine() {
146        let engine = PatternEngine::global();
147        assert!(engine.pattern_count() >= 10);
148    }
149
150    #[test]
151    fn test_hardcoded_api_key_detection() {
152        let engine = PatternEngine::global();
153        let code = r#"
154            let api_key = "sk-1234567890abcdefghijklmnopqrstuvwxyz";
155            let secret_key = "secret_1234567890abcdefghij";
156        "#;
157
158        let findings = engine.scan(code, "test.rs");
159        assert!(!findings.is_empty(), "Should detect hardcoded secrets");
160
161        let api_key_finding = findings
162            .iter()
163            .find(|f| f.pattern_id == "hardcoded-api-key");
164        assert!(api_key_finding.is_some(), "Should detect API key");
165
166        if let Some(finding) = api_key_finding {
167            assert_eq!(finding.severity, Severity::Critical);
168            assert_eq!(finding.confidence, Confidence::High);
169            assert_eq!(finding.cwe, Some("CWE-798".to_string()));
170        }
171    }
172
173    #[test]
174    fn test_sql_injection_detection() {
175        let engine = PatternEngine::global();
176        let code = r#"
177            query("SELECT * FROM users WHERE id = " + user_input);
178            execute(format!("DELETE FROM {} WHERE id = {}", table, id));
179        "#;
180
181        let findings = engine.scan(code, "database.rs");
182        assert!(!findings.is_empty(), "Should detect SQL injection patterns");
183
184        let concat_finding = findings
185            .iter()
186            .find(|f| f.pattern_id == "sql-injection-concat");
187        assert!(concat_finding.is_some(), "Should detect concatenation");
188
189        let format_finding = findings
190            .iter()
191            .find(|f| f.pattern_id == "sql-injection-format");
192        assert!(format_finding.is_some(), "Should detect format string");
193    }
194
195    #[test]
196    fn test_path_traversal_detection() {
197        let engine = PatternEngine::global();
198        let code = r#"
199            open("../../etc/passwd");
200            read("..\..\..\windows\system32\config\sam");
201        "#;
202
203        let findings = engine.scan(code, "file_handler.rs");
204        assert!(!findings.is_empty(), "Should detect path traversal");
205
206        let finding = &findings[0];
207        assert_eq!(finding.pattern_id, "path-traversal");
208        assert_eq!(finding.severity, Severity::High);
209    }
210
211    #[test]
212    fn test_weak_crypto_detection() {
213        let engine = PatternEngine::global();
214        let code = r"
215            let hash = md5(password);
216            let digest = SHA1(data);
217        ";
218
219        let findings = engine.scan(code, "crypto.rs");
220        assert_eq!(findings.len(), 2, "Should detect both MD5 and SHA1");
221
222        assert!(findings.iter().any(|f| f.pattern_id == "weak-crypto-md5"));
223        assert!(findings.iter().any(|f| f.pattern_id == "weak-crypto-sha1"));
224    }
225
226    #[test]
227    fn test_file_extension_filtering() {
228        let engine = PatternEngine::global();
229        let js_code = "element.innerHTML = userInput + '<div>';";
230
231        // Should detect in .js file
232        let js_findings = engine.scan(js_code, "app.js");
233        assert!(!js_findings.is_empty(), "Should detect XSS in JS file");
234
235        // Should NOT detect in .rs file (pattern has file extension filter)
236        let rs_findings = engine.scan(js_code, "app.rs");
237        assert!(
238            rs_findings.is_empty(),
239            "Should not detect XSS pattern in Rust file"
240        );
241    }
242
243    #[test]
244    fn test_no_false_positives_on_safe_code() {
245        let engine = PatternEngine::global();
246        let safe_code = r#"
247            // Safe code examples
248            let config = load_config();
249            let result = query_with_params("SELECT * FROM users WHERE id = ?", &[id]);
250            let hash = sha256(data);
251            let random = OsRng.gen::<u64>();
252        "#;
253
254        let findings = engine.scan(safe_code, "safe.rs");
255        assert!(
256            findings.is_empty(),
257            "Should not have false positives on safe code"
258        );
259    }
260
261    #[test]
262    fn test_ssrf_detection() {
263        let engine = PatternEngine::global();
264
265        // Test bare variable call
266        let code_bare = r"
267            let response = reqwest::get(user_url).await;
268        ";
269        let findings_bare = engine.scan(code_bare, "app.rs");
270        assert!(
271            findings_bare
272                .iter()
273                .any(|f| f.pattern_id == "ssrf-http-request"),
274            "Should detect SSRF pattern with bare variable URL"
275        );
276
277        // Test concatenation call
278        let code_concat = r#"
279            let response = reqwest::get(user_url + "/path").await;
280        "#;
281        let findings_concat = engine.scan(code_concat, "app.rs");
282        assert!(
283            findings_concat
284                .iter()
285                .any(|f| f.pattern_id == "ssrf-http-request"),
286            "Should detect SSRF pattern with concatenated variable URL"
287        );
288    }
289
290    #[test]
291    fn test_open_redirect_detection() {
292        let engine = PatternEngine::global();
293        let code = r"
294            location.href = req.query.url;
295        ";
296
297        let findings = engine.scan(code, "app.js");
298        assert!(
299            findings.iter().any(|f| f.pattern_id == "open-redirect"),
300            "Should detect open redirect pattern from user input"
301        );
302    }
303
304    #[test]
305    fn test_github_token_pattern() {
306        let engine = PatternEngine::global();
307
308        // Case 1: Short opaque ghs_ token (40 chars after prefix)
309        let code_short = r#"
310            token = "ghs_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789AB"
311        "#;
312        let findings = engine.scan(code_short, "test.rs");
313        assert!(
314            findings
315                .iter()
316                .any(|f| f.pattern_id == "leaked-github-token"),
317            "Should detect short opaque ghs_ token"
318        );
319
320        // Case 2: Long JWT-format ghs_ token (two dots, ~520 total chars)
321        let code_jwt = r#"
322            token = "ghs_AAAAAAAAAAAAAAAA.BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB.CCCCCCCCCCCCCCCCCCCC"
323        "#;
324        let findings = engine.scan(code_jwt, "test.rs");
325        assert!(
326            findings
327                .iter()
328                .any(|f| f.pattern_id == "leaked-github-token"),
329            "Should detect long JWT-format ghs_ token"
330        );
331
332        // Case 3: Wrong prefix (ghp_ and ghu_) should not match
333        let code_wrong_prefix = r#"
334            ghp_token = "ghp_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789AB"
335            ghu_token = "ghu_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789AB"
336        "#;
337        let findings = engine.scan(code_wrong_prefix, "test.rs");
338        assert!(
339            !findings
340                .iter()
341                .any(|f| f.pattern_id == "leaked-github-token"),
342            "Should not detect ghp_ or ghu_ prefixed tokens"
343        );
344    }
345
346    #[test]
347    fn test_all_patterns_have_remediation_and_authority_url() {
348        let engine = PatternEngine::from_embedded_json().unwrap();
349        for def in engine.definitions() {
350            assert!(
351                def.remediation.as_deref().is_some_and(|s| !s.is_empty()),
352                "Pattern '{}' is missing a non-empty remediation",
353                def.id
354            );
355            assert!(
356                def.authority_url.as_deref().is_some_and(|s| !s.is_empty()),
357                "Pattern '{}' is missing a non-empty authority_url",
358                def.id
359            );
360        }
361    }
362
363    #[test]
364    fn test_sarif_with_rules_includes_rule_metadata() {
365        use crate::security::sarif::SarifReport;
366        use crate::security::types::{Confidence, Severity};
367
368        let engine = PatternEngine::from_embedded_json().unwrap();
369        let patterns = engine.definitions();
370
371        let finding = Finding {
372            pattern_id: "hardcoded-api-key".to_string(),
373            description: "Hardcoded API key detected".to_string(),
374            severity: Severity::Critical,
375            confidence: Confidence::High,
376            file_path: "src/config.rs".to_string(),
377            line_number: 1,
378            matched_text: "api_key = \"sk-abc\"".to_string(),
379            cwe: Some("CWE-798".to_string()),
380        };
381
382        let report = SarifReport::with_rules(vec![finding], &patterns);
383        let json = serde_json::to_string(&report).unwrap();
384
385        assert!(
386            !report.runs[0].tool.driver.rules.is_empty(),
387            "rules array must not be empty"
388        );
389        assert!(
390            json.contains("hardcoded-api-key"),
391            "JSON must contain rule id"
392        );
393        assert!(
394            json.contains("helpUri") || json.contains("help_uri") || json.contains("cwe.mitre.org"),
395            "JSON must contain authority URL"
396        );
397    }
398
399    #[test]
400    fn test_line_number_accuracy() {
401        let engine = PatternEngine::global();
402        let code = "line 1\nline 2\napi_key = \"sk-1234567890abcdefghijklmnopqrstuvwxyz\"\nline 4";
403
404        let findings = engine.scan(code, "test.rs");
405        assert_eq!(findings.len(), 1);
406        assert_eq!(
407            findings[0].line_number, 3,
408            "Should report correct line number"
409        );
410    }
411}