Skip to main content

aptu_core/security/
patterns.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Security pattern engine with regex-based vulnerability detection.
4
5use crate::security::types::{Finding, PatternDefinition};
6use regex::Regex;
7use std::sync::LazyLock;
8
9/// Embedded pattern database JSON.
10const PATTERNS_JSON: &str = include_str!("patterns.json");
11
12/// Compiled pattern engine (initialized once on first use).
13static PATTERN_ENGINE: LazyLock<PatternEngine> = LazyLock::new(|| {
14    PatternEngine::from_embedded_json()
15        .expect("Failed to load embedded security patterns - patterns.json is malformed")
16});
17
18/// Pattern engine for security scanning.
19#[derive(Debug)]
20pub struct PatternEngine {
21    patterns: Vec<CompiledPattern>,
22}
23
24/// A pattern with pre-compiled regex.
25#[derive(Debug)]
26struct CompiledPattern {
27    definition: PatternDefinition,
28    regex: Regex,
29}
30
31impl PatternEngine {
32    /// Creates a pattern engine from the embedded JSON patterns.
33    ///
34    /// # Errors
35    ///
36    /// Returns an error if the JSON is malformed or regex compilation fails.
37    pub fn from_embedded_json() -> anyhow::Result<Self> {
38        let definitions: Vec<PatternDefinition> = serde_json::from_str(PATTERNS_JSON)?;
39        let mut patterns = Vec::new();
40
41        for def in definitions {
42            let regex = Regex::new(&def.pattern)?;
43            patterns.push(CompiledPattern {
44                definition: def,
45                regex,
46            });
47        }
48
49        Ok(Self { patterns })
50    }
51
52    /// Gets the global pattern engine instance.
53    #[must_use]
54    pub fn global() -> &'static Self {
55        &PATTERN_ENGINE
56    }
57
58    /// Scans text content for security vulnerabilities.
59    ///
60    /// # Arguments
61    ///
62    /// * `content` - The text content to scan
63    /// * `file_path` - Path to the file being scanned (for filtering and reporting)
64    ///
65    /// # Returns
66    ///
67    /// A vector of security findings.
68    pub fn scan(&self, content: &str, file_path: &str) -> Vec<Finding> {
69        let mut findings = Vec::new();
70        let file_ext = std::path::Path::new(file_path)
71            .extension()
72            .and_then(|e| e.to_str())
73            .map(|e| format!(".{e}"));
74
75        for (line_num, line) in content.lines().enumerate() {
76            for compiled in &self.patterns {
77                // Skip if pattern has file extension filter and doesn't match
78                if !compiled.definition.file_extensions.is_empty() {
79                    if let Some(ref ext) = file_ext {
80                        if !compiled.definition.file_extensions.contains(ext) {
81                            continue;
82                        }
83                    } else {
84                        continue;
85                    }
86                }
87
88                if let Some(mat) = compiled.regex.find(line) {
89                    tracing::debug!(
90                        pattern_id = %compiled.definition.id,
91                        file = %file_path,
92                        line = line_num + 1,
93                        "Security pattern matched"
94                    );
95
96                    findings.push(Finding {
97                        pattern_id: compiled.definition.id.clone(),
98                        description: compiled.definition.description.clone(),
99                        severity: compiled.definition.severity,
100                        confidence: compiled.definition.confidence,
101                        file_path: file_path.to_string(),
102                        line_number: line_num + 1,
103                        matched_text: mat.as_str().to_string(),
104                        cwe: compiled.definition.cwe.clone(),
105                    });
106                }
107            }
108        }
109
110        findings
111    }
112
113    /// Returns the number of loaded patterns.
114    #[must_use]
115    pub fn pattern_count(&self) -> usize {
116        self.patterns.len()
117    }
118
119    /// Returns cloned pattern definitions (for SARIF rule metadata injection).
120    #[must_use]
121    pub fn definitions(&self) -> Vec<PatternDefinition> {
122        self.patterns.iter().map(|c| c.definition.clone()).collect()
123    }
124}
125
126#[cfg(test)]
127mod tests {
128    use super::*;
129    use crate::security::types::{Confidence, Severity};
130
131    #[test]
132    fn test_pattern_engine_loads() {
133        let engine = PatternEngine::from_embedded_json().unwrap();
134        assert!(
135            engine.pattern_count() >= 22,
136            "Should have at least 22 patterns"
137        );
138    }
139
140    #[test]
141    fn test_global_engine() {
142        let engine = PatternEngine::global();
143        assert!(engine.pattern_count() >= 10);
144    }
145
146    #[test]
147    fn test_hardcoded_api_key_detection() {
148        let engine = PatternEngine::global();
149        let code = r#"
150            let api_key = "sk-1234567890abcdefghijklmnopqrstuvwxyz";
151            let secret_key = "secret_1234567890abcdefghij";
152        "#;
153
154        let findings = engine.scan(code, "test.rs");
155        assert!(!findings.is_empty(), "Should detect hardcoded secrets");
156
157        let api_key_finding = findings
158            .iter()
159            .find(|f| f.pattern_id == "hardcoded-api-key");
160        assert!(api_key_finding.is_some(), "Should detect API key");
161
162        if let Some(finding) = api_key_finding {
163            assert_eq!(finding.severity, Severity::Critical);
164            assert_eq!(finding.confidence, Confidence::High);
165            assert_eq!(finding.cwe, Some("CWE-798".to_string()));
166        }
167    }
168
169    #[test]
170    fn test_sql_injection_detection() {
171        let engine = PatternEngine::global();
172        let code = r#"
173            query("SELECT * FROM users WHERE id = " + user_input);
174            execute(format!("DELETE FROM {} WHERE id = {}", table, id));
175        "#;
176
177        let findings = engine.scan(code, "database.rs");
178        assert!(!findings.is_empty(), "Should detect SQL injection patterns");
179
180        let concat_finding = findings
181            .iter()
182            .find(|f| f.pattern_id == "sql-injection-concat");
183        assert!(concat_finding.is_some(), "Should detect concatenation");
184
185        let format_finding = findings
186            .iter()
187            .find(|f| f.pattern_id == "sql-injection-format");
188        assert!(format_finding.is_some(), "Should detect format string");
189    }
190
191    #[test]
192    fn test_path_traversal_detection() {
193        let engine = PatternEngine::global();
194        let code = r#"
195            open("../../etc/passwd");
196            read("..\..\..\windows\system32\config\sam");
197        "#;
198
199        let findings = engine.scan(code, "file_handler.rs");
200        assert!(!findings.is_empty(), "Should detect path traversal");
201
202        let finding = &findings[0];
203        assert_eq!(finding.pattern_id, "path-traversal");
204        assert_eq!(finding.severity, Severity::High);
205    }
206
207    #[test]
208    fn test_weak_crypto_detection() {
209        let engine = PatternEngine::global();
210        let code = r"
211            let hash = md5(password);
212            let digest = SHA1(data);
213        ";
214
215        let findings = engine.scan(code, "crypto.rs");
216        assert_eq!(findings.len(), 2, "Should detect both MD5 and SHA1");
217
218        assert!(findings.iter().any(|f| f.pattern_id == "weak-crypto-md5"));
219        assert!(findings.iter().any(|f| f.pattern_id == "weak-crypto-sha1"));
220    }
221
222    #[test]
223    fn test_file_extension_filtering() {
224        let engine = PatternEngine::global();
225        let js_code = "element.innerHTML = userInput + '<div>';";
226
227        // Should detect in .js file
228        let js_findings = engine.scan(js_code, "app.js");
229        assert!(!js_findings.is_empty(), "Should detect XSS in JS file");
230
231        // Should NOT detect in .rs file (pattern has file extension filter)
232        let rs_findings = engine.scan(js_code, "app.rs");
233        assert!(
234            rs_findings.is_empty(),
235            "Should not detect XSS pattern in Rust file"
236        );
237    }
238
239    #[test]
240    fn test_no_false_positives_on_safe_code() {
241        let engine = PatternEngine::global();
242        let safe_code = r#"
243            // Safe code examples
244            let config = load_config();
245            let result = query_with_params("SELECT * FROM users WHERE id = ?", &[id]);
246            let hash = sha256(data);
247            let random = OsRng.gen::<u64>();
248        "#;
249
250        let findings = engine.scan(safe_code, "safe.rs");
251        assert!(
252            findings.is_empty(),
253            "Should not have false positives on safe code"
254        );
255    }
256
257    #[test]
258    fn test_ssrf_detection() {
259        let engine = PatternEngine::global();
260
261        // Test bare variable call
262        let code_bare = r#"
263            let response = reqwest::get(user_url).await;
264        "#;
265        let findings_bare = engine.scan(code_bare, "app.rs");
266        assert!(
267            findings_bare
268                .iter()
269                .any(|f| f.pattern_id == "ssrf-http-request"),
270            "Should detect SSRF pattern with bare variable URL"
271        );
272
273        // Test concatenation call
274        let code_concat = r#"
275            let response = reqwest::get(user_url + "/path").await;
276        "#;
277        let findings_concat = engine.scan(code_concat, "app.rs");
278        assert!(
279            findings_concat
280                .iter()
281                .any(|f| f.pattern_id == "ssrf-http-request"),
282            "Should detect SSRF pattern with concatenated variable URL"
283        );
284    }
285
286    #[test]
287    fn test_open_redirect_detection() {
288        let engine = PatternEngine::global();
289        let code = r#"
290            location.href = req.query.url;
291        "#;
292
293        let findings = engine.scan(code, "app.js");
294        assert!(
295            findings.iter().any(|f| f.pattern_id == "open-redirect"),
296            "Should detect open redirect pattern from user input"
297        );
298    }
299
300    #[test]
301    fn test_all_patterns_have_remediation_and_authority_url() {
302        let engine = PatternEngine::from_embedded_json().unwrap();
303        for def in engine.definitions() {
304            assert!(
305                def.remediation.as_deref().is_some_and(|s| !s.is_empty()),
306                "Pattern '{}' is missing a non-empty remediation",
307                def.id
308            );
309            assert!(
310                def.authority_url.as_deref().is_some_and(|s| !s.is_empty()),
311                "Pattern '{}' is missing a non-empty authority_url",
312                def.id
313            );
314        }
315    }
316
317    #[test]
318    fn test_sarif_with_rules_includes_rule_metadata() {
319        use crate::security::sarif::SarifReport;
320        use crate::security::types::{Confidence, Severity};
321
322        let engine = PatternEngine::from_embedded_json().unwrap();
323        let patterns = engine.definitions();
324
325        let finding = Finding {
326            pattern_id: "hardcoded-api-key".to_string(),
327            description: "Hardcoded API key detected".to_string(),
328            severity: Severity::Critical,
329            confidence: Confidence::High,
330            file_path: "src/config.rs".to_string(),
331            line_number: 1,
332            matched_text: "api_key = \"sk-abc\"".to_string(),
333            cwe: Some("CWE-798".to_string()),
334        };
335
336        let report = SarifReport::with_rules(vec![finding], &patterns);
337        let json = serde_json::to_string(&report).unwrap();
338
339        assert!(
340            !report.runs[0].tool.driver.rules.is_empty(),
341            "rules array must not be empty"
342        );
343        assert!(
344            json.contains("hardcoded-api-key"),
345            "JSON must contain rule id"
346        );
347        assert!(
348            json.contains("helpUri") || json.contains("help_uri") || json.contains("cwe.mitre.org"),
349            "JSON must contain authority URL"
350        );
351    }
352
353    #[test]
354    fn test_line_number_accuracy() {
355        let engine = PatternEngine::global();
356        let code = "line 1\nline 2\napi_key = \"sk-1234567890abcdefghijklmnopqrstuvwxyz\"\nline 4";
357
358        let findings = engine.scan(code, "test.rs");
359        assert_eq!(findings.len(), 1);
360        assert_eq!(
361            findings[0].line_number, 3,
362            "Should report correct line number"
363        );
364    }
365}