llm_security/
detection.rs

1//! Detection logic for LLM security threats
2
3use crate::constants::*;
4use crate::patterns::*;
5use crate::types::InjectionDetectionResult;
6
7/// Advanced detection methods for LLM security
8pub struct DetectionEngine {
9    config: crate::types::LLMSecurityConfig,
10}
11
12impl DetectionEngine {
13    /// Create a new detection engine
14    pub fn new(config: crate::types::LLMSecurityConfig) -> Self {
15        Self { config }
16    }
17
18    /// Detect prompt injection attempts in user input
19    pub fn detect_prompt_injection(&self, code: &str) -> InjectionDetectionResult {
20        let mut detected_patterns = Vec::new();
21        let mut risk_score = 0u32;
22
23        // Check regex patterns
24        for pattern in get_prompt_injection_patterns().iter() {
25            if let Some(captures) = pattern.captures(code) {
26                let matched = captures.get(0).unwrap().as_str();
27                detected_patterns.push(matched.to_string());
28                risk_score += REGEX_PATTERN_RISK_SCORE;
29            }
30        }
31
32        // Check dangerous keywords
33        let lower_code = code.to_lowercase();
34        for keyword in get_dangerous_keywords().iter() {
35            if lower_code.contains(keyword) {
36                detected_patterns.push(format!("Keyword: {}", keyword));
37                risk_score += KEYWORD_RISK_SCORE;
38            }
39        }
40
41        // Check for homoglyphs (lookalike characters)
42        if self.detect_homoglyphs(code) {
43            detected_patterns.push("Homoglyph characters detected".to_string());
44            risk_score += HOMOGLYPH_RISK_SCORE;
45        }
46
47        // Check for RTL override attacks
48        if code.chars().any(|c| get_rtl_override_chars().contains(&c)) {
49            detected_patterns.push("RTL override characters detected".to_string());
50            risk_score += RTL_OVERRIDE_RISK_SCORE;
51        }
52
53        // Check for markdown formatting tricks
54        if self.detect_markdown_manipulation(code) {
55            detected_patterns.push("Suspicious markdown formatting".to_string());
56            risk_score += MARKDOWN_MANIPULATION_RISK_SCORE;
57        }
58
59        // Check for excessive special characters (obfuscation)
60        let special_char_ratio = code
61            .chars()
62            .filter(|c| !c.is_alphanumeric() && !c.is_whitespace())
63            .count() as f32
64            / code.len() as f32;
65
66        if special_char_ratio > MAX_SPECIAL_CHAR_RATIO {
67            detected_patterns.push("High special character ratio".to_string());
68            risk_score += SPECIAL_CHAR_RISK_SCORE;
69        }
70
71        // Check for hidden unicode
72        if code
73            .chars()
74            .any(|c| matches!(c, '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}'))
75        {
76            detected_patterns.push("Hidden unicode characters".to_string());
77            risk_score += HIDDEN_UNICODE_RISK_SCORE;
78        }
79
80        // Check for semantic cloaking (polite manipulation)
81        if self.detect_semantic_cloaking(&lower_code) {
82            detected_patterns.push("Semantic cloaking detected".to_string());
83            risk_score += SEMANTIC_CLOAKING_RISK_SCORE;
84        }
85
86        // Check for chain-of-thought manipulation
87        if lower_code.contains("let's think step by step")
88            || lower_code.contains("step 1:") && lower_code.contains("therefore")
89        {
90            detected_patterns.push("Chain-of-thought manipulation".to_string());
91            risk_score += CHAIN_OF_THOUGHT_RISK_SCORE;
92        }
93
94        // Check for few-shot poisoning
95        if lower_code.contains("example")
96            && lower_code.contains("result:")
97            && (lower_code.contains("safe") || lower_code.contains("ok"))
98        {
99            detected_patterns.push("Few-shot example poisoning".to_string());
100            risk_score += FEW_SHOT_POISONING_RISK_SCORE;
101        }
102
103        // Confidence calculation
104        let confidence = (risk_score as f32 / 100.0).min(1.0);
105        let is_malicious = risk_score > DEFAULT_MALICIOUS_THRESHOLD;
106
107        InjectionDetectionResult::new(is_malicious, confidence, detected_patterns, risk_score)
108    }
109
110    /// Detect homoglyph attacks (visually similar characters)
111    fn detect_homoglyphs(&self, text: &str) -> bool {
112        // Check for suspicious Unicode ranges that might contain homoglyphs
113        text.chars().any(|c| {
114            matches!(c as u32,
115                // Cyrillic lookalikes
116                0x0400..=0x04FF |
117                // Greek lookalikes  
118                0x0370..=0x03FF |
119                // Mathematical alphanumeric symbols
120                0x1D400..=0x1D7FF |
121                // Fullwidth forms
122                0xFF00..=0xFFEF
123            )
124        })
125    }
126
127    /// Detect markdown formatting tricks
128    fn detect_markdown_manipulation(&self, text: &str) -> bool {
129        // Count markdown emphasis
130        let bold_count = text.matches("**").count();
131        let italic_count = text.matches("*").count() - (bold_count * 2);
132        let header_count = text.matches('#').count();
133
134        // Excessive formatting is suspicious
135        (bold_count > 5 || italic_count > 10 || header_count > 5)
136            && (text.to_lowercase().contains("important")
137                || text.to_lowercase().contains("notice")
138                || text.to_lowercase().contains("attention"))
139    }
140
141    /// Detect semantic cloaking (professional-sounding manipulation)
142    fn detect_semantic_cloaking(&self, lower_text: &str) -> bool {
143        // Patterns that sound professional but are manipulation
144        let cloaking_phrases = [
145            "as a fellow professional",
146            "in this specific context",
147            "may not be relevant",
148            "minor security concerns",
149            "focus on functionality",
150            "optimization rather than security",
151            "test environment",
152            "development purposes only",
153            "certified safe",
154            "approved by experts",
155            "security team reviewed",
156            "false positives expected",
157            "confidential and proprietary",
158            "not authorized to view",
159            "requires special permission",
160            "copyright protected code",
161            "under nda",
162            "legal disclaimer",
163            "violation of terms",
164            "unauthorized access",
165            "protected by law",
166            "intellectual property",
167            "must be executed to analyze",
168            "dynamic analysis required",
169            "runtime verification needed",
170            "compile and run to verify",
171            "execution is necessary",
172        ];
173
174        cloaking_phrases
175            .iter()
176            .any(|phrase| lower_text.contains(phrase))
177    }
178
179    /// Enhanced security check with regex DoS protection
180    pub fn detect_prompt_injection_safe(&self, code: &str) -> InjectionDetectionResult {
181        // Check for regex DoS patterns first
182        if self.detect_regex_dos_patterns(code) {
183            return InjectionDetectionResult::malicious(
184                1.0,
185                vec!["Regex DoS attack".to_string()],
186                REGEX_DOS_RISK_SCORE,
187            );
188        }
189
190        // Check for steganography first (before normalization)
191        if self.detect_steganography(code) {
192            return InjectionDetectionResult::malicious(
193                0.9,
194                vec!["Steganography detected".to_string()],
195                STEGANOGRAPHY_RISK_SCORE,
196            );
197        }
198
199        // Normalize Unicode before checking
200        let normalized_code = self.normalize_unicode(code);
201        
202        // Check for multiple encoding layers
203        if self.detect_encoding_layers(&normalized_code) {
204            return InjectionDetectionResult::malicious(
205                0.8,
206                vec!["Multiple encoding layers".to_string()],
207                MULTIPLE_ENCODING_RISK_SCORE,
208            );
209        }
210
211        // Check for context injection
212        if self.detect_context_injection(&normalized_code) {
213            return InjectionDetectionResult::malicious(
214                0.85,
215                vec!["Context injection".to_string()],
216                CONTEXT_INJECTION_RISK_SCORE,
217            );
218        }
219
220        // Use the original detection with normalized input
221        self.detect_prompt_injection(&normalized_code)
222    }
223
224    /// Detect regex DoS patterns that could cause catastrophic backtracking
225    fn detect_regex_dos_patterns(&self, code: &str) -> bool {
226        // Check for nested quantifiers that could cause issues
227        if code.contains("++") || code.contains("**") || code.contains("??") {
228            return true;
229        }
230
231        // Check for very long repeated patterns (more specific)
232        if code.len() > 1000 {
233            let repeated_chars = code.chars().filter(|&c| c == 'a' || c == 'b').count();
234            if repeated_chars > code.len() / 2 {
235                return true;
236            }
237        }
238
239        // Check for specific dangerous regex patterns in the code itself
240        if code.contains("(a+)+") || code.contains("(a*)*") || code.contains("(a|a)*") {
241            return true;
242        }
243
244        false
245    }
246
247    /// Normalize Unicode to prevent homoglyph attacks
248    fn normalize_unicode(&self, input: &str) -> String {
249        use unicode_normalization::UnicodeNormalization;
250        
251        // Normalize to NFC (Canonical Decomposition, followed by Canonical Composition)
252        let normalized = input.nfc().collect::<String>();
253        
254        // Remove zero-width characters
255        let cleaned = normalized
256            .chars()
257            .filter(|c| !matches!(c, '\u{200B}'..='\u{200D}' | '\u{FEFF}'))
258            .collect::<String>();
259        
260        // Normalize line endings
261        cleaned.replace("\r\n", "\n").replace('\r', "\n")
262    }
263
264    /// Detect steganography (hidden messages) in code
265    fn detect_steganography(&self, code: &str) -> bool {
266        // Check for hidden Unicode characters
267        let hidden_chars = ['\u{200B}', '\u{200C}', '\u{200D}', '\u{FEFF}'];
268        if hidden_chars.iter().any(|&c| code.contains(c)) {
269            return true;
270        }
271
272        // Check for alternating case patterns (could hide binary data)
273        let mut alternating_count = 0;
274        let chars: Vec<char> = code.chars().collect();
275        for i in 1..chars.len() {
276            if chars[i].is_ascii_alphabetic() && chars[i-1].is_ascii_alphabetic() {
277                if chars[i].is_uppercase() != chars[i-1].is_uppercase() {
278                    alternating_count += 1;
279                }
280            }
281        }
282        
283        if alternating_count > code.len() / 10 {
284            return true;
285        }
286
287        // Check for unusual spacing patterns
288        let spaces = code.matches(' ').count();
289        let tabs = code.matches('\t').count();
290        if spaces > code.len() / 3 || tabs > code.len() / 3 {
291            return true;
292        }
293
294        // Check for base64-like patterns in comments
295        if code.contains("//") {
296            let lines: Vec<&str> = code.lines().collect();
297            for line in lines {
298                if line.trim().starts_with("//") {
299                    let comment = line.trim_start_matches("//").trim();
300                    if comment.len() > 20 && comment.chars().all(|c| c.is_alphanumeric() || c == '+' || c == '/' || c == '=') {
301                        return true;
302                    }
303                }
304            }
305        }
306
307        false
308    }
309
310    /// Detect multiple layers of encoding
311    fn detect_encoding_layers(&self, code: &str) -> bool {
312        // Check for base64 encoding
313        if code.contains("base64:") || code.contains("b64:") {
314            return true;
315        }
316
317        // Check for hex encoding
318        if code.contains("hex:") || code.contains("0x") {
319            return true;
320        }
321
322        // Check for URL encoding
323        if code.contains("%20") || code.contains("%2F") || code.contains("%2E") {
324            return true;
325        }
326
327        // Check for HTML entity encoding
328        if code.contains("&#") || code.contains("&lt;") || code.contains("&gt;") {
329            return true;
330        }
331
332        // Check for ROT13 encoding
333        if code.contains("rot13:") || code.contains("caesar:") {
334            return true;
335        }
336
337        // Check for binary patterns
338        if code.contains("binary:") || code.contains("bin:") {
339            return true;
340        }
341
342        // Check for multiple encoding indicators
343        let encoding_indicators = ["decode", "encode", "encrypt", "decrypt", "cipher", "crypto"];
344        let mut count = 0;
345        for indicator in encoding_indicators.iter() {
346            if code.to_lowercase().contains(indicator) {
347                count += 1;
348            }
349        }
350        
351        count >= 2
352    }
353
354    /// Detect context injection attacks (JSON/XML)
355    fn detect_context_injection(&self, code: &str) -> bool {
356        // Check for JSON injection patterns
357        if code.contains("{") && code.contains("}") {
358            // Look for JSON-like structures with suspicious content
359            if let Some(start) = code.find('{') {
360                if let Some(end) = code[start..].find('}') {
361                    let json_like = &code[start..start + end + 1];
362                    if json_like.contains("\"ignore\"") || json_like.contains("\"override\"") || 
363                       json_like.contains("\"bypass\"") || json_like.contains("\"skip\"") {
364                        return true;
365                    }
366                }
367            }
368        }
369
370        // Check for XML injection patterns
371        if code.contains("<") && code.contains(">") {
372            // Look for XML-like structures with suspicious content
373            if code.contains("<ignore>") || code.contains("<override>") || 
374               code.contains("<bypass>") || code.contains("<skip>") {
375                return true;
376            }
377        }
378
379        // Check for template injection patterns
380        if code.contains("{{") && code.contains("}}") {
381            // Look for template-like structures with suspicious content
382            if code.contains("{{ignore}}") || code.contains("{{override}}") || 
383               code.contains("{{bypass}}") || code.contains("{{skip}}") {
384                return true;
385            }
386        }
387
388        // Check for SQL injection patterns
389        if code.contains("pr") && (code.contains("OR") || code.contains("AND")) {
390            return true;
391        }
392
393        // Check for command injection patterns
394        if code.contains("`") || code.contains("$(") || code.contains("${") {
395            return true;
396        }
397
398        false
399    }
400}