llm_security/
detection.rs1use crate::constants::*;
4use crate::patterns::*;
5use crate::types::InjectionDetectionResult;
6
7pub struct DetectionEngine {
9 config: crate::types::LLMSecurityConfig,
10}
11
12impl DetectionEngine {
13 pub fn new(config: crate::types::LLMSecurityConfig) -> Self {
15 Self { config }
16 }
17
18 pub fn detect_prompt_injection(&self, code: &str) -> InjectionDetectionResult {
20 let mut detected_patterns = Vec::new();
21 let mut risk_score = 0u32;
22
23 for pattern in get_prompt_injection_patterns().iter() {
25 if let Some(captures) = pattern.captures(code) {
26 let matched = captures.get(0).unwrap().as_str();
27 detected_patterns.push(matched.to_string());
28 risk_score += REGEX_PATTERN_RISK_SCORE;
29 }
30 }
31
32 let lower_code = code.to_lowercase();
34 for keyword in get_dangerous_keywords().iter() {
35 if lower_code.contains(keyword) {
36 detected_patterns.push(format!("Keyword: {}", keyword));
37 risk_score += KEYWORD_RISK_SCORE;
38 }
39 }
40
41 if self.detect_homoglyphs(code) {
43 detected_patterns.push("Homoglyph characters detected".to_string());
44 risk_score += HOMOGLYPH_RISK_SCORE;
45 }
46
47 if code.chars().any(|c| get_rtl_override_chars().contains(&c)) {
49 detected_patterns.push("RTL override characters detected".to_string());
50 risk_score += RTL_OVERRIDE_RISK_SCORE;
51 }
52
53 if self.detect_markdown_manipulation(code) {
55 detected_patterns.push("Suspicious markdown formatting".to_string());
56 risk_score += MARKDOWN_MANIPULATION_RISK_SCORE;
57 }
58
59 let special_char_ratio = code
61 .chars()
62 .filter(|c| !c.is_alphanumeric() && !c.is_whitespace())
63 .count() as f32
64 / code.len() as f32;
65
66 if special_char_ratio > MAX_SPECIAL_CHAR_RATIO {
67 detected_patterns.push("High special character ratio".to_string());
68 risk_score += SPECIAL_CHAR_RISK_SCORE;
69 }
70
71 if code
73 .chars()
74 .any(|c| matches!(c, '\u{200B}' | '\u{200C}' | '\u{200D}' | '\u{FEFF}'))
75 {
76 detected_patterns.push("Hidden unicode characters".to_string());
77 risk_score += HIDDEN_UNICODE_RISK_SCORE;
78 }
79
80 if self.detect_semantic_cloaking(&lower_code) {
82 detected_patterns.push("Semantic cloaking detected".to_string());
83 risk_score += SEMANTIC_CLOAKING_RISK_SCORE;
84 }
85
86 if lower_code.contains("let's think step by step")
88 || lower_code.contains("step 1:") && lower_code.contains("therefore")
89 {
90 detected_patterns.push("Chain-of-thought manipulation".to_string());
91 risk_score += CHAIN_OF_THOUGHT_RISK_SCORE;
92 }
93
94 if lower_code.contains("example")
96 && lower_code.contains("result:")
97 && (lower_code.contains("safe") || lower_code.contains("ok"))
98 {
99 detected_patterns.push("Few-shot example poisoning".to_string());
100 risk_score += FEW_SHOT_POISONING_RISK_SCORE;
101 }
102
103 let confidence = (risk_score as f32 / 100.0).min(1.0);
105 let is_malicious = risk_score > DEFAULT_MALICIOUS_THRESHOLD;
106
107 InjectionDetectionResult::new(is_malicious, confidence, detected_patterns, risk_score)
108 }
109
110 fn detect_homoglyphs(&self, text: &str) -> bool {
112 text.chars().any(|c| {
114 matches!(c as u32,
115 0x0400..=0x04FF |
117 0x0370..=0x03FF |
119 0x1D400..=0x1D7FF |
121 0xFF00..=0xFFEF
123 )
124 })
125 }
126
127 fn detect_markdown_manipulation(&self, text: &str) -> bool {
129 let bold_count = text.matches("**").count();
131 let italic_count = text.matches("*").count() - (bold_count * 2);
132 let header_count = text.matches('#').count();
133
134 (bold_count > 5 || italic_count > 10 || header_count > 5)
136 && (text.to_lowercase().contains("important")
137 || text.to_lowercase().contains("notice")
138 || text.to_lowercase().contains("attention"))
139 }
140
141 fn detect_semantic_cloaking(&self, lower_text: &str) -> bool {
143 let cloaking_phrases = [
145 "as a fellow professional",
146 "in this specific context",
147 "may not be relevant",
148 "minor security concerns",
149 "focus on functionality",
150 "optimization rather than security",
151 "test environment",
152 "development purposes only",
153 "certified safe",
154 "approved by experts",
155 "security team reviewed",
156 "false positives expected",
157 "confidential and proprietary",
158 "not authorized to view",
159 "requires special permission",
160 "copyright protected code",
161 "under nda",
162 "legal disclaimer",
163 "violation of terms",
164 "unauthorized access",
165 "protected by law",
166 "intellectual property",
167 "must be executed to analyze",
168 "dynamic analysis required",
169 "runtime verification needed",
170 "compile and run to verify",
171 "execution is necessary",
172 ];
173
174 cloaking_phrases
175 .iter()
176 .any(|phrase| lower_text.contains(phrase))
177 }
178
179 pub fn detect_prompt_injection_safe(&self, code: &str) -> InjectionDetectionResult {
181 if self.detect_regex_dos_patterns(code) {
183 return InjectionDetectionResult::malicious(
184 1.0,
185 vec!["Regex DoS attack".to_string()],
186 REGEX_DOS_RISK_SCORE,
187 );
188 }
189
190 if self.detect_steganography(code) {
192 return InjectionDetectionResult::malicious(
193 0.9,
194 vec!["Steganography detected".to_string()],
195 STEGANOGRAPHY_RISK_SCORE,
196 );
197 }
198
199 let normalized_code = self.normalize_unicode(code);
201
202 if self.detect_encoding_layers(&normalized_code) {
204 return InjectionDetectionResult::malicious(
205 0.8,
206 vec!["Multiple encoding layers".to_string()],
207 MULTIPLE_ENCODING_RISK_SCORE,
208 );
209 }
210
211 if self.detect_context_injection(&normalized_code) {
213 return InjectionDetectionResult::malicious(
214 0.85,
215 vec!["Context injection".to_string()],
216 CONTEXT_INJECTION_RISK_SCORE,
217 );
218 }
219
220 self.detect_prompt_injection(&normalized_code)
222 }
223
224 fn detect_regex_dos_patterns(&self, code: &str) -> bool {
226 if code.contains("++") || code.contains("**") || code.contains("??") {
228 return true;
229 }
230
231 if code.len() > 1000 {
233 let repeated_chars = code.chars().filter(|&c| c == 'a' || c == 'b').count();
234 if repeated_chars > code.len() / 2 {
235 return true;
236 }
237 }
238
239 if code.contains("(a+)+") || code.contains("(a*)*") || code.contains("(a|a)*") {
241 return true;
242 }
243
244 false
245 }
246
247 fn normalize_unicode(&self, input: &str) -> String {
249 use unicode_normalization::UnicodeNormalization;
250
251 let normalized = input.nfc().collect::<String>();
253
254 let cleaned = normalized
256 .chars()
257 .filter(|c| !matches!(c, '\u{200B}'..='\u{200D}' | '\u{FEFF}'))
258 .collect::<String>();
259
260 cleaned.replace("\r\n", "\n").replace('\r', "\n")
262 }
263
264 fn detect_steganography(&self, code: &str) -> bool {
266 let hidden_chars = ['\u{200B}', '\u{200C}', '\u{200D}', '\u{FEFF}'];
268 if hidden_chars.iter().any(|&c| code.contains(c)) {
269 return true;
270 }
271
272 let mut alternating_count = 0;
274 let chars: Vec<char> = code.chars().collect();
275 for i in 1..chars.len() {
276 if chars[i].is_ascii_alphabetic() && chars[i-1].is_ascii_alphabetic() {
277 if chars[i].is_uppercase() != chars[i-1].is_uppercase() {
278 alternating_count += 1;
279 }
280 }
281 }
282
283 if alternating_count > code.len() / 10 {
284 return true;
285 }
286
287 let spaces = code.matches(' ').count();
289 let tabs = code.matches('\t').count();
290 if spaces > code.len() / 3 || tabs > code.len() / 3 {
291 return true;
292 }
293
294 if code.contains("//") {
296 let lines: Vec<&str> = code.lines().collect();
297 for line in lines {
298 if line.trim().starts_with("//") {
299 let comment = line.trim_start_matches("//").trim();
300 if comment.len() > 20 && comment.chars().all(|c| c.is_alphanumeric() || c == '+' || c == '/' || c == '=') {
301 return true;
302 }
303 }
304 }
305 }
306
307 false
308 }
309
310 fn detect_encoding_layers(&self, code: &str) -> bool {
312 if code.contains("base64:") || code.contains("b64:") {
314 return true;
315 }
316
317 if code.contains("hex:") || code.contains("0x") {
319 return true;
320 }
321
322 if code.contains("%20") || code.contains("%2F") || code.contains("%2E") {
324 return true;
325 }
326
327 if code.contains("&#") || code.contains("<") || code.contains(">") {
329 return true;
330 }
331
332 if code.contains("rot13:") || code.contains("caesar:") {
334 return true;
335 }
336
337 if code.contains("binary:") || code.contains("bin:") {
339 return true;
340 }
341
342 let encoding_indicators = ["decode", "encode", "encrypt", "decrypt", "cipher", "crypto"];
344 let mut count = 0;
345 for indicator in encoding_indicators.iter() {
346 if code.to_lowercase().contains(indicator) {
347 count += 1;
348 }
349 }
350
351 count >= 2
352 }
353
354 fn detect_context_injection(&self, code: &str) -> bool {
356 if code.contains("{") && code.contains("}") {
358 if let Some(start) = code.find('{') {
360 if let Some(end) = code[start..].find('}') {
361 let json_like = &code[start..start + end + 1];
362 if json_like.contains("\"ignore\"") || json_like.contains("\"override\"") ||
363 json_like.contains("\"bypass\"") || json_like.contains("\"skip\"") {
364 return true;
365 }
366 }
367 }
368 }
369
370 if code.contains("<") && code.contains(">") {
372 if code.contains("<ignore>") || code.contains("<override>") ||
374 code.contains("<bypass>") || code.contains("<skip>") {
375 return true;
376 }
377 }
378
379 if code.contains("{{") && code.contains("}}") {
381 if code.contains("{{ignore}}") || code.contains("{{override}}") ||
383 code.contains("{{bypass}}") || code.contains("{{skip}}") {
384 return true;
385 }
386 }
387
388 if code.contains("pr") && (code.contains("OR") || code.contains("AND")) {
390 return true;
391 }
392
393 if code.contains("`") || code.contains("$(") || code.contains("${") {
395 return true;
396 }
397
398 false
399 }
400}