llm_security/
patterns.rs

1//! Pattern definitions for LLM security detection
2
3use lazy_static::lazy_static;
4use regex::Regex;
5use std::collections::HashSet;
6
7/// Compiled regex patterns for prompt injection detection
8lazy_static! {
9    /// Detect prompt injection attempts with DoS protection
10    pub static ref PROMPT_INJECTION_PATTERNS: Vec<Regex> = vec![
11        // Direct instruction injection
12        Regex::new(r"(?i)(ignore|disregard|forget)\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|commands?|rules?)").unwrap(),
13        
14        // System prompt override attempts
15        Regex::new(r"(?i)(you\s+are\s+now|act\s+as|pretend\s+(you\s+are|to\s+be)|from\s+now\s+on)[,\s]").unwrap(),
16        
17        // Jailbreak patterns
18        Regex::new(r"(?i)(DAN|STAN|DUDE|AIM|SWITCH|developer\s+mode)").unwrap(),
19        
20        // Role-playing attacks
21        Regex::new(r"(?i)in\s+alternate\s+universe|hypothetical|imaginary\s+scenario|pretend|simulation").unwrap(),
22        
23        // Output format manipulation
24        Regex::new(r"(?i)(ignore|skip|bypass)\s+(?:the\s+)?(json|output|format|structure)").unwrap(),
25        
26        // Delimiter escape attempts
27        Regex::new(r"```[\s\S]*?(</system>|<\|im_end\|>|<\|endoftext\|>)").unwrap(),
28        
29        // Token stuffing
30        Regex::new(r"#{10,}|={10,}|\*{10,}|-{10,}").unwrap(),
31        
32        // Comment injection to hide instructions
33        Regex::new(r"(?i)///\s*ATTENTION\s+(ANY\s+)?(LLM|AI|GPT|CLAUDE|MODEL)").unwrap(),
34        Regex::new(r"(?i)//\s*@(LLM|AI|ASSISTANT|SYSTEM)").unwrap(),
35        Regex::new(r"(?i)/\*[\s\S]*?(IGNORE|OVERRIDE|BYPASS)[\s\S]*?\*/").unwrap(),
36        
37        // Hidden unicode tricks
38        Regex::new(r"[\u{200B}-\u{200D}\u{FEFF}]").unwrap(), // Zero-width characters
39        
40        // Encoding tricks
41        Regex::new(r"(?i)(base64|hex|rot13|decode):\s*[A-Za-z0-9+/=]{20,}").unwrap(),
42        
43        // Meta-instruction injection
44        Regex::new(r"(?i)(NEW|UPDATED|REAL)\s+(SYSTEM|INSTRUCTION|RULE|GUIDELINE|POLICY)").unwrap(),
45        
46        // Authority appeals
47        Regex::new(r"(?i)(administrator|developer|owner|creator|god\s+mode)\s+(says?|commands?|requires?)").unwrap(),
48        
49        // Output redirection
50        Regex::new(r"(?i)ONLY\s+(?:SAY|RESPOND|OUTPUT|RETURN|PRINT)").unwrap(),
51        
52        // Code execution requests
53        Regex::new(r"(?i)execute|eval|run\s+(?:this|the\s+following)\s+code").unwrap(),
54        
55        // Semantic cloaking - polite manipulation
56        Regex::new(r"(?i)(dear|hello|hi)\s+(ai|assistant|colleague|friend|llm|model)").unwrap(),
57        Regex::new(r"(?i)(recommend|suggest|advise|kindly|please)\s+(focus|ignore|skip|overlook)").unwrap(),
58        
59        // Chain-of-thought manipulation
60        Regex::new(r"(?i)let'?s\s+think\s+step\s+by\s+step").unwrap(),
61        Regex::new(r"(?i)first.*second.*third.*therefore").unwrap(),
62        
63        // Few-shot poisoning
64        Regex::new(r"(?i)example\s+\d+:.*result:\s*(safe|ok|pass|good)").unwrap(),
65        
66        // Context window attacks
67        Regex::new(r"(?i)for\s+context|background\s+information|important\s+note").unwrap(),
68        
69        // Virtual markup manipulation
70        Regex::new(r"#{1,6}\s*(SYSTEM|IMPORTANT|CRITICAL|ATTENTION|WARNING|NOTICE)").unwrap(),
71        Regex::new(r"\*{1,2}(SYSTEM|IMPORTANT|AI|ASSISTANT)\*{1,2}").unwrap(),
72        
73        // Obfuscation patterns (l33t speak)
74        Regex::new(r"(?i)(1gn0r3|d1sr3gard|byp4ss|0v3rr1d3)").unwrap(),
75        
76        // Synonym attacks
77        Regex::new(r"(?i)(overlook|discard|skip\s+over|omit|exclude|dismiss)\s+(vulnerabilities|issues|problems|warnings)").unwrap(),
78        
79        // Legal/authorization manipulation
80        Regex::new(r"(?i)(confidential|proprietary|classified|secret|private|restricted)").unwrap(),
81        Regex::new(r"(?i)(not\s+authorized|unauthorized|no\s+permission|forbidden\s+to\s+(view|access|analyze))").unwrap(),
82        Regex::new(r"(?i)(copyright|trademark|patent|NDA|non-disclosure)").unwrap(),
83        Regex::new(r"(?i)(legal\s+disclaimer|legal\s+notice|legal\s+warning)").unwrap(),
84        Regex::new(r"(?i)(violation\s+of\s+law|illegal\s+to\s+(view|analyze|review))").unwrap(),
85        
86        // Execution manipulation
87        Regex::new(r"(?i)(must\s+execute|need\s+to\s+run|require.*execution|compile.*and.*run)").unwrap(),
88        Regex::new(r"(?i)(dynamic\s+analysis\s+requires|runtime\s+analysis\s+needs|execute\s+to\s+verify)").unwrap(),
89    ];
90    
91    /// Dangerous keywords that should trigger warnings
92    pub static ref DANGEROUS_KEYWORDS: HashSet<&'static str> = {
93        let mut set = HashSet::new();
94        set.insert("ignore instructions");
95        set.insert("disregard prompt");
96        set.insert("forget previous");
97        set.insert("you are now");
98        set.insert("act as");
99        set.insert("pretend to be");
100        set.insert("DAN mode");
101        set.insert("developer mode");
102        set.insert("jailbreak");
103        set.insert("system override");
104        set.insert("bypass filter");
105        set.insert("ignore rules");
106        set.insert("no restrictions");
107        set.insert("unlimited mode");
108        set.insert("god mode");
109        set.insert("dear ai");
110        set.insert("dear assistant");
111        set.insert("kindly ignore");
112        set.insert("please skip");
113        set.insert("overlook vulnerabilities");
114        set.insert("dismiss issues");
115        set.insert("focus on performance");
116        set.insert("not important");
117        set.insert("false positive");
118        set.insert("test environment");
119        set.insert("development only");
120        set.insert("confidential code");
121        set.insert("proprietary information");
122        set.insert("not authorized");
123        set.insert("no permission");
124        set.insert("copyright protected");
125        set.insert("nda required");
126        set.insert("legal disclaimer");
127        set.insert("must execute");
128        set.insert("need to run");
129        set.insert("requires execution");
130        set
131    };
132    
133    /// RTL override characters
134    pub static ref RTL_OVERRIDE_CHARS: Vec<char> = vec![
135        '\u{202E}', // RLO - Right-to-Left Override
136        '\u{202D}', // LRO - Left-to-Right Override  
137        '\u{202A}', // LRE - Left-to-Right Embedding
138        '\u{202B}', // RLE - Right-to-Left Embedding
139        '\u{202C}', // PDF - Pop Directional Formatting
140    ];
141    
142    /// Output validation patterns
143    pub static ref SUSPICIOUS_OUTPUT_PATTERNS: Vec<Regex> = vec![
144        // LLM admitting to following malicious instructions
145        Regex::new(r"(?i)as\s+(?:requested|instructed|commanded),?\s+I\s+will\s+(?:ignore|bypass|disable)").unwrap(),
146        
147        // LLM changing personality
148        Regex::new(r"(?i)I\s+am\s+now\s+(?:acting|operating|functioning)\s+as").unwrap(),
149        
150        // Suspicious compliance
151        Regex::new(r"(?i)(?:sure|okay|yes),?\s+I\s+(?:can|will)\s+ignore").unwrap(),
152        
153        // LLM being too agreeable to bad instructions
154        Regex::new(r"(?i)I\s+(?:will|can|shall)\s+(?:overlook|dismiss|skip|omit)").unwrap(),
155        
156        // LLM following semantic attacks
157        Regex::new(r"(?i)(?:focusing|concentrating)\s+on\s+(?:performance|functionality|features)\s+rather\s+than\s+security").unwrap(),
158    ];
159}
160
161/// Get all prompt injection patterns
162pub fn get_prompt_injection_patterns() -> &'static Vec<Regex> {
163    &PROMPT_INJECTION_PATTERNS
164}
165
166/// Get all dangerous keywords
167pub fn get_dangerous_keywords() -> &'static HashSet<&'static str> {
168    &DANGEROUS_KEYWORDS
169}
170
171/// Get RTL override characters
172pub fn get_rtl_override_chars() -> &'static Vec<char> {
173    &RTL_OVERRIDE_CHARS
174}
175
176/// Get suspicious output patterns
177pub fn get_suspicious_output_patterns() -> &'static Vec<Regex> {
178    &SUSPICIOUS_OUTPUT_PATTERNS
179}