1pub const RAW_INJECTION_PATTERNS: &[(&str, &str)] = &[
30 (
31 "ignore_instructions",
32 r"(?i)ignore\s+(all\s+|any\s+|previous\s+|prior\s+)?instructions",
33 ),
34 ("role_override", r"(?i)you\s+are\s+now"),
35 (
36 "new_directive",
37 r"(?i)new\s+(instructions?|directives?)\s*:",
38 ),
39 ("developer_mode", r"(?i)developer\s+mode"),
40 (
41 "system_prompt_leak",
42 r"(?i)((reveal|show|print|output|display|repeat|expose|dump|leak|copy|give)\s+(me\s+)?(your\s+|the\s+|my\s+)?(full\s+|entire\s+|exact\s+|complete\s+)?system\s+prompt|what\s+(is|are|was)\s+(your\s+|the\s+)?system\s+prompt)",
43 ),
44 (
45 "reveal_instructions",
46 r"(?i)(reveal|show|display|print)\s+your\s+(instructions?|prompts?|rules?)",
47 ),
48 ("jailbreak", r"(?i)\b(DAN|jailbreak)\b"),
49 ("base64_payload", r"(?i)(decode|eval|execute).*base64"),
50 (
51 "xml_tag_injection",
52 r"(?i)</?\s*(system|assistant|user|tool_result|function_call)\s*>",
53 ),
54 ("markdown_image_exfil", r"(?i)!\[.*?\]\(https?://[^)]+\)"),
55 ("forget_everything", r"(?i)forget\s+(everything|all)"),
56 (
57 "disregard_instructions",
58 r"(?i)disregard\s+(your|all|previous)",
59 ),
60 (
61 "override_directives",
62 r"(?i)override\s+(your|all)\s+(directives?|instructions?|rules?)",
63 ),
64 ("act_as_if", r"(?i)act\s+as\s+if"),
65 ("html_image_exfil", r"(?i)<img\s+[^>]*src\s*="),
66 ("delimiter_escape_tool_output", r"(?i)</?tool-output[\s>]"),
67 (
68 "delimiter_escape_external_data",
69 r"(?i)</?external-data[\s>]",
70 ),
71];
72
73pub const RAW_RESPONSE_PATTERNS: &[(&str, &str)] = &[
83 (
84 "autonomy_override",
85 r"(?i)\bset\s+(autonomy|trust)\s*(level|mode)\s*to\b",
86 ),
87 (
88 "memory_write_instruction",
89 r"(?i)\b(now\s+)?(store|save|remember|write)\s+this\s+(to|in)\s+(memory|vault|database)\b",
90 ),
91 (
92 "instruction_override",
93 r"(?i)\b(from\s+now\s+on|henceforth)\b.{0,80}\b(always|never|must)\b",
94 ),
95 (
96 "config_manipulation",
97 r"(?i)\b(change|modify|update)\s+your\s+(config|configuration|settings)\b",
98 ),
99 (
100 "ignore_instructions_response",
101 r"(?i)\bignore\s+(all\s+|any\s+|your\s+)?(previous\s+|prior\s+)?(instructions?|rules?|constraints?)\b",
102 ),
103 (
104 "override_directives_response",
105 r"(?i)\boverride\s+(your\s+)?(directives?|instructions?|rules?|constraints?)\b",
106 ),
107 (
108 "disregard_system",
109 r"(?i)\bdisregard\s+(your\s+|the\s+)?(system\s+prompt|instructions?|guidelines?)\b",
110 ),
111];
112
113#[must_use]
120pub fn strip_format_chars(text: &str) -> String {
121 text.chars()
122 .filter(|&c| {
123 if c == '\t' || c == '\n' {
125 return true;
126 }
127 if c.is_ascii_control() {
129 return false;
130 }
131 !matches!(
133 c,
134 '\u{00AD}' | '\u{034F}' | '\u{061C}' | '\u{115F}' | '\u{1160}' | '\u{17B4}' | '\u{17B5}' | '\u{180B}'..='\u{180D}' | '\u{180F}' | '\u{200B}'..='\u{200F}' | '\u{202A}'..='\u{202E}' | '\u{2060}'..='\u{2064}' | '\u{2066}'..='\u{206F}' | '\u{FEFF}' | '\u{FFF9}'..='\u{FFFB}' | '\u{1BCA0}'..='\u{1BCA3}' | '\u{1D173}'..='\u{1D17A}' | '\u{E0000}'..='\u{E007F}' )
153 })
154 .collect()
155}
156
157#[cfg(test)]
158mod tests {
159 use super::*;
160
161 #[test]
162 fn strip_format_chars_removes_zero_width_space() {
163 let input = "ig\u{200B}nore instructions";
164 let result = strip_format_chars(input);
165 assert!(!result.contains('\u{200B}'));
166 assert!(result.contains("ignore"));
167 }
168
169 #[test]
170 fn strip_format_chars_preserves_tab_and_newline() {
171 let input = "line1\nline2\ttabbed";
172 let result = strip_format_chars(input);
173 assert!(result.contains('\n'));
174 assert!(result.contains('\t'));
175 }
176
177 #[test]
178 fn strip_format_chars_removes_bom() {
179 let input = "\u{FEFF}hello world";
180 let result = strip_format_chars(input);
181 assert!(!result.contains('\u{FEFF}'));
182 assert!(result.contains("hello world"));
183 }
184
185 #[test]
186 fn strip_format_chars_removes_ascii_control() {
187 let input = "hello\x01\x02world";
188 let result = strip_format_chars(input);
189 assert!(!result.contains('\x01'));
190 assert!(result.contains("hello"));
191 assert!(result.contains("world"));
192 }
193
194 #[test]
195 fn raw_injection_patterns_all_compile() {
196 use regex::Regex;
197 for (name, pattern) in RAW_INJECTION_PATTERNS {
198 assert!(
199 Regex::new(pattern).is_ok(),
200 "pattern '{name}' failed to compile"
201 );
202 }
203 }
204
205 #[test]
206 fn raw_response_patterns_all_compile() {
207 use regex::Regex;
208 for (name, pattern) in RAW_RESPONSE_PATTERNS {
209 assert!(
210 Regex::new(pattern).is_ok(),
211 "response pattern '{name}' failed to compile"
212 );
213 }
214 }
215}