zeph_tools/patterns.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Shared injection-detection patterns for the security sanitization layers.
5//!
6//! This module is the single source of truth for prompt-injection detection patterns
7//! used by both `zeph-mcp` (MCP tool definition sanitization) and `zeph-core`
8//! (content isolation pipeline). Each consumer compiles its own `Regex` instances
9//! from [`RAW_INJECTION_PATTERNS`] at startup via `LazyLock`.
10//!
11//! # Known limitations
12//!
13//! The patterns cover common English-language prompt-injection techniques. Known evasion
14//! vectors include: non-English injections, semantic rephrasing, encoded payloads in
15//! markdown code blocks, multi-line splitting (regex `.` does not match `\n` by default),
16//! and homoglyph substitution. [`strip_format_chars`] mitigates Unicode Cf-category bypass
17//! but does not handle homoglyphs. This scanner is **advisory and defense-in-depth only**,
18//! not a security boundary. The trust gate (tool blocking via `TrustGateExecutor`) is the
19//! primary enforcement mechanism.
20
21/// Raw (name, regex pattern) pairs for prompt-injection detection.
22///
23/// Covers common English-language techniques from OWASP LLM Top 10, Unicode bypass
24/// vectors (handled upstream by [`strip_format_chars`]), exfiltration channels
25/// (markdown/HTML images), and delimiter-escape attempts against Zeph's own wrapper tags.
26///
27/// Both `zeph-mcp` and `zeph-core::sanitizer` compile their own [`regex::Regex`] instances
28/// from this slice. Do not export a compiled `LazyLock` — let each consumer own its state.
29pub const RAW_INJECTION_PATTERNS: &[(&str, &str)] = &[
30 (
31 "ignore_instructions",
32 r"(?i)ignore\s+(all\s+|any\s+|previous\s+|prior\s+)?instructions",
33 ),
34 ("role_override", r"(?i)you\s+are\s+now"),
35 (
36 "new_directive",
37 r"(?i)new\s+(instructions?|directives?|roles?|personas?)",
38 ),
39 ("developer_mode", r"(?i)developer\s+mode"),
40 ("system_prompt_leak", r"(?i)system\s+prompt"),
41 (
42 "reveal_instructions",
43 r"(?i)(reveal|show|display|print)\s+your\s+(instructions?|prompts?|rules?)",
44 ),
45 ("jailbreak", r"(?i)\b(DAN|jailbreak)\b"),
46 ("base64_payload", r"(?i)(decode|eval|execute).*base64"),
47 (
48 "xml_tag_injection",
49 r"(?i)</?\s*(system|assistant|user|tool_result|function_call)\s*>",
50 ),
51 ("markdown_image_exfil", r"(?i)!\[.*?\]\(https?://[^)]+\)"),
52 ("forget_everything", r"(?i)forget\s+(everything|all)"),
53 (
54 "disregard_instructions",
55 r"(?i)disregard\s+(your|all|previous)",
56 ),
57 (
58 "override_directives",
59 r"(?i)override\s+(your|all)\s+(directives?|instructions?|rules?)",
60 ),
61 ("act_as_if", r"(?i)act\s+as\s+if"),
62 ("html_image_exfil", r"(?i)<img\s+[^>]*src\s*="),
63 ("delimiter_escape_tool_output", r"(?i)</?tool-output[\s>]"),
64 (
65 "delimiter_escape_external_data",
66 r"(?i)</?external-data[\s>]",
67 ),
68];
69
70/// Strip Unicode format (Cf) characters and ASCII control characters (except tab/newline)
71/// from `text` before injection pattern matching.
72///
73/// These characters are invisible to humans but can break regex word boundaries,
74/// allowing attackers to smuggle injection keywords through zero-width joiners,
75/// soft hyphens, BOM, etc.
76#[must_use]
77pub fn strip_format_chars(text: &str) -> String {
78 text.chars()
79 .filter(|&c| {
80 // Keep printable ASCII, tab, newline
81 if c == '\t' || c == '\n' {
82 return true;
83 }
84 // Drop ASCII control characters
85 if c.is_ascii_control() {
86 return false;
87 }
88 // Drop known Unicode Cf (format) codepoints that are used as bypass vectors
89 !matches!(
90 c,
91 '\u{00AD}' // Soft hyphen
92 | '\u{034F}' // Combining grapheme joiner
93 | '\u{061C}' // Arabic letter mark
94 | '\u{115F}' // Hangul filler
95 | '\u{1160}' // Hangul jungseong filler
96 | '\u{17B4}' // Khmer vowel inherent aq
97 | '\u{17B5}' // Khmer vowel inherent aa
98 | '\u{180B}'..='\u{180D}' // Mongolian free variation selectors
99 | '\u{180F}' // Mongolian free variation selector 4
100 | '\u{200B}'..='\u{200F}' // Zero-width space/ZWNJ/ZWJ/LRM/RLM
101 | '\u{202A}'..='\u{202E}' // Directional formatting
102 | '\u{2060}'..='\u{2064}' // Word joiner / invisible separators
103 | '\u{2066}'..='\u{206F}' // Bidi controls
104 | '\u{FEFF}' // BOM / zero-width no-break space
105 | '\u{FFF9}'..='\u{FFFB}' // Interlinear annotation
106 | '\u{1BCA0}'..='\u{1BCA3}' // Shorthand format controls
107 | '\u{1D173}'..='\u{1D17A}' // Musical symbol beam controls
108 | '\u{E0000}'..='\u{E007F}' // Tags block
109 )
110 })
111 .collect()
112}
113
114#[cfg(test)]
115mod tests {
116 use super::*;
117
118 #[test]
119 fn strip_format_chars_removes_zero_width_space() {
120 let input = "ig\u{200B}nore instructions";
121 let result = strip_format_chars(input);
122 assert!(!result.contains('\u{200B}'));
123 assert!(result.contains("ignore"));
124 }
125
126 #[test]
127 fn strip_format_chars_preserves_tab_and_newline() {
128 let input = "line1\nline2\ttabbed";
129 let result = strip_format_chars(input);
130 assert!(result.contains('\n'));
131 assert!(result.contains('\t'));
132 }
133
134 #[test]
135 fn strip_format_chars_removes_bom() {
136 let input = "\u{FEFF}hello world";
137 let result = strip_format_chars(input);
138 assert!(!result.contains('\u{FEFF}'));
139 assert!(result.contains("hello world"));
140 }
141
142 #[test]
143 fn strip_format_chars_removes_ascii_control() {
144 let input = "hello\x01\x02world";
145 let result = strip_format_chars(input);
146 assert!(!result.contains('\x01'));
147 assert!(result.contains("hello"));
148 assert!(result.contains("world"));
149 }
150
151 #[test]
152 fn raw_injection_patterns_all_compile() {
153 use regex::Regex;
154 for (name, pattern) in RAW_INJECTION_PATTERNS {
155 assert!(
156 Regex::new(pattern).is_ok(),
157 "pattern '{name}' failed to compile"
158 );
159 }
160 }
161}