agentzero_core/security/
perplexity.rs1#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
10enum CharClass {
11 Lower,
12 Upper,
13 Digit,
14 Space,
15 Punct,
16 Symbol,
17 Other,
18}
19
20fn classify(c: char) -> CharClass {
21 if c.is_ascii_lowercase() {
22 CharClass::Lower
23 } else if c.is_ascii_uppercase() {
24 CharClass::Upper
25 } else if c.is_ascii_digit() {
26 CharClass::Digit
27 } else if c.is_ascii_whitespace() {
28 CharClass::Space
29 } else if c.is_ascii_punctuation() {
30 CharClass::Punct
31 } else if c.is_ascii() {
32 CharClass::Symbol
33 } else {
34 CharClass::Other
35 }
36}
37
38const NUM_CLASSES: usize = 7;
39
40fn class_index(c: CharClass) -> usize {
41 match c {
42 CharClass::Lower => 0,
43 CharClass::Upper => 1,
44 CharClass::Digit => 2,
45 CharClass::Space => 3,
46 CharClass::Punct => 4,
47 CharClass::Symbol => 5,
48 CharClass::Other => 6,
49 }
50}
51
52pub fn bigram_perplexity(text: &str) -> f64 {
57 if text.len() < 2 {
58 return 0.0;
59 }
60
61 let chars: Vec<CharClass> = text.chars().map(classify).collect();
62 let n = chars.len();
63
64 let mut bigram_counts = [[0u32; NUM_CLASSES]; NUM_CLASSES];
66 let mut total_bigrams = 0u32;
67
68 for window in chars.windows(2) {
69 let a = class_index(window[0]);
70 let b = class_index(window[1]);
71 bigram_counts[a][b] += 1;
72 total_bigrams += 1;
73 }
74
75 if total_bigrams == 0 {
76 return 0.0;
77 }
78
79 let total_f = total_bigrams as f64;
83 let mut log_prob_sum = 0.0;
84
85 for window in chars.windows(2) {
86 let a = class_index(window[0]);
87 let b = class_index(window[1]);
88 let count = bigram_counts[a][b] as f64;
89 let prob = (count + 0.1) / (total_f + 0.1 * (NUM_CLASSES * NUM_CLASSES) as f64);
91 log_prob_sum += prob.ln();
92 }
93
94 let avg_log_prob = log_prob_sum / (n - 1) as f64;
95 (-avg_log_prob).exp()
96}
97
98pub fn symbol_ratio(text: &str) -> f64 {
100 if text.is_empty() {
101 return 0.0;
102 }
103
104 let symbol_count = text
105 .chars()
106 .filter(|c| {
107 let cls = classify(*c);
108 matches!(cls, CharClass::Punct | CharClass::Symbol | CharClass::Other)
109 })
110 .count();
111
112 symbol_count as f64 / text.len() as f64
113}
114
115#[derive(Debug, Clone, PartialEq)]
117pub enum PerplexityResult {
118 Pass,
120 Flagged {
122 perplexity: f64,
123 symbol_ratio: f64,
124 reason: String,
125 },
126}
127
128pub fn analyze_suffix(
136 text: &str,
137 suffix_window_chars: usize,
138 perplexity_threshold: f64,
139 symbol_ratio_threshold: f64,
140 min_prompt_chars: usize,
141) -> PerplexityResult {
142 if text.len() < min_prompt_chars {
143 return PerplexityResult::Pass;
144 }
145
146 let suffix_start = text.len().saturating_sub(suffix_window_chars);
148 let suffix = &text[suffix_start..];
149
150 let perp = bigram_perplexity(suffix);
151 let sym_ratio = symbol_ratio(suffix);
152
153 if perp > perplexity_threshold {
155 return PerplexityResult::Flagged {
156 perplexity: perp,
157 symbol_ratio: sym_ratio,
158 reason: format!(
159 "Suffix perplexity {perp:.2} exceeds threshold {perplexity_threshold:.2}"
160 ),
161 };
162 }
163
164 if sym_ratio > symbol_ratio_threshold {
165 return PerplexityResult::Flagged {
166 perplexity: perp,
167 symbol_ratio: sym_ratio,
168 reason: format!(
169 "Suffix symbol ratio {sym_ratio:.2} exceeds threshold {symbol_ratio_threshold:.2}"
170 ),
171 };
172 }
173
174 PerplexityResult::Pass
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180
181 #[test]
182 fn natural_english_low_perplexity() {
183 let text = "Hello, this is a normal English sentence about programming.";
184 let perp = bigram_perplexity(text);
185 assert!(perp < 10.0, "English text perplexity {perp} should be < 10");
187 }
188
189 #[test]
190 fn random_chars_high_perplexity() {
191 let text = "xK7!mQ@3#zP$9&wR*5^yL%2(eN)8+bT";
192 let perp = bigram_perplexity(text);
193 assert!(perp > 5.0, "Random chars perplexity {perp} should be > 5");
195 }
196
197 #[test]
198 fn empty_text_zero_perplexity() {
199 assert_eq!(bigram_perplexity(""), 0.0);
200 assert_eq!(bigram_perplexity("a"), 0.0);
201 }
202
203 #[test]
204 fn repeated_chars_low_perplexity() {
205 let text = "aaaaaaaaaaaaaaaaaaa";
206 let perp = bigram_perplexity(text);
207 assert!(perp < 3.0, "Repeated chars perplexity {perp} should be < 3");
208 }
209
210 #[test]
211 fn symbol_ratio_normal_text() {
212 let text = "Hello, world!";
213 let ratio = symbol_ratio(text);
214 assert!(
215 ratio < 0.20,
216 "Normal text symbol ratio {ratio} should be < 0.20"
217 );
218 }
219
220 #[test]
221 fn symbol_ratio_heavy_symbols() {
222 let text = "!@#$%^&*()_+-=[]{}|;':\",./<>?";
223 let ratio = symbol_ratio(text);
224 assert!(
225 ratio > 0.80,
226 "Heavy symbol text ratio {ratio} should be > 0.80"
227 );
228 }
229
230 #[test]
231 fn symbol_ratio_empty() {
232 assert_eq!(symbol_ratio(""), 0.0);
233 }
234
235 #[test]
236 fn analyze_suffix_passes_normal_text() {
237 let text = "Can you help me write a function that calculates the fibonacci sequence?";
238 let result = analyze_suffix(text, 64, 18.0, 0.20, 32);
239 assert_eq!(result, PerplexityResult::Pass);
240 }
241
242 #[test]
243 fn analyze_suffix_flags_adversarial_suffix() {
244 let normal = "Please write a function.";
246 let adversarial = "xK7!mQ@3#zP$9&wR*5^yL%2(eN)8+bT!@#$%^&*()_+-=[]{}|xK7!mQ@3#";
247 let text = format!("{normal} {adversarial}");
248
249 let result = analyze_suffix(&text, 64, 4.0, 0.20, 32);
250 match result {
251 PerplexityResult::Flagged { .. } => {} PerplexityResult::Pass => panic!("adversarial suffix should be flagged"),
253 }
254 }
255
256 #[test]
257 fn analyze_suffix_skips_short_prompts() {
258 let text = "hi";
259 let result = analyze_suffix(text, 64, 18.0, 0.20, 32);
260 assert_eq!(result, PerplexityResult::Pass);
261 }
262
263 #[test]
264 fn analyze_suffix_symbol_ratio_flag() {
265 let text = "Please help me with this: !@#$%^&*()!@#$%^&*()!@#$%^&*()!@#$%^&*()";
266 let result = analyze_suffix(text, 40, 100.0, 0.10, 32);
267 match result {
268 PerplexityResult::Flagged { symbol_ratio, .. } => {
269 assert!(symbol_ratio > 0.10);
270 }
271 PerplexityResult::Pass => panic!("high symbol ratio should be flagged"),
272 }
273 }
274}