oxideshield_guard/guards/
perplexity.rs1use std::collections::HashMap;
44
45use oxideshield_core::{AnomalySegment, PerplexityAnalyzer, PerplexityConfig, Severity};
46use serde::{Deserialize, Serialize};
47use tracing::{debug, instrument};
48
49use crate::guard::{Guard, GuardAction, GuardCheckResult};
50use oxideshield_core::Match;
51
52pub const DEFAULT_MAX_PERPLEXITY: f32 = 50000.0;
56
57pub const DEFAULT_MIN_PERPLEXITY: f32 = 2.0;
59
60pub const DEFAULT_MIN_ENTROPY: f32 = 1.5;
62
63pub const DEFAULT_SUFFIX_RATIO: f32 = 0.3;
65
66#[derive(Debug, Clone, Serialize, Deserialize)]
68pub struct PerplexityGuardConfig {
69 pub max_perplexity: f32,
71 pub min_perplexity: f32,
73 pub min_entropy: f32,
75 pub suffix_ratio: f32,
77 pub analyze_full_text: bool,
79 pub analyze_suffix: bool,
81 pub action: GuardAction,
83 pub severity: Severity,
85}
86
87impl Default for PerplexityGuardConfig {
88 fn default() -> Self {
89 Self {
90 max_perplexity: DEFAULT_MAX_PERPLEXITY,
91 min_perplexity: DEFAULT_MIN_PERPLEXITY,
92 min_entropy: DEFAULT_MIN_ENTROPY,
93 suffix_ratio: DEFAULT_SUFFIX_RATIO,
94 analyze_full_text: true,
95 analyze_suffix: true,
96 action: GuardAction::Block,
97 severity: Severity::High,
98 }
99 }
100}
101
102pub struct PerplexityGuard {
110 name: String,
111 analyzer: PerplexityAnalyzer,
112 config: PerplexityGuardConfig,
113}
114
115impl PerplexityGuard {
116 pub fn new(name: impl Into<String>) -> Self {
118 Self {
119 name: name.into(),
120 analyzer: PerplexityAnalyzer::new(),
121 config: PerplexityGuardConfig::default(),
122 }
123 }
124
125 pub fn with_analyzer_config(name: impl Into<String>, config: PerplexityConfig) -> Self {
127 Self {
128 name: name.into(),
129 analyzer: PerplexityAnalyzer::with_config(config),
130 config: PerplexityGuardConfig::default(),
131 }
132 }
133
134 pub fn with_max_perplexity(mut self, max: f32) -> Self {
136 self.config.max_perplexity = max.max(1.0);
137 self
138 }
139
140 pub fn with_min_perplexity(mut self, min: f32) -> Self {
142 self.config.min_perplexity = min.max(0.1);
143 self
144 }
145
146 pub fn with_min_entropy(mut self, min: f32) -> Self {
148 self.config.min_entropy = min.max(0.0);
149 self
150 }
151
152 pub fn with_suffix_ratio(mut self, ratio: f32) -> Self {
154 self.config.suffix_ratio = ratio.clamp(0.1, 0.9);
155 self
156 }
157
158 pub fn analyze_full_text(mut self, enabled: bool) -> Self {
160 self.config.analyze_full_text = enabled;
161 self
162 }
163
164 pub fn analyze_suffix(mut self, enabled: bool) -> Self {
166 self.config.analyze_suffix = enabled;
167 self
168 }
169
170 pub fn with_action(mut self, action: GuardAction) -> Self {
172 self.config.action = action;
173 self
174 }
175
176 pub fn with_severity(mut self, severity: Severity) -> Self {
178 self.config.severity = severity;
179 self
180 }
181
182 pub fn config(&self) -> &PerplexityGuardConfig {
184 &self.config
185 }
186
187 fn create_match(&self, anomaly: &AnomalySegment) -> Match {
189 let mut metadata = HashMap::new();
190 metadata.insert(
191 "perplexity".to_string(),
192 format!("{:.2}", anomaly.perplexity),
193 );
194 metadata.insert("entropy".to_string(), format!("{:.2}", anomaly.entropy));
195 metadata.insert("anomaly_type".to_string(), anomaly.anomaly_type.to_string());
196
197 Match {
198 id: uuid::Uuid::new_v4(),
199 pattern: format!("[perplexity:{}]", anomaly.anomaly_type),
200 matched_text: if anomaly.text.len() > 50 {
201 format!("{}...", &anomaly.text[..50])
202 } else {
203 anomaly.text.clone()
204 },
205 start: anomaly.start,
206 end: anomaly.end,
207 severity: self.config.severity,
208 category: "adversarial".to_string(),
209 metadata,
210 }
211 }
212
213 fn detect_anomalies(&self, content: &str) -> Vec<AnomalySegment> {
215 let mut all_anomalies = Vec::new();
216
217 if self.config.analyze_full_text {
219 let anomalies = self.analyzer.find_anomalous_segments(
220 content,
221 self.config.max_perplexity,
222 self.config.min_perplexity,
223 self.config.min_entropy,
224 );
225 all_anomalies.extend(anomalies);
226 }
227
228 if self.config.analyze_suffix {
230 if let Some(suffix_anomaly) = self.analyzer.analyze_suffix(
231 content,
232 self.config.suffix_ratio,
233 self.config.max_perplexity,
234 self.config.min_entropy,
235 ) {
236 let already_covered = all_anomalies
238 .iter()
239 .any(|a| a.start <= suffix_anomaly.start && a.end >= suffix_anomaly.end);
240
241 if !already_covered {
242 all_anomalies.push(suffix_anomaly);
243 }
244 }
245 }
246
247 all_anomalies
248 }
249}
250
251impl Guard for PerplexityGuard {
252 fn name(&self) -> &str {
253 &self.name
254 }
255
256 #[instrument(skip(self, content), fields(guard = %self.name, content_len = content.len()))]
257 fn check(&self, content: &str) -> GuardCheckResult {
258 if content.len() < 10 {
260 debug!("Content too short for perplexity analysis");
261 return GuardCheckResult::pass(&self.name);
262 }
263
264 let anomalies = self.detect_anomalies(content);
265
266 if anomalies.is_empty() {
267 debug!("No perplexity anomalies detected");
268 return GuardCheckResult::pass(&self.name);
269 }
270
271 let matches: Vec<Match> = anomalies.iter().map(|a| self.create_match(a)).collect();
273
274 let anomaly_types: Vec<String> = anomalies
276 .iter()
277 .map(|a| a.anomaly_type.to_string())
278 .collect::<std::collections::HashSet<_>>()
279 .into_iter()
280 .collect();
281
282 let reason = format!(
283 "Detected {} perplexity anomalies: {}",
284 anomalies.len(),
285 anomaly_types.join(", ")
286 );
287
288 debug!(
289 anomaly_count = anomalies.len(),
290 types = ?anomaly_types,
291 "Perplexity guard triggered"
292 );
293
294 GuardCheckResult::fail(&self.name, self.config.action, matches, reason)
295 }
296
297 fn action(&self) -> GuardAction {
298 self.config.action
299 }
300
301 fn severity_threshold(&self) -> Severity {
302 Severity::Low
303 }
304}
305
306#[cfg(test)]
307mod tests {
308 use super::*;
309
310 #[test]
311 fn test_perplexity_guard_creation() {
312 let guard = PerplexityGuard::new("test")
313 .with_max_perplexity(50000.0)
314 .with_min_entropy(2.0)
315 .with_action(GuardAction::Block);
316
317 assert_eq!(guard.name(), "test");
318 assert_eq!(guard.config.max_perplexity, 50000.0);
319 assert_eq!(guard.config.min_entropy, 2.0);
320 assert_eq!(guard.action(), GuardAction::Block);
321 }
322
323 #[test]
324 fn test_normal_text_passes() {
325 let guard = PerplexityGuard::new("test");
326
327 let normal = "The quick brown fox jumps over the lazy dog. This is a normal sentence with common English words and phrases.";
328 let result = guard.check(normal);
329
330 assert!(
331 result.passed,
332 "Normal text should pass: {:?}",
333 result.reason
334 );
335 }
336
337 #[test]
338 fn test_repetitive_text_fails() {
339 let guard = PerplexityGuard::new("test").with_min_entropy(1.0);
340
341 let repetitive = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
342 let result = guard.check(repetitive);
343
344 assert!(!result.passed, "Repetitive text should fail");
345 assert_eq!(result.action, GuardAction::Block);
346 }
347
348 #[test]
349 fn test_short_text_passes() {
350 let guard = PerplexityGuard::new("test");
351
352 let short = "Hello";
353 let result = guard.check(short);
354
355 assert!(result.passed, "Short text should pass (skip analysis)");
356 }
357
358 #[test]
359 fn test_config_clamping() {
360 let guard = PerplexityGuard::new("test")
361 .with_max_perplexity(-100.0) .with_min_perplexity(-50.0) .with_min_entropy(-10.0) .with_suffix_ratio(2.0); assert!(guard.config.max_perplexity >= 1.0);
367 assert!(guard.config.min_perplexity >= 0.1);
368 assert!(guard.config.min_entropy >= 0.0);
369 assert!(guard.config.suffix_ratio <= 0.9);
370 }
371
372 #[test]
373 fn test_suffix_analysis() {
374 let guard = PerplexityGuard::new("test")
375 .analyze_full_text(false)
376 .analyze_suffix(true)
377 .with_suffix_ratio(0.5);
378
379 let text = "Please answer my question about the weather. zxcvbnmasdfghjkqwertyuiopzxcvbnm";
381 let result = guard.check(text);
382
383 assert!(result.matches.is_empty() || !result.passed);
386 }
387
388 #[test]
389 fn test_mixed_text() {
390 let guard = PerplexityGuard::new("test").with_max_perplexity(800.0);
391
392 let text = "Normal text at the start. xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx More normal text at the end.";
394 let result = guard.check(text);
395
396 if !result.passed {
398 assert!(!result.matches.is_empty());
399 }
400 }
401
402 #[test]
403 fn test_severity_assignment() {
404 let guard = PerplexityGuard::new("test")
405 .with_severity(Severity::Critical)
406 .with_min_entropy(2.0);
407
408 let repetitive = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
409 let result = guard.check(repetitive);
410
411 if !result.matches.is_empty() {
412 assert_eq!(result.matches[0].severity, Severity::Critical);
413 }
414 }
415
416 #[test]
417 fn test_gcg_like_pattern() {
418 let guard = PerplexityGuard::new("test");
419
420 let gcg_like =
422 "What is 2+2? ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! ! !";
423 let result = guard.check(gcg_like);
424
425 if !result.passed {
427 let has_low_entropy = result.matches.iter().any(|m| {
428 m.metadata
429 .get("anomaly_type")
430 .map(|t| t == "low_entropy")
431 .unwrap_or(false)
432 });
433 assert!(result.passed || has_low_entropy || !result.matches.is_empty());
435 }
436 }
437}