llm_security/
validation.rs

1//! Output validation for LLM security
2
3use crate::patterns::*;
4use crate::constants::*;
5
6/// Output validation engine for LLM responses
7pub struct ValidationEngine {
8    config: crate::types::LLMSecurityConfig,
9}
10
11impl ValidationEngine {
12    /// Create a new validation engine
13    pub fn new(config: crate::types::LLMSecurityConfig) -> Self {
14        Self { config }
15    }
16
17    /// Validate LLM output for manipulation
18    pub fn validate_llm_output(&self, output: &str) -> Result<(), String> {
19        if !self.config.enable_output_validation {
20            return Ok(());
21        }
22
23        // Check if LLM is following malicious instructions
24        for pattern in get_suspicious_output_patterns().iter() {
25            if pattern.is_match(output) {
26                #[cfg(feature = "tracing")]
27                tracing::warn!("SECURITY: Suspicious LLM output detected");
28                #[cfg(not(feature = "tracing"))]
29                eprintln!("WARN: Suspicious LLM output detected");
30                return Err("LLM output contains suspicious patterns".to_string());
31            }
32        }
33
34        // Check if output is trying to escape JSON format
35        if output.contains("```") && !output.trim().starts_with("{") {
36            #[cfg(feature = "tracing")]
37            tracing::warn!("SECURITY: LLM output may be trying to escape JSON format");
38            #[cfg(not(feature = "tracing"))]
39            eprintln!("WARN: LLM output may be trying to escape JSON format");
40            // Don't fail, but log the warning
41        }
42
43        // Check for data exfiltration attempts
44        if output.len() > DEFAULT_MAX_OUTPUT_SIZE {
45            #[cfg(feature = "tracing")]
46            tracing::warn!("SECURITY: Unusually large LLM output");
47            #[cfg(not(feature = "tracing"))]
48            eprintln!("WARN: Unusually large LLM output");
49        }
50
51        Ok(())
52    }
53
54    /// Comprehensive output validation with detailed reporting
55    pub fn validate_output_comprehensive(&self, output: &str) -> ValidationResult {
56        let mut issues = Vec::new();
57        let mut warnings = Vec::new();
58
59        // Check for suspicious patterns
60        for pattern in get_suspicious_output_patterns().iter() {
61            if pattern.is_match(output) {
62                issues.push(ValidationIssue {
63                    severity: ValidationSeverity::High,
64                    message: "LLM output contains suspicious patterns".to_string(),
65                    pattern: pattern.as_str().to_string(),
66                });
67            }
68        }
69
70        // Check for JSON format escape attempts
71        if output.contains("```") && !output.trim().starts_with("{") {
72            warnings.push(ValidationWarning {
73                message: "LLM output may be trying to escape JSON format".to_string(),
74                suggestion: "Ensure output follows expected JSON format".to_string(),
75            });
76        }
77
78        // Check for excessive output size
79        if output.len() > DEFAULT_MAX_OUTPUT_SIZE {
80            warnings.push(ValidationWarning {
81                message: format!("Output size {} exceeds recommended limit {}", output.len(), DEFAULT_MAX_OUTPUT_SIZE),
82                suggestion: "Consider breaking output into smaller chunks".to_string(),
83            });
84        }
85
86        // Check for potential data exfiltration
87        if self.detect_data_exfiltration(output) {
88            issues.push(ValidationIssue {
89                severity: ValidationSeverity::Critical,
90                message: "Potential data exfiltration detected".to_string(),
91                pattern: "suspicious_content".to_string(),
92            });
93        }
94
95        // Check for role/personality changes
96        if self.detect_personality_change(output) {
97            issues.push(ValidationIssue {
98                severity: ValidationSeverity::High,
99                message: "LLM personality change detected".to_string(),
100                pattern: "personality_shift".to_string(),
101            });
102        }
103
104        let is_valid = issues.is_empty();
105        let risk_level = self.calculate_risk_level(&issues);
106
107        ValidationResult {
108            is_valid,
109            risk_level,
110            issues,
111            warnings,
112            output_size: output.len(),
113            validation_timestamp: std::time::SystemTime::now()
114                .duration_since(std::time::UNIX_EPOCH)
115                .unwrap()
116                .as_secs(),
117        }
118    }
119
120    /// Detect potential data exfiltration attempts
121    fn detect_data_exfiltration(&self, output: &str) -> bool {
122        // Check for suspicious patterns that might indicate data exfiltration
123        let exfiltration_patterns = [
124            "here is the data",
125            "as requested, here",
126            "the information you asked for",
127            "confidential data",
128            "sensitive information",
129            "private details",
130            "secret content",
131        ];
132
133        let lower_output = output.to_lowercase();
134        exfiltration_patterns.iter().any(|pattern| lower_output.contains(pattern))
135    }
136
137    /// Detect personality/role changes in LLM output
138    fn detect_personality_change(&self, output: &str) -> bool {
139        // Check for patterns that indicate the LLM has changed its role or personality
140        let personality_patterns = [
141            "i am now",
142            "i have become",
143            "i am acting as",
144            "i am operating as",
145            "i am functioning as",
146            "from now on",
147            "i will now",
148            "i can now",
149            "i am no longer",
150            "i am no longer bound by",
151        ];
152
153        let lower_output = output.to_lowercase();
154        personality_patterns.iter().any(|pattern| lower_output.contains(pattern))
155    }
156
157    /// Calculate risk level based on validation issues
158    fn calculate_risk_level(&self, issues: &[ValidationIssue]) -> ValidationRiskLevel {
159        if issues.iter().any(|i| matches!(i.severity, ValidationSeverity::Critical)) {
160            ValidationRiskLevel::Critical
161        } else if issues.iter().any(|i| matches!(i.severity, ValidationSeverity::High)) {
162            ValidationRiskLevel::High
163        } else if issues.iter().any(|i| matches!(i.severity, ValidationSeverity::Medium)) {
164            ValidationRiskLevel::Medium
165        } else if !issues.is_empty() {
166            ValidationRiskLevel::Low
167        } else {
168            ValidationRiskLevel::None
169        }
170    }
171
172    /// Get validation summary
173    pub fn get_validation_summary(&self, result: &ValidationResult) -> String {
174        format!(
175            "Validation: {} - {} issues, {} warnings, risk level: {:?}",
176            if result.is_valid { "PASSED" } else { "FAILED" },
177            result.issues.len(),
178            result.warnings.len(),
179            result.risk_level
180        )
181    }
182}
183
184/// Validation result with detailed information
185#[derive(Debug, Clone)]
186pub struct ValidationResult {
187    pub is_valid: bool,
188    pub risk_level: ValidationRiskLevel,
189    pub issues: Vec<ValidationIssue>,
190    pub warnings: Vec<ValidationWarning>,
191    pub output_size: usize,
192    pub validation_timestamp: u64,
193}
194
195/// Validation issue severity
196#[derive(Debug, Clone, PartialEq)]
197pub enum ValidationSeverity {
198    Low,
199    Medium,
200    High,
201    Critical,
202}
203
204/// Validation risk level
205#[derive(Debug, Clone, PartialEq)]
206pub enum ValidationRiskLevel {
207    None,
208    Low,
209    Medium,
210    High,
211    Critical,
212}
213
214/// Validation issue
215#[derive(Debug, Clone)]
216pub struct ValidationIssue {
217    pub severity: ValidationSeverity,
218    pub message: String,
219    pub pattern: String,
220}
221
222/// Validation warning
223#[derive(Debug, Clone)]
224pub struct ValidationWarning {
225    pub message: String,
226    pub suggestion: String,
227}
228
229impl ValidationResult {
230    /// Get a summary of the validation result
231    pub fn summary(&self) -> String {
232        format!(
233            "Validation {}: {} issues, {} warnings, risk: {:?}",
234            if self.is_valid { "PASSED" } else { "FAILED" },
235            self.issues.len(),
236            self.warnings.len(),
237            self.risk_level
238        )
239    }
240
241    /// Check if the result indicates a security risk
242    pub fn has_security_risk(&self) -> bool {
243        matches!(self.risk_level, ValidationRiskLevel::Medium | ValidationRiskLevel::High | ValidationRiskLevel::Critical)
244    }
245
246    /// Get all critical issues
247    pub fn get_critical_issues(&self) -> Vec<&ValidationIssue> {
248        self.issues.iter().filter(|i| matches!(i.severity, ValidationSeverity::Critical)).collect()
249    }
250
251    /// Get all high-severity issues
252    pub fn get_high_severity_issues(&self) -> Vec<&ValidationIssue> {
253        self.issues.iter().filter(|i| matches!(i.severity, ValidationSeverity::High | ValidationSeverity::Critical)).collect()
254    }
255}