a3s_code_core/security/
sanitizer.rs1use super::audit::{AuditAction, AuditEntry, AuditEventType, AuditLog};
7use super::classifier::PrivacyClassifier;
8use super::config::{RedactionStrategy, SensitivityLevel};
9use super::taint::TaintRegistry;
10use crate::hooks::HookEvent;
11use crate::hooks::HookHandler;
12use crate::hooks::HookResponse;
13use std::sync::{Arc, RwLock};
14
15pub(crate) fn make_replacement(original: &str, strategy: RedactionStrategy) -> String {
19 match strategy {
20 RedactionStrategy::Mask => "*".repeat(original.len()),
21 RedactionStrategy::Remove => "[REDACTED]".to_string(),
22 RedactionStrategy::Hash => {
23 let hash = original
24 .bytes()
25 .fold(0u64, |acc, b| acc.wrapping_mul(31).wrapping_add(b as u64));
26 let hash_str = hash.to_string();
27 format!("[HASH:{}]", &hash_str[..8.min(hash_str.len())])
28 }
29 }
30}
31
32pub struct OutputSanitizer {
34 taint_registry: Arc<RwLock<TaintRegistry>>,
35 classifier: Arc<PrivacyClassifier>,
36 redaction_strategy: RedactionStrategy,
37 audit_log: Arc<AuditLog>,
38 session_id: String,
39}
40
41impl OutputSanitizer {
42 pub fn new(
44 taint_registry: Arc<RwLock<TaintRegistry>>,
45 classifier: Arc<PrivacyClassifier>,
46 redaction_strategy: RedactionStrategy,
47 audit_log: Arc<AuditLog>,
48 session_id: String,
49 ) -> Self {
50 Self {
51 taint_registry,
52 classifier,
53 redaction_strategy,
54 audit_log,
55 session_id,
56 }
57 }
58
59 pub fn sanitize_text(&self, text: &str) -> String {
61 let mut result = text.to_string();
62 let mut was_redacted = false;
63
64 {
66 let Ok(registry) = self.taint_registry.read() else {
67 tracing::error!("Taint registry lock poisoned — skipping taint-based redaction");
68 return result;
69 };
70 for (_, entry) in registry.entries_iter() {
71 if result.contains(&entry.original_value) {
73 let replacement = self.make_replacement(&entry.original_value);
74 result = result.replace(&entry.original_value, &replacement);
75 was_redacted = true;
76 }
77 for variant in &entry.variants {
79 if result.contains(variant.as_str()) {
80 let replacement = self.make_replacement(variant);
81 result = result.replace(variant.as_str(), &replacement);
82 was_redacted = true;
83 }
84 }
85 }
86 }
87
88 let classified = self.classifier.classify(&result);
90 if !classified.matches.is_empty() {
91 result = self.classifier.redact(&result, self.redaction_strategy);
92 was_redacted = true;
93 }
94
95 if was_redacted {
96 self.audit_log.log(AuditEntry {
97 timestamp: chrono::Utc::now(),
98 session_id: self.session_id.clone(),
99 event_type: AuditEventType::OutputRedacted,
100 severity: SensitivityLevel::Sensitive,
101 details: "Sensitive data redacted from output".to_string(),
102 tool_name: None,
103 action_taken: AuditAction::Redacted,
104 });
105 }
106
107 result
108 }
109
110 fn make_replacement(&self, original: &str) -> String {
112 make_replacement(original, self.redaction_strategy)
113 }
114}
115
116impl HookHandler for OutputSanitizer {
117 fn handle(&self, event: &HookEvent) -> HookResponse {
118 if let HookEvent::GenerateEnd(e) = event {
119 let sanitized = self.sanitize_text(&e.response_text);
120 if sanitized != e.response_text {
121 HookResponse::continue_with(serde_json::json!({
122 "response_text": sanitized
123 }))
124 } else {
125 HookResponse::continue_()
126 }
127 } else {
128 HookResponse::continue_()
129 }
130 }
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136 use crate::security::config::default_classification_rules;
137
138 fn make_sanitizer() -> OutputSanitizer {
139 let registry = Arc::new(RwLock::new(TaintRegistry::new()));
140 let classifier = Arc::new(PrivacyClassifier::new(&default_classification_rules()));
141 let audit = Arc::new(AuditLog::new(100));
142 OutputSanitizer::new(
143 registry,
144 classifier,
145 RedactionStrategy::Remove,
146 audit,
147 "test-session".to_string(),
148 )
149 }
150
151 fn make_sanitizer_with_taint(value: &str) -> (OutputSanitizer, Arc<AuditLog>) {
152 let registry = Arc::new(RwLock::new(TaintRegistry::new()));
153 {
154 let mut reg = registry.write().unwrap();
155 reg.register(value, "test_rule", SensitivityLevel::HighlySensitive);
156 }
157 let classifier = Arc::new(PrivacyClassifier::new(&default_classification_rules()));
158 let audit = Arc::new(AuditLog::new(100));
159 let sanitizer = OutputSanitizer::new(
160 registry,
161 classifier,
162 RedactionStrategy::Remove,
163 audit.clone(),
164 "test-session".to_string(),
165 );
166 (sanitizer, audit)
167 }
168
169 #[test]
170 fn test_sanitize_tainted_data() {
171 let (sanitizer, _) = make_sanitizer_with_taint("my-secret-value");
172 let result = sanitizer.sanitize_text("The value is my-secret-value here");
173 assert!(!result.contains("my-secret-value"));
174 assert!(result.contains("[REDACTED]"));
175 }
176
177 #[test]
178 fn test_sanitize_base64_encoded_taint() {
179 let (sanitizer, _) = make_sanitizer_with_taint("secret123");
180 let b64 = base64::Engine::encode(&base64::engine::general_purpose::STANDARD, "secret123");
181 let result = sanitizer.sanitize_text(&format!("Encoded: {}", b64));
182 assert!(!result.contains(&b64));
183 }
184
185 #[test]
186 fn test_sanitize_pii_from_classifier() {
187 let sanitizer = make_sanitizer();
188 let result = sanitizer.sanitize_text("My SSN is 123-45-6789");
189 assert!(!result.contains("123-45-6789"));
190 }
191
192 #[test]
193 fn test_pass_clean_output() {
194 let sanitizer = make_sanitizer();
195 let input = "This is a normal response with no sensitive data.";
196 let result = sanitizer.sanitize_text(input);
197 assert_eq!(result, input);
198 }
199
200 #[test]
201 fn test_audit_log_on_redaction() {
202 let (sanitizer, audit) = make_sanitizer_with_taint("secret-data");
203 sanitizer.sanitize_text("Contains secret-data here");
204 assert!(!audit.is_empty());
205 let entries = audit.entries();
206 assert_eq!(entries[0].event_type, AuditEventType::OutputRedacted);
207 }
208
209 #[test]
210 fn test_no_audit_on_clean_output() {
211 let (sanitizer, audit) = make_sanitizer_with_taint("secret-data");
212 sanitizer.sanitize_text("Nothing sensitive here");
213 assert!(audit.is_empty());
214 }
215
216 #[test]
217 fn test_hook_handler_with_sensitive_response() {
218 let (sanitizer, _) = make_sanitizer_with_taint("leaked-secret");
219 let event = HookEvent::GenerateEnd(crate::hooks::GenerateEndEvent {
220 session_id: "s1".to_string(),
221 prompt: "test".to_string(),
222 response_text: "Here is leaked-secret in the response".to_string(),
223 tool_calls: vec![],
224 usage: crate::hooks::TokenUsageInfo {
225 prompt_tokens: 10,
226 completion_tokens: 5,
227 total_tokens: 15,
228 },
229 duration_ms: 100,
230 });
231
232 let response = sanitizer.handle(&event);
233 assert!(response.modified.is_some());
234 }
235}