1use std::collections::{HashMap, HashSet};
33use std::sync::OnceLock;
34
35use regex::{Regex, RegexBuilder};
36use serde::{Deserialize, Serialize};
37use serde_json::Value;
38
39use chio_core::capability::Constraint;
40use chio_kernel::{Guard, GuardContext, KernelError, Verdict};
41
42use crate::action::{extract_action, ToolAction};
43
44#[derive(Debug, thiserror::Error)]
46pub enum ContentReviewError {
47 #[error("invalid review pattern `{pattern}`: {source}")]
49 InvalidPattern {
50 pattern: String,
51 #[source]
52 source: regex::Error,
53 },
54
55 #[error("{0}")]
57 UnsafePattern(String),
58}
59
60#[derive(Clone, Debug, Default, Deserialize, Serialize)]
62#[serde(deny_unknown_fields)]
63pub struct ContentReviewRules {
64 #[serde(default = "default_true")]
66 pub detect_pii: bool,
67 #[serde(default = "default_true")]
69 pub detect_profanity: bool,
70 #[serde(default)]
72 pub banned_words: Vec<String>,
73 #[serde(default)]
75 pub extra_patterns: Vec<String>,
76 #[serde(default = "default_max_scan_bytes")]
79 pub max_scan_bytes: usize,
80}
81
82fn default_true() -> bool {
83 true
84}
85
86fn default_max_scan_bytes() -> usize {
87 64 * 1024
88}
89
90#[derive(Clone, Debug, Deserialize, Serialize)]
92#[serde(deny_unknown_fields)]
93pub struct ContentReviewConfig {
94 #[serde(default = "default_true")]
96 pub enabled: bool,
97 #[serde(default = "default_rules")]
99 pub default_rules: ContentReviewRules,
100 #[serde(default)]
103 pub per_service: HashMap<String, ContentReviewRules>,
104}
105
106fn default_rules() -> ContentReviewRules {
107 ContentReviewRules {
108 detect_pii: true,
109 detect_profanity: true,
110 banned_words: Vec::new(),
111 extra_patterns: Vec::new(),
112 max_scan_bytes: default_max_scan_bytes(),
113 }
114}
115
116impl Default for ContentReviewConfig {
117 fn default() -> Self {
118 Self {
119 enabled: true,
120 default_rules: default_rules(),
121 per_service: HashMap::new(),
122 }
123 }
124}
125
126struct CompiledRules {
128 detect_pii: bool,
129 detect_profanity: bool,
130 banned_words: HashSet<String>,
131 extra_patterns: Vec<Regex>,
132 max_scan_bytes: usize,
133}
134
135const MAX_EXTRA_PATTERNS: usize = 64;
136const MAX_EXTRA_PATTERN_LEN: usize = 512;
137const MAX_EXTRA_PATTERN_COMPLEXITY: usize = 96;
138const EXTRA_PATTERN_REGEX_SIZE_LIMIT: usize = 1 << 20;
139const EXTRA_PATTERN_DFA_SIZE_LIMIT: usize = 1 << 20;
140
141impl CompiledRules {
142 fn compile(rules: &ContentReviewRules) -> Result<Self, ContentReviewError> {
143 if rules.extra_patterns.len() > MAX_EXTRA_PATTERNS {
144 return Err(ContentReviewError::UnsafePattern(format!(
145 "content_review.extra_patterns allows at most {MAX_EXTRA_PATTERNS} patterns"
146 )));
147 }
148 let mut extra_patterns = Vec::with_capacity(rules.extra_patterns.len());
149 for pat in &rules.extra_patterns {
150 let trimmed = pat.trim();
151 if trimmed.is_empty() {
152 return Err(ContentReviewError::UnsafePattern(
153 "content_review.extra_patterns cannot contain empty patterns".to_string(),
154 ));
155 }
156 if trimmed.len() > MAX_EXTRA_PATTERN_LEN {
157 return Err(ContentReviewError::UnsafePattern(format!(
158 "content_review.extra_patterns entries must be at most {MAX_EXTRA_PATTERN_LEN} characters"
159 )));
160 }
161 let complexity = review_pattern_complexity(trimmed);
162 if complexity > MAX_EXTRA_PATTERN_COMPLEXITY {
163 return Err(ContentReviewError::UnsafePattern(format!(
164 "content_review.extra_patterns entries must have complexity at most {MAX_EXTRA_PATTERN_COMPLEXITY}"
165 )));
166 }
167 let re = RegexBuilder::new(trimmed)
168 .size_limit(EXTRA_PATTERN_REGEX_SIZE_LIMIT)
169 .dfa_size_limit(EXTRA_PATTERN_DFA_SIZE_LIMIT)
170 .build()
171 .map_err(|e| ContentReviewError::InvalidPattern {
172 pattern: trimmed.to_string(),
173 source: e,
174 })?;
175 extra_patterns.push(re);
176 }
177 let banned_words = rules
178 .banned_words
179 .iter()
180 .map(|w| w.to_ascii_lowercase())
181 .collect();
182 Ok(Self {
183 detect_pii: rules.detect_pii,
184 detect_profanity: rules.detect_profanity,
185 banned_words,
186 extra_patterns,
187 max_scan_bytes: rules.max_scan_bytes.max(1),
188 })
189 }
190}
191
192fn review_pattern_complexity(pattern: &str) -> usize {
193 let mut score = 0usize;
194 let mut escaped = false;
195 for ch in pattern.chars() {
196 if escaped {
197 escaped = false;
198 continue;
199 }
200 match ch {
201 '\\' => escaped = true,
202 '|' | '*' | '+' | '?' => score = score.saturating_add(4),
203 '{' | '[' | '(' => score = score.saturating_add(2),
204 _ => {}
205 }
206 }
207 score
208}
209
210pub struct ContentReviewGuard {
213 enabled: bool,
214 default_rules: CompiledRules,
215 per_service: HashMap<String, CompiledRules>,
216}
217
218impl ContentReviewGuard {
219 pub fn new() -> Self {
221 match Self::with_config(ContentReviewConfig::default()) {
222 Ok(g) => g,
223 Err(_) => Self {
224 enabled: true,
225 default_rules: CompiledRules {
226 detect_pii: true,
227 detect_profanity: true,
228 banned_words: HashSet::new(),
229 extra_patterns: Vec::new(),
230 max_scan_bytes: default_max_scan_bytes(),
231 },
232 per_service: HashMap::new(),
233 },
234 }
235 }
236
237 pub fn with_config(config: ContentReviewConfig) -> Result<Self, ContentReviewError> {
241 let default_rules = CompiledRules::compile(&config.default_rules)?;
242 let mut per_service = HashMap::with_capacity(config.per_service.len());
243 for (service, rules) in &config.per_service {
244 per_service.insert(service.clone(), CompiledRules::compile(rules)?);
245 }
246 Ok(Self {
247 enabled: config.enabled,
248 default_rules,
249 per_service,
250 })
251 }
252
253 fn rules_for(&self, service: &str) -> &CompiledRules {
255 self.per_service.get(service).unwrap_or(&self.default_rules)
256 }
257}
258
259impl Default for ContentReviewGuard {
260 fn default() -> Self {
261 Self::new()
262 }
263}
264
265impl Guard for ContentReviewGuard {
266 fn name(&self) -> &str {
267 "content-review"
268 }
269
270 fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
271 if !self.enabled {
272 return Ok(Verdict::Allow);
273 }
274
275 let action = extract_action(&ctx.request.tool_name, &ctx.request.arguments);
276 let (service, endpoint) = match action {
277 ToolAction::ExternalApiCall { service, endpoint } => (service, endpoint),
278 _ => return Ok(Verdict::Allow),
279 };
280
281 if let Some(verdict) = evaluate_amount_threshold(ctx, &service)? {
285 return Ok(verdict);
286 }
287
288 let text = extract_outbound_text(&ctx.request.arguments);
290 let text = match text {
291 Some(t) if !t.is_empty() => t,
292 _ => return Ok(Verdict::Allow),
293 };
294
295 let rules = self.rules_for(&service);
296 let truncated = truncate_utf8(&text, rules.max_scan_bytes);
297
298 let mut categories: Vec<&'static str> = Vec::new();
300 if rules.detect_pii {
301 for (category, re) in builtin_pii_patterns() {
302 if re.is_match(truncated) {
303 categories.push(*category);
304 }
305 }
306 }
307
308 if rules.detect_profanity && contains_banned_word(truncated, &rules.banned_words) {
310 categories.push("profanity");
311 }
312
313 for re in &rules.extra_patterns {
315 if re.is_match(truncated) {
316 categories.push("custom");
317 }
318 }
319
320 if !categories.is_empty() {
321 tracing::warn!(
322 guard = "content-review",
323 service = %service,
324 endpoint = %endpoint,
325 detected_categories = ?categories,
326 "content-review denied outbound message"
327 );
328 return Ok(Verdict::Deny);
329 }
330
331 Ok(Verdict::Allow)
332 }
333}
334
335fn evaluate_amount_threshold(
341 ctx: &GuardContext,
342 service: &str,
343) -> Result<Option<Verdict>, KernelError> {
344 if !is_payment_service(service) {
345 return Ok(None);
346 }
347 let Some(grant) = ctx
348 .matched_grant_index
349 .and_then(|idx| ctx.scope.grants.get(idx))
350 else {
351 return Ok(None);
352 };
353
354 let threshold = grant.constraints.iter().find_map(|c| match c {
355 Constraint::RequireApprovalAbove { threshold_units } => Some(*threshold_units),
356 _ => None,
357 });
358 let Some(threshold) = threshold else {
359 return Ok(None);
360 };
361
362 let amount_units = extract_amount_units(ctx.request).or_else(|| {
363 ctx.request
364 .governed_intent
365 .as_ref()
366 .and_then(|intent| intent.max_amount.as_ref().map(|amt| amt.units))
367 });
368 let Some(units) = amount_units else {
369 return Ok(None);
371 };
372 if units >= threshold {
373 tracing::info!(
374 guard = "content-review",
375 service = %service,
376 units,
377 threshold,
378 "content-review requires human approval for monetary threshold"
379 );
380 return Ok(Some(Verdict::PendingApproval));
381 }
382 Ok(None)
383}
384
385fn is_payment_service(service: &str) -> bool {
387 matches!(
388 service,
389 "stripe" | "paypal" | "square" | "braintree" | "adyen" | "plaid"
390 )
391}
392
393fn extract_amount_units(request: &chio_kernel::ToolCallRequest) -> Option<u64> {
397 let args = &request.arguments;
398 for key in ["amount_units", "amountUnits", "amount"] {
399 if let Some(v) = args.get(key) {
400 if let Some(u) = v.as_u64() {
401 return Some(u);
402 }
403 if let Some(f) = v.as_f64() {
404 if f >= 0.0 && f.is_finite() {
405 return Some(f as u64);
406 }
407 }
408 }
409 }
410 None
411}
412
413fn extract_outbound_text(arguments: &Value) -> Option<String> {
415 let mut chunks: Vec<String> = Vec::new();
416 for key in [
417 "text",
418 "body",
419 "message",
420 "content",
421 "subject",
422 "html",
423 "description",
424 "summary",
425 "note",
426 ] {
427 if let Some(v) = arguments.get(key).and_then(|v| v.as_str()) {
428 if !v.is_empty() {
429 chunks.push(v.to_string());
430 }
431 }
432 }
433 if let Some(arr) = arguments.get("blocks").and_then(|v| v.as_array()) {
435 for block in arr {
436 if let Some(text) = block
437 .get("text")
438 .and_then(|t| t.get("text"))
439 .and_then(|t| t.as_str())
440 {
441 chunks.push(text.to_string());
442 }
443 }
444 }
445 if chunks.is_empty() {
446 None
447 } else {
448 Some(chunks.join("\n"))
449 }
450}
451
452fn truncate_utf8(input: &str, max_bytes: usize) -> &str {
453 if input.len() <= max_bytes {
454 return input;
455 }
456 let mut end = max_bytes;
457 while end > 0 && !input.is_char_boundary(end) {
458 end -= 1;
459 }
460 &input[..end]
461}
462
463fn contains_banned_word(text: &str, banned: &HashSet<String>) -> bool {
464 if banned.is_empty() {
465 return false;
466 }
467 let lowered = text.to_ascii_lowercase();
468 for word in banned {
469 if word.is_empty() {
470 continue;
471 }
472 if lowered.contains(word) {
473 return true;
474 }
475 }
476 false
477}
478
479fn builtin_pii_patterns() -> &'static [(&'static str, Regex)] {
482 static PATS: OnceLock<Vec<(&'static str, Regex)>> = OnceLock::new();
483 PATS.get_or_init(|| {
484 let sources: &[(&'static str, &'static str)] = &[
485 ("email", r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b"),
486 ("ssn", r"\b\d{3}-\d{2}-\d{4}\b"),
487 ("phone_us", r"\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b"),
488 ("credit_card", r"\b(?:\d[ -]*?){13,19}\b"),
489 ("ipv4", r"\b(?:\d{1,3}\.){3}\d{1,3}\b"),
490 ];
491 sources
492 .iter()
493 .filter_map(|(cat, src)| match Regex::new(src) {
494 Ok(re) => Some((*cat, re)),
495 Err(err) => {
496 tracing::error!(error = %err, source = %src, category = %cat, "content-review: pii regex failed");
497 None
498 }
499 })
500 .collect()
501 })
502}
503
504#[cfg(test)]
505mod tests {
506 use super::*;
507
508 #[test]
509 fn extract_outbound_text_joins_chunks() {
510 let args = serde_json::json!({
511 "subject": "hi",
512 "body": "hello",
513 "blocks": [{"text": {"text": "b1"}}]
514 });
515 let text = extract_outbound_text(&args).unwrap();
516 assert!(text.contains("hi"));
517 assert!(text.contains("hello"));
518 assert!(text.contains("b1"));
519 }
520
521 #[test]
522 fn pii_patterns_detect_email() {
523 let pats = builtin_pii_patterns();
524 assert!(pats
525 .iter()
526 .any(|(cat, re)| *cat == "email" && re.is_match("user@example.com")));
527 }
528
529 #[test]
530 fn truncate_utf8_honors_boundaries() {
531 let s = "héllo";
532 let out = truncate_utf8(s, 2);
533 assert_eq!(out, "h");
534 }
535}