Skip to main content

chio_guards/
content_review.rs

1//! ContentReviewGuard -- pre-invocation review of outbound content for
2//! SaaS / communication / payment tool calls.
3//!
4//! Roadmap phase 11.1.  The guard inspects
5//! [`ToolAction::ExternalApiCall`] requests to services like Slack,
6//! SendGrid, Twilio, and Stripe, applying:
7//!
8//! 1. **PII detection** on message bodies / email text.  Detected
9//!    categories are surfaced through tracing evidence.
10//! 2. **Tone / profanity** filter (configurable wordlist).
11//! 3. **Monetary approval gating** -- payment calls whose amount meets
12//!    or exceeds the grant's [`Constraint::RequireApprovalAbove`]
13//!    threshold yield [`Verdict::PendingApproval`] so the HITL flow in
14//!    [`chio_kernel::approval`] can collect a human signoff.
15//!
16//! Unknown / non-external-API actions pass through with [`Verdict::Allow`].
17//!
18//! Evidence is emitted via `tracing::warn!` with a structured
19//! `detected_categories` field so downstream log pipelines can extract
20//! the reasons.
21//!
22//! # Fail-closed semantics
23//!
24//! - [`ContentReviewConfig::per_service`] lookups fall back to
25//!   [`ContentReviewConfig::default_rules`];
26//! - invalid user-supplied regex patterns cause
27//!   [`ContentReviewGuard::with_config`] to return
28//!   [`ContentReviewError::InvalidPattern`];
29//! - messages that trip both PII and profanity return a single `Deny`
30//!   outcome but log both categories as evidence.
31
32use std::collections::{HashMap, HashSet};
33use std::sync::OnceLock;
34
35use regex::{Regex, RegexBuilder};
36use serde::{Deserialize, Serialize};
37use serde_json::Value;
38
39use chio_core::capability::Constraint;
40use chio_kernel::{Guard, GuardContext, KernelError, Verdict};
41
42use crate::action::{extract_action, ToolAction};
43
44/// Errors produced when building a [`ContentReviewGuard`].
45#[derive(Debug, thiserror::Error)]
46pub enum ContentReviewError {
47    /// A user-supplied regex pattern failed to compile.
48    #[error("invalid review pattern `{pattern}`: {source}")]
49    InvalidPattern {
50        pattern: String,
51        #[source]
52        source: regex::Error,
53    },
54
55    /// A user-supplied regex pattern exceeded policy-load safety limits.
56    #[error("{0}")]
57    UnsafePattern(String),
58}
59
60/// Per-service review rules.  Missing fields fall back to defaults.
61#[derive(Clone, Debug, Default, Deserialize, Serialize)]
62#[serde(deny_unknown_fields)]
63pub struct ContentReviewRules {
64    /// Enable PII detection on the message body.  Default `true`.
65    #[serde(default = "default_true")]
66    pub detect_pii: bool,
67    /// Enable the profanity filter.  Default `true`.
68    #[serde(default = "default_true")]
69    pub detect_profanity: bool,
70    /// Case-insensitive words that trigger a Deny.
71    #[serde(default)]
72    pub banned_words: Vec<String>,
73    /// Extra regex patterns whose match triggers a Deny.
74    #[serde(default)]
75    pub extra_patterns: Vec<String>,
76    /// Maximum bytes of outbound text to scan.  Longer inputs are
77    /// truncated at a UTF-8 boundary.
78    #[serde(default = "default_max_scan_bytes")]
79    pub max_scan_bytes: usize,
80}
81
82fn default_true() -> bool {
83    true
84}
85
86fn default_max_scan_bytes() -> usize {
87    64 * 1024
88}
89
90/// Full content-review configuration.
91#[derive(Clone, Debug, Deserialize, Serialize)]
92#[serde(deny_unknown_fields)]
93pub struct ContentReviewConfig {
94    /// Enable/disable the guard entirely.
95    #[serde(default = "default_true")]
96    pub enabled: bool,
97    /// Default rules applied when a service has no per-service entry.
98    #[serde(default = "default_rules")]
99    pub default_rules: ContentReviewRules,
100    /// Per-service overrides keyed by the service name produced by
101    /// [`crate::action::extract_action`] (e.g. `"slack"`, `"stripe"`).
102    #[serde(default)]
103    pub per_service: HashMap<String, ContentReviewRules>,
104}
105
106fn default_rules() -> ContentReviewRules {
107    ContentReviewRules {
108        detect_pii: true,
109        detect_profanity: true,
110        banned_words: Vec::new(),
111        extra_patterns: Vec::new(),
112        max_scan_bytes: default_max_scan_bytes(),
113    }
114}
115
116impl Default for ContentReviewConfig {
117    fn default() -> Self {
118        Self {
119            enabled: true,
120            default_rules: default_rules(),
121            per_service: HashMap::new(),
122        }
123    }
124}
125
126/// Compiled per-service rules (regex already built).
127struct CompiledRules {
128    detect_pii: bool,
129    detect_profanity: bool,
130    banned_words: HashSet<String>,
131    extra_patterns: Vec<Regex>,
132    max_scan_bytes: usize,
133}
134
135const MAX_EXTRA_PATTERNS: usize = 64;
136const MAX_EXTRA_PATTERN_LEN: usize = 512;
137const MAX_EXTRA_PATTERN_COMPLEXITY: usize = 96;
138const EXTRA_PATTERN_REGEX_SIZE_LIMIT: usize = 1 << 20;
139const EXTRA_PATTERN_DFA_SIZE_LIMIT: usize = 1 << 20;
140
141impl CompiledRules {
142    fn compile(rules: &ContentReviewRules) -> Result<Self, ContentReviewError> {
143        if rules.extra_patterns.len() > MAX_EXTRA_PATTERNS {
144            return Err(ContentReviewError::UnsafePattern(format!(
145                "content_review.extra_patterns allows at most {MAX_EXTRA_PATTERNS} patterns"
146            )));
147        }
148        let mut extra_patterns = Vec::with_capacity(rules.extra_patterns.len());
149        for pat in &rules.extra_patterns {
150            let trimmed = pat.trim();
151            if trimmed.is_empty() {
152                return Err(ContentReviewError::UnsafePattern(
153                    "content_review.extra_patterns cannot contain empty patterns".to_string(),
154                ));
155            }
156            if trimmed.len() > MAX_EXTRA_PATTERN_LEN {
157                return Err(ContentReviewError::UnsafePattern(format!(
158                    "content_review.extra_patterns entries must be at most {MAX_EXTRA_PATTERN_LEN} characters"
159                )));
160            }
161            let complexity = review_pattern_complexity(trimmed);
162            if complexity > MAX_EXTRA_PATTERN_COMPLEXITY {
163                return Err(ContentReviewError::UnsafePattern(format!(
164                    "content_review.extra_patterns entries must have complexity at most {MAX_EXTRA_PATTERN_COMPLEXITY}"
165                )));
166            }
167            let re = RegexBuilder::new(trimmed)
168                .size_limit(EXTRA_PATTERN_REGEX_SIZE_LIMIT)
169                .dfa_size_limit(EXTRA_PATTERN_DFA_SIZE_LIMIT)
170                .build()
171                .map_err(|e| ContentReviewError::InvalidPattern {
172                    pattern: trimmed.to_string(),
173                    source: e,
174                })?;
175            extra_patterns.push(re);
176        }
177        let banned_words = rules
178            .banned_words
179            .iter()
180            .map(|w| w.to_ascii_lowercase())
181            .collect();
182        Ok(Self {
183            detect_pii: rules.detect_pii,
184            detect_profanity: rules.detect_profanity,
185            banned_words,
186            extra_patterns,
187            max_scan_bytes: rules.max_scan_bytes.max(1),
188        })
189    }
190}
191
192fn review_pattern_complexity(pattern: &str) -> usize {
193    let mut score = 0usize;
194    let mut escaped = false;
195    for ch in pattern.chars() {
196        if escaped {
197            escaped = false;
198            continue;
199        }
200        match ch {
201            '\\' => escaped = true,
202            '|' | '*' | '+' | '?' => score = score.saturating_add(4),
203            '{' | '[' | '(' => score = score.saturating_add(2),
204            _ => {}
205        }
206    }
207    score
208}
209
210/// Guard that runs content review on outbound SaaS / payment / comms
211/// calls.
212pub struct ContentReviewGuard {
213    enabled: bool,
214    default_rules: CompiledRules,
215    per_service: HashMap<String, CompiledRules>,
216}
217
218impl ContentReviewGuard {
219    /// Build a guard with default configuration.
220    pub fn new() -> Self {
221        match Self::with_config(ContentReviewConfig::default()) {
222            Ok(g) => g,
223            Err(_) => Self {
224                enabled: true,
225                default_rules: CompiledRules {
226                    detect_pii: true,
227                    detect_profanity: true,
228                    banned_words: HashSet::new(),
229                    extra_patterns: Vec::new(),
230                    max_scan_bytes: default_max_scan_bytes(),
231                },
232                per_service: HashMap::new(),
233            },
234        }
235    }
236
237    /// Build a guard with explicit configuration.  Returns
238    /// [`ContentReviewError::InvalidPattern`] if any regex fails to
239    /// compile.
240    pub fn with_config(config: ContentReviewConfig) -> Result<Self, ContentReviewError> {
241        let default_rules = CompiledRules::compile(&config.default_rules)?;
242        let mut per_service = HashMap::with_capacity(config.per_service.len());
243        for (service, rules) in &config.per_service {
244            per_service.insert(service.clone(), CompiledRules::compile(rules)?);
245        }
246        Ok(Self {
247            enabled: config.enabled,
248            default_rules,
249            per_service,
250        })
251    }
252
253    /// Fetch compiled rules for a service, falling back to defaults.
254    fn rules_for(&self, service: &str) -> &CompiledRules {
255        self.per_service.get(service).unwrap_or(&self.default_rules)
256    }
257}
258
259impl Default for ContentReviewGuard {
260    fn default() -> Self {
261        Self::new()
262    }
263}
264
265impl Guard for ContentReviewGuard {
266    fn name(&self) -> &str {
267        "content-review"
268    }
269
270    fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
271        if !self.enabled {
272            return Ok(Verdict::Allow);
273        }
274
275        let action = extract_action(&ctx.request.tool_name, &ctx.request.arguments);
276        let (service, endpoint) = match action {
277            ToolAction::ExternalApiCall { service, endpoint } => (service, endpoint),
278            _ => return Ok(Verdict::Allow),
279        };
280
281        // 1. Monetary approval gating: check the matched grant for a
282        //    RequireApprovalAbove constraint and compare to the amount
283        //    surfaced in the request body / governed intent.
284        if let Some(verdict) = evaluate_amount_threshold(ctx, &service)? {
285            return Ok(verdict);
286        }
287
288        // 2. Extract outbound text from the common argument shapes.
289        let text = extract_outbound_text(&ctx.request.arguments);
290        let text = match text {
291            Some(t) if !t.is_empty() => t,
292            _ => return Ok(Verdict::Allow),
293        };
294
295        let rules = self.rules_for(&service);
296        let truncated = truncate_utf8(&text, rules.max_scan_bytes);
297
298        // 3. PII detection.
299        let mut categories: Vec<&'static str> = Vec::new();
300        if rules.detect_pii {
301            for (category, re) in builtin_pii_patterns() {
302                if re.is_match(truncated) {
303                    categories.push(*category);
304                }
305            }
306        }
307
308        // 4. Profanity / banned word check.
309        if rules.detect_profanity && contains_banned_word(truncated, &rules.banned_words) {
310            categories.push("profanity");
311        }
312
313        // 5. Extra user regex patterns.
314        for re in &rules.extra_patterns {
315            if re.is_match(truncated) {
316                categories.push("custom");
317            }
318        }
319
320        if !categories.is_empty() {
321            tracing::warn!(
322                guard = "content-review",
323                service = %service,
324                endpoint = %endpoint,
325                detected_categories = ?categories,
326                "content-review denied outbound message"
327            );
328            return Ok(Verdict::Deny);
329        }
330
331        Ok(Verdict::Allow)
332    }
333}
334
335/// Inspect the matched grant for a [`Constraint::RequireApprovalAbove`]
336/// and compare the requested amount to its threshold.  When the call is
337/// a payment-service call (`stripe`, `paypal`, ...) and the amount meets
338/// the threshold, emit [`Verdict::PendingApproval`] so the kernel's
339/// HITL surface can take over.
340fn evaluate_amount_threshold(
341    ctx: &GuardContext,
342    service: &str,
343) -> Result<Option<Verdict>, KernelError> {
344    if !is_payment_service(service) {
345        return Ok(None);
346    }
347    let Some(grant) = ctx
348        .matched_grant_index
349        .and_then(|idx| ctx.scope.grants.get(idx))
350    else {
351        return Ok(None);
352    };
353
354    let threshold = grant.constraints.iter().find_map(|c| match c {
355        Constraint::RequireApprovalAbove { threshold_units } => Some(*threshold_units),
356        _ => None,
357    });
358    let Some(threshold) = threshold else {
359        return Ok(None);
360    };
361
362    let amount_units = extract_amount_units(ctx.request).or_else(|| {
363        ctx.request
364            .governed_intent
365            .as_ref()
366            .and_then(|intent| intent.max_amount.as_ref().map(|amt| amt.units))
367    });
368    let Some(units) = amount_units else {
369        // Cannot compare; leave the decision to other guards.
370        return Ok(None);
371    };
372    if units >= threshold {
373        tracing::info!(
374            guard = "content-review",
375            service = %service,
376            units,
377            threshold,
378            "content-review requires human approval for monetary threshold"
379        );
380        return Ok(Some(Verdict::PendingApproval));
381    }
382    Ok(None)
383}
384
385/// Return `true` for services where monetary threshold checks apply.
386fn is_payment_service(service: &str) -> bool {
387    matches!(
388        service,
389        "stripe" | "paypal" | "square" | "braintree" | "adyen" | "plaid"
390    )
391}
392
393/// Extract an amount-in-units figure from common argument names used by
394/// payment APIs.  Interprets plain numeric fields (`amount`,
395/// `amount_units`) as the minor-unit integer.
396fn extract_amount_units(request: &chio_kernel::ToolCallRequest) -> Option<u64> {
397    let args = &request.arguments;
398    for key in ["amount_units", "amountUnits", "amount"] {
399        if let Some(v) = args.get(key) {
400            if let Some(u) = v.as_u64() {
401                return Some(u);
402            }
403            if let Some(f) = v.as_f64() {
404                if f >= 0.0 && f.is_finite() {
405                    return Some(f as u64);
406                }
407            }
408        }
409    }
410    None
411}
412
413/// Extract the outbound text to review from the tool-call arguments.
414fn extract_outbound_text(arguments: &Value) -> Option<String> {
415    let mut chunks: Vec<String> = Vec::new();
416    for key in [
417        "text",
418        "body",
419        "message",
420        "content",
421        "subject",
422        "html",
423        "description",
424        "summary",
425        "note",
426    ] {
427        if let Some(v) = arguments.get(key).and_then(|v| v.as_str()) {
428            if !v.is_empty() {
429                chunks.push(v.to_string());
430            }
431        }
432    }
433    // Slack-style blocks[].text.text.
434    if let Some(arr) = arguments.get("blocks").and_then(|v| v.as_array()) {
435        for block in arr {
436            if let Some(text) = block
437                .get("text")
438                .and_then(|t| t.get("text"))
439                .and_then(|t| t.as_str())
440            {
441                chunks.push(text.to_string());
442            }
443        }
444    }
445    if chunks.is_empty() {
446        None
447    } else {
448        Some(chunks.join("\n"))
449    }
450}
451
452fn truncate_utf8(input: &str, max_bytes: usize) -> &str {
453    if input.len() <= max_bytes {
454        return input;
455    }
456    let mut end = max_bytes;
457    while end > 0 && !input.is_char_boundary(end) {
458        end -= 1;
459    }
460    &input[..end]
461}
462
463fn contains_banned_word(text: &str, banned: &HashSet<String>) -> bool {
464    if banned.is_empty() {
465        return false;
466    }
467    let lowered = text.to_ascii_lowercase();
468    for word in banned {
469        if word.is_empty() {
470            continue;
471        }
472        if lowered.contains(word) {
473            return true;
474        }
475    }
476    false
477}
478
479/// Compiled once per process.  Built-in PII detectors keyed by the
480/// category tag surfaced in tracing evidence.
481fn builtin_pii_patterns() -> &'static [(&'static str, Regex)] {
482    static PATS: OnceLock<Vec<(&'static str, Regex)>> = OnceLock::new();
483    PATS.get_or_init(|| {
484        let sources: &[(&'static str, &'static str)] = &[
485            ("email", r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b"),
486            ("ssn", r"\b\d{3}-\d{2}-\d{4}\b"),
487            ("phone_us", r"\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b"),
488            ("credit_card", r"\b(?:\d[ -]*?){13,19}\b"),
489            ("ipv4", r"\b(?:\d{1,3}\.){3}\d{1,3}\b"),
490        ];
491        sources
492            .iter()
493            .filter_map(|(cat, src)| match Regex::new(src) {
494                Ok(re) => Some((*cat, re)),
495                Err(err) => {
496                    tracing::error!(error = %err, source = %src, category = %cat, "content-review: pii regex failed");
497                    None
498                }
499            })
500            .collect()
501    })
502}
503
504#[cfg(test)]
505mod tests {
506    use super::*;
507
508    #[test]
509    fn extract_outbound_text_joins_chunks() {
510        let args = serde_json::json!({
511            "subject": "hi",
512            "body": "hello",
513            "blocks": [{"text": {"text": "b1"}}]
514        });
515        let text = extract_outbound_text(&args).unwrap();
516        assert!(text.contains("hi"));
517        assert!(text.contains("hello"));
518        assert!(text.contains("b1"));
519    }
520
521    #[test]
522    fn pii_patterns_detect_email() {
523        let pats = builtin_pii_patterns();
524        assert!(pats
525            .iter()
526            .any(|(cat, re)| *cat == "email" && re.is_match("user@example.com")));
527    }
528
529    #[test]
530    fn truncate_utf8_honors_boundaries() {
531        let s = "héllo";
532        let out = truncate_utf8(s, 2);
533        assert_eq!(out, "h");
534    }
535}