Skip to main content

chio_guards/
browser_automation.rs

1//! BrowserAutomationGuard -- domain allowlists, action-type restrictions,
2//! and credential detection in `Type` actions.
3//!
4//! Roadmap phase 8.2.  Complements the coarse
5//! [`crate::computer_use::ComputerUseGuard`] with fine-grained rules
6//! specifically for browser-automation tool calls:
7//!
8//! - **Navigation gating**: URLs whose host is outside
9//!   [`BrowserAutomationConfig::allowed_domains`] are denied.
10//! - **Verb gating**: [`BrowserAutomationConfig::allowed_verbs`] restricts
11//!   the action verbs an agent may issue (e.g. read-only sessions
12//!   permit `navigate` + `screenshot` but deny `type` / `click`).
13//! - **Credential detection**: `type` / `input` actions whose text
14//!   looks like a secret (API key, bearer token, PEM key, AWS key,
15//!   high-entropy password) are denied.
16//!
17//! Calls that are not [`ToolAction::BrowserAction`] pass through with
18//! [`Verdict::Allow`] so the guard composes cleanly with the rest of
19//! the pipeline.
20//!
21//! # Fail-closed semantics
22//!
23//! - navigation verbs (`navigate`/`goto`/`open`) without a parseable
24//!   target URL are denied when a non-empty
25//!   [`BrowserAutomationConfig::allowed_domains`] list is configured;
26//! - `type` actions with no `value` / `text` argument are allowed (the
27//!   guard has nothing to inspect);
28//! - malformed credential regex configuration causes
29//!   [`BrowserAutomationGuard::with_config`] to return
30//!   [`BrowserAutomationError::InvalidPattern`].
31
32use std::collections::HashSet;
33use std::sync::OnceLock;
34
35use regex::Regex;
36use serde::{Deserialize, Serialize};
37use serde_json::Value;
38
39use chio_kernel::{Guard, GuardContext, KernelError, Verdict};
40
41use crate::action::{extract_action, ToolAction};
42
43/// Default allowed verbs (open browser session, navigate, read state).
44pub fn default_allowed_verbs() -> Vec<String> {
45    vec![
46        "navigate".to_string(),
47        "goto".to_string(),
48        "open".to_string(),
49        "screenshot".to_string(),
50        "screen_capture".to_string(),
51        "capture".to_string(),
52        "browser_screenshot".to_string(),
53        "get_url".to_string(),
54        "get_title".to_string(),
55        "read".to_string(),
56        "get_content".to_string(),
57        "close".to_string(),
58        "back".to_string(),
59        "forward".to_string(),
60        "reload".to_string(),
61    ]
62}
63
64/// Errors produced when building a [`BrowserAutomationGuard`].
65#[derive(Debug, thiserror::Error)]
66pub enum BrowserAutomationError {
67    /// A user-supplied credential pattern was not a valid regex.
68    #[error("invalid credential pattern `{pattern}`: {source}")]
69    InvalidPattern {
70        pattern: String,
71        #[source]
72        source: regex::Error,
73    },
74}
75
76/// Configuration for [`BrowserAutomationGuard`].
77#[derive(Clone, Debug, Deserialize, Serialize)]
78#[serde(deny_unknown_fields)]
79pub struct BrowserAutomationConfig {
80    /// Enable/disable the guard entirely.
81    #[serde(default = "default_true")]
82    pub enabled: bool,
83    /// Hosts the agent may navigate to.  Supports exact match and
84    /// `*.suffix` wildcard patterns.  Empty means "no allowlist"
85    /// (navigation check is skipped).
86    #[serde(default)]
87    pub allowed_domains: Vec<String>,
88    /// Blocked hosts (always denied, evaluated before the allowlist).
89    #[serde(default)]
90    pub blocked_domains: Vec<String>,
91    /// Verbs (actions) the agent may issue.  Empty means "any verb".
92    #[serde(default = "default_allowed_verbs")]
93    pub allowed_verbs: Vec<String>,
94    /// When `true`, check `type` / `input` action values for
95    /// credential-shaped secrets.
96    #[serde(default = "default_true")]
97    pub credential_detection: bool,
98    /// Extra credential regex patterns layered on top of the built-in
99    /// detectors.  Invalid regexes cause initialization to fail.
100    #[serde(default)]
101    pub extra_credential_patterns: Vec<String>,
102}
103
104fn default_true() -> bool {
105    true
106}
107
108impl Default for BrowserAutomationConfig {
109    fn default() -> Self {
110        Self {
111            enabled: true,
112            allowed_domains: Vec::new(),
113            blocked_domains: Vec::new(),
114            allowed_verbs: default_allowed_verbs(),
115            credential_detection: true,
116            extra_credential_patterns: Vec::new(),
117        }
118    }
119}
120
121/// Guard that enforces browser-automation policy per
122/// [`BrowserAutomationConfig`].
123pub struct BrowserAutomationGuard {
124    enabled: bool,
125    allowed_domains: Vec<String>,
126    blocked_domains: Vec<String>,
127    allowed_verbs: HashSet<String>,
128    credential_detection: bool,
129    extra_patterns: Vec<Regex>,
130}
131
132impl BrowserAutomationGuard {
133    /// Build a guard with default configuration.
134    pub fn new() -> Self {
135        match Self::with_config(BrowserAutomationConfig::default()) {
136            Ok(g) => g,
137            Err(_) => Self::empty_failclosed(),
138        }
139    }
140
141    /// Build an empty guard that denies every browser action.  Defensive
142    /// fallback used when the default config cannot compile (should never
143    /// happen because defaults carry no user regex).
144    fn empty_failclosed() -> Self {
145        Self {
146            enabled: true,
147            allowed_domains: Vec::new(),
148            blocked_domains: Vec::new(),
149            allowed_verbs: HashSet::new(),
150            credential_detection: true,
151            extra_patterns: Vec::new(),
152        }
153    }
154
155    /// Build a guard with explicit configuration.
156    pub fn with_config(config: BrowserAutomationConfig) -> Result<Self, BrowserAutomationError> {
157        let mut extra_patterns = Vec::with_capacity(config.extra_credential_patterns.len());
158        for pat in &config.extra_credential_patterns {
159            let re = Regex::new(pat).map_err(|e| BrowserAutomationError::InvalidPattern {
160                pattern: pat.clone(),
161                source: e,
162            })?;
163            extra_patterns.push(re);
164        }
165        let allowed_verbs: HashSet<String> = config
166            .allowed_verbs
167            .into_iter()
168            .map(|v| v.to_ascii_lowercase())
169            .collect();
170        Ok(Self {
171            enabled: config.enabled,
172            allowed_domains: config.allowed_domains,
173            blocked_domains: config.blocked_domains,
174            allowed_verbs,
175            credential_detection: config.credential_detection,
176            extra_patterns,
177        })
178    }
179
180    /// Evaluate a navigation verb against the blocked/allowed domain
181    /// sets.  Returns `Verdict::Deny` when the target is blocked or
182    /// outside a non-empty allowlist.
183    fn check_navigation(&self, target: Option<&str>) -> Verdict {
184        let empty_allow = self.allowed_domains.is_empty();
185        let empty_block = self.blocked_domains.is_empty();
186        if empty_allow && empty_block {
187            return Verdict::Allow;
188        }
189        let url = match target {
190            Some(u) if !u.trim().is_empty() => u,
191            // Missing target with a configured allowlist is fail-closed:
192            // we cannot attest the nav host, so deny.
193            _ if !empty_allow => return Verdict::Deny,
194            _ => return Verdict::Allow,
195        };
196        let host = match extract_host(url) {
197            Some(h) => h,
198            None if !empty_allow => return Verdict::Deny,
199            None => return Verdict::Allow,
200        };
201        if self
202            .blocked_domains
203            .iter()
204            .any(|pat| matches_domain(pat, &host))
205        {
206            return Verdict::Deny;
207        }
208        if !empty_allow
209            && !self
210                .allowed_domains
211                .iter()
212                .any(|pat| matches_domain(pat, &host))
213        {
214            return Verdict::Deny;
215        }
216        Verdict::Allow
217    }
218
219    /// Check whether `text` looks like a credential / secret.  Runs both
220    /// built-in detectors and any extra regexes supplied via config.
221    fn looks_like_credential(&self, text: &str) -> bool {
222        if text.trim().is_empty() {
223            return false;
224        }
225        for re in builtin_credential_patterns() {
226            if re.is_match(text) {
227                return true;
228            }
229        }
230        for re in &self.extra_patterns {
231            if re.is_match(text) {
232                return true;
233            }
234        }
235        false
236    }
237}
238
239impl Default for BrowserAutomationGuard {
240    fn default() -> Self {
241        Self::new()
242    }
243}
244
245impl Guard for BrowserAutomationGuard {
246    fn name(&self) -> &str {
247        "browser-automation"
248    }
249
250    fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
251        if !self.enabled {
252            return Ok(Verdict::Allow);
253        }
254
255        let action = extract_action(&ctx.request.tool_name, &ctx.request.arguments);
256        let (verb, target) = match action {
257            ToolAction::BrowserAction { verb, target } => (verb, target),
258            _ => return Ok(Verdict::Allow),
259        };
260
261        let verb_lower = verb.to_ascii_lowercase();
262
263        // 1. Verb allowlist.
264        if !self.allowed_verbs.is_empty() && !self.allowed_verbs.contains(&verb_lower) {
265            return Ok(Verdict::Deny);
266        }
267
268        // 2. Navigation domain gating.
269        if is_navigation_verb(&verb_lower) {
270            let target_ref = target.as_deref().filter(|s| !is_selector_like(s));
271            return Ok(self.check_navigation(target_ref));
272        }
273
274        // 3. Credential detection on type/input verbs.
275        if self.credential_detection && is_type_verb(&verb_lower) {
276            if let Some(text) = extract_type_text(&ctx.request.arguments) {
277                if self.looks_like_credential(&text) {
278                    return Ok(Verdict::Deny);
279                }
280            }
281        }
282
283        Ok(Verdict::Allow)
284    }
285}
286
287/// Return true when `s` looks like a CSS selector / xpath / anchor rather
288/// than a navigation URL.
289fn is_selector_like(s: &str) -> bool {
290    let trimmed = s.trim();
291    trimmed.starts_with('#')
292        || trimmed.starts_with('.')
293        || trimmed.starts_with('[')
294        || trimmed.starts_with('/') && !trimmed.starts_with("//")
295        || trimmed.starts_with("xpath=")
296}
297
298fn is_navigation_verb(verb: &str) -> bool {
299    matches!(verb, "navigate" | "goto" | "open" | "load" | "browse")
300}
301
302fn is_type_verb(verb: &str) -> bool {
303    matches!(
304        verb,
305        "type" | "input" | "fill" | "browser_type" | "type_text" | "enter_text" | "send_keys"
306    )
307}
308
309/// Extract the string the agent wants to type.  Looks at common argument
310/// names: `text`, `value`, `content`, `input`, `keys`.
311fn extract_type_text(arguments: &Value) -> Option<String> {
312    for key in ["text", "value", "content", "input", "keys"] {
313        if let Some(v) = arguments.get(key).and_then(|v| v.as_str()) {
314            if !v.is_empty() {
315                return Some(v.to_string());
316            }
317        }
318    }
319    None
320}
321
322/// Match a domain host against a pattern.  Supports exact match and
323/// `*.suffix` wildcards.
324fn matches_domain(pattern: &str, host: &str) -> bool {
325    let pattern = pattern.trim().to_ascii_lowercase();
326    let host = host.trim().to_ascii_lowercase();
327    if pattern.is_empty() || host.is_empty() {
328        return false;
329    }
330    if let Some(suffix) = pattern.strip_prefix("*.") {
331        return host == suffix || host.ends_with(&format!(".{suffix}"));
332    }
333    pattern == host
334}
335
336/// Extract the host portion of a URL.
337fn extract_host(url: &str) -> Option<String> {
338    let url = url.trim();
339    if url.is_empty() {
340        return None;
341    }
342    if url.starts_with('#') || url.starts_with('.') || url.starts_with('[') {
343        return None;
344    }
345    let lowered = url.to_ascii_lowercase();
346    if lowered.starts_with("data:")
347        || lowered.starts_with("javascript:")
348        || lowered.starts_with("about:")
349        || lowered.starts_with("file:")
350    {
351        return None;
352    }
353    let rest = if lowered.starts_with("https://") {
354        &url["https://".len()..]
355    } else if lowered.starts_with("http://") {
356        &url["http://".len()..]
357    } else if let Some(rest) = url.strip_prefix("//") {
358        rest
359    } else {
360        url
361    };
362    let host_with_port = rest.split(['/', '?', '#']).next().unwrap_or(rest);
363    let host_without_userinfo = host_with_port
364        .rsplit_once('@')
365        .map(|(_, host)| host)
366        .unwrap_or(host_with_port);
367    let host = if let Some(bracketed) = host_without_userinfo.strip_prefix('[') {
368        let (host, remainder) = bracketed.split_once(']')?;
369        if !remainder.is_empty() && !remainder.starts_with(':') {
370            return None;
371        }
372        host
373    } else {
374        host_without_userinfo
375            .rsplit_once(':')
376            .map(|(h, _)| h)
377            .unwrap_or(host_without_userinfo)
378    }
379    .trim_matches(|c: char| c == '/' || c == '.');
380    if host.is_empty() {
381        return None;
382    }
383    Some(host.to_ascii_lowercase())
384}
385
386/// Compiled once per process.  Returns regexes that match common
387/// credential / secret shapes appearing in Type action text.
388fn builtin_credential_patterns() -> &'static [Regex] {
389    static PATS: OnceLock<Vec<Regex>> = OnceLock::new();
390    PATS.get_or_init(|| {
391        let sources = [
392            // AWS access key ID.
393            r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b",
394            // GitHub personal access tokens.
395            r"\bgh[pousr]_[A-Za-z0-9]{36,}\b",
396            // Slack bot/user tokens.
397            r"\bxox[abopsr]-[A-Za-z0-9-]{10,}\b",
398            // JWT shape.
399            r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\b",
400            // PEM private keys.
401            r"-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----",
402            // Generic `password = ...` / `token = ...` assignment shapes.
403            r"(?i)\b(?:password|passwd|pwd|token|api[_-]?key|secret|bearer)\s*[:=]\s*\S{6,}",
404            // OpenAI-style API key prefix.
405            r"\bsk-[A-Za-z0-9]{20,}\b",
406            // Stripe secret key prefix.
407            r"\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b",
408        ];
409        sources
410            .iter()
411            .filter_map(|s| match Regex::new(s) {
412                Ok(re) => Some(re),
413                Err(err) => {
414                    tracing::error!(error = %err, source = %s, "browser-automation: builtin credential regex failed");
415                    None
416                }
417            })
418            .collect()
419    })
420}
421
422#[cfg(test)]
423mod tests {
424    use super::*;
425
426    #[test]
427    fn extract_host_basic() {
428        assert_eq!(
429            extract_host("https://example.com/x"),
430            Some("example.com".into())
431        );
432        assert_eq!(
433            extract_host("HTTPS://Blocked.Example/x"),
434            Some("blocked.example".into())
435        );
436        assert_eq!(
437            extract_host("https://user:pass@blocked.example:8443/path"),
438            Some("blocked.example".into())
439        );
440        assert_eq!(
441            extract_host("https://user@[fd00:ec2::254]:8443/path"),
442            Some("fd00:ec2::254".into())
443        );
444        assert_eq!(
445            extract_host("//blocked.example/path"),
446            Some("blocked.example".into())
447        );
448        assert_eq!(
449            extract_host("https://blocked.example?redir=1"),
450            Some("blocked.example".into())
451        );
452        assert_eq!(
453            extract_host("https://blocked.example#anchor"),
454            Some("blocked.example".into())
455        );
456        assert_eq!(extract_host("#submit"), None);
457        assert_eq!(extract_host("data:text/plain,hi"), None);
458    }
459
460    #[test]
461    fn matches_domain_wildcard() {
462        assert!(matches_domain("*.example.com", "api.example.com"));
463        assert!(!matches_domain("*.example.com", "example.org"));
464        assert!(matches_domain("example.com", "example.com"));
465    }
466
467    #[test]
468    fn builtin_detects_common_tokens() {
469        let guard = BrowserAutomationGuard::new();
470        assert!(guard.looks_like_credential("AKIAABCDEFGHIJKLMNOP"));
471        assert!(guard.looks_like_credential("password=hunter2345"));
472        assert!(guard.looks_like_credential("sk-0123456789abcdef01234567"));
473        assert!(!guard.looks_like_credential("hello world"));
474        assert!(!guard.looks_like_credential(""));
475    }
476
477    #[test]
478    fn is_selector_like_classifies() {
479        assert!(is_selector_like("#submit"));
480        assert!(is_selector_like(".login"));
481        assert!(is_selector_like("[data-id=1]"));
482        assert!(!is_selector_like("https://example.com/x"));
483        assert!(!is_selector_like("//example.com"));
484    }
485
486    #[test]
487    fn check_navigation_blocks_scheme_relative_urls() {
488        let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
489            blocked_domains: vec!["blocked.example".into()],
490            ..BrowserAutomationConfig::default()
491        })
492        .expect("default browser automation config should compile");
493
494        assert_eq!(
495            guard.check_navigation(Some("//blocked.example/path")),
496            Verdict::Deny
497        );
498    }
499
500    #[test]
501    fn check_navigation_blocks_urls_with_userinfo() {
502        let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
503            blocked_domains: vec!["blocked.example".into()],
504            ..BrowserAutomationConfig::default()
505        })
506        .expect("default browser automation config should compile");
507
508        assert_eq!(
509            guard.check_navigation(Some("https://user@blocked.example/path")),
510            Verdict::Deny
511        );
512    }
513
514    #[test]
515    fn check_navigation_blocks_bracketed_ipv6_hosts() {
516        let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
517            blocked_domains: vec!["fd00:ec2::254".into()],
518            ..BrowserAutomationConfig::default()
519        })
520        .expect("default browser automation config should compile");
521
522        assert_eq!(
523            guard.check_navigation(Some("https://[fd00:ec2::254]/latest")),
524            Verdict::Deny
525        );
526    }
527
528    #[test]
529    fn check_navigation_blocks_mixed_case_scheme_urls() {
530        let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
531            blocked_domains: vec!["blocked.example".into()],
532            ..BrowserAutomationConfig::default()
533        })
534        .expect("default browser automation config should compile");
535
536        assert_eq!(
537            guard.check_navigation(Some("HTTPS://blocked.example/path")),
538            Verdict::Deny
539        );
540    }
541
542    #[test]
543    fn check_navigation_blocks_query_and_fragment_only_urls() {
544        let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
545            blocked_domains: vec!["blocked.example".into()],
546            ..BrowserAutomationConfig::default()
547        })
548        .expect("default browser automation config should compile");
549
550        assert_eq!(
551            guard.check_navigation(Some("https://blocked.example?redir=1")),
552            Verdict::Deny
553        );
554        assert_eq!(
555            guard.check_navigation(Some("https://blocked.example#anchor")),
556            Verdict::Deny
557        );
558    }
559}