use std::collections::HashSet;
use std::sync::OnceLock;
use regex::Regex;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use chio_kernel::{Guard, GuardContext, KernelError, Verdict};
use crate::action::{extract_action, ToolAction};
pub fn default_allowed_verbs() -> Vec<String> {
vec![
"navigate".to_string(),
"goto".to_string(),
"open".to_string(),
"screenshot".to_string(),
"screen_capture".to_string(),
"capture".to_string(),
"browser_screenshot".to_string(),
"get_url".to_string(),
"get_title".to_string(),
"read".to_string(),
"get_content".to_string(),
"close".to_string(),
"back".to_string(),
"forward".to_string(),
"reload".to_string(),
]
}
#[derive(Debug, thiserror::Error)]
pub enum BrowserAutomationError {
#[error("invalid credential pattern `{pattern}`: {source}")]
InvalidPattern {
pattern: String,
#[source]
source: regex::Error,
},
}
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(deny_unknown_fields)]
pub struct BrowserAutomationConfig {
#[serde(default = "default_true")]
pub enabled: bool,
#[serde(default)]
pub allowed_domains: Vec<String>,
#[serde(default)]
pub blocked_domains: Vec<String>,
#[serde(default = "default_allowed_verbs")]
pub allowed_verbs: Vec<String>,
#[serde(default = "default_true")]
pub credential_detection: bool,
#[serde(default)]
pub extra_credential_patterns: Vec<String>,
}
fn default_true() -> bool {
true
}
impl Default for BrowserAutomationConfig {
fn default() -> Self {
Self {
enabled: true,
allowed_domains: Vec::new(),
blocked_domains: Vec::new(),
allowed_verbs: default_allowed_verbs(),
credential_detection: true,
extra_credential_patterns: Vec::new(),
}
}
}
pub struct BrowserAutomationGuard {
enabled: bool,
allowed_domains: Vec<String>,
blocked_domains: Vec<String>,
allowed_verbs: HashSet<String>,
credential_detection: bool,
extra_patterns: Vec<Regex>,
}
impl BrowserAutomationGuard {
pub fn new() -> Self {
match Self::with_config(BrowserAutomationConfig::default()) {
Ok(g) => g,
Err(_) => Self::empty_failclosed(),
}
}
fn empty_failclosed() -> Self {
Self {
enabled: true,
allowed_domains: Vec::new(),
blocked_domains: Vec::new(),
allowed_verbs: HashSet::new(),
credential_detection: true,
extra_patterns: Vec::new(),
}
}
pub fn with_config(config: BrowserAutomationConfig) -> Result<Self, BrowserAutomationError> {
let mut extra_patterns = Vec::with_capacity(config.extra_credential_patterns.len());
for pat in &config.extra_credential_patterns {
let re = Regex::new(pat).map_err(|e| BrowserAutomationError::InvalidPattern {
pattern: pat.clone(),
source: e,
})?;
extra_patterns.push(re);
}
let allowed_verbs: HashSet<String> = config
.allowed_verbs
.into_iter()
.map(|v| v.to_ascii_lowercase())
.collect();
Ok(Self {
enabled: config.enabled,
allowed_domains: config.allowed_domains,
blocked_domains: config.blocked_domains,
allowed_verbs,
credential_detection: config.credential_detection,
extra_patterns,
})
}
fn check_navigation(&self, target: Option<&str>) -> Verdict {
let empty_allow = self.allowed_domains.is_empty();
let empty_block = self.blocked_domains.is_empty();
if empty_allow && empty_block {
return Verdict::Allow;
}
let url = match target {
Some(u) if !u.trim().is_empty() => u,
_ if !empty_allow => return Verdict::Deny,
_ => return Verdict::Allow,
};
let host = match extract_host(url) {
Some(h) => h,
None if !empty_allow => return Verdict::Deny,
None => return Verdict::Allow,
};
if self
.blocked_domains
.iter()
.any(|pat| matches_domain(pat, &host))
{
return Verdict::Deny;
}
if !empty_allow
&& !self
.allowed_domains
.iter()
.any(|pat| matches_domain(pat, &host))
{
return Verdict::Deny;
}
Verdict::Allow
}
fn looks_like_credential(&self, text: &str) -> bool {
if text.trim().is_empty() {
return false;
}
for re in builtin_credential_patterns() {
if re.is_match(text) {
return true;
}
}
for re in &self.extra_patterns {
if re.is_match(text) {
return true;
}
}
false
}
}
impl Default for BrowserAutomationGuard {
fn default() -> Self {
Self::new()
}
}
impl Guard for BrowserAutomationGuard {
fn name(&self) -> &str {
"browser-automation"
}
fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
if !self.enabled {
return Ok(Verdict::Allow);
}
let action = extract_action(&ctx.request.tool_name, &ctx.request.arguments);
let (verb, target) = match action {
ToolAction::BrowserAction { verb, target } => (verb, target),
_ => return Ok(Verdict::Allow),
};
let verb_lower = verb.to_ascii_lowercase();
if !self.allowed_verbs.is_empty() && !self.allowed_verbs.contains(&verb_lower) {
return Ok(Verdict::Deny);
}
if is_navigation_verb(&verb_lower) {
let target_ref = target.as_deref().filter(|s| !is_selector_like(s));
return Ok(self.check_navigation(target_ref));
}
if self.credential_detection && is_type_verb(&verb_lower) {
if let Some(text) = extract_type_text(&ctx.request.arguments) {
if self.looks_like_credential(&text) {
return Ok(Verdict::Deny);
}
}
}
Ok(Verdict::Allow)
}
}
fn is_selector_like(s: &str) -> bool {
let trimmed = s.trim();
trimmed.starts_with('#')
|| trimmed.starts_with('.')
|| trimmed.starts_with('[')
|| trimmed.starts_with('/') && !trimmed.starts_with("//")
|| trimmed.starts_with("xpath=")
}
fn is_navigation_verb(verb: &str) -> bool {
matches!(verb, "navigate" | "goto" | "open" | "load" | "browse")
}
fn is_type_verb(verb: &str) -> bool {
matches!(
verb,
"type" | "input" | "fill" | "browser_type" | "type_text" | "enter_text" | "send_keys"
)
}
fn extract_type_text(arguments: &Value) -> Option<String> {
for key in ["text", "value", "content", "input", "keys"] {
if let Some(v) = arguments.get(key).and_then(|v| v.as_str()) {
if !v.is_empty() {
return Some(v.to_string());
}
}
}
None
}
fn matches_domain(pattern: &str, host: &str) -> bool {
let pattern = pattern.trim().to_ascii_lowercase();
let host = host.trim().to_ascii_lowercase();
if pattern.is_empty() || host.is_empty() {
return false;
}
if let Some(suffix) = pattern.strip_prefix("*.") {
return host == suffix || host.ends_with(&format!(".{suffix}"));
}
pattern == host
}
fn extract_host(url: &str) -> Option<String> {
let url = url.trim();
if url.is_empty() {
return None;
}
if url.starts_with('#') || url.starts_with('.') || url.starts_with('[') {
return None;
}
let lowered = url.to_ascii_lowercase();
if lowered.starts_with("data:")
|| lowered.starts_with("javascript:")
|| lowered.starts_with("about:")
|| lowered.starts_with("file:")
{
return None;
}
let rest = if lowered.starts_with("https://") {
&url["https://".len()..]
} else if lowered.starts_with("http://") {
&url["http://".len()..]
} else if let Some(rest) = url.strip_prefix("//") {
rest
} else {
url
};
let host_with_port = rest.split(['/', '?', '#']).next().unwrap_or(rest);
let host_without_userinfo = host_with_port
.rsplit_once('@')
.map(|(_, host)| host)
.unwrap_or(host_with_port);
let host = if let Some(bracketed) = host_without_userinfo.strip_prefix('[') {
let (host, remainder) = bracketed.split_once(']')?;
if !remainder.is_empty() && !remainder.starts_with(':') {
return None;
}
host
} else {
host_without_userinfo
.rsplit_once(':')
.map(|(h, _)| h)
.unwrap_or(host_without_userinfo)
}
.trim_matches(|c: char| c == '/' || c == '.');
if host.is_empty() {
return None;
}
Some(host.to_ascii_lowercase())
}
fn builtin_credential_patterns() -> &'static [Regex] {
static PATS: OnceLock<Vec<Regex>> = OnceLock::new();
PATS.get_or_init(|| {
let sources = [
r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b",
r"\bgh[pousr]_[A-Za-z0-9]{36,}\b",
r"\bxox[abopsr]-[A-Za-z0-9-]{10,}\b",
r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\b",
r"-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----",
r"(?i)\b(?:password|passwd|pwd|token|api[_-]?key|secret|bearer)\s*[:=]\s*\S{6,}",
r"\bsk-[A-Za-z0-9]{20,}\b",
r"\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b",
];
sources
.iter()
.filter_map(|s| match Regex::new(s) {
Ok(re) => Some(re),
Err(err) => {
tracing::error!(error = %err, source = %s, "browser-automation: builtin credential regex failed");
None
}
})
.collect()
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn extract_host_basic() {
assert_eq!(
extract_host("https://example.com/x"),
Some("example.com".into())
);
assert_eq!(
extract_host("HTTPS://Blocked.Example/x"),
Some("blocked.example".into())
);
assert_eq!(
extract_host("https://user:pass@blocked.example:8443/path"),
Some("blocked.example".into())
);
assert_eq!(
extract_host("https://user@[fd00:ec2::254]:8443/path"),
Some("fd00:ec2::254".into())
);
assert_eq!(
extract_host("//blocked.example/path"),
Some("blocked.example".into())
);
assert_eq!(
extract_host("https://blocked.example?redir=1"),
Some("blocked.example".into())
);
assert_eq!(
extract_host("https://blocked.example#anchor"),
Some("blocked.example".into())
);
assert_eq!(extract_host("#submit"), None);
assert_eq!(extract_host("data:text/plain,hi"), None);
}
#[test]
fn matches_domain_wildcard() {
assert!(matches_domain("*.example.com", "api.example.com"));
assert!(!matches_domain("*.example.com", "example.org"));
assert!(matches_domain("example.com", "example.com"));
}
#[test]
fn builtin_detects_common_tokens() {
let guard = BrowserAutomationGuard::new();
assert!(guard.looks_like_credential("AKIAABCDEFGHIJKLMNOP"));
assert!(guard.looks_like_credential("password=hunter2345"));
assert!(guard.looks_like_credential("sk-0123456789abcdef01234567"));
assert!(!guard.looks_like_credential("hello world"));
assert!(!guard.looks_like_credential(""));
}
#[test]
fn is_selector_like_classifies() {
assert!(is_selector_like("#submit"));
assert!(is_selector_like(".login"));
assert!(is_selector_like("[data-id=1]"));
assert!(!is_selector_like("https://example.com/x"));
assert!(!is_selector_like("//example.com"));
}
#[test]
fn check_navigation_blocks_scheme_relative_urls() {
let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
blocked_domains: vec!["blocked.example".into()],
..BrowserAutomationConfig::default()
})
.expect("default browser automation config should compile");
assert_eq!(
guard.check_navigation(Some("//blocked.example/path")),
Verdict::Deny
);
}
#[test]
fn check_navigation_blocks_urls_with_userinfo() {
let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
blocked_domains: vec!["blocked.example".into()],
..BrowserAutomationConfig::default()
})
.expect("default browser automation config should compile");
assert_eq!(
guard.check_navigation(Some("https://user@blocked.example/path")),
Verdict::Deny
);
}
#[test]
fn check_navigation_blocks_bracketed_ipv6_hosts() {
let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
blocked_domains: vec!["fd00:ec2::254".into()],
..BrowserAutomationConfig::default()
})
.expect("default browser automation config should compile");
assert_eq!(
guard.check_navigation(Some("https://[fd00:ec2::254]/latest")),
Verdict::Deny
);
}
#[test]
fn check_navigation_blocks_mixed_case_scheme_urls() {
let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
blocked_domains: vec!["blocked.example".into()],
..BrowserAutomationConfig::default()
})
.expect("default browser automation config should compile");
assert_eq!(
guard.check_navigation(Some("HTTPS://blocked.example/path")),
Verdict::Deny
);
}
#[test]
fn check_navigation_blocks_query_and_fragment_only_urls() {
let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
blocked_domains: vec!["blocked.example".into()],
..BrowserAutomationConfig::default()
})
.expect("default browser automation config should compile");
assert_eq!(
guard.check_navigation(Some("https://blocked.example?redir=1")),
Verdict::Deny
);
assert_eq!(
guard.check_navigation(Some("https://blocked.example#anchor")),
Verdict::Deny
);
}
}