1use std::collections::HashSet;
33use std::sync::OnceLock;
34
35use regex::Regex;
36use serde::{Deserialize, Serialize};
37use serde_json::Value;
38
39use chio_kernel::{Guard, GuardContext, KernelError, Verdict};
40
41use crate::action::{extract_action, ToolAction};
42
43pub fn default_allowed_verbs() -> Vec<String> {
45 vec![
46 "navigate".to_string(),
47 "goto".to_string(),
48 "open".to_string(),
49 "screenshot".to_string(),
50 "screen_capture".to_string(),
51 "capture".to_string(),
52 "browser_screenshot".to_string(),
53 "get_url".to_string(),
54 "get_title".to_string(),
55 "read".to_string(),
56 "get_content".to_string(),
57 "close".to_string(),
58 "back".to_string(),
59 "forward".to_string(),
60 "reload".to_string(),
61 ]
62}
63
64#[derive(Debug, thiserror::Error)]
66pub enum BrowserAutomationError {
67 #[error("invalid credential pattern `{pattern}`: {source}")]
69 InvalidPattern {
70 pattern: String,
71 #[source]
72 source: regex::Error,
73 },
74}
75
76#[derive(Clone, Debug, Deserialize, Serialize)]
78#[serde(deny_unknown_fields)]
79pub struct BrowserAutomationConfig {
80 #[serde(default = "default_true")]
82 pub enabled: bool,
83 #[serde(default)]
87 pub allowed_domains: Vec<String>,
88 #[serde(default)]
90 pub blocked_domains: Vec<String>,
91 #[serde(default = "default_allowed_verbs")]
93 pub allowed_verbs: Vec<String>,
94 #[serde(default = "default_true")]
97 pub credential_detection: bool,
98 #[serde(default)]
101 pub extra_credential_patterns: Vec<String>,
102}
103
104fn default_true() -> bool {
105 true
106}
107
108impl Default for BrowserAutomationConfig {
109 fn default() -> Self {
110 Self {
111 enabled: true,
112 allowed_domains: Vec::new(),
113 blocked_domains: Vec::new(),
114 allowed_verbs: default_allowed_verbs(),
115 credential_detection: true,
116 extra_credential_patterns: Vec::new(),
117 }
118 }
119}
120
121pub struct BrowserAutomationGuard {
124 enabled: bool,
125 allowed_domains: Vec<String>,
126 blocked_domains: Vec<String>,
127 allowed_verbs: HashSet<String>,
128 credential_detection: bool,
129 extra_patterns: Vec<Regex>,
130}
131
132impl BrowserAutomationGuard {
133 pub fn new() -> Self {
135 match Self::with_config(BrowserAutomationConfig::default()) {
136 Ok(g) => g,
137 Err(_) => Self::empty_failclosed(),
138 }
139 }
140
141 fn empty_failclosed() -> Self {
145 Self {
146 enabled: true,
147 allowed_domains: Vec::new(),
148 blocked_domains: Vec::new(),
149 allowed_verbs: HashSet::new(),
150 credential_detection: true,
151 extra_patterns: Vec::new(),
152 }
153 }
154
155 pub fn with_config(config: BrowserAutomationConfig) -> Result<Self, BrowserAutomationError> {
157 let mut extra_patterns = Vec::with_capacity(config.extra_credential_patterns.len());
158 for pat in &config.extra_credential_patterns {
159 let re = Regex::new(pat).map_err(|e| BrowserAutomationError::InvalidPattern {
160 pattern: pat.clone(),
161 source: e,
162 })?;
163 extra_patterns.push(re);
164 }
165 let allowed_verbs: HashSet<String> = config
166 .allowed_verbs
167 .into_iter()
168 .map(|v| v.to_ascii_lowercase())
169 .collect();
170 Ok(Self {
171 enabled: config.enabled,
172 allowed_domains: config.allowed_domains,
173 blocked_domains: config.blocked_domains,
174 allowed_verbs,
175 credential_detection: config.credential_detection,
176 extra_patterns,
177 })
178 }
179
180 fn check_navigation(&self, target: Option<&str>) -> Verdict {
184 let empty_allow = self.allowed_domains.is_empty();
185 let empty_block = self.blocked_domains.is_empty();
186 if empty_allow && empty_block {
187 return Verdict::Allow;
188 }
189 let url = match target {
190 Some(u) if !u.trim().is_empty() => u,
191 _ if !empty_allow => return Verdict::Deny,
194 _ => return Verdict::Allow,
195 };
196 let host = match extract_host(url) {
197 Some(h) => h,
198 None if !empty_allow => return Verdict::Deny,
199 None => return Verdict::Allow,
200 };
201 if self
202 .blocked_domains
203 .iter()
204 .any(|pat| matches_domain(pat, &host))
205 {
206 return Verdict::Deny;
207 }
208 if !empty_allow
209 && !self
210 .allowed_domains
211 .iter()
212 .any(|pat| matches_domain(pat, &host))
213 {
214 return Verdict::Deny;
215 }
216 Verdict::Allow
217 }
218
219 fn looks_like_credential(&self, text: &str) -> bool {
222 if text.trim().is_empty() {
223 return false;
224 }
225 for re in builtin_credential_patterns() {
226 if re.is_match(text) {
227 return true;
228 }
229 }
230 for re in &self.extra_patterns {
231 if re.is_match(text) {
232 return true;
233 }
234 }
235 false
236 }
237}
238
239impl Default for BrowserAutomationGuard {
240 fn default() -> Self {
241 Self::new()
242 }
243}
244
245impl Guard for BrowserAutomationGuard {
246 fn name(&self) -> &str {
247 "browser-automation"
248 }
249
250 fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
251 if !self.enabled {
252 return Ok(Verdict::Allow);
253 }
254
255 let action = extract_action(&ctx.request.tool_name, &ctx.request.arguments);
256 let (verb, target) = match action {
257 ToolAction::BrowserAction { verb, target } => (verb, target),
258 _ => return Ok(Verdict::Allow),
259 };
260
261 let verb_lower = verb.to_ascii_lowercase();
262
263 if !self.allowed_verbs.is_empty() && !self.allowed_verbs.contains(&verb_lower) {
265 return Ok(Verdict::Deny);
266 }
267
268 if is_navigation_verb(&verb_lower) {
270 let target_ref = target.as_deref().filter(|s| !is_selector_like(s));
271 return Ok(self.check_navigation(target_ref));
272 }
273
274 if self.credential_detection && is_type_verb(&verb_lower) {
276 if let Some(text) = extract_type_text(&ctx.request.arguments) {
277 if self.looks_like_credential(&text) {
278 return Ok(Verdict::Deny);
279 }
280 }
281 }
282
283 Ok(Verdict::Allow)
284 }
285}
286
287fn is_selector_like(s: &str) -> bool {
290 let trimmed = s.trim();
291 trimmed.starts_with('#')
292 || trimmed.starts_with('.')
293 || trimmed.starts_with('[')
294 || trimmed.starts_with('/') && !trimmed.starts_with("//")
295 || trimmed.starts_with("xpath=")
296}
297
298fn is_navigation_verb(verb: &str) -> bool {
299 matches!(verb, "navigate" | "goto" | "open" | "load" | "browse")
300}
301
302fn is_type_verb(verb: &str) -> bool {
303 matches!(
304 verb,
305 "type" | "input" | "fill" | "browser_type" | "type_text" | "enter_text" | "send_keys"
306 )
307}
308
309fn extract_type_text(arguments: &Value) -> Option<String> {
312 for key in ["text", "value", "content", "input", "keys"] {
313 if let Some(v) = arguments.get(key).and_then(|v| v.as_str()) {
314 if !v.is_empty() {
315 return Some(v.to_string());
316 }
317 }
318 }
319 None
320}
321
322fn matches_domain(pattern: &str, host: &str) -> bool {
325 let pattern = pattern.trim().to_ascii_lowercase();
326 let host = host.trim().to_ascii_lowercase();
327 if pattern.is_empty() || host.is_empty() {
328 return false;
329 }
330 if let Some(suffix) = pattern.strip_prefix("*.") {
331 return host == suffix || host.ends_with(&format!(".{suffix}"));
332 }
333 pattern == host
334}
335
336fn extract_host(url: &str) -> Option<String> {
338 let url = url.trim();
339 if url.is_empty() {
340 return None;
341 }
342 if url.starts_with('#') || url.starts_with('.') || url.starts_with('[') {
343 return None;
344 }
345 let lowered = url.to_ascii_lowercase();
346 if lowered.starts_with("data:")
347 || lowered.starts_with("javascript:")
348 || lowered.starts_with("about:")
349 || lowered.starts_with("file:")
350 {
351 return None;
352 }
353 let rest = if lowered.starts_with("https://") {
354 &url["https://".len()..]
355 } else if lowered.starts_with("http://") {
356 &url["http://".len()..]
357 } else if let Some(rest) = url.strip_prefix("//") {
358 rest
359 } else {
360 url
361 };
362 let host_with_port = rest.split(['/', '?', '#']).next().unwrap_or(rest);
363 let host_without_userinfo = host_with_port
364 .rsplit_once('@')
365 .map(|(_, host)| host)
366 .unwrap_or(host_with_port);
367 let host = if let Some(bracketed) = host_without_userinfo.strip_prefix('[') {
368 let (host, remainder) = bracketed.split_once(']')?;
369 if !remainder.is_empty() && !remainder.starts_with(':') {
370 return None;
371 }
372 host
373 } else {
374 host_without_userinfo
375 .rsplit_once(':')
376 .map(|(h, _)| h)
377 .unwrap_or(host_without_userinfo)
378 }
379 .trim_matches(|c: char| c == '/' || c == '.');
380 if host.is_empty() {
381 return None;
382 }
383 Some(host.to_ascii_lowercase())
384}
385
386fn builtin_credential_patterns() -> &'static [Regex] {
389 static PATS: OnceLock<Vec<Regex>> = OnceLock::new();
390 PATS.get_or_init(|| {
391 let sources = [
392 r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b",
394 r"\bgh[pousr]_[A-Za-z0-9]{36,}\b",
396 r"\bxox[abopsr]-[A-Za-z0-9-]{10,}\b",
398 r"\beyJ[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\.[A-Za-z0-9_-]{8,}\b",
400 r"-----BEGIN (?:RSA |EC |DSA |OPENSSH |ENCRYPTED )?PRIVATE KEY-----",
402 r"(?i)\b(?:password|passwd|pwd|token|api[_-]?key|secret|bearer)\s*[:=]\s*\S{6,}",
404 r"\bsk-[A-Za-z0-9]{20,}\b",
406 r"\bsk_(?:live|test)_[A-Za-z0-9]{16,}\b",
408 ];
409 sources
410 .iter()
411 .filter_map(|s| match Regex::new(s) {
412 Ok(re) => Some(re),
413 Err(err) => {
414 tracing::error!(error = %err, source = %s, "browser-automation: builtin credential regex failed");
415 None
416 }
417 })
418 .collect()
419 })
420}
421
422#[cfg(test)]
423mod tests {
424 use super::*;
425
426 #[test]
427 fn extract_host_basic() {
428 assert_eq!(
429 extract_host("https://example.com/x"),
430 Some("example.com".into())
431 );
432 assert_eq!(
433 extract_host("HTTPS://Blocked.Example/x"),
434 Some("blocked.example".into())
435 );
436 assert_eq!(
437 extract_host("https://user:pass@blocked.example:8443/path"),
438 Some("blocked.example".into())
439 );
440 assert_eq!(
441 extract_host("https://user@[fd00:ec2::254]:8443/path"),
442 Some("fd00:ec2::254".into())
443 );
444 assert_eq!(
445 extract_host("//blocked.example/path"),
446 Some("blocked.example".into())
447 );
448 assert_eq!(
449 extract_host("https://blocked.example?redir=1"),
450 Some("blocked.example".into())
451 );
452 assert_eq!(
453 extract_host("https://blocked.example#anchor"),
454 Some("blocked.example".into())
455 );
456 assert_eq!(extract_host("#submit"), None);
457 assert_eq!(extract_host("data:text/plain,hi"), None);
458 }
459
460 #[test]
461 fn matches_domain_wildcard() {
462 assert!(matches_domain("*.example.com", "api.example.com"));
463 assert!(!matches_domain("*.example.com", "example.org"));
464 assert!(matches_domain("example.com", "example.com"));
465 }
466
467 #[test]
468 fn builtin_detects_common_tokens() {
469 let guard = BrowserAutomationGuard::new();
470 assert!(guard.looks_like_credential("AKIAABCDEFGHIJKLMNOP"));
471 assert!(guard.looks_like_credential("password=hunter2345"));
472 assert!(guard.looks_like_credential("sk-0123456789abcdef01234567"));
473 assert!(!guard.looks_like_credential("hello world"));
474 assert!(!guard.looks_like_credential(""));
475 }
476
477 #[test]
478 fn is_selector_like_classifies() {
479 assert!(is_selector_like("#submit"));
480 assert!(is_selector_like(".login"));
481 assert!(is_selector_like("[data-id=1]"));
482 assert!(!is_selector_like("https://example.com/x"));
483 assert!(!is_selector_like("//example.com"));
484 }
485
486 #[test]
487 fn check_navigation_blocks_scheme_relative_urls() {
488 let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
489 blocked_domains: vec!["blocked.example".into()],
490 ..BrowserAutomationConfig::default()
491 })
492 .expect("default browser automation config should compile");
493
494 assert_eq!(
495 guard.check_navigation(Some("//blocked.example/path")),
496 Verdict::Deny
497 );
498 }
499
500 #[test]
501 fn check_navigation_blocks_urls_with_userinfo() {
502 let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
503 blocked_domains: vec!["blocked.example".into()],
504 ..BrowserAutomationConfig::default()
505 })
506 .expect("default browser automation config should compile");
507
508 assert_eq!(
509 guard.check_navigation(Some("https://user@blocked.example/path")),
510 Verdict::Deny
511 );
512 }
513
514 #[test]
515 fn check_navigation_blocks_bracketed_ipv6_hosts() {
516 let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
517 blocked_domains: vec!["fd00:ec2::254".into()],
518 ..BrowserAutomationConfig::default()
519 })
520 .expect("default browser automation config should compile");
521
522 assert_eq!(
523 guard.check_navigation(Some("https://[fd00:ec2::254]/latest")),
524 Verdict::Deny
525 );
526 }
527
528 #[test]
529 fn check_navigation_blocks_mixed_case_scheme_urls() {
530 let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
531 blocked_domains: vec!["blocked.example".into()],
532 ..BrowserAutomationConfig::default()
533 })
534 .expect("default browser automation config should compile");
535
536 assert_eq!(
537 guard.check_navigation(Some("HTTPS://blocked.example/path")),
538 Verdict::Deny
539 );
540 }
541
542 #[test]
543 fn check_navigation_blocks_query_and_fragment_only_urls() {
544 let guard = BrowserAutomationGuard::with_config(BrowserAutomationConfig {
545 blocked_domains: vec!["blocked.example".into()],
546 ..BrowserAutomationConfig::default()
547 })
548 .expect("default browser automation config should compile");
549
550 assert_eq!(
551 guard.check_navigation(Some("https://blocked.example?redir=1")),
552 Verdict::Deny
553 );
554 assert_eq!(
555 guard.check_navigation(Some("https://blocked.example#anchor")),
556 Verdict::Deny
557 );
558 }
559}