1use super::traits::{Tool, ToolResult};
9use crate::security::SecurityPolicy;
10use anyhow::Context;
11use async_trait::async_trait;
12use serde::{Deserialize, Serialize};
13use serde_json::{Value, json};
14use std::net::ToSocketAddrs;
15use std::process::Stdio;
16use std::sync::Arc;
17use std::time::Duration;
18use tokio::process::Command;
19use tracing::debug;
20
21#[derive(Clone)]
23pub struct ComputerUseConfig {
24 pub endpoint: String,
25 pub api_key: Option<String>,
26 pub timeout_ms: u64,
27 pub allow_remote_endpoint: bool,
28 pub window_allowlist: Vec<String>,
29 pub max_coordinate_x: Option<i64>,
30 pub max_coordinate_y: Option<i64>,
31}
32
33impl std::fmt::Debug for ComputerUseConfig {
34 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35 f.debug_struct("ComputerUseConfig")
36 .field("endpoint", &self.endpoint)
37 .field("timeout_ms", &self.timeout_ms)
38 .field("allow_remote_endpoint", &self.allow_remote_endpoint)
39 .field("window_allowlist", &self.window_allowlist)
40 .field("max_coordinate_x", &self.max_coordinate_x)
41 .field("max_coordinate_y", &self.max_coordinate_y)
42 .finish_non_exhaustive()
43 }
44}
45
46impl Default for ComputerUseConfig {
47 fn default() -> Self {
48 Self {
49 endpoint: "http://127.0.0.1:8787/v1/actions".into(),
50 api_key: None,
51 timeout_ms: 15_000,
52 allow_remote_endpoint: false,
53 window_allowlist: Vec::new(),
54 max_coordinate_x: None,
55 max_coordinate_y: None,
56 }
57 }
58}
59
60pub struct BrowserTool {
62 security: Arc<SecurityPolicy>,
63 allowed_domains: Vec<String>,
64 session_name: Option<String>,
65 backend: String,
66 native_headless: bool,
67 native_webdriver_url: String,
68 native_chrome_path: Option<String>,
69 computer_use: ComputerUseConfig,
70 #[cfg(feature = "browser-native")]
71 native_state: tokio::sync::Mutex<native_backend::NativeBrowserState>,
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq)]
75enum BrowserBackendKind {
76 AgentBrowser,
77 RustNative,
78 ComputerUse,
79 Auto,
80}
81
82#[derive(Debug, Clone, Copy, PartialEq, Eq)]
83enum ResolvedBackend {
84 AgentBrowser,
85 RustNative,
86 ComputerUse,
87}
88
89impl BrowserBackendKind {
90 fn parse(raw: &str) -> anyhow::Result<Self> {
91 let key = raw.trim().to_ascii_lowercase().replace('-', "_");
92 match key.as_str() {
93 "agent_browser" | "agentbrowser" => Ok(Self::AgentBrowser),
94 "rust_native" | "native" => Ok(Self::RustNative),
95 "computer_use" | "computeruse" => Ok(Self::ComputerUse),
96 "auto" => Ok(Self::Auto),
97 _ => anyhow::bail!(
98 "Unsupported browser backend '{raw}'. Use 'agent_browser', 'rust_native', 'computer_use', or 'auto'"
99 ),
100 }
101 }
102
103 fn as_str(self) -> &'static str {
104 match self {
105 Self::AgentBrowser => "agent_browser",
106 Self::RustNative => "rust_native",
107 Self::ComputerUse => "computer_use",
108 Self::Auto => "auto",
109 }
110 }
111}
112
113#[derive(Debug, Deserialize)]
115struct AgentBrowserResponse {
116 success: bool,
117 data: Option<Value>,
118 error: Option<String>,
119}
120
121#[derive(Debug, Deserialize)]
123struct ComputerUseResponse {
124 #[serde(default)]
125 success: Option<bool>,
126 #[serde(default)]
127 data: Option<Value>,
128 #[serde(default)]
129 error: Option<String>,
130}
131
132#[derive(Debug, Clone, Serialize, Deserialize)]
134#[serde(rename_all = "snake_case")]
135pub enum BrowserAction {
136 Open { url: String },
138 Snapshot {
140 #[serde(default)]
141 interactive_only: bool,
142 #[serde(default)]
143 compact: bool,
144 #[serde(default)]
145 depth: Option<u32>,
146 },
147 Click { selector: String },
149 Fill { selector: String, value: String },
151 Type { selector: String, text: String },
153 GetText { selector: String },
155 GetTitle,
157 GetUrl,
159 Screenshot {
161 #[serde(default)]
162 path: Option<String>,
163 #[serde(default)]
164 full_page: bool,
165 },
166 Wait {
168 #[serde(default)]
169 selector: Option<String>,
170 #[serde(default)]
171 ms: Option<u64>,
172 #[serde(default)]
173 text: Option<String>,
174 },
175 Press { key: String },
177 Hover { selector: String },
179 Scroll {
181 direction: String,
182 #[serde(default)]
183 pixels: Option<u32>,
184 },
185 IsVisible { selector: String },
187 Close,
189 Find {
191 by: String, value: String,
193 action: String, #[serde(default)]
195 fill_value: Option<String>,
196 },
197}
198
199impl BrowserTool {
200 pub fn new(
201 security: Arc<SecurityPolicy>,
202 allowed_domains: Vec<String>,
203 session_name: Option<String>,
204 ) -> Self {
205 Self::new_with_backend(
206 security,
207 allowed_domains,
208 session_name,
209 "agent_browser".into(),
210 true,
211 "http://127.0.0.1:9515".into(),
212 None,
213 ComputerUseConfig::default(),
214 )
215 }
216
217 #[allow(clippy::too_many_arguments)]
218 pub fn new_with_backend(
219 security: Arc<SecurityPolicy>,
220 allowed_domains: Vec<String>,
221 session_name: Option<String>,
222 backend: String,
223 native_headless: bool,
224 native_webdriver_url: String,
225 native_chrome_path: Option<String>,
226 computer_use: ComputerUseConfig,
227 ) -> Self {
228 Self {
229 security,
230 allowed_domains: normalize_domains(allowed_domains),
231 session_name,
232 backend,
233 native_headless,
234 native_webdriver_url,
235 native_chrome_path,
236 computer_use,
237 #[cfg(feature = "browser-native")]
238 native_state: tokio::sync::Mutex::new(native_backend::NativeBrowserState::default()),
239 }
240 }
241
242 pub async fn is_agent_browser_available() -> bool {
244 let cmd = if cfg!(target_os = "windows") {
245 "agent-browser.cmd"
246 } else {
247 "agent-browser"
248 };
249 Command::new(cmd)
250 .arg("--version")
251 .stdout(Stdio::null())
252 .stderr(Stdio::null())
253 .status()
254 .await
255 .map(|s| s.success())
256 .unwrap_or(false)
257 }
258
259 pub async fn is_available() -> bool {
261 Self::is_agent_browser_available().await
262 }
263
264 fn configured_backend(&self) -> anyhow::Result<BrowserBackendKind> {
265 BrowserBackendKind::parse(&self.backend)
266 }
267
268 fn rust_native_compiled() -> bool {
269 cfg!(feature = "browser-native")
270 }
271
272 fn rust_native_available(&self) -> bool {
273 #[cfg(feature = "browser-native")]
274 {
275 native_backend::NativeBrowserState::is_available(
276 self.native_headless,
277 &self.native_webdriver_url,
278 self.native_chrome_path.as_deref(),
279 )
280 }
281 #[cfg(not(feature = "browser-native"))]
282 {
283 false
284 }
285 }
286
287 fn computer_use_endpoint_url(&self) -> anyhow::Result<reqwest::Url> {
288 if self.computer_use.timeout_ms == 0 {
289 anyhow::bail!("browser.computer_use.timeout_ms must be > 0");
290 }
291
292 let endpoint = self.computer_use.endpoint.trim();
293 if endpoint.is_empty() {
294 anyhow::bail!("browser.computer_use.endpoint cannot be empty");
295 }
296
297 let parsed = reqwest::Url::parse(endpoint).map_err(|_| {
298 anyhow::anyhow!(
299 "Invalid browser.computer_use.endpoint: '{endpoint}'. Expected http(s) URL"
300 )
301 })?;
302
303 let scheme = parsed.scheme();
304 if scheme != "http" && scheme != "https" {
305 anyhow::bail!("browser.computer_use.endpoint must use http:// or https://");
306 }
307
308 let host = parsed
309 .host_str()
310 .ok_or_else(|| anyhow::anyhow!("browser.computer_use.endpoint must include host"))?;
311
312 let host_is_private = is_private_host(host);
313 if !self.computer_use.allow_remote_endpoint && !host_is_private {
314 anyhow::bail!(
315 "browser.computer_use.endpoint host '{host}' is public. Set browser.computer_use.allow_remote_endpoint=true to allow it"
316 );
317 }
318
319 if self.computer_use.allow_remote_endpoint && !host_is_private && scheme != "https" {
320 anyhow::bail!(
321 "browser.computer_use.endpoint must use https:// when allow_remote_endpoint=true and host is public"
322 );
323 }
324
325 Ok(parsed)
326 }
327
328 fn computer_use_available(&self) -> anyhow::Result<bool> {
329 let endpoint = self.computer_use_endpoint_url()?;
330 Ok(endpoint_reachable(&endpoint, Duration::from_millis(500)))
331 }
332
333 async fn resolve_backend(&self) -> anyhow::Result<ResolvedBackend> {
334 let configured = self.configured_backend()?;
335
336 match configured {
337 BrowserBackendKind::AgentBrowser => {
338 if Self::is_agent_browser_available().await {
339 Ok(ResolvedBackend::AgentBrowser)
340 } else {
341 #[cfg(target_os = "windows")]
342 let install_hint = "Install with: npm install -g agent-browser (ensure npm global bin is in PATH)";
343 #[cfg(not(target_os = "windows"))]
344 let install_hint = "Install with: npm install -g agent-browser";
345 anyhow::bail!(
346 "browser.backend='{}' but agent-browser CLI is unavailable. {}",
347 configured.as_str(),
348 install_hint
349 )
350 }
351 }
352 BrowserBackendKind::RustNative => {
353 if !Self::rust_native_compiled() {
354 anyhow::bail!(
355 "browser.backend='rust_native' requires build feature 'browser-native'"
356 );
357 }
358 if !self.rust_native_available() {
359 anyhow::bail!(
360 "Rust-native browser backend is enabled but WebDriver endpoint is unreachable. Set browser.native_webdriver_url and start a compatible driver"
361 );
362 }
363 Ok(ResolvedBackend::RustNative)
364 }
365 BrowserBackendKind::ComputerUse => {
366 if !self.computer_use_available()? {
367 anyhow::bail!(
368 "browser.backend='computer_use' but sidecar endpoint is unreachable. Check browser.computer_use.endpoint and sidecar status"
369 );
370 }
371 Ok(ResolvedBackend::ComputerUse)
372 }
373 BrowserBackendKind::Auto => {
374 if Self::rust_native_compiled() && self.rust_native_available() {
375 return Ok(ResolvedBackend::RustNative);
376 }
377 if Self::is_agent_browser_available().await {
378 return Ok(ResolvedBackend::AgentBrowser);
379 }
380
381 let computer_use_err = match self.computer_use_available() {
382 Ok(true) => return Ok(ResolvedBackend::ComputerUse),
383 Ok(false) => None,
384 Err(err) => Some(err.to_string()),
385 };
386
387 if Self::rust_native_compiled() {
388 if let Some(err) = computer_use_err {
389 anyhow::bail!(
390 "browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable, computer-use invalid: {err})"
391 );
392 }
393 anyhow::bail!(
394 "browser.backend='auto' found no usable backend (agent-browser missing, rust-native unavailable, computer-use sidecar unreachable)"
395 )
396 }
397
398 if let Some(err) = computer_use_err {
399 anyhow::bail!(
400 "browser.backend='auto' needs agent-browser CLI, browser-native, or valid computer-use sidecar (error: {err})"
401 );
402 }
403
404 anyhow::bail!(
405 "browser.backend='auto' needs agent-browser CLI, browser-native, or computer-use sidecar"
406 )
407 }
408 }
409 }
410
411 fn validate_url(&self, url: &str) -> anyhow::Result<()> {
413 let url = url.trim();
414
415 if url.is_empty() {
416 anyhow::bail!("URL cannot be empty");
417 }
418
419 if url.starts_with("file://") {
422 anyhow::bail!("file:// URLs are not allowed in browser automation");
423 }
424
425 if !url.starts_with("https://") && !url.starts_with("http://") {
426 anyhow::bail!("Only http:// and https:// URLs are allowed");
427 }
428
429 if self.allowed_domains.is_empty() {
430 anyhow::bail!(
431 "Browser tool enabled but no allowed_domains configured. \
432 Add [browser].allowed_domains in config.toml"
433 );
434 }
435
436 let host = extract_host(url)?;
437
438 if is_private_host(&host) {
439 anyhow::bail!("Blocked local/private host: {host}");
440 }
441
442 if !host_matches_allowlist(&host, &self.allowed_domains) {
443 anyhow::bail!("Host '{host}' not in browser.allowed_domains");
444 }
445
446 Ok(())
447 }
448
449 async fn run_command(&self, args: &[&str]) -> anyhow::Result<AgentBrowserResponse> {
451 let agent_browser_bin = if cfg!(target_os = "windows") {
452 "agent-browser.cmd"
453 } else {
454 "agent-browser"
455 };
456 let mut cmd = Command::new(agent_browser_bin);
457
458 if is_service_environment() {
461 ensure_browser_env(&mut cmd);
462 }
463
464 if let Some(ref session) = self.session_name {
466 cmd.arg("--session").arg(session);
467 }
468
469 cmd.args(args).arg("--json");
471
472 debug!("Running: agent-browser {} --json", args.join(" "));
473
474 let output = cmd
475 .stdout(Stdio::piped())
476 .stderr(Stdio::piped())
477 .output()
478 .await?;
479
480 let stdout = String::from_utf8_lossy(&output.stdout);
481 let stderr = String::from_utf8_lossy(&output.stderr);
482
483 if !stderr.is_empty() {
484 debug!("agent-browser stderr: {}", stderr);
485 }
486
487 if let Ok(resp) = serde_json::from_str::<AgentBrowserResponse>(&stdout) {
489 return Ok(resp);
490 }
491
492 if output.status.success() {
494 Ok(AgentBrowserResponse {
495 success: true,
496 data: Some(json!({ "output": stdout.trim() })),
497 error: None,
498 })
499 } else {
500 Ok(AgentBrowserResponse {
501 success: false,
502 data: None,
503 error: Some(stderr.trim().to_string()),
504 })
505 }
506 }
507
508 #[allow(clippy::too_many_lines)]
510 async fn execute_agent_browser_action(
511 &self,
512 action: BrowserAction,
513 ) -> anyhow::Result<ToolResult> {
514 match action {
515 BrowserAction::Open { url } => {
516 self.validate_url(&url)?;
517 let resp = self.run_command(&["open", &url]).await?;
518 self.to_result(resp)
519 }
520
521 BrowserAction::Snapshot {
522 interactive_only,
523 compact,
524 depth,
525 } => {
526 let mut args = vec!["snapshot"];
527 if interactive_only {
528 args.push("-i");
529 }
530 if compact {
531 args.push("-c");
532 }
533 let depth_str;
534 if let Some(d) = depth {
535 args.push("-d");
536 depth_str = d.to_string();
537 args.push(&depth_str);
538 }
539 let resp = self.run_command(&args).await?;
540 self.to_result(resp)
541 }
542
543 BrowserAction::Click { selector } => {
544 let resp = self.run_command(&["click", &selector]).await?;
545 self.to_result(resp)
546 }
547
548 BrowserAction::Fill { selector, value } => {
549 let resp = self.run_command(&["fill", &selector, &value]).await?;
550 self.to_result(resp)
551 }
552
553 BrowserAction::Type { selector, text } => {
554 let resp = self.run_command(&["type", &selector, &text]).await?;
555 self.to_result(resp)
556 }
557
558 BrowserAction::GetText { selector } => {
559 let resp = self.run_command(&["get", "text", &selector]).await?;
560 self.to_result(resp)
561 }
562
563 BrowserAction::GetTitle => {
564 let resp = self.run_command(&["get", "title"]).await?;
565 self.to_result(resp)
566 }
567
568 BrowserAction::GetUrl => {
569 let resp = self.run_command(&["get", "url"]).await?;
570 self.to_result(resp)
571 }
572
573 BrowserAction::Screenshot { path, full_page } => {
574 let mut args = vec!["screenshot"];
575 if let Some(ref p) = path {
576 args.push(p);
577 }
578 if full_page {
579 args.push("--full");
580 }
581 let resp = self.run_command(&args).await?;
582 self.to_result(resp)
583 }
584
585 BrowserAction::Wait { selector, ms, text } => {
586 let mut args = vec!["wait"];
587 let ms_str;
588 if let Some(sel) = selector.as_ref() {
589 args.push(sel);
590 } else if let Some(millis) = ms {
591 ms_str = millis.to_string();
592 args.push(&ms_str);
593 } else if let Some(ref t) = text {
594 args.push("--text");
595 args.push(t);
596 }
597 let resp = self.run_command(&args).await?;
598 self.to_result(resp)
599 }
600
601 BrowserAction::Press { key } => {
602 let resp = self.run_command(&["press", &key]).await?;
603 self.to_result(resp)
604 }
605
606 BrowserAction::Hover { selector } => {
607 let resp = self.run_command(&["hover", &selector]).await?;
608 self.to_result(resp)
609 }
610
611 BrowserAction::Scroll { direction, pixels } => {
612 let mut args = vec!["scroll", &direction];
613 let px_str;
614 if let Some(px) = pixels {
615 px_str = px.to_string();
616 args.push(&px_str);
617 }
618 let resp = self.run_command(&args).await?;
619 self.to_result(resp)
620 }
621
622 BrowserAction::IsVisible { selector } => {
623 let resp = self.run_command(&["is", "visible", &selector]).await?;
624 self.to_result(resp)
625 }
626
627 BrowserAction::Close => {
628 let resp = self.run_command(&["close"]).await?;
629 self.to_result(resp)
630 }
631
632 BrowserAction::Find {
633 by,
634 value,
635 action,
636 fill_value,
637 } => {
638 let mut args = vec!["find", &by, &value, &action];
639 if let Some(ref fv) = fill_value {
640 args.push(fv);
641 }
642 let resp = self.run_command(&args).await?;
643 self.to_result(resp)
644 }
645 }
646 }
647
648 #[allow(clippy::unused_async)]
649 async fn execute_rust_native_action(
650 &self,
651 action: BrowserAction,
652 ) -> anyhow::Result<ToolResult> {
653 #[cfg(feature = "browser-native")]
654 {
655 let mut state = self.native_state.lock().await;
656
657 let first_attempt = state
658 .execute_action(
659 action.clone(),
660 self.native_headless,
661 &self.native_webdriver_url,
662 self.native_chrome_path.as_deref(),
663 )
664 .await;
665
666 let output = match first_attempt {
667 Ok(output) => output,
668 Err(err) => {
669 if !is_recoverable_rust_native_error(&err) {
670 return Err(err);
671 }
672
673 state.reset_session().await;
674 state
675 .execute_action(
676 action,
677 self.native_headless,
678 &self.native_webdriver_url,
679 self.native_chrome_path.as_deref(),
680 )
681 .await
682 .with_context(|| "rust_native backend retry after session reset failed")?
683 }
684 };
685
686 Ok(ToolResult {
687 success: true,
688 output: serde_json::to_string_pretty(&output).unwrap_or_default(),
689 error: None,
690 })
691 }
692
693 #[cfg(not(feature = "browser-native"))]
694 {
695 let _ = action;
696 anyhow::bail!(
697 "Rust-native browser backend is not compiled. Rebuild with --features browser-native"
698 )
699 }
700 }
701
702 fn validate_coordinate(&self, key: &str, value: i64, max: Option<i64>) -> anyhow::Result<()> {
703 if value < 0 {
704 anyhow::bail!("'{key}' must be >= 0")
705 }
706 if let Some(limit) = max {
707 if limit < 0 {
708 anyhow::bail!("Configured coordinate limit for '{key}' must be >= 0")
709 }
710 if value > limit {
711 anyhow::bail!("'{key}'={value} exceeds configured limit {limit}")
712 }
713 }
714 Ok(())
715 }
716
717 fn read_required_i64(
718 &self,
719 params: &serde_json::Map<String, Value>,
720 key: &str,
721 ) -> anyhow::Result<i64> {
722 params
723 .get(key)
724 .and_then(Value::as_i64)
725 .ok_or_else(|| anyhow::anyhow!("Missing or invalid '{key}' parameter"))
726 }
727
728 fn validate_computer_use_action(
729 &self,
730 action: &str,
731 params: &serde_json::Map<String, Value>,
732 ) -> anyhow::Result<()> {
733 match action {
734 "open" => {
735 let url = params
736 .get("url")
737 .and_then(Value::as_str)
738 .ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?;
739 self.validate_url(url)?;
740 }
741 "mouse_move" | "mouse_click" => {
742 let x = self.read_required_i64(params, "x")?;
743 let y = self.read_required_i64(params, "y")?;
744 self.validate_coordinate("x", x, self.computer_use.max_coordinate_x)?;
745 self.validate_coordinate("y", y, self.computer_use.max_coordinate_y)?;
746 }
747 "mouse_drag" => {
748 let from_x = self.read_required_i64(params, "from_x")?;
749 let from_y = self.read_required_i64(params, "from_y")?;
750 let to_x = self.read_required_i64(params, "to_x")?;
751 let to_y = self.read_required_i64(params, "to_y")?;
752 self.validate_coordinate("from_x", from_x, self.computer_use.max_coordinate_x)?;
753 self.validate_coordinate("to_x", to_x, self.computer_use.max_coordinate_x)?;
754 self.validate_coordinate("from_y", from_y, self.computer_use.max_coordinate_y)?;
755 self.validate_coordinate("to_y", to_y, self.computer_use.max_coordinate_y)?;
756 }
757 _ => {}
758 }
759 Ok(())
760 }
761
762 async fn execute_computer_use_action(
763 &self,
764 action: &str,
765 args: &Value,
766 ) -> anyhow::Result<ToolResult> {
767 let endpoint = self.computer_use_endpoint_url()?;
768
769 let mut params = args
770 .as_object()
771 .cloned()
772 .ok_or_else(|| anyhow::anyhow!("browser args must be a JSON object"))?;
773 params.remove("action");
774
775 self.validate_computer_use_action(action, ¶ms)?;
776
777 let payload = json!({
778 "action": action,
779 "params": params,
780 "policy": {
781 "allowed_domains": self.allowed_domains,
782 "window_allowlist": self.computer_use.window_allowlist,
783 "max_coordinate_x": self.computer_use.max_coordinate_x,
784 "max_coordinate_y": self.computer_use.max_coordinate_y,
785 },
786 "metadata": {
787 "session_name": self.session_name,
788 "source": "construct.browser",
789 "version": env!("CARGO_PKG_VERSION"),
790 }
791 });
792
793 let client = crate::config::build_runtime_proxy_client("tool.browser");
794 let mut request = client
795 .post(endpoint)
796 .timeout(Duration::from_millis(self.computer_use.timeout_ms))
797 .json(&payload);
798
799 if let Some(api_key) = self.computer_use.api_key.as_deref() {
800 let token = api_key.trim();
801 if !token.is_empty() {
802 request = request.bearer_auth(token);
803 }
804 }
805
806 let response = request.send().await.with_context(|| {
807 format!(
808 "Failed to call computer-use sidecar at {}",
809 self.computer_use.endpoint
810 )
811 })?;
812
813 let status = response.status();
814 let body = response
815 .text()
816 .await
817 .context("Failed to read computer-use sidecar response body")?;
818
819 if let Ok(parsed) = serde_json::from_str::<ComputerUseResponse>(&body) {
820 if status.is_success() && parsed.success.unwrap_or(true) {
821 let output = parsed
822 .data
823 .map(|data| serde_json::to_string_pretty(&data).unwrap_or_default())
824 .unwrap_or_else(|| {
825 serde_json::to_string_pretty(&json!({
826 "backend": "computer_use",
827 "action": action,
828 "ok": true,
829 }))
830 .unwrap_or_default()
831 });
832
833 return Ok(ToolResult {
834 success: true,
835 output,
836 error: None,
837 });
838 }
839
840 let error = parsed.error.or_else(|| {
841 if status.is_success() && parsed.success == Some(false) {
842 Some("computer-use sidecar returned success=false".to_string())
843 } else {
844 Some(format!(
845 "computer-use sidecar request failed with status {status}"
846 ))
847 }
848 });
849
850 return Ok(ToolResult {
851 success: false,
852 output: String::new(),
853 error,
854 });
855 }
856
857 if status.is_success() {
858 return Ok(ToolResult {
859 success: true,
860 output: body,
861 error: None,
862 });
863 }
864
865 Ok(ToolResult {
866 success: false,
867 output: String::new(),
868 error: Some(format!(
869 "computer-use sidecar request failed with status {status}: {}",
870 body.trim()
871 )),
872 })
873 }
874
875 async fn execute_action(
876 &self,
877 action: BrowserAction,
878 backend: ResolvedBackend,
879 ) -> anyhow::Result<ToolResult> {
880 match backend {
881 ResolvedBackend::AgentBrowser => self.execute_agent_browser_action(action).await,
882 ResolvedBackend::RustNative => self.execute_rust_native_action(action).await,
883 ResolvedBackend::ComputerUse => anyhow::bail!(
884 "Internal error: computer_use backend must be handled before BrowserAction parsing"
885 ),
886 }
887 }
888
889 #[allow(clippy::unnecessary_wraps, clippy::unused_self)]
890 fn to_result(&self, resp: AgentBrowserResponse) -> anyhow::Result<ToolResult> {
891 if resp.success {
892 let output = resp
893 .data
894 .map(|d| serde_json::to_string_pretty(&d).unwrap_or_default())
895 .unwrap_or_default();
896 Ok(ToolResult {
897 success: true,
898 output,
899 error: None,
900 })
901 } else {
902 Ok(ToolResult {
903 success: false,
904 output: String::new(),
905 error: resp.error,
906 })
907 }
908 }
909}
910
911#[async_trait]
912impl Tool for BrowserTool {
913 fn name(&self) -> &str {
914 "browser"
915 }
916
917 fn description(&self) -> &str {
918 concat!(
919 "Web/browser automation with pluggable backends (agent-browser, rust-native, computer_use). ",
920 "Supports DOM actions plus optional OS-level actions (mouse_move, mouse_click, mouse_drag, ",
921 "key_type, key_press, screen_capture) through a computer-use sidecar. Use 'snapshot' to map ",
922 "interactive elements to refs (@e1, @e2). Enforces browser.allowed_domains for open actions."
923 )
924 }
925
926 fn parameters_schema(&self) -> Value {
927 json!({
928 "type": "object",
929 "properties": {
930 "action": {
931 "type": "string",
932 "enum": ["open", "snapshot", "click", "fill", "type", "get_text",
933 "get_title", "get_url", "screenshot", "wait", "press",
934 "hover", "scroll", "is_visible", "close", "find",
935 "mouse_move", "mouse_click", "mouse_drag", "key_type",
936 "key_press", "screen_capture"],
937 "description": "Browser action to perform (OS-level actions require backend=computer_use)"
938 },
939 "url": {
940 "type": "string",
941 "description": "URL to navigate to (for 'open' action)"
942 },
943 "selector": {
944 "type": "string",
945 "description": "Element selector: @ref (e.g. @e1), CSS (#id, .class), or text=..."
946 },
947 "value": {
948 "type": "string",
949 "description": "Value to fill or type"
950 },
951 "text": {
952 "type": "string",
953 "description": "Text to type or wait for"
954 },
955 "key": {
956 "type": "string",
957 "description": "Key to press (Enter, Tab, Escape, etc.)"
958 },
959 "x": {
960 "type": "integer",
961 "description": "Screen X coordinate (computer_use: mouse_move/mouse_click)"
962 },
963 "y": {
964 "type": "integer",
965 "description": "Screen Y coordinate (computer_use: mouse_move/mouse_click)"
966 },
967 "from_x": {
968 "type": "integer",
969 "description": "Drag source X coordinate (computer_use: mouse_drag)"
970 },
971 "from_y": {
972 "type": "integer",
973 "description": "Drag source Y coordinate (computer_use: mouse_drag)"
974 },
975 "to_x": {
976 "type": "integer",
977 "description": "Drag target X coordinate (computer_use: mouse_drag)"
978 },
979 "to_y": {
980 "type": "integer",
981 "description": "Drag target Y coordinate (computer_use: mouse_drag)"
982 },
983 "button": {
984 "type": "string",
985 "enum": ["left", "right", "middle"],
986 "description": "Mouse button for computer_use mouse_click"
987 },
988 "direction": {
989 "type": "string",
990 "enum": ["up", "down", "left", "right"],
991 "description": "Scroll direction"
992 },
993 "pixels": {
994 "type": "integer",
995 "description": "Pixels to scroll"
996 },
997 "interactive_only": {
998 "type": "boolean",
999 "description": "For snapshot: only show interactive elements"
1000 },
1001 "compact": {
1002 "type": "boolean",
1003 "description": "For snapshot: remove empty structural elements"
1004 },
1005 "depth": {
1006 "type": "integer",
1007 "description": "For snapshot: limit tree depth"
1008 },
1009 "full_page": {
1010 "type": "boolean",
1011 "description": "For screenshot: capture full page"
1012 },
1013 "path": {
1014 "type": "string",
1015 "description": "File path for screenshot"
1016 },
1017 "ms": {
1018 "type": "integer",
1019 "description": "Milliseconds to wait"
1020 },
1021 "by": {
1022 "type": "string",
1023 "enum": ["role", "text", "label", "placeholder", "testid"],
1024 "description": "For find: semantic locator type"
1025 },
1026 "find_action": {
1027 "type": "string",
1028 "enum": ["click", "fill", "text", "hover", "check"],
1029 "description": "For find: action to perform on found element"
1030 },
1031 "fill_value": {
1032 "type": "string",
1033 "description": "For find with fill action: value to fill"
1034 }
1035 },
1036 "required": ["action"]
1037 })
1038 }
1039
1040 async fn execute(&self, args: Value) -> anyhow::Result<ToolResult> {
1041 if !self.security.can_act() {
1043 return Ok(ToolResult {
1044 success: false,
1045 output: String::new(),
1046 error: Some("Action blocked: autonomy is read-only".into()),
1047 });
1048 }
1049
1050 if !self.security.record_action() {
1051 return Ok(ToolResult {
1052 success: false,
1053 output: String::new(),
1054 error: Some("Action blocked: rate limit exceeded".into()),
1055 });
1056 }
1057
1058 let backend = match self.resolve_backend().await {
1059 Ok(selected) => selected,
1060 Err(error) => {
1061 return Ok(ToolResult {
1062 success: false,
1063 output: String::new(),
1064 error: Some(error.to_string()),
1065 });
1066 }
1067 };
1068
1069 let action_str = args
1071 .get("action")
1072 .and_then(|v| v.as_str())
1073 .ok_or_else(|| anyhow::anyhow!("Missing 'action' parameter"))?;
1074
1075 if !is_supported_browser_action(action_str) {
1076 return Ok(ToolResult {
1077 success: false,
1078 output: String::new(),
1079 error: Some(format!("Unknown action: {action_str}")),
1080 });
1081 }
1082
1083 if backend == ResolvedBackend::ComputerUse {
1084 return self.execute_computer_use_action(action_str, &args).await;
1085 }
1086
1087 if is_computer_use_only_action(action_str) {
1088 return Ok(ToolResult {
1089 success: false,
1090 output: String::new(),
1091 error: Some(unavailable_action_for_backend_error(action_str, backend)),
1092 });
1093 }
1094
1095 let action = match parse_browser_action(action_str, &args) {
1096 Ok(a) => a,
1097 Err(e) => {
1098 return Ok(ToolResult {
1099 success: false,
1100 output: String::new(),
1101 error: Some(e.to_string()),
1102 });
1103 }
1104 };
1105
1106 self.execute_action(action, backend).await
1107 }
1108}
1109
1110#[cfg(feature = "browser-native")]
1111mod native_backend {
1112 use super::BrowserAction;
1113 use anyhow::{Context, Result};
1114 use base64::Engine;
1115 use fantoccini::actions::{InputSource, MouseActions, PointerAction};
1116 use fantoccini::key::Key;
1117 use fantoccini::{Client, ClientBuilder, Locator};
1118 use serde_json::{Map, Value, json};
1119 use std::net::{TcpStream, ToSocketAddrs};
1120 use std::time::Duration;
1121
1122 #[derive(Default)]
1123 pub struct NativeBrowserState {
1124 client: Option<Client>,
1125 }
1126
1127 impl NativeBrowserState {
1128 pub fn is_available(
1129 _headless: bool,
1130 webdriver_url: &str,
1131 _chrome_path: Option<&str>,
1132 ) -> bool {
1133 webdriver_endpoint_reachable(webdriver_url, Duration::from_millis(500))
1134 }
1135
1136 #[allow(clippy::too_many_lines)]
1137 pub async fn execute_action(
1138 &mut self,
1139 action: BrowserAction,
1140 headless: bool,
1141 webdriver_url: &str,
1142 chrome_path: Option<&str>,
1143 ) -> Result<Value> {
1144 match action {
1145 BrowserAction::Open { url } => {
1146 self.ensure_session(headless, webdriver_url, chrome_path)
1147 .await?;
1148 let client = self.active_client()?;
1149 client
1150 .goto(&url)
1151 .await
1152 .with_context(|| format!("Failed to open URL: {url}"))?;
1153 let current_url = client
1154 .current_url()
1155 .await
1156 .context("Failed to read current URL after navigation")?;
1157
1158 Ok(json!({
1159 "backend": "rust_native",
1160 "action": "open",
1161 "url": current_url.as_str(),
1162 }))
1163 }
1164 BrowserAction::Snapshot {
1165 interactive_only,
1166 compact,
1167 depth,
1168 } => {
1169 let client = self.active_client()?;
1170 let snapshot = client
1171 .execute(
1172 &snapshot_script(interactive_only, compact, depth.map(i64::from)),
1173 vec![],
1174 )
1175 .await
1176 .context("Failed to evaluate snapshot script")?;
1177
1178 Ok(json!({
1179 "backend": "rust_native",
1180 "action": "snapshot",
1181 "data": snapshot,
1182 }))
1183 }
1184 BrowserAction::Click { selector } => {
1185 let client = self.active_client()?;
1186 find_element(client, &selector).await?.click().await?;
1187
1188 Ok(json!({
1189 "backend": "rust_native",
1190 "action": "click",
1191 "selector": selector,
1192 }))
1193 }
1194 BrowserAction::Fill { selector, value } => {
1195 let client = self.active_client()?;
1196 let element = find_element(client, &selector).await?;
1197 let _ = element.clear().await;
1198 element.send_keys(&value).await?;
1199
1200 Ok(json!({
1201 "backend": "rust_native",
1202 "action": "fill",
1203 "selector": selector,
1204 }))
1205 }
1206 BrowserAction::Type { selector, text } => {
1207 let client = self.active_client()?;
1208 find_element(client, &selector)
1209 .await?
1210 .send_keys(&text)
1211 .await?;
1212
1213 Ok(json!({
1214 "backend": "rust_native",
1215 "action": "type",
1216 "selector": selector,
1217 "typed": text.len(),
1218 }))
1219 }
1220 BrowserAction::GetText { selector } => {
1221 let client = self.active_client()?;
1222 let text = find_element(client, &selector).await?.text().await?;
1223
1224 Ok(json!({
1225 "backend": "rust_native",
1226 "action": "get_text",
1227 "selector": selector,
1228 "text": text,
1229 }))
1230 }
1231 BrowserAction::GetTitle => {
1232 let client = self.active_client()?;
1233 let title = client.title().await.context("Failed to read page title")?;
1234
1235 Ok(json!({
1236 "backend": "rust_native",
1237 "action": "get_title",
1238 "title": title,
1239 }))
1240 }
1241 BrowserAction::GetUrl => {
1242 let client = self.active_client()?;
1243 let url = client
1244 .current_url()
1245 .await
1246 .context("Failed to read current URL")?;
1247
1248 Ok(json!({
1249 "backend": "rust_native",
1250 "action": "get_url",
1251 "url": url.as_str(),
1252 }))
1253 }
1254 BrowserAction::Screenshot { path, full_page } => {
1255 let client = self.active_client()?;
1256 let png = client
1257 .screenshot()
1258 .await
1259 .context("Failed to capture screenshot")?;
1260 let mut payload = json!({
1261 "backend": "rust_native",
1262 "action": "screenshot",
1263 "full_page": full_page,
1264 "bytes": png.len(),
1265 });
1266
1267 if let Some(path_str) = path {
1268 tokio::fs::write(&path_str, &png)
1269 .await
1270 .with_context(|| format!("Failed to write screenshot to {path_str}"))?;
1271 payload["path"] = Value::String(path_str);
1272 } else {
1273 payload["png_base64"] =
1274 Value::String(base64::engine::general_purpose::STANDARD.encode(&png));
1275 }
1276
1277 Ok(payload)
1278 }
1279 BrowserAction::Wait { selector, ms, text } => {
1280 let client = self.active_client()?;
1281 if let Some(sel) = selector.as_ref() {
1282 wait_for_selector(client, sel).await?;
1283 Ok(json!({
1284 "backend": "rust_native",
1285 "action": "wait",
1286 "selector": sel,
1287 }))
1288 } else if let Some(duration_ms) = ms {
1289 tokio::time::sleep(Duration::from_millis(duration_ms)).await;
1290 Ok(json!({
1291 "backend": "rust_native",
1292 "action": "wait",
1293 "ms": duration_ms,
1294 }))
1295 } else if let Some(needle) = text.as_ref() {
1296 let xpath = xpath_contains_text(needle);
1297 client
1298 .wait()
1299 .for_element(Locator::XPath(&xpath))
1300 .await
1301 .with_context(|| {
1302 format!("Timed out waiting for text to appear: {needle}")
1303 })?;
1304 Ok(json!({
1305 "backend": "rust_native",
1306 "action": "wait",
1307 "text": needle,
1308 }))
1309 } else {
1310 tokio::time::sleep(Duration::from_millis(250)).await;
1311 Ok(json!({
1312 "backend": "rust_native",
1313 "action": "wait",
1314 "ms": 250,
1315 }))
1316 }
1317 }
1318 BrowserAction::Press { key } => {
1319 let client = self.active_client()?;
1320 let key_input = webdriver_key(&key);
1321 match client.active_element().await {
1322 Ok(element) => {
1323 element.send_keys(&key_input).await?;
1324 }
1325 Err(_) => {
1326 find_element(client, "body")
1327 .await?
1328 .send_keys(&key_input)
1329 .await?;
1330 }
1331 }
1332
1333 Ok(json!({
1334 "backend": "rust_native",
1335 "action": "press",
1336 "key": key,
1337 }))
1338 }
1339 BrowserAction::Hover { selector } => {
1340 let client = self.active_client()?;
1341 let element = find_element(client, &selector).await?;
1342 hover_element(client, &element).await?;
1343
1344 Ok(json!({
1345 "backend": "rust_native",
1346 "action": "hover",
1347 "selector": selector,
1348 }))
1349 }
1350 BrowserAction::Scroll { direction, pixels } => {
1351 let client = self.active_client()?;
1352 let amount = i64::from(pixels.unwrap_or(600));
1353 let (dx, dy) = match direction.as_str() {
1354 "up" => (0, -amount),
1355 "down" => (0, amount),
1356 "left" => (-amount, 0),
1357 "right" => (amount, 0),
1358 _ => anyhow::bail!(
1359 "Unsupported scroll direction '{direction}'. Use up/down/left/right"
1360 ),
1361 };
1362
1363 let position = client
1364 .execute(
1365 "window.scrollBy(arguments[0], arguments[1]); return { x: window.scrollX, y: window.scrollY };",
1366 vec![json!(dx), json!(dy)],
1367 )
1368 .await
1369 .context("Failed to execute scroll script")?;
1370
1371 Ok(json!({
1372 "backend": "rust_native",
1373 "action": "scroll",
1374 "position": position,
1375 }))
1376 }
1377 BrowserAction::IsVisible { selector } => {
1378 let client = self.active_client()?;
1379 let visible = find_element(client, &selector)
1380 .await?
1381 .is_displayed()
1382 .await?;
1383
1384 Ok(json!({
1385 "backend": "rust_native",
1386 "action": "is_visible",
1387 "selector": selector,
1388 "visible": visible,
1389 }))
1390 }
1391 BrowserAction::Close => {
1392 self.reset_session().await;
1393
1394 Ok(json!({
1395 "backend": "rust_native",
1396 "action": "close",
1397 "closed": true,
1398 }))
1399 }
1400 BrowserAction::Find {
1401 by,
1402 value,
1403 action,
1404 fill_value,
1405 } => {
1406 let client = self.active_client()?;
1407 let selector = selector_for_find(&by, &value);
1408 let element = find_element(client, &selector).await?;
1409
1410 let payload = match action.as_str() {
1411 "click" => {
1412 element.click().await?;
1413 json!({"result": "clicked"})
1414 }
1415 "fill" => {
1416 let fill = fill_value.ok_or_else(|| {
1417 anyhow::anyhow!("find_action='fill' requires fill_value")
1418 })?;
1419 let _ = element.clear().await;
1420 element.send_keys(&fill).await?;
1421 json!({"result": "filled", "typed": fill.len()})
1422 }
1423 "text" => {
1424 let text = element.text().await?;
1425 json!({"result": "text", "text": text})
1426 }
1427 "hover" => {
1428 hover_element(client, &element).await?;
1429 json!({"result": "hovered"})
1430 }
1431 "check" => {
1432 let checked_before = element_checked(&element).await?;
1433 if !checked_before {
1434 element.click().await?;
1435 }
1436 let checked_after = element_checked(&element).await?;
1437 json!({
1438 "result": "checked",
1439 "checked_before": checked_before,
1440 "checked_after": checked_after,
1441 })
1442 }
1443 _ => anyhow::bail!(
1444 "Unsupported find_action '{action}'. Use click/fill/text/hover/check"
1445 ),
1446 };
1447
1448 Ok(json!({
1449 "backend": "rust_native",
1450 "action": "find",
1451 "by": by,
1452 "value": value,
1453 "selector": selector,
1454 "data": payload,
1455 }))
1456 }
1457 }
1458 }
1459
1460 pub async fn reset_session(&mut self) {
1461 if let Some(client) = self.client.take() {
1462 let _ = client.close().await;
1463 }
1464 }
1465
1466 async fn ensure_session(
1467 &mut self,
1468 headless: bool,
1469 webdriver_url: &str,
1470 chrome_path: Option<&str>,
1471 ) -> Result<()> {
1472 if self.client.is_some() {
1473 return Ok(());
1474 }
1475
1476 let mut capabilities: Map<String, Value> = Map::new();
1477 let mut chrome_options: Map<String, Value> = Map::new();
1478 let mut args: Vec<Value> = Vec::new();
1479
1480 if headless {
1481 args.push(Value::String("--headless=new".to_string()));
1482 args.push(Value::String("--disable-gpu".to_string()));
1483 }
1484
1485 if super::is_service_environment() {
1489 args.push(Value::String("--no-sandbox".to_string()));
1490 args.push(Value::String("--disable-dev-shm-usage".to_string()));
1491 }
1492
1493 if !args.is_empty() {
1494 chrome_options.insert("args".to_string(), Value::Array(args));
1495 }
1496
1497 if let Some(path) = chrome_path {
1498 let trimmed = path.trim();
1499 if !trimmed.is_empty() {
1500 chrome_options.insert("binary".to_string(), Value::String(trimmed.to_string()));
1501 }
1502 }
1503
1504 if !chrome_options.is_empty() {
1505 capabilities.insert(
1506 "goog:chromeOptions".to_string(),
1507 Value::Object(chrome_options),
1508 );
1509 }
1510
1511 let mut builder =
1512 ClientBuilder::rustls().context("Failed to initialize rustls connector")?;
1513 if !capabilities.is_empty() {
1514 builder.capabilities(capabilities);
1515 }
1516
1517 let client = builder
1518 .connect(webdriver_url)
1519 .await
1520 .with_context(|| {
1521 format!(
1522 "Failed to connect to WebDriver at {webdriver_url}. Start chromedriver/geckodriver first"
1523 )
1524 })?;
1525
1526 self.client = Some(client);
1527 Ok(())
1528 }
1529
1530 fn active_client(&self) -> Result<&Client> {
1531 self.client.as_ref().ok_or_else(|| {
1532 anyhow::anyhow!("No active native browser session. Run browser action='open' first")
1533 })
1534 }
1535 }
1536
1537 fn webdriver_endpoint_reachable(webdriver_url: &str, timeout: Duration) -> bool {
1538 let parsed = match reqwest::Url::parse(webdriver_url) {
1539 Ok(url) => url,
1540 Err(_) => return false,
1541 };
1542
1543 if parsed.scheme() != "http" && parsed.scheme() != "https" {
1544 return false;
1545 }
1546
1547 let host = match parsed.host_str() {
1548 Some(h) if !h.is_empty() => h,
1549 _ => return false,
1550 };
1551
1552 let port = parsed.port_or_known_default().unwrap_or(4444);
1553 let mut addrs = match (host, port).to_socket_addrs() {
1554 Ok(iter) => iter,
1555 Err(_) => return false,
1556 };
1557
1558 let addr = match addrs.next() {
1559 Some(a) => a,
1560 None => return false,
1561 };
1562
1563 TcpStream::connect_timeout(&addr, timeout).is_ok()
1564 }
1565
1566 fn selector_for_find(by: &str, value: &str) -> String {
1567 let escaped = css_attr_escape(value);
1568 match by {
1569 "role" => format!(r#"[role=\"{escaped}\"]"#),
1570 "label" => format!("label={value}"),
1571 "placeholder" => format!(r#"[placeholder=\"{escaped}\"]"#),
1572 "testid" => format!(r#"[data-testid=\"{escaped}\"]"#),
1573 _ => format!("text={value}"),
1574 }
1575 }
1576
1577 async fn wait_for_selector(client: &Client, selector: &str) -> Result<()> {
1578 match parse_selector(selector) {
1579 SelectorKind::Css(css) => {
1580 client
1581 .wait()
1582 .for_element(Locator::Css(&css))
1583 .await
1584 .with_context(|| format!("Timed out waiting for selector '{selector}'"))?;
1585 }
1586 SelectorKind::XPath(xpath) => {
1587 client
1588 .wait()
1589 .for_element(Locator::XPath(&xpath))
1590 .await
1591 .with_context(|| format!("Timed out waiting for selector '{selector}'"))?;
1592 }
1593 }
1594 Ok(())
1595 }
1596
1597 async fn find_element(
1598 client: &Client,
1599 selector: &str,
1600 ) -> Result<fantoccini::elements::Element> {
1601 let element = match parse_selector(selector) {
1602 SelectorKind::Css(css) => client
1603 .find(Locator::Css(&css))
1604 .await
1605 .with_context(|| format!("Failed to find element by CSS '{css}'"))?,
1606 SelectorKind::XPath(xpath) => client
1607 .find(Locator::XPath(&xpath))
1608 .await
1609 .with_context(|| format!("Failed to find element by XPath '{xpath}'"))?,
1610 };
1611 Ok(element)
1612 }
1613
1614 async fn hover_element(client: &Client, element: &fantoccini::elements::Element) -> Result<()> {
1615 let actions = MouseActions::new("mouse".to_string()).then(PointerAction::MoveToElement {
1616 element: element.clone(),
1617 duration: Some(Duration::from_millis(150)),
1618 x: 0.0,
1619 y: 0.0,
1620 });
1621
1622 client
1623 .perform_actions(actions)
1624 .await
1625 .context("Failed to perform hover action")?;
1626 let _ = client.release_actions().await;
1627 Ok(())
1628 }
1629
1630 async fn element_checked(element: &fantoccini::elements::Element) -> Result<bool> {
1631 let checked = element
1632 .prop("checked")
1633 .await
1634 .context("Failed to read checkbox checked property")?
1635 .unwrap_or_default()
1636 .to_ascii_lowercase();
1637 Ok(matches!(checked.as_str(), "true" | "checked" | "1"))
1638 }
1639
1640 enum SelectorKind {
1641 Css(String),
1642 XPath(String),
1643 }
1644
1645 fn parse_selector(selector: &str) -> SelectorKind {
1646 let trimmed = selector.trim();
1647 if let Some(text_query) = trimmed.strip_prefix("text=") {
1648 return SelectorKind::XPath(xpath_contains_text(text_query));
1649 }
1650
1651 if let Some(label_query) = trimmed.strip_prefix("label=") {
1652 let literal = xpath_literal(label_query);
1653 return SelectorKind::XPath(format!(
1654 "(//label[contains(normalize-space(.), {literal})]/following::*[self::input or self::textarea or self::select][1] | //*[@aria-label and contains(normalize-space(@aria-label), {literal})] | //label[contains(normalize-space(.), {literal})])"
1655 ));
1656 }
1657
1658 if trimmed.starts_with('@') {
1659 let escaped = css_attr_escape(trimmed);
1660 return SelectorKind::Css(format!(r#"[data-zc-ref=\"{escaped}\"]"#));
1661 }
1662
1663 SelectorKind::Css(trimmed.to_string())
1664 }
1665
1666 fn css_attr_escape(input: &str) -> String {
1667 input
1668 .replace('\\', "\\\\")
1669 .replace('"', "\\\"")
1670 .replace('\n', " ")
1671 }
1672
1673 fn xpath_contains_text(text: &str) -> String {
1674 format!("//*[contains(normalize-space(.), {})]", xpath_literal(text))
1675 }
1676
1677 fn xpath_literal(input: &str) -> String {
1678 if !input.contains('"') {
1679 return format!("\"{input}\"");
1680 }
1681 if !input.contains('\'') {
1682 return format!("'{input}'");
1683 }
1684
1685 let segments: Vec<&str> = input.split('"').collect();
1686 let mut parts: Vec<String> = Vec::new();
1687 for (index, part) in segments.iter().enumerate() {
1688 if !part.is_empty() {
1689 parts.push(format!("\"{part}\""));
1690 }
1691 if index + 1 < segments.len() {
1692 parts.push("'\"'".to_string());
1693 }
1694 }
1695
1696 if parts.is_empty() {
1697 "\"\"".to_string()
1698 } else {
1699 format!("concat({})", parts.join(","))
1700 }
1701 }
1702
1703 fn webdriver_key(key: &str) -> String {
1704 match key.trim().to_ascii_lowercase().as_str() {
1705 "enter" => Key::Enter.to_string(),
1706 "return" => Key::Return.to_string(),
1707 "tab" => Key::Tab.to_string(),
1708 "escape" | "esc" => Key::Escape.to_string(),
1709 "backspace" => Key::Backspace.to_string(),
1710 "delete" => Key::Delete.to_string(),
1711 "space" => Key::Space.to_string(),
1712 "arrowup" | "up" => Key::Up.to_string(),
1713 "arrowdown" | "down" => Key::Down.to_string(),
1714 "arrowleft" | "left" => Key::Left.to_string(),
1715 "arrowright" | "right" => Key::Right.to_string(),
1716 "home" => Key::Home.to_string(),
1717 "end" => Key::End.to_string(),
1718 "pageup" => Key::PageUp.to_string(),
1719 "pagedown" => Key::PageDown.to_string(),
1720 other => other.to_string(),
1721 }
1722 }
1723
1724 fn snapshot_script(interactive_only: bool, compact: bool, depth: Option<i64>) -> String {
1725 let depth_literal = depth
1726 .map(|level| level.to_string())
1727 .unwrap_or_else(|| "null".to_string());
1728
1729 format!(
1730 r#"(() => {{
1731 const interactiveOnly = {interactive_only};
1732 const compact = {compact};
1733 const maxDepth = {depth_literal};
1734 const nodes = [];
1735 const root = document.body || document.documentElement;
1736 let counter = 0;
1737
1738 const isVisible = (el) => {{
1739 const style = window.getComputedStyle(el);
1740 if (style.display === 'none' || style.visibility === 'hidden' || Number(style.opacity || 1) === 0) {{
1741 return false;
1742 }}
1743 const rect = el.getBoundingClientRect();
1744 return rect.width > 0 && rect.height > 0;
1745 }};
1746
1747 const isInteractive = (el) => {{
1748 if (el.matches('a,button,input,select,textarea,summary,[role],*[tabindex]')) return true;
1749 return typeof el.onclick === 'function';
1750 }};
1751
1752 const describe = (el, depth) => {{
1753 const interactive = isInteractive(el);
1754 const text = (el.innerText || el.textContent || '').trim().replace(/\s+/g, ' ').slice(0, 140);
1755 if (interactiveOnly && !interactive) return;
1756 if (compact && !interactive && !text) return;
1757
1758 const ref = '@e' + (++counter);
1759 el.setAttribute('data-zc-ref', ref);
1760 nodes.push({{
1761 ref,
1762 depth,
1763 tag: el.tagName.toLowerCase(),
1764 id: el.id || null,
1765 role: el.getAttribute('role'),
1766 text,
1767 interactive,
1768 }});
1769 }};
1770
1771 const walk = (el, depth) => {{
1772 if (!(el instanceof Element)) return;
1773 if (maxDepth !== null && depth > maxDepth) return;
1774 if (isVisible(el)) {{
1775 describe(el, depth);
1776 }}
1777 for (const child of el.children) {{
1778 walk(child, depth + 1);
1779 if (nodes.length >= 400) return;
1780 }}
1781 }};
1782
1783 if (root) walk(root, 0);
1784
1785 return {{
1786 title: document.title,
1787 url: window.location.href,
1788 count: nodes.length,
1789 nodes,
1790 }};
1791}})();"#
1792 )
1793 }
1794}
1795
1796fn parse_browser_action(action_str: &str, args: &Value) -> anyhow::Result<BrowserAction> {
1800 match action_str {
1801 "open" => {
1802 let url = args
1803 .get("url")
1804 .and_then(|v| v.as_str())
1805 .ok_or_else(|| anyhow::anyhow!("Missing 'url' for open action"))?;
1806 Ok(BrowserAction::Open { url: url.into() })
1807 }
1808 "snapshot" => Ok(BrowserAction::Snapshot {
1809 interactive_only: args
1810 .get("interactive_only")
1811 .and_then(serde_json::Value::as_bool)
1812 .unwrap_or(true),
1813 compact: args
1814 .get("compact")
1815 .and_then(serde_json::Value::as_bool)
1816 .unwrap_or(true),
1817 depth: args
1818 .get("depth")
1819 .and_then(serde_json::Value::as_u64)
1820 .map(|d| u32::try_from(d).unwrap_or(u32::MAX)),
1821 }),
1822 "click" => {
1823 let selector = args
1824 .get("selector")
1825 .and_then(|v| v.as_str())
1826 .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for click"))?;
1827 Ok(BrowserAction::Click {
1828 selector: selector.into(),
1829 })
1830 }
1831 "fill" => {
1832 let selector = args
1833 .get("selector")
1834 .and_then(|v| v.as_str())
1835 .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for fill"))?;
1836 let value = args
1837 .get("value")
1838 .and_then(|v| v.as_str())
1839 .ok_or_else(|| anyhow::anyhow!("Missing 'value' for fill"))?;
1840 Ok(BrowserAction::Fill {
1841 selector: selector.into(),
1842 value: value.into(),
1843 })
1844 }
1845 "type" => {
1846 let selector = args
1847 .get("selector")
1848 .and_then(|v| v.as_str())
1849 .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for type"))?;
1850 let text = args
1851 .get("text")
1852 .and_then(|v| v.as_str())
1853 .ok_or_else(|| anyhow::anyhow!("Missing 'text' for type"))?;
1854 Ok(BrowserAction::Type {
1855 selector: selector.into(),
1856 text: text.into(),
1857 })
1858 }
1859 "get_text" => {
1860 let selector = args
1861 .get("selector")
1862 .and_then(|v| v.as_str())
1863 .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for get_text"))?;
1864 Ok(BrowserAction::GetText {
1865 selector: selector.into(),
1866 })
1867 }
1868 "get_title" => Ok(BrowserAction::GetTitle),
1869 "get_url" => Ok(BrowserAction::GetUrl),
1870 "screenshot" => Ok(BrowserAction::Screenshot {
1871 path: args.get("path").and_then(|v| v.as_str()).map(String::from),
1872 full_page: args
1873 .get("full_page")
1874 .and_then(serde_json::Value::as_bool)
1875 .unwrap_or(false),
1876 }),
1877 "wait" => Ok(BrowserAction::Wait {
1878 selector: args
1879 .get("selector")
1880 .and_then(|v| v.as_str())
1881 .map(String::from),
1882 ms: args.get("ms").and_then(serde_json::Value::as_u64),
1883 text: args.get("text").and_then(|v| v.as_str()).map(String::from),
1884 }),
1885 "press" => {
1886 let key = args
1887 .get("key")
1888 .and_then(|v| v.as_str())
1889 .ok_or_else(|| anyhow::anyhow!("Missing 'key' for press"))?;
1890 Ok(BrowserAction::Press { key: key.into() })
1891 }
1892 "hover" => {
1893 let selector = args
1894 .get("selector")
1895 .and_then(|v| v.as_str())
1896 .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for hover"))?;
1897 Ok(BrowserAction::Hover {
1898 selector: selector.into(),
1899 })
1900 }
1901 "scroll" => {
1902 let direction = args
1903 .get("direction")
1904 .and_then(|v| v.as_str())
1905 .ok_or_else(|| anyhow::anyhow!("Missing 'direction' for scroll"))?;
1906 Ok(BrowserAction::Scroll {
1907 direction: direction.into(),
1908 pixels: args
1909 .get("pixels")
1910 .and_then(serde_json::Value::as_u64)
1911 .map(|p| u32::try_from(p).unwrap_or(u32::MAX)),
1912 })
1913 }
1914 "is_visible" => {
1915 let selector = args
1916 .get("selector")
1917 .and_then(|v| v.as_str())
1918 .ok_or_else(|| anyhow::anyhow!("Missing 'selector' for is_visible"))?;
1919 Ok(BrowserAction::IsVisible {
1920 selector: selector.into(),
1921 })
1922 }
1923 "close" => Ok(BrowserAction::Close),
1924 "find" => {
1925 let by = args
1926 .get("by")
1927 .and_then(|v| v.as_str())
1928 .ok_or_else(|| anyhow::anyhow!("Missing 'by' for find"))?;
1929 let value = args
1930 .get("value")
1931 .and_then(|v| v.as_str())
1932 .ok_or_else(|| anyhow::anyhow!("Missing 'value' for find"))?;
1933 let action = args
1934 .get("find_action")
1935 .and_then(|v| v.as_str())
1936 .ok_or_else(|| anyhow::anyhow!("Missing 'find_action' for find"))?;
1937 Ok(BrowserAction::Find {
1938 by: by.into(),
1939 value: value.into(),
1940 action: action.into(),
1941 fill_value: args
1942 .get("fill_value")
1943 .and_then(|v| v.as_str())
1944 .map(String::from),
1945 })
1946 }
1947 other => anyhow::bail!("Unsupported browser action: {other}"),
1948 }
1949}
1950
1951fn is_supported_browser_action(action: &str) -> bool {
1954 matches!(
1955 action,
1956 "open"
1957 | "snapshot"
1958 | "click"
1959 | "fill"
1960 | "type"
1961 | "get_text"
1962 | "get_title"
1963 | "get_url"
1964 | "screenshot"
1965 | "wait"
1966 | "press"
1967 | "hover"
1968 | "scroll"
1969 | "is_visible"
1970 | "close"
1971 | "find"
1972 | "mouse_move"
1973 | "mouse_click"
1974 | "mouse_drag"
1975 | "key_type"
1976 | "key_press"
1977 | "screen_capture"
1978 )
1979}
1980
1981fn is_computer_use_only_action(action: &str) -> bool {
1982 matches!(
1983 action,
1984 "mouse_move" | "mouse_click" | "mouse_drag" | "key_type" | "key_press" | "screen_capture"
1985 )
1986}
1987
1988fn backend_name(backend: ResolvedBackend) -> &'static str {
1989 match backend {
1990 ResolvedBackend::AgentBrowser => "agent_browser",
1991 ResolvedBackend::RustNative => "rust_native",
1992 ResolvedBackend::ComputerUse => "computer_use",
1993 }
1994}
1995
1996fn unavailable_action_for_backend_error(action: &str, backend: ResolvedBackend) -> String {
1997 format!(
1998 "Action '{action}' is unavailable for backend '{}'",
1999 backend_name(backend)
2000 )
2001}
2002
2003fn is_recoverable_rust_native_error(err: &anyhow::Error) -> bool {
2004 let message = format!("{err:#}").to_ascii_lowercase();
2005
2006 if message.contains("invalid session id")
2007 || message.contains("no such window")
2008 || message.contains("session not created")
2009 || message.contains("connection reset")
2010 || message.contains("broken pipe")
2011 {
2012 return true;
2013 }
2014
2015 message.contains("webdriver") && (message.contains("timed out") || message.contains("timeout"))
2016}
2017
2018fn normalize_domains(domains: Vec<String>) -> Vec<String> {
2019 domains
2020 .into_iter()
2021 .map(|d| d.trim().to_lowercase())
2022 .filter(|d| !d.is_empty())
2023 .collect()
2024}
2025
2026fn endpoint_reachable(endpoint: &reqwest::Url, timeout: Duration) -> bool {
2027 let host = match endpoint.host_str() {
2028 Some(host) if !host.is_empty() => host,
2029 _ => return false,
2030 };
2031
2032 let port = match endpoint.port_or_known_default() {
2033 Some(port) => port,
2034 None => return false,
2035 };
2036
2037 let mut addrs = match (host, port).to_socket_addrs() {
2038 Ok(addrs) => addrs,
2039 Err(_) => return false,
2040 };
2041
2042 let addr = match addrs.next() {
2043 Some(addr) => addr,
2044 None => return false,
2045 };
2046
2047 std::net::TcpStream::connect_timeout(&addr, timeout).is_ok()
2048}
2049
2050fn extract_host(url_str: &str) -> anyhow::Result<String> {
2051 let url = url_str.trim();
2053 let without_scheme = url
2054 .strip_prefix("https://")
2055 .or_else(|| url.strip_prefix("http://"))
2056 .or_else(|| url.strip_prefix("file://"))
2057 .unwrap_or(url);
2058
2059 let authority = without_scheme.split('/').next().unwrap_or(without_scheme);
2061
2062 let host = if authority.starts_with('[') {
2063 authority.find(']').map_or(authority, |i| &authority[..=i])
2065 } else {
2066 authority.split(':').next().unwrap_or(authority)
2068 };
2069
2070 if host.is_empty() {
2071 anyhow::bail!("Invalid URL: no host");
2072 }
2073
2074 Ok(host.to_lowercase())
2075}
2076
2077fn is_private_host(host: &str) -> bool {
2078 let bare = host
2080 .strip_prefix('[')
2081 .and_then(|h| h.strip_suffix(']'))
2082 .unwrap_or(host);
2083
2084 if bare == "localhost" || bare.ends_with(".localhost") {
2085 return true;
2086 }
2087
2088 if bare
2090 .rsplit('.')
2091 .next()
2092 .is_some_and(|label| label == "local")
2093 {
2094 return true;
2095 }
2096
2097 if let Ok(ip) = bare.parse::<std::net::IpAddr>() {
2099 return match ip {
2100 std::net::IpAddr::V4(v4) => is_non_global_v4(v4),
2101 std::net::IpAddr::V6(v6) => is_non_global_v6(v6),
2102 };
2103 }
2104
2105 false
2106}
2107
2108fn is_non_global_v4(v4: std::net::Ipv4Addr) -> bool {
2110 let [a, b, _, _] = v4.octets();
2111 v4.is_loopback()
2112 || v4.is_private()
2113 || v4.is_link_local()
2114 || v4.is_unspecified()
2115 || v4.is_broadcast()
2116 || v4.is_multicast()
2117 || (a == 100 && (64..=127).contains(&b))
2119 || a >= 240
2121 || (a == 192 && b == 0)
2123 || (a == 198 && b == 51)
2124 || (a == 203 && b == 0)
2125 || (a == 198 && (18..=19).contains(&b))
2127}
2128
2129fn is_non_global_v6(v6: std::net::Ipv6Addr) -> bool {
2131 let segs = v6.segments();
2132 v6.is_loopback()
2133 || v6.is_unspecified()
2134 || v6.is_multicast()
2135 || (segs[0] & 0xfe00) == 0xfc00
2137 || (segs[0] & 0xffc0) == 0xfe80
2139 || v6.to_ipv4_mapped().is_some_and(is_non_global_v4)
2141}
2142
2143fn is_service_environment() -> bool {
2147 if std::env::var_os("INVOCATION_ID").is_some() {
2148 return true;
2149 }
2150 if std::env::var_os("JOURNAL_STREAM").is_some() {
2151 return true;
2152 }
2153 #[cfg(target_os = "linux")]
2154 if std::path::Path::new("/run/openrc").exists() && std::env::var_os("HOME").is_none() {
2155 return true;
2156 }
2157 #[cfg(target_os = "linux")]
2158 if std::env::var_os("HOME").is_none() {
2159 return true;
2160 }
2161 false
2162}
2163
2164fn ensure_browser_env(cmd: &mut Command) {
2167 if std::env::var_os("HOME").is_none() {
2168 cmd.env("HOME", "/tmp");
2169 }
2170 let existing = std::env::var("CHROMIUM_FLAGS").unwrap_or_default();
2171 if !existing.contains("--no-sandbox") {
2172 let new_flags = if existing.is_empty() {
2173 "--no-sandbox --disable-dev-shm-usage".to_string()
2174 } else {
2175 format!("{existing} --no-sandbox --disable-dev-shm-usage")
2176 };
2177 cmd.env("CHROMIUM_FLAGS", new_flags);
2178 }
2179}
2180
2181fn host_matches_allowlist(host: &str, allowed: &[String]) -> bool {
2182 allowed.iter().any(|pattern| {
2183 if pattern == "*" {
2184 return true;
2185 }
2186 if pattern.starts_with("*.") {
2187 let suffix = &pattern[1..]; host.ends_with(suffix) || host == &pattern[2..]
2190 } else {
2191 host == pattern || host.ends_with(&format!(".{pattern}"))
2193 }
2194 })
2195}
2196
2197#[cfg(test)]
2198mod tests {
2199 use super::*;
2200
2201 #[test]
2202 fn normalize_domains_works() {
2203 let domains = vec![
2204 " Example.COM ".into(),
2205 "docs.example.com".into(),
2206 String::new(),
2207 ];
2208 let normalized = normalize_domains(domains);
2209 assert_eq!(normalized, vec!["example.com", "docs.example.com"]);
2210 }
2211
2212 #[test]
2213 fn extract_host_works() {
2214 assert_eq!(
2215 extract_host("https://example.com/path").unwrap(),
2216 "example.com"
2217 );
2218 assert_eq!(
2219 extract_host("https://Sub.Example.COM:8080/").unwrap(),
2220 "sub.example.com"
2221 );
2222 }
2223
2224 #[test]
2225 fn extract_host_handles_ipv6() {
2226 assert_eq!(extract_host("https://[::1]/path").unwrap(), "[::1]");
2228 assert_eq!(
2230 extract_host("https://[2001:db8::1]:8080/path").unwrap(),
2231 "[2001:db8::1]"
2232 );
2233 assert_eq!(extract_host("https://[fe80::1]/").unwrap(), "[fe80::1]");
2235 }
2236
2237 #[test]
2238 fn is_private_host_detects_local() {
2239 assert!(is_private_host("localhost"));
2240 assert!(is_private_host("app.localhost"));
2241 assert!(is_private_host("printer.local"));
2242 assert!(is_private_host("127.0.0.1"));
2243 assert!(is_private_host("192.168.1.1"));
2244 assert!(is_private_host("10.0.0.1"));
2245 assert!(!is_private_host("example.com"));
2246 assert!(!is_private_host("google.com"));
2247 }
2248
2249 #[test]
2250 fn is_private_host_blocks_multicast_and_reserved() {
2251 assert!(is_private_host("224.0.0.1")); assert!(is_private_host("255.255.255.255")); assert!(is_private_host("100.64.0.1")); assert!(is_private_host("240.0.0.1")); assert!(is_private_host("192.0.2.1")); assert!(is_private_host("198.51.100.1")); assert!(is_private_host("203.0.113.1")); assert!(is_private_host("198.18.0.1")); }
2260
2261 #[test]
2262 fn is_private_host_catches_ipv6() {
2263 assert!(is_private_host("::1"));
2264 assert!(is_private_host("[::1]"));
2265 assert!(is_private_host("0.0.0.0"));
2266 }
2267
2268 #[test]
2269 fn is_private_host_catches_mapped_ipv4() {
2270 assert!(is_private_host("::ffff:127.0.0.1"));
2272 assert!(is_private_host("::ffff:10.0.0.1"));
2273 assert!(is_private_host("::ffff:192.168.1.1"));
2274 }
2275
2276 #[test]
2277 fn is_private_host_catches_ipv6_private_ranges() {
2278 assert!(is_private_host("fd00::1"));
2280 assert!(is_private_host("fc00::1"));
2281 assert!(is_private_host("fe80::1"));
2283 assert!(!is_private_host("2001:db8::1"));
2285 }
2286
2287 #[test]
2288 fn validate_url_blocks_ipv6_ssrf() {
2289 let security = Arc::new(SecurityPolicy::default());
2290 let tool = BrowserTool::new(security, vec!["*".into()], None);
2291 assert!(tool.validate_url("https://[::1]/").is_err());
2292 assert!(tool.validate_url("https://[::ffff:127.0.0.1]/").is_err());
2293 assert!(
2294 tool.validate_url("https://[::ffff:10.0.0.1]:8080/")
2295 .is_err()
2296 );
2297 }
2298
2299 #[test]
2300 fn host_matches_allowlist_exact() {
2301 let allowed = vec!["example.com".into()];
2302 assert!(host_matches_allowlist("example.com", &allowed));
2303 assert!(host_matches_allowlist("sub.example.com", &allowed));
2304 assert!(!host_matches_allowlist("notexample.com", &allowed));
2305 }
2306
2307 #[test]
2308 fn host_matches_allowlist_wildcard() {
2309 let allowed = vec!["*.example.com".into()];
2310 assert!(host_matches_allowlist("sub.example.com", &allowed));
2311 assert!(host_matches_allowlist("example.com", &allowed));
2312 assert!(!host_matches_allowlist("other.com", &allowed));
2313 }
2314
2315 #[test]
2316 fn host_matches_allowlist_star() {
2317 let allowed = vec!["*".into()];
2318 assert!(host_matches_allowlist("anything.com", &allowed));
2319 assert!(host_matches_allowlist("example.org", &allowed));
2320 }
2321
2322 #[test]
2323 fn browser_backend_parser_accepts_supported_values() {
2324 assert_eq!(
2325 BrowserBackendKind::parse("agent_browser").unwrap(),
2326 BrowserBackendKind::AgentBrowser
2327 );
2328 assert_eq!(
2329 BrowserBackendKind::parse("rust-native").unwrap(),
2330 BrowserBackendKind::RustNative
2331 );
2332 assert_eq!(
2333 BrowserBackendKind::parse("computer_use").unwrap(),
2334 BrowserBackendKind::ComputerUse
2335 );
2336 assert_eq!(
2337 BrowserBackendKind::parse("auto").unwrap(),
2338 BrowserBackendKind::Auto
2339 );
2340 }
2341
2342 #[test]
2343 fn browser_backend_parser_rejects_unknown_values() {
2344 assert!(BrowserBackendKind::parse("playwright").is_err());
2345 }
2346
2347 #[test]
2348 fn browser_tool_default_backend_is_agent_browser() {
2349 let security = Arc::new(SecurityPolicy::default());
2350 let tool = BrowserTool::new(security, vec!["example.com".into()], None);
2351 assert_eq!(
2352 tool.configured_backend().unwrap(),
2353 BrowserBackendKind::AgentBrowser
2354 );
2355 }
2356
2357 #[test]
2358 fn browser_tool_accepts_auto_backend_config() {
2359 let security = Arc::new(SecurityPolicy::default());
2360 let tool = BrowserTool::new_with_backend(
2361 security,
2362 vec!["example.com".into()],
2363 None,
2364 "auto".into(),
2365 true,
2366 "http://127.0.0.1:9515".into(),
2367 None,
2368 ComputerUseConfig::default(),
2369 );
2370 assert_eq!(tool.configured_backend().unwrap(), BrowserBackendKind::Auto);
2371 }
2372
2373 #[test]
2374 fn browser_tool_accepts_computer_use_backend_config() {
2375 let security = Arc::new(SecurityPolicy::default());
2376 let tool = BrowserTool::new_with_backend(
2377 security,
2378 vec!["example.com".into()],
2379 None,
2380 "computer_use".into(),
2381 true,
2382 "http://127.0.0.1:9515".into(),
2383 None,
2384 ComputerUseConfig::default(),
2385 );
2386 assert_eq!(
2387 tool.configured_backend().unwrap(),
2388 BrowserBackendKind::ComputerUse
2389 );
2390 }
2391
2392 #[test]
2393 fn computer_use_endpoint_rejects_public_http_by_default() {
2394 let security = Arc::new(SecurityPolicy::default());
2395 let tool = BrowserTool::new_with_backend(
2396 security,
2397 vec!["example.com".into()],
2398 None,
2399 "computer_use".into(),
2400 true,
2401 "http://127.0.0.1:9515".into(),
2402 None,
2403 ComputerUseConfig {
2404 endpoint: "http://computer-use.example.com/v1/actions".into(),
2405 ..ComputerUseConfig::default()
2406 },
2407 );
2408
2409 assert!(tool.computer_use_endpoint_url().is_err());
2410 }
2411
2412 #[test]
2413 fn computer_use_endpoint_requires_https_for_public_remote() {
2414 let security = Arc::new(SecurityPolicy::default());
2415 let tool = BrowserTool::new_with_backend(
2416 security,
2417 vec!["example.com".into()],
2418 None,
2419 "computer_use".into(),
2420 true,
2421 "http://127.0.0.1:9515".into(),
2422 None,
2423 ComputerUseConfig {
2424 endpoint: "https://computer-use.example.com/v1/actions".into(),
2425 allow_remote_endpoint: true,
2426 ..ComputerUseConfig::default()
2427 },
2428 );
2429
2430 assert!(tool.computer_use_endpoint_url().is_ok());
2431 }
2432
2433 #[test]
2434 fn computer_use_coordinate_validation_applies_limits() {
2435 let security = Arc::new(SecurityPolicy::default());
2436 let tool = BrowserTool::new_with_backend(
2437 security,
2438 vec!["example.com".into()],
2439 None,
2440 "computer_use".into(),
2441 true,
2442 "http://127.0.0.1:9515".into(),
2443 None,
2444 ComputerUseConfig {
2445 max_coordinate_x: Some(100),
2446 max_coordinate_y: Some(100),
2447 ..ComputerUseConfig::default()
2448 },
2449 );
2450
2451 assert!(
2452 tool.validate_coordinate("x", 50, tool.computer_use.max_coordinate_x)
2453 .is_ok()
2454 );
2455 assert!(
2456 tool.validate_coordinate("x", 101, tool.computer_use.max_coordinate_x)
2457 .is_err()
2458 );
2459 assert!(
2460 tool.validate_coordinate("y", -1, tool.computer_use.max_coordinate_y)
2461 .is_err()
2462 );
2463 }
2464
2465 #[test]
2466 fn browser_tool_name() {
2467 let security = Arc::new(SecurityPolicy::default());
2468 let tool = BrowserTool::new(security, vec!["example.com".into()], None);
2469 assert_eq!(tool.name(), "browser");
2470 }
2471
2472 #[test]
2473 fn browser_tool_validates_url() {
2474 let security = Arc::new(SecurityPolicy::default());
2475 let tool = BrowserTool::new(security, vec!["example.com".into()], None);
2476
2477 assert!(tool.validate_url("https://example.com").is_ok());
2479 assert!(tool.validate_url("https://sub.example.com/path").is_ok());
2480
2481 assert!(tool.validate_url("https://other.com").is_err());
2483
2484 assert!(tool.validate_url("https://localhost").is_err());
2486 assert!(tool.validate_url("https://127.0.0.1").is_err());
2487
2488 assert!(tool.validate_url("ftp://example.com").is_err());
2490
2491 assert!(tool.validate_url("file:///tmp/test.html").is_err());
2493 }
2494
2495 #[test]
2496 fn browser_tool_empty_allowlist_blocks() {
2497 let security = Arc::new(SecurityPolicy::default());
2498 let tool = BrowserTool::new(security, vec![], None);
2499 assert!(tool.validate_url("https://example.com").is_err());
2500 }
2501
2502 #[test]
2503 fn computer_use_only_action_detection_is_correct() {
2504 assert!(is_computer_use_only_action("mouse_move"));
2505 assert!(is_computer_use_only_action("mouse_click"));
2506 assert!(is_computer_use_only_action("mouse_drag"));
2507 assert!(is_computer_use_only_action("key_type"));
2508 assert!(is_computer_use_only_action("key_press"));
2509 assert!(is_computer_use_only_action("screen_capture"));
2510 assert!(!is_computer_use_only_action("open"));
2511 assert!(!is_computer_use_only_action("snapshot"));
2512 }
2513
2514 #[test]
2515 fn unavailable_action_error_preserves_backend_context() {
2516 assert_eq!(
2517 unavailable_action_for_backend_error("mouse_move", ResolvedBackend::AgentBrowser),
2518 "Action 'mouse_move' is unavailable for backend 'agent_browser'"
2519 );
2520 assert_eq!(
2521 unavailable_action_for_backend_error("mouse_move", ResolvedBackend::RustNative),
2522 "Action 'mouse_move' is unavailable for backend 'rust_native'"
2523 );
2524 }
2525
2526 #[test]
2527 fn recoverable_error_detection_matches_session_patterns() {
2528 for message in [
2529 "invalid session id",
2530 "No Such Window",
2531 "session not created",
2532 "connection reset by peer",
2533 "broken pipe while writing webdriver command",
2534 "WebDriver request timed out",
2535 ] {
2536 let err = anyhow::anyhow!(message);
2537 assert!(is_recoverable_rust_native_error(&err), "{message}");
2538 }
2539
2540 let allowlist_error =
2541 anyhow::anyhow!("URL host 'localhost' is not in browser allowlist [example.com]");
2542 assert!(!is_recoverable_rust_native_error(&allowlist_error));
2543 }
2544
2545 #[test]
2546 fn non_recoverable_error_detection_rejects_policy_errors() {
2547 for message in [
2548 "Blocked by security policy",
2549 "URL host '127.0.0.1' is private and disallowed",
2550 "Action 'mouse_move' is unavailable for backend 'rust_native'",
2551 ] {
2552 let err = anyhow::anyhow!(message);
2553 assert!(!is_recoverable_rust_native_error(&err), "{message}");
2554 }
2555 }
2556
2557 #[cfg(feature = "browser-native")]
2558 #[test]
2559 fn reset_session_is_idempotent_without_client() {
2560 tokio_test::block_on(async {
2561 let mut state = native_backend::NativeBrowserState::default();
2562 state.reset_session().await;
2563 state.reset_session().await;
2564 });
2565 }
2566
2567 #[test]
2568 fn ensure_browser_env_sets_home_when_missing() {
2569 let original_home = std::env::var_os("HOME");
2570 unsafe { std::env::remove_var("HOME") };
2571
2572 let mut cmd = Command::new("true");
2573 ensure_browser_env(&mut cmd);
2574 if let Some(home) = original_home {
2577 unsafe { std::env::set_var("HOME", home) };
2578 }
2579 }
2580
2581 #[test]
2582 fn ensure_browser_env_sets_chromium_flags() {
2583 let original = std::env::var_os("CHROMIUM_FLAGS");
2584 unsafe { std::env::remove_var("CHROMIUM_FLAGS") };
2585
2586 let mut cmd = Command::new("true");
2587 ensure_browser_env(&mut cmd);
2588
2589 if let Some(val) = original {
2590 unsafe { std::env::set_var("CHROMIUM_FLAGS", val) };
2591 }
2592 }
2593
2594 #[test]
2595 fn is_service_environment_detects_invocation_id() {
2596 let original = std::env::var_os("INVOCATION_ID");
2597 unsafe { std::env::set_var("INVOCATION_ID", "test-unit-id") };
2598
2599 assert!(is_service_environment());
2600
2601 if let Some(val) = original {
2602 unsafe { std::env::set_var("INVOCATION_ID", val) };
2603 } else {
2604 unsafe { std::env::remove_var("INVOCATION_ID") };
2605 }
2606 }
2607
2608 #[test]
2609 fn is_service_environment_detects_journal_stream() {
2610 let original = std::env::var_os("JOURNAL_STREAM");
2611 unsafe { std::env::set_var("JOURNAL_STREAM", "8:12345") };
2612
2613 assert!(is_service_environment());
2614
2615 if let Some(val) = original {
2616 unsafe { std::env::set_var("JOURNAL_STREAM", val) };
2617 } else {
2618 unsafe { std::env::remove_var("JOURNAL_STREAM") };
2619 }
2620 }
2621
2622 #[test]
2623 fn is_service_environment_false_in_normal_context() {
2624 let inv = std::env::var_os("INVOCATION_ID");
2625 let journal = std::env::var_os("JOURNAL_STREAM");
2626 unsafe { std::env::remove_var("INVOCATION_ID") };
2627 unsafe { std::env::remove_var("JOURNAL_STREAM") };
2628
2629 if std::env::var_os("HOME").is_some() {
2630 assert!(!is_service_environment());
2631 }
2632
2633 if let Some(val) = inv {
2634 unsafe { std::env::set_var("INVOCATION_ID", val) };
2635 }
2636 if let Some(val) = journal {
2637 unsafe { std::env::set_var("JOURNAL_STREAM", val) };
2638 }
2639 }
2640
2641 #[test]
2642 fn windows_command_name_selection() {
2643 let cmd = if cfg!(target_os = "windows") {
2646 "agent-browser.cmd"
2647 } else {
2648 "agent-browser"
2649 };
2650
2651 if cfg!(target_os = "windows") {
2652 assert_eq!(cmd, "agent-browser.cmd");
2653 } else {
2654 assert_eq!(cmd, "agent-browser");
2655 }
2656 }
2657}