1use std::collections::HashMap;
2use std::net::{IpAddr, Ipv6Addr};
3use std::path::{Path, PathBuf};
4use std::process::Stdio;
5use std::sync::atomic::{AtomicBool, Ordering};
6use std::sync::Arc;
7
8use anyhow::{anyhow, Context};
9use async_trait::async_trait;
10use base64::Engine;
11use flate2::read::GzDecoder;
12use serde::{Deserialize, Serialize};
13use serde_json::{json, Value};
14use tandem_browser::{
15 detect_sidecar_binary_path, run_doctor, BrowserActionResult, BrowserArtifactRef,
16 BrowserBlockingIssue, BrowserCloseParams, BrowserCloseResult, BrowserDoctorOptions,
17 BrowserExtractParams, BrowserExtractResult, BrowserNavigateParams, BrowserNavigateResult,
18 BrowserOpenRequest, BrowserOpenResult, BrowserPressParams, BrowserRpcRequest,
19 BrowserRpcResponse, BrowserScreenshotParams, BrowserScreenshotResult, BrowserSnapshotParams,
20 BrowserSnapshotResult, BrowserStatus, BrowserTypeParams, BrowserViewport, BrowserWaitCondition,
21 BrowserWaitParams, BROWSER_PROTOCOL_VERSION,
22};
23use tandem_core::{resolve_shared_paths, BrowserConfig};
24use tandem_tools::{Tool, ToolRegistry};
25use tandem_types::{EngineEvent, ToolResult, ToolSchema};
26use tokio::fs;
27use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
28use tokio::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command};
29use tokio::sync::{Mutex, RwLock};
30use uuid::Uuid;
31
32use crate::{now_ms, AppState, RoutineRunArtifact, RuntimeState};
33
34const STATUS_CACHE_MAX_AGE_MS: u64 = 30_000;
35const INLINE_EXTRACT_LIMIT_BYTES: usize = 24_000;
36const SNAPSHOT_SCREENSHOT_LABEL: &str = "browser snapshot";
37const RELEASE_REPO: &str = "frumu-ai/tandem";
38const RELEASES_URL_ENV: &str = "TANDEM_BROWSER_RELEASES_URL";
39const BROWSER_INSTALL_USER_AGENT: &str = "tandem-browser-installer";
40
41#[derive(Debug)]
42struct BrowserSidecarClient {
43 _child: Child,
44 stdin: ChildStdin,
45 stdout: BufReader<ChildStdout>,
46 stderr: BufReader<ChildStderr>,
47 next_id: u64,
48}
49
50#[derive(Debug, Clone)]
51struct ManagedBrowserSession {
52 owner_session_id: Option<String>,
53 current_url: String,
54 _created_at_ms: u64,
55 updated_at_ms: u64,
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize, Default)]
59pub struct BrowserHealthSummary {
60 pub enabled: bool,
61 pub runnable: bool,
62 pub tools_registered: bool,
63 pub sidecar_found: bool,
64 pub browser_found: bool,
65 #[serde(default, skip_serializing_if = "Option::is_none")]
66 pub browser_version: Option<String>,
67 #[serde(default, skip_serializing_if = "Option::is_none")]
68 pub last_checked_at_ms: Option<u64>,
69 #[serde(default, skip_serializing_if = "Option::is_none")]
70 pub last_error: Option<String>,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct BrowserSidecarInstallResult {
75 pub version: String,
76 pub asset_name: String,
77 pub installed_path: String,
78 pub downloaded_bytes: u64,
79 pub status: BrowserStatus,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct BrowserSmokeTestResult {
84 pub ok: bool,
85 pub status: BrowserStatus,
86 pub url: String,
87 pub final_url: String,
88 pub title: String,
89 pub load_state: String,
90 pub element_count: usize,
91 #[serde(default, skip_serializing_if = "Option::is_none")]
92 pub excerpt: Option<String>,
93 pub closed: bool,
94}
95
96#[derive(Debug, Clone, Deserialize)]
97struct GitHubRelease {
98 tag_name: String,
99 assets: Vec<GitHubAsset>,
100}
101
102#[derive(Debug, Clone, Deserialize)]
103struct GitHubAsset {
104 name: String,
105 browser_download_url: String,
106 size: u64,
107}
108
109#[derive(Clone)]
110pub struct BrowserSubsystem {
111 config: BrowserConfig,
112 status: Arc<RwLock<BrowserStatus>>,
113 tools_registered: Arc<AtomicBool>,
114 client: Arc<Mutex<Option<BrowserSidecarClient>>>,
115 sessions: Arc<RwLock<HashMap<String, ManagedBrowserSession>>>,
116 artifact_root: PathBuf,
117}
118
119#[derive(Clone, Copy)]
120enum BrowserToolKind {
121 Status,
122 Open,
123 Navigate,
124 Snapshot,
125 Click,
126 Type,
127 Press,
128 Wait,
129 Extract,
130 Screenshot,
131 Close,
132}
133
134#[derive(Clone)]
135pub struct BrowserTool {
136 kind: BrowserToolKind,
137 browser: BrowserSubsystem,
138 state: Option<AppState>,
139}
140
141#[derive(Debug, Deserialize)]
142struct BrowserTypeToolArgs {
143 session_id: String,
144 #[serde(default)]
145 element_id: Option<String>,
146 #[serde(default)]
147 selector: Option<String>,
148 #[serde(default)]
149 text: Option<String>,
150 #[serde(default)]
151 secret_ref: Option<String>,
152 #[serde(default)]
153 replace: bool,
154 #[serde(default)]
155 submit: bool,
156 #[serde(default)]
157 timeout_ms: Option<u64>,
158}
159
160#[derive(Debug, Deserialize, Default)]
161struct BrowserWaitConditionArgs {
162 #[serde(default, alias = "type")]
163 kind: Option<String>,
164 #[serde(default)]
165 value: Option<String>,
166 #[serde(default)]
167 selector: Option<String>,
168 #[serde(default)]
169 text: Option<String>,
170 #[serde(default)]
171 url: Option<String>,
172}
173
174#[derive(Debug, Deserialize)]
175struct BrowserWaitToolArgs {
176 #[serde(alias = "sessionId")]
177 session_id: String,
178 #[serde(default, alias = "wait_for", alias = "waitFor")]
179 condition: Option<BrowserWaitConditionArgs>,
180 #[serde(default, alias = "timeoutMs")]
181 timeout_ms: Option<u64>,
182 #[serde(default, alias = "type")]
183 kind: Option<String>,
184 #[serde(default)]
185 value: Option<String>,
186 #[serde(default)]
187 selector: Option<String>,
188 #[serde(default)]
189 text: Option<String>,
190 #[serde(default)]
191 url: Option<String>,
192}
193
194#[derive(Debug, Deserialize)]
195struct BrowserToolContext {
196 #[serde(default, rename = "__session_id")]
197 model_session_id: Option<String>,
198}
199
200impl BrowserSidecarClient {
201 async fn spawn(config: &BrowserConfig) -> anyhow::Result<Self> {
202 let sidecar_path = detect_sidecar_binary_path(config.sidecar_path.as_deref())
203 .ok_or_else(|| anyhow!("browser_sidecar_not_found"))?;
204 let mut cmd = Command::new(&sidecar_path);
205 cmd.arg("serve")
206 .arg("--transport")
207 .arg("stdio")
208 .stdin(Stdio::piped())
209 .stdout(Stdio::piped())
210 .stderr(Stdio::piped());
211 if let Some(path) = config
212 .executable_path
213 .as_deref()
214 .filter(|v| !v.trim().is_empty())
215 {
216 cmd.env("TANDEM_BROWSER_EXECUTABLE", path);
217 }
218 if let Some(path) = config
219 .user_data_root
220 .as_deref()
221 .filter(|v| !v.trim().is_empty())
222 {
223 cmd.env("TANDEM_BROWSER_USER_DATA_ROOT", path);
224 }
225 cmd.env(
226 "TANDEM_BROWSER_ALLOW_NO_SANDBOX",
227 bool_env_value(config.allow_no_sandbox),
228 );
229 cmd.env(
230 "TANDEM_BROWSER_HEADLESS",
231 bool_env_value(config.headless_default),
232 );
233
234 let mut child = cmd.spawn().with_context(|| {
235 format!(
236 "failed to spawn tandem-browser sidecar at `{}`",
237 sidecar_path.display()
238 )
239 })?;
240 let stdin = child
241 .stdin
242 .take()
243 .ok_or_else(|| anyhow!("browser sidecar stdin unavailable"))?;
244 let stdout = child
245 .stdout
246 .take()
247 .ok_or_else(|| anyhow!("browser sidecar stdout unavailable"))?;
248 let stderr = child
249 .stderr
250 .take()
251 .ok_or_else(|| anyhow!("browser sidecar stderr unavailable"))?;
252 let mut client = Self {
253 _child: child,
254 stdin,
255 stdout: BufReader::new(stdout),
256 stderr: BufReader::new(stderr),
257 next_id: 1,
258 };
259 let version: Value = client.call_raw("browser.version", json!({})).await?;
260 let protocol = version
261 .get("protocol_version")
262 .and_then(Value::as_str)
263 .unwrap_or("");
264 if protocol != BROWSER_PROTOCOL_VERSION {
265 anyhow::bail!(
266 "protocol_mismatch: expected browser protocol {}, got {}",
267 BROWSER_PROTOCOL_VERSION,
268 protocol
269 );
270 }
271 Ok(client)
272 }
273
274 async fn call_raw(&mut self, method: &str, params: Value) -> anyhow::Result<Value> {
275 let id = self.next_id;
276 self.next_id = self.next_id.saturating_add(1);
277 let request = BrowserRpcRequest {
278 jsonrpc: "2.0".to_string(),
279 id: json!(id),
280 method: method.to_string(),
281 params,
282 };
283 let raw = serde_json::to_string(&request)?;
284 self.stdin.write_all(raw.as_bytes()).await?;
285 self.stdin.write_all(b"\n").await?;
286 self.stdin.flush().await?;
287
288 let mut line = String::new();
289 let read = self.stdout.read_line(&mut line).await?;
290 if read == 0 {
291 let mut stderr = String::new();
292 let _ = self.stderr.read_to_string(&mut stderr).await;
293 let stderr = stderr.trim();
294 if stderr.is_empty() {
295 anyhow::bail!("browser sidecar closed the stdio connection");
296 }
297 anyhow::bail!(
298 "browser sidecar closed the stdio connection: {}",
299 smoke_excerpt(stderr, 600)
300 );
301 }
302 let response: BrowserRpcResponse =
303 serde_json::from_str(line.trim()).context("invalid browser sidecar response")?;
304 if let Some(error) = response.error {
305 anyhow::bail!("{}", error.message);
306 }
307 response
308 .result
309 .ok_or_else(|| anyhow!("browser sidecar returned an empty result"))
310 }
311
312 async fn call<T: Serialize, R: for<'de> Deserialize<'de>>(
313 &mut self,
314 method: &str,
315 params: T,
316 ) -> anyhow::Result<R> {
317 let value = self.call_raw(method, serde_json::to_value(params)?).await?;
318 serde_json::from_value(value).context("invalid browser sidecar payload")
319 }
320
321 async fn call_value<R: for<'de> Deserialize<'de>>(
322 &mut self,
323 method: &str,
324 params: Value,
325 ) -> anyhow::Result<R> {
326 let value = self.call_raw(method, params).await?;
327 serde_json::from_value(value).context("invalid browser sidecar payload")
328 }
329}
330
331impl BrowserSubsystem {
332 pub fn new(config: BrowserConfig) -> Self {
333 let artifact_root = resolve_shared_paths()
334 .map(|paths| paths.canonical_root.join("browser-artifacts"))
335 .unwrap_or_else(|_| PathBuf::from(".tandem").join("browser-artifacts"));
336 Self {
337 config,
338 status: Arc::new(RwLock::new(BrowserStatus::default())),
339 tools_registered: Arc::new(AtomicBool::new(false)),
340 client: Arc::new(Mutex::new(None)),
341 sessions: Arc::new(RwLock::new(HashMap::new())),
342 artifact_root,
343 }
344 }
345
346 pub fn config(&self) -> &BrowserConfig {
347 &self.config
348 }
349
350 pub async fn install_sidecar(&self) -> anyhow::Result<BrowserSidecarInstallResult> {
351 let mut result = install_browser_sidecar(&self.config).await?;
352 result.status = self.refresh_status().await;
353 Ok(result)
354 }
355
356 pub async fn smoke_test(&self, url: Option<String>) -> anyhow::Result<BrowserSmokeTestResult> {
357 let status = self.status_snapshot().await;
358 if !status.runnable {
359 anyhow::bail!(
360 "browser_not_runnable: run browser doctor first; current status is not runnable"
361 );
362 }
363
364 let target_url = url
365 .map(|value| value.trim().to_string())
366 .filter(|value| !value.is_empty())
367 .unwrap_or_else(|| "https://example.com".to_string());
368 let request = BrowserOpenRequest {
369 url: target_url.clone(),
370 profile_id: None,
371 headless: Some(self.config.headless_default),
372 viewport: Some(BrowserViewport {
373 width: self.config.default_viewport.width,
374 height: self.config.default_viewport.height,
375 }),
376 wait_until: Some("navigation".to_string()),
377 executable_path: self.config.executable_path.clone(),
378 user_data_root: self.config.user_data_root.clone(),
379 allow_no_sandbox: self.config.allow_no_sandbox,
380 headless_default: self.config.headless_default,
381 };
382 let opened: BrowserOpenResult = self.call_sidecar("browser.open", request).await?;
383 let session_id = opened.session_id.clone();
384
385 let result = async {
386 let snapshot: BrowserSnapshotResult = self
387 .call_sidecar(
388 "browser.snapshot",
389 BrowserSnapshotParams {
390 session_id: session_id.clone(),
391 max_elements: Some(25),
392 include_screenshot: false,
393 },
394 )
395 .await?;
396 let extract: BrowserExtractResult = self
397 .call_sidecar(
398 "browser.extract",
399 BrowserExtractParams {
400 session_id: session_id.clone(),
401 format: "visible_text".to_string(),
402 max_bytes: Some(4_000),
403 },
404 )
405 .await?;
406 Ok::<BrowserSmokeTestResult, anyhow::Error>(BrowserSmokeTestResult {
407 ok: true,
408 status,
409 url: target_url,
410 final_url: snapshot.url,
411 title: snapshot.title,
412 load_state: snapshot.load_state,
413 element_count: snapshot.elements.len(),
414 excerpt: Some(smoke_excerpt(&extract.content, 400)),
415 closed: false,
416 })
417 }
418 .await;
419
420 let close_result: BrowserCloseResult = self
421 .call_sidecar(
422 "browser.close",
423 BrowserCloseParams {
424 session_id: session_id.clone(),
425 },
426 )
427 .await
428 .unwrap_or(BrowserCloseResult {
429 session_id,
430 closed: false,
431 });
432
433 let mut smoke = result?;
434 smoke.closed = close_result.closed;
435 Ok(smoke)
436 }
437
438 pub async fn refresh_status(&self) -> BrowserStatus {
439 let config = self.config.clone();
440 let evaluated = tokio::task::spawn_blocking(move || evaluate_browser_status(config))
441 .await
442 .unwrap_or_else(|err| BrowserStatus {
443 enabled: false,
444 runnable: false,
445 headless_default: true,
446 sidecar: Default::default(),
447 browser: Default::default(),
448 blocking_issues: vec![BrowserBlockingIssue {
449 code: "browser_launch_failed".to_string(),
450 message: format!("browser readiness task failed: {}", err),
451 }],
452 recommendations: vec![
453 "Run `tandem-engine browser doctor --json` on the same host.".to_string(),
454 ],
455 install_hints: Vec::new(),
456 last_checked_at_ms: Some(now_ms()),
457 last_error: Some(err.to_string()),
458 });
459 *self.status.write().await = evaluated.clone();
460 evaluated
461 }
462
463 pub async fn status_snapshot(&self) -> BrowserStatus {
464 let current = self.status.read().await.clone();
465 if current
466 .last_checked_at_ms
467 .is_some_and(|ts| now_ms().saturating_sub(ts) <= STATUS_CACHE_MAX_AGE_MS)
468 {
469 current
470 } else {
471 self.refresh_status().await
472 }
473 }
474
475 pub async fn health_summary(&self) -> BrowserHealthSummary {
476 let status = self.status.read().await.clone();
477 BrowserHealthSummary {
478 enabled: status.enabled,
479 runnable: status.runnable,
480 tools_registered: self.tools_registered.load(Ordering::Relaxed),
481 sidecar_found: status.sidecar.found,
482 browser_found: status.browser.found,
483 browser_version: status.browser.version,
484 last_checked_at_ms: status.last_checked_at_ms,
485 last_error: status.last_error,
486 }
487 }
488
489 pub fn set_tools_registered(&self, value: bool) {
490 self.tools_registered.store(value, Ordering::Relaxed);
491 }
492
493 pub async fn register_tools(
494 &self,
495 tools: &ToolRegistry,
496 state: Option<AppState>,
497 ) -> anyhow::Result<()> {
498 tools.unregister_by_prefix("browser_").await;
499 tools
500 .register_tool(
501 "browser_status".to_string(),
502 Arc::new(BrowserTool::new(
503 BrowserToolKind::Status,
504 self.clone(),
505 state.clone(),
506 )),
507 )
508 .await;
509
510 let status = self.status_snapshot().await;
511 if !status.enabled || !status.runnable {
512 self.set_tools_registered(false);
513 return Ok(());
514 }
515
516 for (name, kind) in [
517 ("browser_open", BrowserToolKind::Open),
518 ("browser_navigate", BrowserToolKind::Navigate),
519 ("browser_snapshot", BrowserToolKind::Snapshot),
520 ("browser_click", BrowserToolKind::Click),
521 ("browser_type", BrowserToolKind::Type),
522 ("browser_press", BrowserToolKind::Press),
523 ("browser_wait", BrowserToolKind::Wait),
524 ("browser_extract", BrowserToolKind::Extract),
525 ("browser_screenshot", BrowserToolKind::Screenshot),
526 ("browser_close", BrowserToolKind::Close),
527 ] {
528 tools
529 .register_tool(
530 name.to_string(),
531 Arc::new(BrowserTool::new(kind, self.clone(), state.clone())),
532 )
533 .await;
534 }
535 self.set_tools_registered(true);
536 Ok(())
537 }
538
539 async fn update_last_error(&self, message: impl Into<String>) {
540 let mut status = self.status.write().await;
541 status.last_error = Some(message.into());
542 status.last_checked_at_ms = Some(now_ms());
543 }
544
545 async fn call_sidecar<T: Serialize, R: for<'de> Deserialize<'de>>(
546 &self,
547 method: &str,
548 params: T,
549 ) -> anyhow::Result<R> {
550 let params = serde_json::to_value(params)?;
551 let mut guard = self.client.lock().await;
552 if guard.is_none() {
553 *guard = Some(BrowserSidecarClient::spawn(&self.config).await?);
554 }
555 let result = guard
556 .as_mut()
557 .expect("browser sidecar client initialized")
558 .call_value(method, params.clone())
559 .await;
560 if let Err(err) = &result {
561 *guard = None;
562 self.update_last_error(err.to_string()).await;
563 if err
564 .to_string()
565 .contains("browser sidecar closed the stdio connection")
566 {
567 *guard = Some(BrowserSidecarClient::spawn(&self.config).await?);
568 return guard
569 .as_mut()
570 .expect("browser sidecar client reinitialized")
571 .call_value(method, params)
572 .await;
573 }
574 }
575 result
576 }
577
578 async fn insert_session(
579 &self,
580 browser_session_id: String,
581 owner_session_id: Option<String>,
582 current_url: String,
583 ) {
584 self.sessions.write().await.insert(
585 browser_session_id,
586 ManagedBrowserSession {
587 owner_session_id,
588 current_url,
589 _created_at_ms: now_ms(),
590 updated_at_ms: now_ms(),
591 },
592 );
593 }
594
595 async fn session(&self, browser_session_id: &str) -> Option<ManagedBrowserSession> {
596 self.sessions.read().await.get(browser_session_id).cloned()
597 }
598
599 async fn update_session_url(
600 &self,
601 browser_session_id: &str,
602 current_url: String,
603 ) -> Option<ManagedBrowserSession> {
604 let mut sessions = self.sessions.write().await;
605 let session = sessions.get_mut(browser_session_id)?;
606 session.current_url = current_url;
607 session.updated_at_ms = now_ms();
608 Some(session.clone())
609 }
610
611 async fn remove_session(&self, browser_session_id: &str) -> Option<ManagedBrowserSession> {
612 self.sessions.write().await.remove(browser_session_id)
613 }
614
615 pub async fn close_sessions_for_owner(&self, owner_session_id: &str) -> usize {
616 let session_ids = self
617 .sessions
618 .read()
619 .await
620 .iter()
621 .filter_map(|(session_id, session)| {
622 (session.owner_session_id.as_deref() == Some(owner_session_id))
623 .then_some(session_id.clone())
624 })
625 .collect::<Vec<_>>();
626 self.close_session_ids(session_ids).await
627 }
628
629 pub async fn close_all_sessions(&self) -> usize {
630 let session_ids = self
631 .sessions
632 .read()
633 .await
634 .keys()
635 .cloned()
636 .collect::<Vec<_>>();
637 self.close_session_ids(session_ids).await
638 }
639
640 async fn close_session_ids(&self, session_ids: Vec<String>) -> usize {
641 let mut closed = 0usize;
642 for session_id in session_ids {
643 let _ = self
644 .call_sidecar::<_, BrowserCloseResult>(
645 "browser.close",
646 BrowserCloseParams {
647 session_id: session_id.clone(),
648 },
649 )
650 .await;
651 if self.remove_session(&session_id).await.is_some() {
652 closed += 1;
653 }
654 }
655 closed
656 }
657}
658
659impl BrowserTool {
660 fn new(kind: BrowserToolKind, browser: BrowserSubsystem, state: Option<AppState>) -> Self {
661 Self {
662 kind,
663 browser,
664 state,
665 }
666 }
667
668 async fn execute_impl(&self, args: Value) -> anyhow::Result<ToolResult> {
669 match self.kind {
670 BrowserToolKind::Status => self.execute_status().await,
671 BrowserToolKind::Open => self.execute_open(args).await,
672 BrowserToolKind::Navigate => self.execute_navigate(args).await,
673 BrowserToolKind::Snapshot => self.execute_snapshot(args).await,
674 BrowserToolKind::Click => self.execute_click(args).await,
675 BrowserToolKind::Type => self.execute_type(args).await,
676 BrowserToolKind::Press => self.execute_press(args).await,
677 BrowserToolKind::Wait => self.execute_wait(args).await,
678 BrowserToolKind::Extract => self.execute_extract(args).await,
679 BrowserToolKind::Screenshot => self.execute_screenshot(args).await,
680 BrowserToolKind::Close => self.execute_close(args).await,
681 }
682 }
683
684 async fn execute_status(&self) -> anyhow::Result<ToolResult> {
685 let status = self.browser.status_snapshot().await;
686 ok_tool_result(
687 serde_json::to_value(&status)?,
688 json!({
689 "enabled": status.enabled,
690 "runnable": status.runnable,
691 "sidecar_found": status.sidecar.found,
692 "browser_found": status.browser.found,
693 }),
694 )
695 }
696
697 async fn execute_open(&self, args: Value) -> anyhow::Result<ToolResult> {
698 let ctx = parse_tool_context(&args);
699 let mut request: BrowserOpenRequest =
700 serde_json::from_value(args.clone()).context("invalid browser_open arguments")?;
701 normalize_browser_open_request(&mut request);
702 let status = self.browser.status_snapshot().await;
703 if !status.runnable {
704 return browser_not_runnable_result(&status);
705 }
706 ensure_allowed_browser_url(
707 &request.url,
708 &self
709 .effective_allowed_hosts(ctx.model_session_id.as_deref())
710 .await,
711 )?;
712 request.executable_path = self.browser.config.executable_path.clone();
713 request.user_data_root = self.browser.config.user_data_root.clone();
714 request.allow_no_sandbox = self.browser.config.allow_no_sandbox;
715 request.headless_default = self.browser.config.headless_default;
716 if request.viewport.is_none() {
717 request.viewport = Some(BrowserViewport {
718 width: self.browser.config.default_viewport.width,
719 height: self.browser.config.default_viewport.height,
720 });
721 }
722 let result: BrowserOpenResult = self.browser.call_sidecar("browser.open", request).await?;
723 ensure_allowed_browser_url(
724 &result.final_url,
725 &self
726 .effective_allowed_hosts(ctx.model_session_id.as_deref())
727 .await,
728 )
729 .map_err(|err| anyhow!("host_not_allowed: {}", err))?;
730 self.browser
731 .insert_session(
732 result.session_id.clone(),
733 ctx.model_session_id.clone(),
734 result.final_url.clone(),
735 )
736 .await;
737 ok_tool_result(
738 serde_json::to_value(&result)?,
739 json!({
740 "session_id": result.session_id,
741 "url": result.final_url,
742 "headless": result.headless,
743 }),
744 )
745 }
746
747 async fn execute_navigate(&self, args: Value) -> anyhow::Result<ToolResult> {
748 let ctx = parse_tool_context(&args);
749 let params: BrowserNavigateParams =
750 serde_json::from_value(args.clone()).context("invalid browser_navigate arguments")?;
751 let session = self
752 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
753 .await?;
754 ensure_allowed_browser_url(
755 ¶ms.url,
756 &self
757 .effective_allowed_hosts(session.owner_session_id.as_deref())
758 .await,
759 )?;
760 let result: BrowserNavigateResult = self
761 .browser
762 .call_sidecar("browser.navigate", params.clone())
763 .await?;
764 self.enforce_post_navigation(
765 ¶ms.session_id,
766 &result.final_url,
767 session.owner_session_id.as_deref(),
768 )
769 .await?;
770 ok_tool_result(
771 serde_json::to_value(&result)?,
772 json!({
773 "session_id": result.session_id,
774 "url": result.final_url,
775 }),
776 )
777 }
778
779 async fn execute_snapshot(&self, args: Value) -> anyhow::Result<ToolResult> {
780 let ctx = parse_tool_context(&args);
781 let params: BrowserSnapshotParams =
782 serde_json::from_value(args.clone()).context("invalid browser_snapshot arguments")?;
783 let session = self
784 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
785 .await?;
786 self.ensure_page_read_allowed(session.owner_session_id.as_deref(), &session.current_url)
787 .await?;
788 let mut result: BrowserSnapshotResult = self
789 .browser
790 .call_sidecar("browser.snapshot", params.clone())
791 .await?;
792 self.browser
793 .update_session_url(¶ms.session_id, result.url.clone())
794 .await;
795
796 let screenshot_artifact = if let Some(base64) = result.screenshot_base64.take() {
797 Some(
798 self.store_artifact(
799 ctx.model_session_id.as_deref(),
800 ¶ms.session_id,
801 "screenshot",
802 params
803 .include_screenshot
804 .then_some(SNAPSHOT_SCREENSHOT_LABEL.to_string()),
805 "png",
806 &base64::engine::general_purpose::STANDARD
807 .decode(base64.as_bytes())
808 .context("invalid snapshot screenshot payload")?,
809 Some(json!({
810 "source": "browser_snapshot",
811 "url": result.url,
812 })),
813 )
814 .await?,
815 )
816 } else {
817 None
818 };
819 let payload = json!({
820 "session_id": result.session_id,
821 "url": result.url,
822 "title": result.title,
823 "load_state": result.load_state,
824 "viewport": result.viewport,
825 "elements": result.elements,
826 "notices": result.notices,
827 "screenshot_artifact": screenshot_artifact,
828 });
829 ok_tool_result(
830 payload.clone(),
831 json!({
832 "session_id": payload.get("session_id"),
833 "url": payload.get("url"),
834 "element_count": payload.get("elements").and_then(Value::as_array).map(|rows| rows.len()).unwrap_or(0),
835 }),
836 )
837 }
838
839 async fn execute_click(&self, args: Value) -> anyhow::Result<ToolResult> {
840 let ctx = parse_tool_context(&args);
841 let params: tandem_browser::BrowserClickParams =
842 serde_json::from_value(args.clone()).context("invalid browser_click arguments")?;
843 let session = self
844 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
845 .await?;
846 self.ensure_action_allowed(session.owner_session_id.as_deref(), &session.current_url)
847 .await?;
848 let result: BrowserActionResult = self
849 .browser
850 .call_sidecar("browser.click", params.clone())
851 .await?;
852 self.update_action_url(
853 ¶ms.session_id,
854 result.final_url.as_deref(),
855 session.owner_session_id.as_deref(),
856 )
857 .await?;
858 ok_tool_result(
859 serde_json::to_value(&result)?,
860 json!({
861 "session_id": result.session_id,
862 "success": result.success,
863 "url": result.final_url,
864 }),
865 )
866 }
867
868 async fn execute_type(&self, args: Value) -> anyhow::Result<ToolResult> {
869 let ctx = parse_tool_context(&args);
870 let params: BrowserTypeToolArgs =
871 serde_json::from_value(args.clone()).context("invalid browser_type arguments")?;
872 let session = self
873 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
874 .await?;
875 self.ensure_action_allowed(session.owner_session_id.as_deref(), &session.current_url)
876 .await?;
877 let text = resolve_text_input(params.text.clone(), params.secret_ref.clone())?;
878 let request = BrowserTypeParams {
879 session_id: params.session_id.clone(),
880 element_id: params.element_id.clone(),
881 selector: params.selector.clone(),
882 text,
883 replace: params.replace,
884 submit: params.submit,
885 timeout_ms: params.timeout_ms,
886 };
887 let result: BrowserActionResult =
888 self.browser.call_sidecar("browser.type", request).await?;
889 self.update_action_url(
890 ¶ms.session_id,
891 result.final_url.as_deref(),
892 session.owner_session_id.as_deref(),
893 )
894 .await?;
895 ok_tool_result(
896 serde_json::to_value(&result)?,
897 json!({
898 "session_id": result.session_id,
899 "success": result.success,
900 "used_secret_ref": params.secret_ref.is_some(),
901 "url": result.final_url,
902 }),
903 )
904 }
905
906 async fn execute_press(&self, args: Value) -> anyhow::Result<ToolResult> {
907 let ctx = parse_tool_context(&args);
908 let params: BrowserPressParams =
909 serde_json::from_value(args.clone()).context("invalid browser_press arguments")?;
910 let session = self
911 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
912 .await?;
913 self.ensure_action_allowed(session.owner_session_id.as_deref(), &session.current_url)
914 .await?;
915 let result: BrowserActionResult = self
916 .browser
917 .call_sidecar("browser.press", params.clone())
918 .await?;
919 self.update_action_url(
920 ¶ms.session_id,
921 result.final_url.as_deref(),
922 session.owner_session_id.as_deref(),
923 )
924 .await?;
925 ok_tool_result(
926 serde_json::to_value(&result)?,
927 json!({
928 "session_id": result.session_id,
929 "success": result.success,
930 "url": result.final_url,
931 }),
932 )
933 }
934
935 async fn execute_wait(&self, args: Value) -> anyhow::Result<ToolResult> {
936 let ctx = parse_tool_context(&args);
937 let params = parse_browser_wait_args(&args).context("invalid browser_wait arguments")?;
938 let session = self
939 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
940 .await?;
941 self.ensure_page_read_allowed(session.owner_session_id.as_deref(), &session.current_url)
942 .await?;
943 let result: BrowserActionResult = self
944 .browser
945 .call_sidecar("browser.wait", params.clone())
946 .await?;
947 self.update_action_url(
948 ¶ms.session_id,
949 result.final_url.as_deref(),
950 session.owner_session_id.as_deref(),
951 )
952 .await?;
953 ok_tool_result(
954 serde_json::to_value(&result)?,
955 json!({
956 "session_id": result.session_id,
957 "success": result.success,
958 "url": result.final_url,
959 }),
960 )
961 }
962
963 async fn execute_extract(&self, args: Value) -> anyhow::Result<ToolResult> {
964 let ctx = parse_tool_context(&args);
965 let params: BrowserExtractParams =
966 serde_json::from_value(args.clone()).context("invalid browser_extract arguments")?;
967 let session = self
968 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
969 .await?;
970 self.ensure_page_read_allowed(session.owner_session_id.as_deref(), &session.current_url)
971 .await?;
972 let result: BrowserExtractResult = self
973 .browser
974 .call_sidecar("browser.extract", params.clone())
975 .await?;
976 let bytes = result.content.as_bytes();
977 let artifact = if bytes.len() > INLINE_EXTRACT_LIMIT_BYTES {
978 Some(
979 self.store_artifact(
980 ctx.model_session_id.as_deref(),
981 ¶ms.session_id,
982 "extract",
983 Some(format!("browser extract ({})", result.format)),
984 extension_for_extract_format(&result.format),
985 bytes,
986 Some(json!({
987 "format": result.format,
988 "truncated": result.truncated,
989 "source": "browser_extract",
990 })),
991 )
992 .await?,
993 )
994 } else {
995 None
996 };
997 let payload = json!({
998 "session_id": result.session_id,
999 "format": result.format,
1000 "content": artifact.is_none().then_some(result.content),
1001 "truncated": result.truncated,
1002 "artifact": artifact,
1003 });
1004 ok_tool_result(
1005 payload.clone(),
1006 json!({
1007 "session_id": payload.get("session_id"),
1008 "format": payload.get("format"),
1009 "artifact": payload.get("artifact").is_some(),
1010 }),
1011 )
1012 }
1013
1014 async fn execute_screenshot(&self, args: Value) -> anyhow::Result<ToolResult> {
1015 let ctx = parse_tool_context(&args);
1016 let params: BrowserScreenshotParams =
1017 serde_json::from_value(args.clone()).context("invalid browser_screenshot arguments")?;
1018 let session = self
1019 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
1020 .await?;
1021 self.ensure_page_read_allowed(session.owner_session_id.as_deref(), &session.current_url)
1022 .await?;
1023 let result: BrowserScreenshotResult = self
1024 .browser
1025 .call_sidecar("browser.screenshot", params.clone())
1026 .await?;
1027 let bytes = base64::engine::general_purpose::STANDARD
1028 .decode(result.data_base64.as_bytes())
1029 .context("invalid screenshot payload")?;
1030 let artifact = self
1031 .store_artifact(
1032 ctx.model_session_id.as_deref(),
1033 ¶ms.session_id,
1034 "screenshot",
1035 result.label.clone(),
1036 "png",
1037 &bytes,
1038 Some(json!({
1039 "mime_type": result.mime_type,
1040 "bytes": result.bytes,
1041 "source": "browser_screenshot",
1042 })),
1043 )
1044 .await?;
1045 ok_tool_result(
1046 json!({
1047 "session_id": result.session_id,
1048 "artifact": artifact,
1049 "summary": format!("Saved screenshot artifact ({} bytes).", result.bytes),
1050 }),
1051 json!({
1052 "session_id": result.session_id,
1053 "artifact_id": artifact.artifact_id,
1054 }),
1055 )
1056 }
1057
1058 async fn execute_close(&self, args: Value) -> anyhow::Result<ToolResult> {
1059 let ctx = parse_tool_context(&args);
1060 let params: BrowserCloseParams =
1061 serde_json::from_value(args.clone()).context("invalid browser_close arguments")?;
1062 let _ = self
1063 .load_session(¶ms.session_id, ctx.model_session_id.as_deref())
1064 .await?;
1065 let result: BrowserCloseResult = self
1066 .browser
1067 .call_sidecar("browser.close", params.clone())
1068 .await?;
1069 self.browser.remove_session(¶ms.session_id).await;
1070 ok_tool_result(
1071 serde_json::to_value(&result)?,
1072 json!({
1073 "session_id": result.session_id,
1074 "closed": result.closed,
1075 }),
1076 )
1077 }
1078
1079 async fn load_session(
1080 &self,
1081 browser_session_id: &str,
1082 model_session_id: Option<&str>,
1083 ) -> anyhow::Result<ManagedBrowserSession> {
1084 let session = self
1085 .browser
1086 .session(browser_session_id)
1087 .await
1088 .ok_or_else(|| anyhow!("session `{}` not found", browser_session_id))?;
1089 if let (Some(owner), Some(model_session_id)) =
1090 (session.owner_session_id.as_deref(), model_session_id)
1091 {
1092 if owner != model_session_id {
1093 anyhow::bail!(
1094 "browser session `{}` belongs to a different engine session",
1095 browser_session_id
1096 );
1097 }
1098 }
1099 Ok(session)
1100 }
1101
1102 async fn effective_allowed_hosts(&self, model_session_id: Option<&str>) -> Vec<String> {
1103 if let Some(model_session_id) = model_session_id {
1104 if let Some(state) = self.state.as_ref() {
1105 if let Some(instance) = state
1106 .agent_teams
1107 .instance_for_session(model_session_id)
1108 .await
1109 {
1110 if !instance.capabilities.net_scopes.allow_hosts.is_empty() {
1111 return normalize_allowed_hosts(
1112 instance.capabilities.net_scopes.allow_hosts,
1113 );
1114 }
1115 }
1116 }
1117 }
1118 normalize_allowed_hosts(self.browser.config.allowed_hosts.clone())
1119 }
1120
1121 async fn ensure_page_read_allowed(
1122 &self,
1123 model_session_id: Option<&str>,
1124 current_url: &str,
1125 ) -> anyhow::Result<()> {
1126 ensure_allowed_browser_url(
1127 current_url,
1128 &self.effective_allowed_hosts(model_session_id).await,
1129 )?;
1130 Ok(())
1131 }
1132
1133 async fn ensure_action_allowed(
1134 &self,
1135 model_session_id: Option<&str>,
1136 current_url: &str,
1137 ) -> anyhow::Result<()> {
1138 self.ensure_page_read_allowed(model_session_id, current_url)
1139 .await?;
1140 let host = browser_url_host(current_url)?;
1141 if !is_local_or_private_host(&host)
1142 && !self.external_integrations_allowed(model_session_id).await
1143 {
1144 anyhow::bail!(
1145 "external integrations are disabled for this routine session on host `{}`",
1146 host
1147 );
1148 }
1149 Ok(())
1150 }
1151
1152 async fn external_integrations_allowed(&self, model_session_id: Option<&str>) -> bool {
1153 let Some(model_session_id) = model_session_id else {
1154 return true;
1155 };
1156 let Some(state) = self.state.as_ref() else {
1157 return true;
1158 };
1159 let Some(policy) = state.routine_session_policy(model_session_id).await else {
1160 return true;
1161 };
1162 state
1163 .get_routine(&policy.routine_id)
1164 .await
1165 .map(|routine| routine.external_integrations_allowed)
1166 .unwrap_or(true)
1167 }
1168
1169 async fn enforce_post_navigation(
1170 &self,
1171 browser_session_id: &str,
1172 final_url: &str,
1173 model_session_id: Option<&str>,
1174 ) -> anyhow::Result<()> {
1175 if let Err(err) = ensure_allowed_browser_url(
1176 final_url,
1177 &self.effective_allowed_hosts(model_session_id).await,
1178 ) {
1179 let _ = self
1180 .browser
1181 .call_sidecar::<_, BrowserCloseResult>(
1182 "browser.close",
1183 BrowserCloseParams {
1184 session_id: browser_session_id.to_string(),
1185 },
1186 )
1187 .await;
1188 self.browser.remove_session(browser_session_id).await;
1189 return Err(anyhow!("host_not_allowed: {}", err));
1190 }
1191 self.browser
1192 .update_session_url(browser_session_id, final_url.to_string())
1193 .await;
1194 Ok(())
1195 }
1196
1197 async fn update_action_url(
1198 &self,
1199 browser_session_id: &str,
1200 final_url: Option<&str>,
1201 model_session_id: Option<&str>,
1202 ) -> anyhow::Result<()> {
1203 if let Some(final_url) = final_url {
1204 self.enforce_post_navigation(browser_session_id, final_url, model_session_id)
1205 .await?;
1206 }
1207 Ok(())
1208 }
1209
1210 async fn store_artifact(
1211 &self,
1212 model_session_id: Option<&str>,
1213 browser_session_id: &str,
1214 kind: &str,
1215 label: Option<String>,
1216 extension: &str,
1217 bytes: &[u8],
1218 metadata: Option<Value>,
1219 ) -> anyhow::Result<BrowserArtifactRef> {
1220 fs::create_dir_all(&self.browser.artifact_root).await?;
1221 let artifact_id = format!("artifact-{}", Uuid::new_v4());
1222 let file_name = format!("{artifact_id}.{extension}");
1223 let target = self.browser.artifact_root.join(file_name);
1224 fs::write(&target, bytes)
1225 .await
1226 .with_context(|| format!("failed to write browser artifact `{}`", target.display()))?;
1227 let artifact = BrowserArtifactRef {
1228 artifact_id: artifact_id.clone(),
1229 uri: target.to_string_lossy().to_string(),
1230 kind: kind.to_string(),
1231 label,
1232 created_at_ms: now_ms(),
1233 metadata,
1234 };
1235 self.append_routine_artifact_if_needed(
1236 model_session_id,
1237 artifact.clone(),
1238 browser_session_id,
1239 )
1240 .await;
1241 Ok(artifact)
1242 }
1243
1244 async fn append_routine_artifact_if_needed(
1245 &self,
1246 model_session_id: Option<&str>,
1247 artifact: BrowserArtifactRef,
1248 browser_session_id: &str,
1249 ) {
1250 let Some(model_session_id) = model_session_id else {
1251 return;
1252 };
1253 let Some(state) = self.state.as_ref() else {
1254 return;
1255 };
1256 let Some(policy) = state.routine_session_policy(model_session_id).await else {
1257 return;
1258 };
1259 let run_artifact = RoutineRunArtifact {
1260 artifact_id: artifact.artifact_id.clone(),
1261 uri: artifact.uri.clone(),
1262 kind: artifact.kind.clone(),
1263 label: artifact.label.clone(),
1264 created_at_ms: artifact.created_at_ms,
1265 metadata: artifact.metadata.clone(),
1266 };
1267 let _ = state
1268 .append_routine_run_artifact(&policy.run_id, run_artifact.clone())
1269 .await;
1270 state.event_bus.publish(EngineEvent::new(
1271 "routine.run.artifact_added",
1272 json!({
1273 "runID": policy.run_id,
1274 "routineID": policy.routine_id,
1275 "browserSessionID": browser_session_id,
1276 "artifact": run_artifact,
1277 }),
1278 ));
1279 }
1280}