Skip to main content

tandem_server/browser_parts/
part01.rs

1use std::collections::HashMap;
2use std::net::{IpAddr, Ipv6Addr};
3use std::path::{Path, PathBuf};
4use std::process::Stdio;
5use std::sync::atomic::{AtomicBool, Ordering};
6use std::sync::Arc;
7
8use anyhow::{anyhow, Context};
9use async_trait::async_trait;
10use base64::Engine;
11use flate2::read::GzDecoder;
12use serde::{Deserialize, Serialize};
13use serde_json::{json, Value};
14use tandem_browser::{
15    detect_sidecar_binary_path, run_doctor, BrowserActionResult, BrowserArtifactRef,
16    BrowserBlockingIssue, BrowserCloseParams, BrowserCloseResult, BrowserDoctorOptions,
17    BrowserExtractParams, BrowserExtractResult, BrowserNavigateParams, BrowserNavigateResult,
18    BrowserOpenRequest, BrowserOpenResult, BrowserPressParams, BrowserRpcRequest,
19    BrowserRpcResponse, BrowserScreenshotParams, BrowserScreenshotResult, BrowserSnapshotParams,
20    BrowserSnapshotResult, BrowserStatus, BrowserTypeParams, BrowserViewport, BrowserWaitCondition,
21    BrowserWaitParams, BROWSER_PROTOCOL_VERSION,
22};
23use tandem_core::{resolve_shared_paths, BrowserConfig};
24use tandem_tools::{Tool, ToolRegistry};
25use tandem_types::{EngineEvent, ToolResult, ToolSchema};
26use tokio::fs;
27use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncWriteExt, BufReader};
28use tokio::process::{Child, ChildStderr, ChildStdin, ChildStdout, Command};
29use tokio::sync::{Mutex, RwLock};
30use uuid::Uuid;
31
32use crate::{now_ms, AppState, RoutineRunArtifact, RuntimeState};
33
34const STATUS_CACHE_MAX_AGE_MS: u64 = 30_000;
35const INLINE_EXTRACT_LIMIT_BYTES: usize = 24_000;
36const SNAPSHOT_SCREENSHOT_LABEL: &str = "browser snapshot";
37const RELEASE_REPO: &str = "frumu-ai/tandem";
38const RELEASES_URL_ENV: &str = "TANDEM_BROWSER_RELEASES_URL";
39const BROWSER_INSTALL_USER_AGENT: &str = "tandem-browser-installer";
40
41#[derive(Debug)]
42struct BrowserSidecarClient {
43    _child: Child,
44    stdin: ChildStdin,
45    stdout: BufReader<ChildStdout>,
46    stderr: BufReader<ChildStderr>,
47    next_id: u64,
48}
49
50#[derive(Debug, Clone)]
51struct ManagedBrowserSession {
52    owner_session_id: Option<String>,
53    current_url: String,
54    _created_at_ms: u64,
55    updated_at_ms: u64,
56}
57
58#[derive(Debug, Clone, Serialize, Deserialize, Default)]
59pub struct BrowserHealthSummary {
60    pub enabled: bool,
61    pub runnable: bool,
62    pub tools_registered: bool,
63    pub sidecar_found: bool,
64    pub browser_found: bool,
65    #[serde(default, skip_serializing_if = "Option::is_none")]
66    pub browser_version: Option<String>,
67    #[serde(default, skip_serializing_if = "Option::is_none")]
68    pub last_checked_at_ms: Option<u64>,
69    #[serde(default, skip_serializing_if = "Option::is_none")]
70    pub last_error: Option<String>,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct BrowserSidecarInstallResult {
75    pub version: String,
76    pub asset_name: String,
77    pub installed_path: String,
78    pub downloaded_bytes: u64,
79    pub status: BrowserStatus,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub struct BrowserSmokeTestResult {
84    pub ok: bool,
85    pub status: BrowserStatus,
86    pub url: String,
87    pub final_url: String,
88    pub title: String,
89    pub load_state: String,
90    pub element_count: usize,
91    #[serde(default, skip_serializing_if = "Option::is_none")]
92    pub excerpt: Option<String>,
93    pub closed: bool,
94}
95
96#[derive(Debug, Clone, Deserialize)]
97struct GitHubRelease {
98    tag_name: String,
99    assets: Vec<GitHubAsset>,
100}
101
102#[derive(Debug, Clone, Deserialize)]
103struct GitHubAsset {
104    name: String,
105    browser_download_url: String,
106    size: u64,
107}
108
109#[derive(Clone)]
110pub struct BrowserSubsystem {
111    config: BrowserConfig,
112    status: Arc<RwLock<BrowserStatus>>,
113    tools_registered: Arc<AtomicBool>,
114    client: Arc<Mutex<Option<BrowserSidecarClient>>>,
115    sessions: Arc<RwLock<HashMap<String, ManagedBrowserSession>>>,
116    artifact_root: PathBuf,
117}
118
119#[derive(Clone, Copy)]
120enum BrowserToolKind {
121    Status,
122    Open,
123    Navigate,
124    Snapshot,
125    Click,
126    Type,
127    Press,
128    Wait,
129    Extract,
130    Screenshot,
131    Close,
132}
133
134#[derive(Clone)]
135pub struct BrowserTool {
136    kind: BrowserToolKind,
137    browser: BrowserSubsystem,
138    state: Option<AppState>,
139}
140
141#[derive(Debug, Deserialize)]
142struct BrowserTypeToolArgs {
143    session_id: String,
144    #[serde(default)]
145    element_id: Option<String>,
146    #[serde(default)]
147    selector: Option<String>,
148    #[serde(default)]
149    text: Option<String>,
150    #[serde(default)]
151    secret_ref: Option<String>,
152    #[serde(default)]
153    replace: bool,
154    #[serde(default)]
155    submit: bool,
156    #[serde(default)]
157    timeout_ms: Option<u64>,
158}
159
160#[derive(Debug, Deserialize, Default)]
161struct BrowserWaitConditionArgs {
162    #[serde(default, alias = "type")]
163    kind: Option<String>,
164    #[serde(default)]
165    value: Option<String>,
166    #[serde(default)]
167    selector: Option<String>,
168    #[serde(default)]
169    text: Option<String>,
170    #[serde(default)]
171    url: Option<String>,
172}
173
174#[derive(Debug, Deserialize)]
175struct BrowserWaitToolArgs {
176    #[serde(alias = "sessionId")]
177    session_id: String,
178    #[serde(default, alias = "wait_for", alias = "waitFor")]
179    condition: Option<BrowserWaitConditionArgs>,
180    #[serde(default, alias = "timeoutMs")]
181    timeout_ms: Option<u64>,
182    #[serde(default, alias = "type")]
183    kind: Option<String>,
184    #[serde(default)]
185    value: Option<String>,
186    #[serde(default)]
187    selector: Option<String>,
188    #[serde(default)]
189    text: Option<String>,
190    #[serde(default)]
191    url: Option<String>,
192}
193
194#[derive(Debug, Deserialize)]
195struct BrowserToolContext {
196    #[serde(default, rename = "__session_id")]
197    model_session_id: Option<String>,
198}
199
200impl BrowserSidecarClient {
201    async fn spawn(config: &BrowserConfig) -> anyhow::Result<Self> {
202        let sidecar_path = detect_sidecar_binary_path(config.sidecar_path.as_deref())
203            .ok_or_else(|| anyhow!("browser_sidecar_not_found"))?;
204        let mut cmd = Command::new(&sidecar_path);
205        cmd.arg("serve")
206            .arg("--transport")
207            .arg("stdio")
208            .stdin(Stdio::piped())
209            .stdout(Stdio::piped())
210            .stderr(Stdio::piped());
211        if let Some(path) = config
212            .executable_path
213            .as_deref()
214            .filter(|v| !v.trim().is_empty())
215        {
216            cmd.env("TANDEM_BROWSER_EXECUTABLE", path);
217        }
218        if let Some(path) = config
219            .user_data_root
220            .as_deref()
221            .filter(|v| !v.trim().is_empty())
222        {
223            cmd.env("TANDEM_BROWSER_USER_DATA_ROOT", path);
224        }
225        cmd.env(
226            "TANDEM_BROWSER_ALLOW_NO_SANDBOX",
227            bool_env_value(config.allow_no_sandbox),
228        );
229        cmd.env(
230            "TANDEM_BROWSER_HEADLESS",
231            bool_env_value(config.headless_default),
232        );
233
234        let mut child = cmd.spawn().with_context(|| {
235            format!(
236                "failed to spawn tandem-browser sidecar at `{}`",
237                sidecar_path.display()
238            )
239        })?;
240        let stdin = child
241            .stdin
242            .take()
243            .ok_or_else(|| anyhow!("browser sidecar stdin unavailable"))?;
244        let stdout = child
245            .stdout
246            .take()
247            .ok_or_else(|| anyhow!("browser sidecar stdout unavailable"))?;
248        let stderr = child
249            .stderr
250            .take()
251            .ok_or_else(|| anyhow!("browser sidecar stderr unavailable"))?;
252        let mut client = Self {
253            _child: child,
254            stdin,
255            stdout: BufReader::new(stdout),
256            stderr: BufReader::new(stderr),
257            next_id: 1,
258        };
259        let version: Value = client.call_raw("browser.version", json!({})).await?;
260        let protocol = version
261            .get("protocol_version")
262            .and_then(Value::as_str)
263            .unwrap_or("");
264        if protocol != BROWSER_PROTOCOL_VERSION {
265            anyhow::bail!(
266                "protocol_mismatch: expected browser protocol {}, got {}",
267                BROWSER_PROTOCOL_VERSION,
268                protocol
269            );
270        }
271        Ok(client)
272    }
273
274    async fn call_raw(&mut self, method: &str, params: Value) -> anyhow::Result<Value> {
275        let id = self.next_id;
276        self.next_id = self.next_id.saturating_add(1);
277        let request = BrowserRpcRequest {
278            jsonrpc: "2.0".to_string(),
279            id: json!(id),
280            method: method.to_string(),
281            params,
282        };
283        let raw = serde_json::to_string(&request)?;
284        self.stdin.write_all(raw.as_bytes()).await?;
285        self.stdin.write_all(b"\n").await?;
286        self.stdin.flush().await?;
287
288        let mut line = String::new();
289        let read = self.stdout.read_line(&mut line).await?;
290        if read == 0 {
291            let mut stderr = String::new();
292            let _ = self.stderr.read_to_string(&mut stderr).await;
293            let stderr = stderr.trim();
294            if stderr.is_empty() {
295                anyhow::bail!("browser sidecar closed the stdio connection");
296            }
297            anyhow::bail!(
298                "browser sidecar closed the stdio connection: {}",
299                smoke_excerpt(stderr, 600)
300            );
301        }
302        let response: BrowserRpcResponse =
303            serde_json::from_str(line.trim()).context("invalid browser sidecar response")?;
304        if let Some(error) = response.error {
305            anyhow::bail!("{}", error.message);
306        }
307        response
308            .result
309            .ok_or_else(|| anyhow!("browser sidecar returned an empty result"))
310    }
311
312    async fn call<T: Serialize, R: for<'de> Deserialize<'de>>(
313        &mut self,
314        method: &str,
315        params: T,
316    ) -> anyhow::Result<R> {
317        let value = self.call_raw(method, serde_json::to_value(params)?).await?;
318        serde_json::from_value(value).context("invalid browser sidecar payload")
319    }
320
321    async fn call_value<R: for<'de> Deserialize<'de>>(
322        &mut self,
323        method: &str,
324        params: Value,
325    ) -> anyhow::Result<R> {
326        let value = self.call_raw(method, params).await?;
327        serde_json::from_value(value).context("invalid browser sidecar payload")
328    }
329}
330
331impl BrowserSubsystem {
332    pub fn new(config: BrowserConfig) -> Self {
333        let artifact_root = resolve_shared_paths()
334            .map(|paths| paths.canonical_root.join("browser-artifacts"))
335            .unwrap_or_else(|_| PathBuf::from(".tandem").join("browser-artifacts"));
336        Self {
337            config,
338            status: Arc::new(RwLock::new(BrowserStatus::default())),
339            tools_registered: Arc::new(AtomicBool::new(false)),
340            client: Arc::new(Mutex::new(None)),
341            sessions: Arc::new(RwLock::new(HashMap::new())),
342            artifact_root,
343        }
344    }
345
346    pub fn config(&self) -> &BrowserConfig {
347        &self.config
348    }
349
350    pub async fn install_sidecar(&self) -> anyhow::Result<BrowserSidecarInstallResult> {
351        let mut result = install_browser_sidecar(&self.config).await?;
352        result.status = self.refresh_status().await;
353        Ok(result)
354    }
355
356    pub async fn smoke_test(&self, url: Option<String>) -> anyhow::Result<BrowserSmokeTestResult> {
357        let status = self.status_snapshot().await;
358        if !status.runnable {
359            anyhow::bail!(
360                "browser_not_runnable: run browser doctor first; current status is not runnable"
361            );
362        }
363
364        let target_url = url
365            .map(|value| value.trim().to_string())
366            .filter(|value| !value.is_empty())
367            .unwrap_or_else(|| "https://example.com".to_string());
368        let request = BrowserOpenRequest {
369            url: target_url.clone(),
370            profile_id: None,
371            headless: Some(self.config.headless_default),
372            viewport: Some(BrowserViewport {
373                width: self.config.default_viewport.width,
374                height: self.config.default_viewport.height,
375            }),
376            wait_until: Some("navigation".to_string()),
377            executable_path: self.config.executable_path.clone(),
378            user_data_root: self.config.user_data_root.clone(),
379            allow_no_sandbox: self.config.allow_no_sandbox,
380            headless_default: self.config.headless_default,
381        };
382        let opened: BrowserOpenResult = self.call_sidecar("browser.open", request).await?;
383        let session_id = opened.session_id.clone();
384
385        let result = async {
386            let snapshot: BrowserSnapshotResult = self
387                .call_sidecar(
388                    "browser.snapshot",
389                    BrowserSnapshotParams {
390                        session_id: session_id.clone(),
391                        max_elements: Some(25),
392                        include_screenshot: false,
393                    },
394                )
395                .await?;
396            let extract: BrowserExtractResult = self
397                .call_sidecar(
398                    "browser.extract",
399                    BrowserExtractParams {
400                        session_id: session_id.clone(),
401                        format: "visible_text".to_string(),
402                        max_bytes: Some(4_000),
403                    },
404                )
405                .await?;
406            Ok::<BrowserSmokeTestResult, anyhow::Error>(BrowserSmokeTestResult {
407                ok: true,
408                status,
409                url: target_url,
410                final_url: snapshot.url,
411                title: snapshot.title,
412                load_state: snapshot.load_state,
413                element_count: snapshot.elements.len(),
414                excerpt: Some(smoke_excerpt(&extract.content, 400)),
415                closed: false,
416            })
417        }
418        .await;
419
420        let close_result: BrowserCloseResult = self
421            .call_sidecar(
422                "browser.close",
423                BrowserCloseParams {
424                    session_id: session_id.clone(),
425                },
426            )
427            .await
428            .unwrap_or(BrowserCloseResult {
429                session_id,
430                closed: false,
431            });
432
433        let mut smoke = result?;
434        smoke.closed = close_result.closed;
435        Ok(smoke)
436    }
437
438    pub async fn refresh_status(&self) -> BrowserStatus {
439        let config = self.config.clone();
440        let evaluated = tokio::task::spawn_blocking(move || evaluate_browser_status(config))
441            .await
442            .unwrap_or_else(|err| BrowserStatus {
443                enabled: false,
444                runnable: false,
445                headless_default: true,
446                sidecar: Default::default(),
447                browser: Default::default(),
448                blocking_issues: vec![BrowserBlockingIssue {
449                    code: "browser_launch_failed".to_string(),
450                    message: format!("browser readiness task failed: {}", err),
451                }],
452                recommendations: vec![
453                    "Run `tandem-engine browser doctor --json` on the same host.".to_string(),
454                ],
455                install_hints: Vec::new(),
456                last_checked_at_ms: Some(now_ms()),
457                last_error: Some(err.to_string()),
458            });
459        *self.status.write().await = evaluated.clone();
460        evaluated
461    }
462
463    pub async fn status_snapshot(&self) -> BrowserStatus {
464        let current = self.status.read().await.clone();
465        if current
466            .last_checked_at_ms
467            .is_some_and(|ts| now_ms().saturating_sub(ts) <= STATUS_CACHE_MAX_AGE_MS)
468        {
469            current
470        } else {
471            self.refresh_status().await
472        }
473    }
474
475    pub async fn health_summary(&self) -> BrowserHealthSummary {
476        let status = self.status.read().await.clone();
477        BrowserHealthSummary {
478            enabled: status.enabled,
479            runnable: status.runnable,
480            tools_registered: self.tools_registered.load(Ordering::Relaxed),
481            sidecar_found: status.sidecar.found,
482            browser_found: status.browser.found,
483            browser_version: status.browser.version,
484            last_checked_at_ms: status.last_checked_at_ms,
485            last_error: status.last_error,
486        }
487    }
488
489    pub fn set_tools_registered(&self, value: bool) {
490        self.tools_registered.store(value, Ordering::Relaxed);
491    }
492
493    pub async fn register_tools(
494        &self,
495        tools: &ToolRegistry,
496        state: Option<AppState>,
497    ) -> anyhow::Result<()> {
498        tools.unregister_by_prefix("browser_").await;
499        tools
500            .register_tool(
501                "browser_status".to_string(),
502                Arc::new(BrowserTool::new(
503                    BrowserToolKind::Status,
504                    self.clone(),
505                    state.clone(),
506                )),
507            )
508            .await;
509
510        let status = self.status_snapshot().await;
511        if !status.enabled || !status.runnable {
512            self.set_tools_registered(false);
513            return Ok(());
514        }
515
516        for (name, kind) in [
517            ("browser_open", BrowserToolKind::Open),
518            ("browser_navigate", BrowserToolKind::Navigate),
519            ("browser_snapshot", BrowserToolKind::Snapshot),
520            ("browser_click", BrowserToolKind::Click),
521            ("browser_type", BrowserToolKind::Type),
522            ("browser_press", BrowserToolKind::Press),
523            ("browser_wait", BrowserToolKind::Wait),
524            ("browser_extract", BrowserToolKind::Extract),
525            ("browser_screenshot", BrowserToolKind::Screenshot),
526            ("browser_close", BrowserToolKind::Close),
527        ] {
528            tools
529                .register_tool(
530                    name.to_string(),
531                    Arc::new(BrowserTool::new(kind, self.clone(), state.clone())),
532                )
533                .await;
534        }
535        self.set_tools_registered(true);
536        Ok(())
537    }
538
539    async fn update_last_error(&self, message: impl Into<String>) {
540        let mut status = self.status.write().await;
541        status.last_error = Some(message.into());
542        status.last_checked_at_ms = Some(now_ms());
543    }
544
545    async fn call_sidecar<T: Serialize, R: for<'de> Deserialize<'de>>(
546        &self,
547        method: &str,
548        params: T,
549    ) -> anyhow::Result<R> {
550        let params = serde_json::to_value(params)?;
551        let mut guard = self.client.lock().await;
552        if guard.is_none() {
553            *guard = Some(BrowserSidecarClient::spawn(&self.config).await?);
554        }
555        let result = guard
556            .as_mut()
557            .expect("browser sidecar client initialized")
558            .call_value(method, params.clone())
559            .await;
560        if let Err(err) = &result {
561            *guard = None;
562            self.update_last_error(err.to_string()).await;
563            if err
564                .to_string()
565                .contains("browser sidecar closed the stdio connection")
566            {
567                *guard = Some(BrowserSidecarClient::spawn(&self.config).await?);
568                return guard
569                    .as_mut()
570                    .expect("browser sidecar client reinitialized")
571                    .call_value(method, params)
572                    .await;
573            }
574        }
575        result
576    }
577
578    async fn insert_session(
579        &self,
580        browser_session_id: String,
581        owner_session_id: Option<String>,
582        current_url: String,
583    ) {
584        self.sessions.write().await.insert(
585            browser_session_id,
586            ManagedBrowserSession {
587                owner_session_id,
588                current_url,
589                _created_at_ms: now_ms(),
590                updated_at_ms: now_ms(),
591            },
592        );
593    }
594
595    async fn session(&self, browser_session_id: &str) -> Option<ManagedBrowserSession> {
596        self.sessions.read().await.get(browser_session_id).cloned()
597    }
598
599    async fn update_session_url(
600        &self,
601        browser_session_id: &str,
602        current_url: String,
603    ) -> Option<ManagedBrowserSession> {
604        let mut sessions = self.sessions.write().await;
605        let session = sessions.get_mut(browser_session_id)?;
606        session.current_url = current_url;
607        session.updated_at_ms = now_ms();
608        Some(session.clone())
609    }
610
611    async fn remove_session(&self, browser_session_id: &str) -> Option<ManagedBrowserSession> {
612        self.sessions.write().await.remove(browser_session_id)
613    }
614
615    pub async fn close_sessions_for_owner(&self, owner_session_id: &str) -> usize {
616        let session_ids = self
617            .sessions
618            .read()
619            .await
620            .iter()
621            .filter_map(|(session_id, session)| {
622                (session.owner_session_id.as_deref() == Some(owner_session_id))
623                    .then_some(session_id.clone())
624            })
625            .collect::<Vec<_>>();
626        self.close_session_ids(session_ids).await
627    }
628
629    pub async fn close_all_sessions(&self) -> usize {
630        let session_ids = self
631            .sessions
632            .read()
633            .await
634            .keys()
635            .cloned()
636            .collect::<Vec<_>>();
637        self.close_session_ids(session_ids).await
638    }
639
640    async fn close_session_ids(&self, session_ids: Vec<String>) -> usize {
641        let mut closed = 0usize;
642        for session_id in session_ids {
643            let _ = self
644                .call_sidecar::<_, BrowserCloseResult>(
645                    "browser.close",
646                    BrowserCloseParams {
647                        session_id: session_id.clone(),
648                    },
649                )
650                .await;
651            if self.remove_session(&session_id).await.is_some() {
652                closed += 1;
653            }
654        }
655        closed
656    }
657}
658
659impl BrowserTool {
660    fn new(kind: BrowserToolKind, browser: BrowserSubsystem, state: Option<AppState>) -> Self {
661        Self {
662            kind,
663            browser,
664            state,
665        }
666    }
667
668    async fn execute_impl(&self, args: Value) -> anyhow::Result<ToolResult> {
669        match self.kind {
670            BrowserToolKind::Status => self.execute_status().await,
671            BrowserToolKind::Open => self.execute_open(args).await,
672            BrowserToolKind::Navigate => self.execute_navigate(args).await,
673            BrowserToolKind::Snapshot => self.execute_snapshot(args).await,
674            BrowserToolKind::Click => self.execute_click(args).await,
675            BrowserToolKind::Type => self.execute_type(args).await,
676            BrowserToolKind::Press => self.execute_press(args).await,
677            BrowserToolKind::Wait => self.execute_wait(args).await,
678            BrowserToolKind::Extract => self.execute_extract(args).await,
679            BrowserToolKind::Screenshot => self.execute_screenshot(args).await,
680            BrowserToolKind::Close => self.execute_close(args).await,
681        }
682    }
683
684    async fn execute_status(&self) -> anyhow::Result<ToolResult> {
685        let status = self.browser.status_snapshot().await;
686        ok_tool_result(
687            serde_json::to_value(&status)?,
688            json!({
689                "enabled": status.enabled,
690                "runnable": status.runnable,
691                "sidecar_found": status.sidecar.found,
692                "browser_found": status.browser.found,
693            }),
694        )
695    }
696
697    async fn execute_open(&self, args: Value) -> anyhow::Result<ToolResult> {
698        let ctx = parse_tool_context(&args);
699        let mut request: BrowserOpenRequest =
700            serde_json::from_value(args.clone()).context("invalid browser_open arguments")?;
701        normalize_browser_open_request(&mut request);
702        let status = self.browser.status_snapshot().await;
703        if !status.runnable {
704            return browser_not_runnable_result(&status);
705        }
706        ensure_allowed_browser_url(
707            &request.url,
708            &self
709                .effective_allowed_hosts(ctx.model_session_id.as_deref())
710                .await,
711        )?;
712        request.executable_path = self.browser.config.executable_path.clone();
713        request.user_data_root = self.browser.config.user_data_root.clone();
714        request.allow_no_sandbox = self.browser.config.allow_no_sandbox;
715        request.headless_default = self.browser.config.headless_default;
716        if request.viewport.is_none() {
717            request.viewport = Some(BrowserViewport {
718                width: self.browser.config.default_viewport.width,
719                height: self.browser.config.default_viewport.height,
720            });
721        }
722        let result: BrowserOpenResult = self.browser.call_sidecar("browser.open", request).await?;
723        ensure_allowed_browser_url(
724            &result.final_url,
725            &self
726                .effective_allowed_hosts(ctx.model_session_id.as_deref())
727                .await,
728        )
729        .map_err(|err| anyhow!("host_not_allowed: {}", err))?;
730        self.browser
731            .insert_session(
732                result.session_id.clone(),
733                ctx.model_session_id.clone(),
734                result.final_url.clone(),
735            )
736            .await;
737        ok_tool_result(
738            serde_json::to_value(&result)?,
739            json!({
740                "session_id": result.session_id,
741                "url": result.final_url,
742                "headless": result.headless,
743            }),
744        )
745    }
746
747    async fn execute_navigate(&self, args: Value) -> anyhow::Result<ToolResult> {
748        let ctx = parse_tool_context(&args);
749        let params: BrowserNavigateParams =
750            serde_json::from_value(args.clone()).context("invalid browser_navigate arguments")?;
751        let session = self
752            .load_session(&params.session_id, ctx.model_session_id.as_deref())
753            .await?;
754        ensure_allowed_browser_url(
755            &params.url,
756            &self
757                .effective_allowed_hosts(session.owner_session_id.as_deref())
758                .await,
759        )?;
760        let result: BrowserNavigateResult = self
761            .browser
762            .call_sidecar("browser.navigate", params.clone())
763            .await?;
764        self.enforce_post_navigation(
765            &params.session_id,
766            &result.final_url,
767            session.owner_session_id.as_deref(),
768        )
769        .await?;
770        ok_tool_result(
771            serde_json::to_value(&result)?,
772            json!({
773                "session_id": result.session_id,
774                "url": result.final_url,
775            }),
776        )
777    }
778
779    async fn execute_snapshot(&self, args: Value) -> anyhow::Result<ToolResult> {
780        let ctx = parse_tool_context(&args);
781        let params: BrowserSnapshotParams =
782            serde_json::from_value(args.clone()).context("invalid browser_snapshot arguments")?;
783        let session = self
784            .load_session(&params.session_id, ctx.model_session_id.as_deref())
785            .await?;
786        self.ensure_page_read_allowed(session.owner_session_id.as_deref(), &session.current_url)
787            .await?;
788        let mut result: BrowserSnapshotResult = self
789            .browser
790            .call_sidecar("browser.snapshot", params.clone())
791            .await?;
792        self.browser
793            .update_session_url(&params.session_id, result.url.clone())
794            .await;
795
796        let screenshot_artifact = if let Some(base64) = result.screenshot_base64.take() {
797            Some(
798                self.store_artifact(
799                    ctx.model_session_id.as_deref(),
800                    &params.session_id,
801                    "screenshot",
802                    params
803                        .include_screenshot
804                        .then_some(SNAPSHOT_SCREENSHOT_LABEL.to_string()),
805                    "png",
806                    &base64::engine::general_purpose::STANDARD
807                        .decode(base64.as_bytes())
808                        .context("invalid snapshot screenshot payload")?,
809                    Some(json!({
810                        "source": "browser_snapshot",
811                        "url": result.url,
812                    })),
813                )
814                .await?,
815            )
816        } else {
817            None
818        };
819        let payload = json!({
820            "session_id": result.session_id,
821            "url": result.url,
822            "title": result.title,
823            "load_state": result.load_state,
824            "viewport": result.viewport,
825            "elements": result.elements,
826            "notices": result.notices,
827            "screenshot_artifact": screenshot_artifact,
828        });
829        ok_tool_result(
830            payload.clone(),
831            json!({
832                "session_id": payload.get("session_id"),
833                "url": payload.get("url"),
834                "element_count": payload.get("elements").and_then(Value::as_array).map(|rows| rows.len()).unwrap_or(0),
835            }),
836        )
837    }
838
839    async fn execute_click(&self, args: Value) -> anyhow::Result<ToolResult> {
840        let ctx = parse_tool_context(&args);
841        let params: tandem_browser::BrowserClickParams =
842            serde_json::from_value(args.clone()).context("invalid browser_click arguments")?;
843        let session = self
844            .load_session(&params.session_id, ctx.model_session_id.as_deref())
845            .await?;
846        self.ensure_action_allowed(session.owner_session_id.as_deref(), &session.current_url)
847            .await?;
848        let result: BrowserActionResult = self
849            .browser
850            .call_sidecar("browser.click", params.clone())
851            .await?;
852        self.update_action_url(
853            &params.session_id,
854            result.final_url.as_deref(),
855            session.owner_session_id.as_deref(),
856        )
857        .await?;
858        ok_tool_result(
859            serde_json::to_value(&result)?,
860            json!({
861                "session_id": result.session_id,
862                "success": result.success,
863                "url": result.final_url,
864            }),
865        )
866    }
867
868    async fn execute_type(&self, args: Value) -> anyhow::Result<ToolResult> {
869        let ctx = parse_tool_context(&args);
870        let params: BrowserTypeToolArgs =
871            serde_json::from_value(args.clone()).context("invalid browser_type arguments")?;
872        let session = self
873            .load_session(&params.session_id, ctx.model_session_id.as_deref())
874            .await?;
875        self.ensure_action_allowed(session.owner_session_id.as_deref(), &session.current_url)
876            .await?;
877        let text = resolve_text_input(params.text.clone(), params.secret_ref.clone())?;
878        let request = BrowserTypeParams {
879            session_id: params.session_id.clone(),
880            element_id: params.element_id.clone(),
881            selector: params.selector.clone(),
882            text,
883            replace: params.replace,
884            submit: params.submit,
885            timeout_ms: params.timeout_ms,
886        };
887        let result: BrowserActionResult =
888            self.browser.call_sidecar("browser.type", request).await?;
889        self.update_action_url(
890            &params.session_id,
891            result.final_url.as_deref(),
892            session.owner_session_id.as_deref(),
893        )
894        .await?;
895        ok_tool_result(
896            serde_json::to_value(&result)?,
897            json!({
898                "session_id": result.session_id,
899                "success": result.success,
900                "used_secret_ref": params.secret_ref.is_some(),
901                "url": result.final_url,
902            }),
903        )
904    }
905
906    async fn execute_press(&self, args: Value) -> anyhow::Result<ToolResult> {
907        let ctx = parse_tool_context(&args);
908        let params: BrowserPressParams =
909            serde_json::from_value(args.clone()).context("invalid browser_press arguments")?;
910        let session = self
911            .load_session(&params.session_id, ctx.model_session_id.as_deref())
912            .await?;
913        self.ensure_action_allowed(session.owner_session_id.as_deref(), &session.current_url)
914            .await?;
915        let result: BrowserActionResult = self
916            .browser
917            .call_sidecar("browser.press", params.clone())
918            .await?;
919        self.update_action_url(
920            &params.session_id,
921            result.final_url.as_deref(),
922            session.owner_session_id.as_deref(),
923        )
924        .await?;
925        ok_tool_result(
926            serde_json::to_value(&result)?,
927            json!({
928                "session_id": result.session_id,
929                "success": result.success,
930                "url": result.final_url,
931            }),
932        )
933    }
934
935    async fn execute_wait(&self, args: Value) -> anyhow::Result<ToolResult> {
936        let ctx = parse_tool_context(&args);
937        let params = parse_browser_wait_args(&args).context("invalid browser_wait arguments")?;
938        let session = self
939            .load_session(&params.session_id, ctx.model_session_id.as_deref())
940            .await?;
941        self.ensure_page_read_allowed(session.owner_session_id.as_deref(), &session.current_url)
942            .await?;
943        let result: BrowserActionResult = self
944            .browser
945            .call_sidecar("browser.wait", params.clone())
946            .await?;
947        self.update_action_url(
948            &params.session_id,
949            result.final_url.as_deref(),
950            session.owner_session_id.as_deref(),
951        )
952        .await?;
953        ok_tool_result(
954            serde_json::to_value(&result)?,
955            json!({
956                "session_id": result.session_id,
957                "success": result.success,
958                "url": result.final_url,
959            }),
960        )
961    }
962
963    async fn execute_extract(&self, args: Value) -> anyhow::Result<ToolResult> {
964        let ctx = parse_tool_context(&args);
965        let params: BrowserExtractParams =
966            serde_json::from_value(args.clone()).context("invalid browser_extract arguments")?;
967        let session = self
968            .load_session(&params.session_id, ctx.model_session_id.as_deref())
969            .await?;
970        self.ensure_page_read_allowed(session.owner_session_id.as_deref(), &session.current_url)
971            .await?;
972        let result: BrowserExtractResult = self
973            .browser
974            .call_sidecar("browser.extract", params.clone())
975            .await?;
976        let bytes = result.content.as_bytes();
977        let artifact = if bytes.len() > INLINE_EXTRACT_LIMIT_BYTES {
978            Some(
979                self.store_artifact(
980                    ctx.model_session_id.as_deref(),
981                    &params.session_id,
982                    "extract",
983                    Some(format!("browser extract ({})", result.format)),
984                    extension_for_extract_format(&result.format),
985                    bytes,
986                    Some(json!({
987                        "format": result.format,
988                        "truncated": result.truncated,
989                        "source": "browser_extract",
990                    })),
991                )
992                .await?,
993            )
994        } else {
995            None
996        };
997        let payload = json!({
998            "session_id": result.session_id,
999            "format": result.format,
1000            "content": artifact.is_none().then_some(result.content),
1001            "truncated": result.truncated,
1002            "artifact": artifact,
1003        });
1004        ok_tool_result(
1005            payload.clone(),
1006            json!({
1007                "session_id": payload.get("session_id"),
1008                "format": payload.get("format"),
1009                "artifact": payload.get("artifact").is_some(),
1010            }),
1011        )
1012    }
1013
1014    async fn execute_screenshot(&self, args: Value) -> anyhow::Result<ToolResult> {
1015        let ctx = parse_tool_context(&args);
1016        let params: BrowserScreenshotParams =
1017            serde_json::from_value(args.clone()).context("invalid browser_screenshot arguments")?;
1018        let session = self
1019            .load_session(&params.session_id, ctx.model_session_id.as_deref())
1020            .await?;
1021        self.ensure_page_read_allowed(session.owner_session_id.as_deref(), &session.current_url)
1022            .await?;
1023        let result: BrowserScreenshotResult = self
1024            .browser
1025            .call_sidecar("browser.screenshot", params.clone())
1026            .await?;
1027        let bytes = base64::engine::general_purpose::STANDARD
1028            .decode(result.data_base64.as_bytes())
1029            .context("invalid screenshot payload")?;
1030        let artifact = self
1031            .store_artifact(
1032                ctx.model_session_id.as_deref(),
1033                &params.session_id,
1034                "screenshot",
1035                result.label.clone(),
1036                "png",
1037                &bytes,
1038                Some(json!({
1039                    "mime_type": result.mime_type,
1040                    "bytes": result.bytes,
1041                    "source": "browser_screenshot",
1042                })),
1043            )
1044            .await?;
1045        ok_tool_result(
1046            json!({
1047                "session_id": result.session_id,
1048                "artifact": artifact,
1049                "summary": format!("Saved screenshot artifact ({} bytes).", result.bytes),
1050            }),
1051            json!({
1052                "session_id": result.session_id,
1053                "artifact_id": artifact.artifact_id,
1054            }),
1055        )
1056    }
1057
1058    async fn execute_close(&self, args: Value) -> anyhow::Result<ToolResult> {
1059        let ctx = parse_tool_context(&args);
1060        let params: BrowserCloseParams =
1061            serde_json::from_value(args.clone()).context("invalid browser_close arguments")?;
1062        let _ = self
1063            .load_session(&params.session_id, ctx.model_session_id.as_deref())
1064            .await?;
1065        let result: BrowserCloseResult = self
1066            .browser
1067            .call_sidecar("browser.close", params.clone())
1068            .await?;
1069        self.browser.remove_session(&params.session_id).await;
1070        ok_tool_result(
1071            serde_json::to_value(&result)?,
1072            json!({
1073                "session_id": result.session_id,
1074                "closed": result.closed,
1075            }),
1076        )
1077    }
1078
1079    async fn load_session(
1080        &self,
1081        browser_session_id: &str,
1082        model_session_id: Option<&str>,
1083    ) -> anyhow::Result<ManagedBrowserSession> {
1084        let session = self
1085            .browser
1086            .session(browser_session_id)
1087            .await
1088            .ok_or_else(|| anyhow!("session `{}` not found", browser_session_id))?;
1089        if let (Some(owner), Some(model_session_id)) =
1090            (session.owner_session_id.as_deref(), model_session_id)
1091        {
1092            if owner != model_session_id {
1093                anyhow::bail!(
1094                    "browser session `{}` belongs to a different engine session",
1095                    browser_session_id
1096                );
1097            }
1098        }
1099        Ok(session)
1100    }
1101
1102    async fn effective_allowed_hosts(&self, model_session_id: Option<&str>) -> Vec<String> {
1103        if let Some(model_session_id) = model_session_id {
1104            if let Some(state) = self.state.as_ref() {
1105                if let Some(instance) = state
1106                    .agent_teams
1107                    .instance_for_session(model_session_id)
1108                    .await
1109                {
1110                    if !instance.capabilities.net_scopes.allow_hosts.is_empty() {
1111                        return normalize_allowed_hosts(
1112                            instance.capabilities.net_scopes.allow_hosts,
1113                        );
1114                    }
1115                }
1116            }
1117        }
1118        normalize_allowed_hosts(self.browser.config.allowed_hosts.clone())
1119    }
1120
1121    async fn ensure_page_read_allowed(
1122        &self,
1123        model_session_id: Option<&str>,
1124        current_url: &str,
1125    ) -> anyhow::Result<()> {
1126        ensure_allowed_browser_url(
1127            current_url,
1128            &self.effective_allowed_hosts(model_session_id).await,
1129        )?;
1130        Ok(())
1131    }
1132
1133    async fn ensure_action_allowed(
1134        &self,
1135        model_session_id: Option<&str>,
1136        current_url: &str,
1137    ) -> anyhow::Result<()> {
1138        self.ensure_page_read_allowed(model_session_id, current_url)
1139            .await?;
1140        let host = browser_url_host(current_url)?;
1141        if !is_local_or_private_host(&host)
1142            && !self.external_integrations_allowed(model_session_id).await
1143        {
1144            anyhow::bail!(
1145                "external integrations are disabled for this routine session on host `{}`",
1146                host
1147            );
1148        }
1149        Ok(())
1150    }
1151
1152    async fn external_integrations_allowed(&self, model_session_id: Option<&str>) -> bool {
1153        let Some(model_session_id) = model_session_id else {
1154            return true;
1155        };
1156        let Some(state) = self.state.as_ref() else {
1157            return true;
1158        };
1159        let Some(policy) = state.routine_session_policy(model_session_id).await else {
1160            return true;
1161        };
1162        state
1163            .get_routine(&policy.routine_id)
1164            .await
1165            .map(|routine| routine.external_integrations_allowed)
1166            .unwrap_or(true)
1167    }
1168
1169    async fn enforce_post_navigation(
1170        &self,
1171        browser_session_id: &str,
1172        final_url: &str,
1173        model_session_id: Option<&str>,
1174    ) -> anyhow::Result<()> {
1175        if let Err(err) = ensure_allowed_browser_url(
1176            final_url,
1177            &self.effective_allowed_hosts(model_session_id).await,
1178        ) {
1179            let _ = self
1180                .browser
1181                .call_sidecar::<_, BrowserCloseResult>(
1182                    "browser.close",
1183                    BrowserCloseParams {
1184                        session_id: browser_session_id.to_string(),
1185                    },
1186                )
1187                .await;
1188            self.browser.remove_session(browser_session_id).await;
1189            return Err(anyhow!("host_not_allowed: {}", err));
1190        }
1191        self.browser
1192            .update_session_url(browser_session_id, final_url.to_string())
1193            .await;
1194        Ok(())
1195    }
1196
1197    async fn update_action_url(
1198        &self,
1199        browser_session_id: &str,
1200        final_url: Option<&str>,
1201        model_session_id: Option<&str>,
1202    ) -> anyhow::Result<()> {
1203        if let Some(final_url) = final_url {
1204            self.enforce_post_navigation(browser_session_id, final_url, model_session_id)
1205                .await?;
1206        }
1207        Ok(())
1208    }
1209
1210    async fn store_artifact(
1211        &self,
1212        model_session_id: Option<&str>,
1213        browser_session_id: &str,
1214        kind: &str,
1215        label: Option<String>,
1216        extension: &str,
1217        bytes: &[u8],
1218        metadata: Option<Value>,
1219    ) -> anyhow::Result<BrowserArtifactRef> {
1220        fs::create_dir_all(&self.browser.artifact_root).await?;
1221        let artifact_id = format!("artifact-{}", Uuid::new_v4());
1222        let file_name = format!("{artifact_id}.{extension}");
1223        let target = self.browser.artifact_root.join(file_name);
1224        fs::write(&target, bytes)
1225            .await
1226            .with_context(|| format!("failed to write browser artifact `{}`", target.display()))?;
1227        let artifact = BrowserArtifactRef {
1228            artifact_id: artifact_id.clone(),
1229            uri: target.to_string_lossy().to_string(),
1230            kind: kind.to_string(),
1231            label,
1232            created_at_ms: now_ms(),
1233            metadata,
1234        };
1235        self.append_routine_artifact_if_needed(
1236            model_session_id,
1237            artifact.clone(),
1238            browser_session_id,
1239        )
1240        .await;
1241        Ok(artifact)
1242    }
1243
1244    async fn append_routine_artifact_if_needed(
1245        &self,
1246        model_session_id: Option<&str>,
1247        artifact: BrowserArtifactRef,
1248        browser_session_id: &str,
1249    ) {
1250        let Some(model_session_id) = model_session_id else {
1251            return;
1252        };
1253        let Some(state) = self.state.as_ref() else {
1254            return;
1255        };
1256        let Some(policy) = state.routine_session_policy(model_session_id).await else {
1257            return;
1258        };
1259        let run_artifact = RoutineRunArtifact {
1260            artifact_id: artifact.artifact_id.clone(),
1261            uri: artifact.uri.clone(),
1262            kind: artifact.kind.clone(),
1263            label: artifact.label.clone(),
1264            created_at_ms: artifact.created_at_ms,
1265            metadata: artifact.metadata.clone(),
1266        };
1267        let _ = state
1268            .append_routine_run_artifact(&policy.run_id, run_artifact.clone())
1269            .await;
1270        state.event_bus.publish(EngineEvent::new(
1271            "routine.run.artifact_added",
1272            json!({
1273                "runID": policy.run_id,
1274                "routineID": policy.routine_id,
1275                "browserSessionID": browser_session_id,
1276                "artifact": run_artifact,
1277            }),
1278        ));
1279    }
1280}