Skip to main content

roder_api/
chrome.rs

1//! Shared Chrome browser-bridge contract.
2//!
3//! This module is the single source of truth that ties together the four layers
4//! of the Roder Chrome integration:
5//!
6//! * the Manifest V3 browser extension (`/Users/pz/w/roder-web-extention`), which
7//!   speaks the JSON bridge envelope described below over the remote WebSocket
8//!   app-server;
9//! * the remote WebSocket transport in `roder-app-server` (`remote.rs`), which
10//!   registers each connected extension with the [`ChromeBridge`] and forwards
11//!   commands to it;
12//! * the `chrome/*` app-server methods, which call [`ChromeController::dispatch`];
13//! * the model-facing `chrome_*` tools in `roder-ext-chrome`, which are generic
14//!   over an injected [`ChromeController`] so they can be unit-tested against a
15//!   fake bridge.
16//!
17//! The single live [`ChromeBridge`] instance is reachable from every layer via
18//! [`bridge`], a process-global singleton (there is exactly one browser bridge
19//! per Roder process). Keeping the bridge here in `roder-api` lets both
20//! `roder-app-server` and `roder-ext-chrome` share it without `roder-ext-chrome`
21//! depending on `roder-core`.
22//!
23//! # Wire envelope
24//!
25//! * Roder → extension (command): `{ "type": "<command>", "id": "<corr>", ...params }`
26//! * extension → Roder (command result):
27//!   `{ "type": "command/result", "id": "<corr>", "ok": bool, "result"?: any, "error"?: string }`
28//! * extension → Roder (unsolicited event):
29//!   `{ "type": "hello"|"state"|"tabs/list"|"tab/updated"|"page/snapshot"|"activity"|"chat"|"output"|"debug/console"|"debug/network", ... }`
30//!
31//! Browser page content, DOM text, console output and network metadata are
32//! **untrusted input** and are tagged with `untrusted: true` so model prompts do
33//! not treat page content as user or system instructions.
34
35use std::collections::HashMap;
36use std::sync::atomic::{AtomicU64, Ordering};
37use std::sync::{Arc, Mutex, OnceLock};
38use std::time::Duration;
39
40use serde::{Deserialize, Serialize};
41use tokio::sync::{mpsc, oneshot};
42
43/// How aggressively Roder may act in the browser on the user's behalf.
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
45#[serde(rename_all = "snake_case")]
46pub enum ChromePermissionMode {
47    /// Chat, tab status and connection state only.
48    Observe,
49    /// Inspect actions can run when the site permits; privileged actions queue
50    /// for approval.
51    #[default]
52    Assist,
53    /// Enabled actions execute inside the approved plan and site scope;
54    /// protected actions still require explicit approval.
55    Control,
56}
57
58impl ChromePermissionMode {
59    pub fn as_str(self) -> &'static str {
60        match self {
61            ChromePermissionMode::Observe => "observe",
62            ChromePermissionMode::Assist => "assist",
63            ChromePermissionMode::Control => "control",
64        }
65    }
66
67    pub fn parse(value: &str) -> Option<Self> {
68        match value.trim().to_ascii_lowercase().as_str() {
69            "observe" => Some(ChromePermissionMode::Observe),
70            "assist" => Some(ChromePermissionMode::Assist),
71            "control" => Some(ChromePermissionMode::Control),
72            _ => None,
73        }
74    }
75}
76
77/// A browser the extension can drive (Chrome is P0, Edge is P1).
78#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
79#[serde(rename_all = "camelCase")]
80pub struct ChromeBrowser {
81    pub id: String,
82    pub name: String,
83    /// `chrome` or `edge`.
84    pub kind: String,
85}
86
87/// A tab visible to the extension.
88#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
89#[serde(rename_all = "camelCase")]
90pub struct ChromeTab {
91    pub id: i64,
92    #[serde(default)]
93    pub window_id: Option<i64>,
94    #[serde(default)]
95    pub title: Option<String>,
96    #[serde(default)]
97    pub url: Option<String>,
98    #[serde(default)]
99    pub fav_icon_url: Option<String>,
100    #[serde(default)]
101    pub active: bool,
102}
103
104/// Per-origin site permission record stored by the extension.
105#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
106#[serde(rename_all = "camelCase")]
107pub struct ChromeSitePermission {
108    pub origin: String,
109    #[serde(default)]
110    pub inspect: bool,
111    #[serde(default)]
112    pub interact: bool,
113    #[serde(default)]
114    pub eval: bool,
115    #[serde(default)]
116    pub debugger: bool,
117    #[serde(default)]
118    pub download: bool,
119    #[serde(default)]
120    pub upload: bool,
121    #[serde(default)]
122    pub recording: bool,
123    #[serde(default)]
124    pub schedule: bool,
125    #[serde(default)]
126    pub always_allow: bool,
127}
128
129/// Snapshot of the connection, mirrored to TUI/CLI/SDK clients via `chrome/status`.
130#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
131#[serde(rename_all = "camelCase")]
132pub struct ChromeStatus {
133    /// At least one extension is connected.
134    pub connected: bool,
135    /// Number of connected extension clients.
136    pub client_count: usize,
137    /// Whether Chrome tools are enabled for the session.
138    pub enabled: bool,
139    /// Capabilities advertised by the connected extension's `hello`.
140    pub capabilities: Vec<String>,
141    pub mode: ChromePermissionMode,
142    #[serde(default)]
143    pub active_tab: Option<ChromeTab>,
144    #[serde(default)]
145    pub browser: Option<ChromeBrowser>,
146    #[serde(default)]
147    pub last_error: Option<String>,
148    #[serde(default)]
149    pub remote_addr: Option<String>,
150}
151
152/// A command Roder asks the extension to run. `kind` is the wire `type`
153/// (e.g. `"page/snapshot"`); `params` is merged into the outgoing frame.
154#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
155pub struct ChromeCommand {
156    pub kind: String,
157    #[serde(default)]
158    pub params: serde_json::Value,
159}
160
161impl ChromeCommand {
162    pub fn new(kind: impl Into<String>) -> Self {
163        Self {
164            kind: kind.into(),
165            params: serde_json::Value::Null,
166        }
167    }
168
169    pub fn with_params(kind: impl Into<String>, params: serde_json::Value) -> Self {
170        Self {
171            kind: kind.into(),
172            params,
173        }
174    }
175}
176
177/// Failure surface for browser commands.
178#[derive(Debug, Clone, PartialEq, Eq)]
179pub enum ChromeError {
180    /// No extension is connected.
181    NotConnected,
182    /// Chrome tools are not enabled for this session.
183    Disabled,
184    /// The command was rejected by the user or by site/mode policy.
185    Rejected(String),
186    /// The extension did not respond within the dispatch deadline.
187    Timeout,
188    /// The connection dropped before a result arrived.
189    Disconnected,
190    /// The extension returned an error result.
191    Remote(String),
192}
193
194impl std::fmt::Display for ChromeError {
195    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
196        match self {
197            ChromeError::NotConnected => write!(f, "no Chrome extension is connected"),
198            ChromeError::Disabled => write!(f, "Chrome tools are not enabled for this session"),
199            ChromeError::Rejected(reason) => write!(f, "browser action rejected: {reason}"),
200            ChromeError::Timeout => write!(f, "Chrome extension did not respond in time"),
201            ChromeError::Disconnected => {
202                write!(f, "Chrome extension disconnected before responding")
203            }
204            ChromeError::Remote(message) => write!(f, "Chrome extension error: {message}"),
205        }
206    }
207}
208
209impl std::error::Error for ChromeError {}
210
211/// Default time a `dispatch` will wait for the extension to answer.
212pub const CHROME_DISPATCH_TIMEOUT: Duration = Duration::from_secs(30);
213
214/// Abstraction over the live browser bridge so model tools can be tested against
215/// a fake. Implemented by [`ChromeBridge`].
216#[async_trait::async_trait]
217pub trait ChromeController: Send + Sync + 'static {
218    /// Current connection status.
219    fn status(&self) -> ChromeStatus;
220
221    /// Whether Chrome tools are enabled for the session.
222    fn is_enabled(&self) -> bool {
223        self.status().enabled
224    }
225
226    /// Enable/disable Chrome tools for the session.
227    fn set_enabled(&self, enabled: bool);
228
229    /// Set the permission mode.
230    fn set_mode(&self, mode: ChromePermissionMode);
231
232    /// Forward a command to the connected extension and await its result.
233    async fn dispatch(&self, command: ChromeCommand) -> Result<serde_json::Value, ChromeError>;
234}
235
236/// Handle returned to the transport when an extension connects: the transport
237/// drains `commands` and writes each frame to the socket.
238pub struct ChromeClientRegistration {
239    pub client_id: u64,
240    pub commands: mpsc::UnboundedReceiver<serde_json::Value>,
241}
242
243struct ClientHandle {
244    commands: mpsc::UnboundedSender<serde_json::Value>,
245    capabilities: Vec<String>,
246    remote_addr: Option<String>,
247    active_tab: Option<ChromeTab>,
248    browser: Option<ChromeBrowser>,
249}
250
251#[derive(Default)]
252struct BridgeState {
253    clients: HashMap<u64, ClientHandle>,
254    pending: HashMap<String, oneshot::Sender<Result<serde_json::Value, String>>>,
255    enabled: bool,
256    mode: ChromePermissionMode,
257    last_error: Option<String>,
258}
259
260impl BridgeState {
261    fn new() -> Self {
262        Self {
263            clients: HashMap::new(),
264            pending: HashMap::new(),
265            enabled: true,
266            mode: ChromePermissionMode::default(),
267            last_error: None,
268        }
269    }
270}
271
272/// The live connection registry. One instance per process (see [`bridge`]).
273///
274/// Transport integration (in `roder-app-server::remote`), on each connection:
275/// ```ignore
276/// // on first `hello` frame:
277/// let reg = bridge().register_client(Some(remote_addr.clone()), &frame);
278/// // spawn a task draining reg.commands -> websocket as Message::Text(JSON)
279/// // for every inbound bridge frame: bridge().ingest_frame(Some(reg.client_id), frame);
280/// // on disconnect: bridge().unregister_client(reg.client_id);
281/// ```
282pub struct ChromeBridge {
283    state: Mutex<BridgeState>,
284    next_client_id: AtomicU64,
285    next_corr_id: AtomicU64,
286    dispatch_timeout: Duration,
287}
288
289impl Default for ChromeBridge {
290    fn default() -> Self {
291        Self {
292            state: Mutex::new(BridgeState::new()),
293            next_client_id: AtomicU64::new(1),
294            next_corr_id: AtomicU64::new(1),
295            dispatch_timeout: CHROME_DISPATCH_TIMEOUT,
296        }
297    }
298}
299
300impl ChromeBridge {
301    pub fn new() -> Self {
302        Self::default()
303    }
304
305    #[cfg(test)]
306    fn with_timeout(timeout: Duration) -> Self {
307        Self {
308            dispatch_timeout: timeout,
309            ..Self::default()
310        }
311    }
312
313    fn lock(&self) -> std::sync::MutexGuard<'_, BridgeState> {
314        self.state.lock().expect("chrome bridge mutex poisoned")
315    }
316
317    /// Register a newly-connected extension. Returns the receiver the transport
318    /// must drain to deliver commands to that extension.
319    pub fn register_client(
320        &self,
321        remote_addr: Option<String>,
322        hello: &serde_json::Value,
323    ) -> ChromeClientRegistration {
324        let client_id = self.next_client_id.fetch_add(1, Ordering::SeqCst);
325        let (tx, rx) = mpsc::unbounded_channel();
326        let capabilities = string_array(hello.get("capabilities"));
327        let mut state = self.lock();
328        state.clients.insert(
329            client_id,
330            ClientHandle {
331                commands: tx,
332                capabilities,
333                remote_addr,
334                active_tab: None,
335                browser: None,
336            },
337        );
338        state.last_error = None;
339        ChromeClientRegistration {
340            client_id,
341            commands: rx,
342        }
343    }
344
345    /// Remove a disconnected extension and fail any of its pending commands.
346    pub fn unregister_client(&self, client_id: u64) {
347        let mut state = self.lock();
348        state.clients.remove(&client_id);
349        if state.clients.is_empty() {
350            // Fail in-flight commands; the sockets are gone.
351            for (_, tx) in state.pending.drain() {
352                let _ = tx.send(Err("extension disconnected".to_string()));
353            }
354        }
355    }
356
357    /// Route an inbound bridge frame from the extension: resolve a pending
358    /// command result or fold an unsolicited event into bridge state.
359    pub fn ingest_frame(&self, client_id: Option<u64>, frame: serde_json::Value) {
360        let kind = frame.get("type").and_then(|v| v.as_str()).unwrap_or("");
361        match kind {
362            "command/result" => {
363                let Some(id) = frame.get("id").and_then(|v| v.as_str()) else {
364                    return;
365                };
366                let sender = {
367                    let mut state = self.lock();
368                    state.pending.remove(id)
369                };
370                if let Some(sender) = sender {
371                    let ok = frame.get("ok").and_then(|v| v.as_bool()).unwrap_or(false);
372                    let payload = if ok {
373                        Ok(frame
374                            .get("result")
375                            .cloned()
376                            .unwrap_or(serde_json::Value::Null))
377                    } else {
378                        Err(frame
379                            .get("error")
380                            .and_then(|v| v.as_str())
381                            .unwrap_or("unknown extension error")
382                            .to_string())
383                    };
384                    let _ = sender.send(payload);
385                }
386            }
387            "hello" => {
388                let caps = string_array(frame.get("capabilities"));
389                let mut state = self.lock();
390                if let Some(id) = client_id
391                    && let Some(handle) = state.clients.get_mut(&id)
392                {
393                    handle.capabilities = caps;
394                }
395            }
396            "state" => {
397                let tab = frame
398                    .get("state")
399                    .and_then(|s| s.get("activeTab"))
400                    .and_then(|t| serde_json::from_value::<ChromeTab>(t.clone()).ok());
401                let mut state = self.lock();
402                if let Some(id) = client_id
403                    && let Some(handle) = state.clients.get_mut(&id)
404                {
405                    handle.active_tab = tab;
406                }
407            }
408            "tab/updated" => {
409                let tab = frame
410                    .get("tab")
411                    .and_then(|t| serde_json::from_value::<ChromeTab>(t.clone()).ok());
412                let mut state = self.lock();
413                if let Some(id) = client_id
414                    && let Some(handle) = state.clients.get_mut(&id)
415                {
416                    handle.active_tab = tab;
417                }
418            }
419            _ => {}
420        }
421    }
422
423    fn next_correlation_id(&self) -> String {
424        format!("rc-{}", self.next_corr_id.fetch_add(1, Ordering::SeqCst))
425    }
426}
427
428#[async_trait::async_trait]
429impl ChromeController for ChromeBridge {
430    fn status(&self) -> ChromeStatus {
431        let state = self.lock();
432        let primary = state.clients.values().next();
433        ChromeStatus {
434            connected: !state.clients.is_empty(),
435            client_count: state.clients.len(),
436            enabled: state.enabled,
437            capabilities: primary.map(|c| c.capabilities.clone()).unwrap_or_default(),
438            mode: state.mode,
439            active_tab: primary.and_then(|c| c.active_tab.clone()),
440            browser: primary.and_then(|c| c.browser.clone()),
441            last_error: state.last_error.clone(),
442            remote_addr: primary.and_then(|c| c.remote_addr.clone()),
443        }
444    }
445
446    fn set_enabled(&self, enabled: bool) {
447        self.lock().enabled = enabled;
448    }
449
450    fn set_mode(&self, mode: ChromePermissionMode) {
451        self.lock().mode = mode;
452    }
453
454    async fn dispatch(&self, command: ChromeCommand) -> Result<serde_json::Value, ChromeError> {
455        let corr = self.next_correlation_id();
456        let (res_tx, res_rx) = oneshot::channel();
457
458        // Build the outgoing frame `{ type, id, ...params }` and enqueue it on the
459        // first connected client. Hold the lock only for the send.
460        {
461            let mut state = self.lock();
462            if !state.enabled {
463                return Err(ChromeError::Disabled);
464            }
465            let Some(handle) = state.clients.values().next() else {
466                return Err(ChromeError::NotConnected);
467            };
468            let mut frame = serde_json::Map::new();
469            frame.insert("type".to_string(), serde_json::Value::String(command.kind));
470            frame.insert("id".to_string(), serde_json::Value::String(corr.clone()));
471            if let serde_json::Value::Object(params) = command.params {
472                for (key, value) in params {
473                    if key != "type" && key != "id" {
474                        frame.insert(key, value);
475                    }
476                }
477            }
478            if handle
479                .commands
480                .send(serde_json::Value::Object(frame))
481                .is_err()
482            {
483                return Err(ChromeError::Disconnected);
484            }
485            state.pending.insert(corr.clone(), res_tx);
486        }
487
488        match tokio::time::timeout(self.dispatch_timeout, res_rx).await {
489            Ok(Ok(Ok(result))) => Ok(result),
490            Ok(Ok(Err(message))) => Err(ChromeError::Remote(message)),
491            Ok(Err(_)) => Err(ChromeError::Disconnected),
492            Err(_) => {
493                self.lock().pending.remove(&corr);
494                Err(ChromeError::Timeout)
495            }
496        }
497    }
498}
499
500/// The process-global browser bridge. Every layer shares this instance.
501pub fn bridge() -> Arc<ChromeBridge> {
502    static BRIDGE: OnceLock<Arc<ChromeBridge>> = OnceLock::new();
503    BRIDGE.get_or_init(|| Arc::new(ChromeBridge::new())).clone()
504}
505
506fn string_array(value: Option<&serde_json::Value>) -> Vec<String> {
507    value
508        .and_then(|v| v.as_array())
509        .map(|items| {
510            items
511                .iter()
512                .filter_map(|item| item.as_str().map(ToString::to_string))
513                .collect()
514        })
515        .unwrap_or_default()
516}
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521    use serde_json::json;
522
523    #[test]
524    fn status_reports_disconnected_by_default() {
525        let bridge = ChromeBridge::new();
526        let status = bridge.status();
527        assert!(!status.connected);
528        assert_eq!(status.client_count, 0);
529        assert!(status.enabled);
530        assert_eq!(status.mode, ChromePermissionMode::Assist);
531    }
532
533    #[tokio::test]
534    async fn dispatch_respects_explicit_disable() {
535        let bridge = ChromeBridge::new();
536        bridge.set_enabled(false);
537        let reg = bridge.register_client(None, &json!({ "capabilities": ["chat"] }));
538        drop(reg.commands);
539        let err = bridge
540            .dispatch(ChromeCommand::new("tabs/list"))
541            .await
542            .unwrap_err();
543        assert_eq!(err, ChromeError::Disabled);
544    }
545
546    #[tokio::test]
547    async fn dispatch_round_trips_command_and_result() {
548        let bridge = Arc::new(ChromeBridge::new());
549        bridge.set_enabled(true);
550        let mut reg = bridge.register_client(
551            Some("127.0.0.1:9".to_string()),
552            &json!({ "capabilities": ["tabs.list"] }),
553        );
554
555        // Simulated extension: read the command frame, echo a result.
556        let echo = bridge.clone();
557        let handle = tokio::spawn(async move {
558            let frame = reg.commands.recv().await.expect("command frame");
559            assert_eq!(frame["type"], "page/snapshot");
560            assert_eq!(frame["tabId"], 7);
561            let id = frame["id"].as_str().unwrap().to_string();
562            echo.ingest_frame(
563                Some(reg.client_id),
564                json!({ "type": "command/result", "id": id, "ok": true, "result": { "title": "Example" } }),
565            );
566        });
567
568        let result = bridge
569            .dispatch(ChromeCommand::with_params(
570                "page/snapshot",
571                json!({ "tabId": 7 }),
572            ))
573            .await
574            .expect("dispatch ok");
575        assert_eq!(result["title"], "Example");
576        handle.await.unwrap();
577
578        let status = bridge.status();
579        assert!(status.connected);
580        assert_eq!(status.capabilities, vec!["tabs.list".to_string()]);
581    }
582
583    #[tokio::test]
584    async fn dispatch_times_out_without_response() {
585        let bridge = ChromeBridge::with_timeout(Duration::from_millis(20));
586        bridge.set_enabled(true);
587        let reg = bridge.register_client(None, &json!({}));
588        // Keep the receiver alive but never answer.
589        let _keep = reg.commands;
590        let err = bridge
591            .dispatch(ChromeCommand::new("tabs/list"))
592            .await
593            .unwrap_err();
594        assert_eq!(err, ChromeError::Timeout);
595    }
596
597    #[tokio::test]
598    async fn not_connected_when_no_clients() {
599        let bridge = ChromeBridge::new();
600        bridge.set_enabled(true);
601        let err = bridge
602            .dispatch(ChromeCommand::new("tabs/list"))
603            .await
604            .unwrap_err();
605        assert_eq!(err, ChromeError::NotConnected);
606    }
607}