Skip to main content

chrome_cli/chrome/
discovery.rs

1use std::io::{Read, Write};
2use std::net::TcpStream;
3use std::time::Duration;
4
5use serde::Deserialize;
6
7use super::ChromeError;
8use super::platform;
9
10/// Browser version information returned by `/json/version`.
11#[derive(Debug, Deserialize)]
12#[serde(rename_all = "camelCase")]
13#[allow(dead_code)]
14pub struct BrowserVersion {
15    /// The browser name and version (e.g. "Chrome/120.0.6099.71").
16    #[serde(rename = "Browser")]
17    pub browser: String,
18
19    /// The CDP protocol version (e.g. "1.3").
20    #[serde(rename = "Protocol-Version")]
21    pub protocol_version: String,
22
23    /// The browser-level WebSocket debugger URL.
24    #[serde(rename = "webSocketDebuggerUrl")]
25    pub ws_debugger_url: String,
26}
27
28/// Information about a single debuggable target (tab, service worker, etc.).
29#[derive(Debug, Clone, Deserialize)]
30#[serde(rename_all = "camelCase")]
31#[allow(dead_code)]
32pub struct TargetInfo {
33    /// Unique target identifier.
34    pub id: String,
35
36    /// Target type (e.g. "page", "`background_page`").
37    #[serde(rename = "type")]
38    pub target_type: String,
39
40    /// Page title.
41    pub title: String,
42
43    /// Current URL.
44    pub url: String,
45
46    /// WebSocket URL to debug this specific target.
47    #[serde(rename = "webSocketDebuggerUrl")]
48    pub ws_debugger_url: Option<String>,
49}
50
51/// Query Chrome's `/json/version` endpoint.
52///
53/// # Errors
54///
55/// Returns `ChromeError::HttpError` on connection failure or `ChromeError::ParseError`
56/// if the response cannot be deserialized.
57pub async fn query_version(host: &str, port: u16) -> Result<BrowserVersion, ChromeError> {
58    let body = http_get(host, port, "/json/version").await?;
59    serde_json::from_str(&body).map_err(|e| ChromeError::ParseError(e.to_string()))
60}
61
62/// Query Chrome's `/json/list` endpoint for debuggable targets.
63///
64/// # Errors
65///
66/// Returns `ChromeError::HttpError` on connection failure or `ChromeError::ParseError`
67/// if the response cannot be deserialized.
68#[allow(dead_code)]
69pub async fn query_targets(host: &str, port: u16) -> Result<Vec<TargetInfo>, ChromeError> {
70    let body = http_get(host, port, "/json/list").await?;
71    serde_json::from_str(&body).map_err(|e| ChromeError::ParseError(e.to_string()))
72}
73
74/// Read the `DevToolsActivePort` file from the default user data directory.
75///
76/// Returns `(port, ws_path)` on success.
77///
78/// # Errors
79///
80/// Returns `ChromeError::NoActivePort` if the file is missing or unreadable,
81/// or `ChromeError::ParseError` if the contents are malformed.
82pub fn read_devtools_active_port() -> Result<(u16, String), ChromeError> {
83    let data_dir = platform::default_user_data_dir().ok_or(ChromeError::NoActivePort)?;
84    read_devtools_active_port_from(&data_dir)
85}
86
87/// Read the `DevToolsActivePort` file from a specific directory.
88///
89/// This is the parameterized version of [`read_devtools_active_port`] that accepts
90/// an explicit data directory, enabling unit testing without relying on
91/// platform-specific defaults.
92///
93/// # Errors
94///
95/// Returns `ChromeError::NoActivePort` if the file is missing or unreadable,
96/// or `ChromeError::ParseError` if the contents are malformed.
97pub fn read_devtools_active_port_from(
98    data_dir: &std::path::Path,
99) -> Result<(u16, String), ChromeError> {
100    let path = data_dir.join("DevToolsActivePort");
101    let contents = std::fs::read_to_string(&path).map_err(|_| ChromeError::NoActivePort)?;
102    parse_devtools_active_port(&contents)
103}
104
105/// Parse the contents of a `DevToolsActivePort` file.
106///
107/// The file has two lines: a port number and a WebSocket path.
108fn parse_devtools_active_port(contents: &str) -> Result<(u16, String), ChromeError> {
109    let mut lines = contents.lines();
110    let port_str = lines.next().ok_or(ChromeError::NoActivePort)?;
111    let port: u16 = port_str.trim().parse().map_err(|_| {
112        ChromeError::ParseError(format!("invalid port in DevToolsActivePort: {port_str}"))
113    })?;
114    let ws_path = lines
115        .next()
116        .ok_or(ChromeError::NoActivePort)?
117        .trim()
118        .to_string();
119    Ok((port, ws_path))
120}
121
122/// Attempt to discover a running Chrome instance.
123///
124/// Tries `DevToolsActivePort` file first, then falls back to the given host/port.
125/// Returns the WebSocket URL and port on success.
126///
127/// # Errors
128///
129/// Returns `ChromeError::NotRunning` if no Chrome instance can be discovered.
130pub async fn discover_chrome(host: &str, port: u16) -> Result<(String, u16), ChromeError> {
131    // Try DevToolsActivePort file first
132    if let Ok((file_port, _ws_path)) = read_devtools_active_port() {
133        if let Ok(version) = query_version("127.0.0.1", file_port).await {
134            return Ok((version.ws_debugger_url, file_port));
135        }
136    }
137
138    // Fall back to the explicitly given host/port
139    query_version(host, port)
140        .await
141        .map(|version| (version.ws_debugger_url, port))
142        .map_err(|e| ChromeError::NotRunning(format!("discovery failed on {host}:{port}: {e}")))
143}
144
145/// Check whether `buf` contains a complete HTTP response (headers + full body per Content-Length).
146fn is_http_response_complete(buf: &[u8]) -> bool {
147    let Some(header_end) = find_header_end(buf) else {
148        return false;
149    };
150    let body_start = header_end + 4; // skip past \r\n\r\n
151    let headers = &buf[..header_end];
152    match parse_content_length(headers) {
153        Some(cl) => buf.len() >= body_start + cl,
154        None => true, // no Content-Length; headers are complete, assume body is too
155    }
156}
157
158/// Find the byte offset of `\r\n\r\n` in `buf`, returning the position of the first `\r`.
159fn find_header_end(buf: &[u8]) -> Option<usize> {
160    buf.windows(4).position(|w| w == b"\r\n\r\n")
161}
162
163/// Parse `Content-Length` from raw header bytes (case-insensitive).
164fn parse_content_length(headers: &[u8]) -> Option<usize> {
165    let header_str = std::str::from_utf8(headers).ok()?;
166    for line in header_str.lines() {
167        if let Some((key, value)) = line.split_once(':') {
168            if key.trim().eq_ignore_ascii_case("content-length") {
169                return value.trim().parse().ok();
170            }
171        }
172    }
173    None
174}
175
176/// Parse a raw HTTP response buffer into the body string.
177///
178/// Validates the status line is 200 OK and extracts the body after headers.
179fn parse_http_response(buf: &[u8]) -> Result<String, ChromeError> {
180    let header_end = find_header_end(buf)
181        .ok_or_else(|| ChromeError::HttpError("malformed HTTP response".into()))?;
182    let body_start = header_end + 4;
183
184    let headers = std::str::from_utf8(&buf[..header_end])
185        .map_err(|e| ChromeError::HttpError(format!("invalid UTF-8 in headers: {e}")))?;
186
187    // Check for HTTP 200 status
188    let status_line = headers
189        .lines()
190        .next()
191        .ok_or_else(|| ChromeError::HttpError("empty response".into()))?;
192    if !status_line.contains(" 200 ") {
193        return Err(ChromeError::HttpError(format!(
194            "unexpected HTTP status: {status_line}"
195        )));
196    }
197
198    // Extract body: use Content-Length if available, otherwise take everything after headers
199    let body_bytes = if let Some(cl) = parse_content_length(&buf[..header_end]) {
200        let end = (body_start + cl).min(buf.len());
201        &buf[body_start..end]
202    } else {
203        &buf[body_start..]
204    };
205
206    String::from_utf8(body_bytes.to_vec())
207        .map_err(|e| ChromeError::HttpError(format!("invalid UTF-8 in body: {e}")))
208}
209
210/// Perform a simple HTTP GET request using blocking I/O in a `spawn_blocking` context.
211async fn http_get(host: &str, port: u16, path: &str) -> Result<String, ChromeError> {
212    let addr = format!("{host}:{port}");
213    let request = format!("GET {path} HTTP/1.1\r\nHost: {addr}\r\nConnection: close\r\n\r\n");
214
215    let (addr_clone, request_clone) = (addr.clone(), request);
216    tokio::task::spawn_blocking(move || {
217        let mut stream = TcpStream::connect_timeout(
218            &addr_clone
219                .parse()
220                .map_err(|e| ChromeError::HttpError(format!("invalid address: {e}")))?,
221            Duration::from_secs(2),
222        )
223        .map_err(|e| ChromeError::HttpError(format!("connection failed to {addr_clone}: {e}")))?;
224
225        stream.set_read_timeout(Some(Duration::from_secs(5))).ok();
226
227        stream
228            .write_all(request_clone.as_bytes())
229            .map_err(|e| ChromeError::HttpError(format!("write failed: {e}")))?;
230
231        // Read response incrementally, stopping once we have Content-Length bytes
232        // of body. This avoids blocking on EOF when Chrome keeps the connection open.
233        let mut buf = Vec::with_capacity(4096);
234        let mut tmp = [0u8; 4096];
235        loop {
236            match stream.read(&mut tmp) {
237                Ok(0) => break, // EOF
238                Ok(n) => {
239                    buf.extend_from_slice(&tmp[..n]);
240                    if is_http_response_complete(&buf) {
241                        break;
242                    }
243                }
244                Err(e)
245                    if e.kind() == std::io::ErrorKind::WouldBlock
246                        || e.kind() == std::io::ErrorKind::TimedOut =>
247                {
248                    // Timeout/EAGAIN: if we already have a complete response, use it
249                    if is_http_response_complete(&buf) {
250                        break;
251                    }
252                    return Err(ChromeError::HttpError(format!("read timed out: {e}")));
253                }
254                Err(e) => {
255                    return Err(ChromeError::HttpError(format!("read failed: {e}")));
256                }
257            }
258        }
259
260        parse_http_response(&buf)
261    })
262    .await
263    .map_err(|e| ChromeError::HttpError(format!("task join failed: {e}")))?
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269
270    #[test]
271    fn parse_browser_version() {
272        let json = r#"{
273            "Browser": "Chrome/120.0.6099.71",
274            "Protocol-Version": "1.3",
275            "User-Agent": "Mozilla/5.0",
276            "V8-Version": "12.0.267.8",
277            "WebKit-Version": "537.36",
278            "webSocketDebuggerUrl": "ws://127.0.0.1:9222/devtools/browser/abc-123"
279        }"#;
280        let v: BrowserVersion = serde_json::from_str(json).unwrap();
281        assert_eq!(v.browser, "Chrome/120.0.6099.71");
282        assert_eq!(v.protocol_version, "1.3");
283        assert!(v.ws_debugger_url.contains("ws://"));
284    }
285
286    #[test]
287    fn parse_target_info() {
288        let json = r#"[{
289            "description": "",
290            "devtoolsFrontendUrl": "/devtools/inspector.html",
291            "id": "ABCDEF",
292            "title": "New Tab",
293            "type": "page",
294            "url": "chrome://newtab/",
295            "webSocketDebuggerUrl": "ws://127.0.0.1:9222/devtools/page/ABCDEF"
296        }]"#;
297        let targets: Vec<TargetInfo> = serde_json::from_str(json).unwrap();
298        assert_eq!(targets.len(), 1);
299        assert_eq!(targets[0].id, "ABCDEF");
300        assert_eq!(targets[0].target_type, "page");
301        assert_eq!(targets[0].title, "New Tab");
302        assert!(targets[0].ws_debugger_url.is_some());
303    }
304
305    #[test]
306    fn parse_devtools_active_port_valid() {
307        let contents = "9222\n/devtools/browser/abc-123\n";
308        let (port, path) = parse_devtools_active_port(contents).unwrap();
309        assert_eq!(port, 9222);
310        assert_eq!(path, "/devtools/browser/abc-123");
311    }
312
313    #[test]
314    fn parse_devtools_active_port_empty() {
315        let result = parse_devtools_active_port("");
316        assert!(result.is_err());
317    }
318
319    #[test]
320    fn parse_devtools_active_port_invalid_port() {
321        let result = parse_devtools_active_port("notaport\n/ws/path\n");
322        assert!(result.is_err());
323    }
324
325    #[test]
326    fn read_devtools_active_port_from_dir() {
327        let dir = std::env::temp_dir().join("chrome-cli-test-devtools-port");
328        std::fs::create_dir_all(&dir).unwrap();
329        let file = dir.join("DevToolsActivePort");
330        std::fs::write(&file, "9333\n/devtools/browser/xyz-789\n").unwrap();
331
332        let (port, path) = read_devtools_active_port_from(&dir).unwrap();
333        assert_eq!(port, 9333);
334        assert_eq!(path, "/devtools/browser/xyz-789");
335
336        // Clean up
337        let _ = std::fs::remove_dir_all(&dir);
338    }
339
340    #[test]
341    fn read_devtools_active_port_from_missing_dir() {
342        let dir = std::path::Path::new("/nonexistent/chrome-cli-test");
343        let result = read_devtools_active_port_from(dir);
344        assert!(result.is_err());
345    }
346
347    #[test]
348    fn parse_http_response_with_content_length() {
349        let raw = b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello, world!";
350        let body = parse_http_response(raw).unwrap();
351        assert_eq!(body, "Hello, world!");
352    }
353
354    #[test]
355    fn parse_http_response_without_content_length() {
356        let raw = b"HTTP/1.1 200 OK\r\nConnection: close\r\n\r\n{\"ok\":true}";
357        let body = parse_http_response(raw).unwrap();
358        assert_eq!(body, "{\"ok\":true}");
359    }
360
361    #[test]
362    fn parse_http_response_content_length_zero() {
363        let raw = b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n";
364        let body = parse_http_response(raw).unwrap();
365        assert_eq!(body, "");
366    }
367
368    #[test]
369    fn parse_http_response_malformed_no_separator() {
370        let raw = b"HTTP/1.1 200 OK\nno double crlf here";
371        let result = parse_http_response(raw);
372        assert!(result.is_err());
373    }
374
375    #[test]
376    fn parse_http_response_non_200_status() {
377        let raw = b"HTTP/1.1 404 Not Found\r\nContent-Length: 0\r\n\r\n";
378        let result = parse_http_response(raw);
379        assert!(result.is_err());
380    }
381
382    #[test]
383    fn is_http_response_complete_with_content_length() {
384        let partial = b"HTTP/1.1 200 OK\r\nContent-Length: 5\r\n\r\nHe";
385        assert!(!is_http_response_complete(partial));
386
387        let complete = b"HTTP/1.1 200 OK\r\nContent-Length: 5\r\n\r\nHello";
388        assert!(is_http_response_complete(complete));
389    }
390
391    #[test]
392    fn is_http_response_complete_no_headers_yet() {
393        assert!(!is_http_response_complete(b"HTTP/1.1 200 OK\r\n"));
394    }
395
396    #[test]
397    fn is_http_response_complete_without_content_length() {
398        let response = b"HTTP/1.1 200 OK\r\nConnection: close\r\n\r\nbody";
399        assert!(is_http_response_complete(response));
400    }
401}