Skip to main content

browser_control/cli/
fetch.rs

1//! `browser-control fetch` — run an HTTP request from the page's context.
2//!
3//! The request is executed by injecting [`crate::dom::scripts::FETCH_JS`] into
4//! the active page via the engine-agnostic [`PageSession`] and parsing the
5//! `{status, statusText, headers, body}` envelope it returns.
6//!
7//! Output mirrors `curl`:
8//! - `--include` prepends `HTTP/1.1 <code> <text>\r\n` and response headers.
9//! - `--output PATH` writes the body to PATH (and `chmod 0600` on Unix).
10//! - Without `--output`, the body is written to stdout.
11//!
12//! Transport errors (script failure, attach failure) exit non-zero; HTTP
13//! status is reported verbatim and does not change the exit code.
14
15use std::path::{Path, PathBuf};
16
17use anyhow::{anyhow, bail, Context, Result};
18use serde_json::{json, Map, Value};
19
20use crate::cli::mcp::resolve_browser;
21use crate::dom::scripts::FETCH_JS;
22use crate::session::PageSession;
23
24#[allow(clippy::too_many_arguments)]
25pub async fn run(
26    browser: Option<String>,
27    url: String,
28    method: String,
29    headers: Vec<String>,
30    data: Option<String>,
31    target: Option<String>,
32    include: bool,
33    output: Option<PathBuf>,
34) -> Result<()> {
35    let header_map = parse_headers(&headers)?;
36    let expr = build_fetch_expr(&url, &method, &header_map, data.as_deref())?;
37
38    let resolved = resolve_browser(browser).await?;
39    let session =
40        PageSession::attach(&resolved.endpoint, resolved.engine, target.as_deref()).await?;
41    let result = session.evaluate(&expr, true).await;
42    session.close().await;
43    let result = result?;
44
45    let envelope = parse_envelope(&result)?;
46
47    let mut bytes = Vec::new();
48    if include {
49        bytes.extend_from_slice(format_status_and_headers(&envelope).as_bytes());
50    }
51    bytes.extend_from_slice(envelope.body.as_bytes());
52
53    match output {
54        Some(path) => {
55            write_file(&path, &bytes)?;
56            tracing::info!(target = "fetch", "wrote {} bytes to {}", bytes.len(), path.display());
57            eprintln!("wrote {} bytes to {}", bytes.len(), path.display());
58        }
59        None => {
60            use std::io::Write;
61            let mut out = std::io::stdout().lock();
62            out.write_all(&bytes)?;
63        }
64    }
65    Ok(())
66}
67
68/// Parsed `{status, statusText, headers, body}` envelope from `FETCH_JS`.
69#[derive(Debug, Clone, PartialEq)]
70struct FetchEnvelope {
71    status: u16,
72    status_text: String,
73    headers: Vec<(String, String)>,
74    body: String,
75}
76
77fn parse_headers(headers: &[String]) -> Result<Map<String, Value>> {
78    let mut map = Map::new();
79    for raw in headers {
80        let (k, v) = raw
81            .split_once(':')
82            .ok_or_else(|| anyhow!("malformed header `{raw}`: expected `Key: Value`"))?;
83        let key = k.trim();
84        if key.is_empty() {
85            bail!("malformed header `{raw}`: empty key");
86        }
87        // Per RFC 7230, header names are tokens; reject whitespace/control in name.
88        if key.chars().any(|c| c.is_whitespace() || c.is_control()) {
89            bail!("malformed header `{raw}`: invalid character in name");
90        }
91        let value = v.trim();
92        map.insert(key.to_string(), Value::String(value.to_string()));
93    }
94    Ok(map)
95}
96
97/// Build the JS expression that invokes `FETCH_JS` with a JSON-encoded arg
98/// string. All user-controlled fields are JSON-encoded twice (once inside the
99/// args object, once when we embed the args string as a JS string literal)
100/// so the page can't be tricked into evaluating arbitrary expressions.
101fn build_fetch_expr(
102    url: &str,
103    method: &str,
104    headers: &Map<String, Value>,
105    body: Option<&str>,
106) -> Result<String> {
107    let args = json!({
108        "url": url,
109        "method": method,
110        "headers": Value::Object(headers.clone()),
111        "body": body,
112    });
113    let args_json = serde_json::to_string(&args)?;
114    let args_literal = serde_json::to_string(&args_json)?;
115    Ok(format!("({FETCH_JS})({args_literal})"))
116}
117
118/// `FETCH_JS` returns `JSON.stringify({...})`, so the evaluator hands us a
119/// JSON value of *type string*. Decode the inner JSON.
120fn parse_envelope(v: &Value) -> Result<FetchEnvelope> {
121    let s = v
122        .as_str()
123        .ok_or_else(|| anyhow!("fetch script returned non-string value: {v}"))?;
124    let inner: Value = serde_json::from_str(s)
125        .with_context(|| format!("fetch script returned invalid JSON: {s}"))?;
126    let status = inner
127        .get("status")
128        .and_then(|x| x.as_u64())
129        .ok_or_else(|| anyhow!("fetch envelope missing `status`"))? as u16;
130    let status_text = inner
131        .get("statusText")
132        .and_then(|x| x.as_str())
133        .unwrap_or("")
134        .to_string();
135    let headers = inner
136        .get("headers")
137        .and_then(|x| x.as_object())
138        .map(|m| {
139            m.iter()
140                .map(|(k, v)| (k.clone(), v.as_str().unwrap_or_default().to_string()))
141                .collect::<Vec<_>>()
142        })
143        .unwrap_or_default();
144    let body = inner
145        .get("body")
146        .and_then(|x| x.as_str())
147        .unwrap_or("")
148        .to_string();
149    Ok(FetchEnvelope {
150        status,
151        status_text,
152        headers,
153        body,
154    })
155}
156
157fn format_status_and_headers(env: &FetchEnvelope) -> String {
158    let mut s = format!("HTTP/1.1 {} {}\r\n", env.status, env.status_text);
159    for (k, v) in &env.headers {
160        s.push_str(&format!("{k}: {v}\r\n"));
161    }
162    s.push_str("\r\n");
163    s
164}
165
166fn write_file(path: &Path, body: &[u8]) -> Result<()> {
167    std::fs::write(path, body)
168        .with_context(|| format!("failed to write {}", path.display()))?;
169    #[cfg(unix)]
170    {
171        use std::os::unix::fs::PermissionsExt;
172        std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600))
173            .with_context(|| format!("failed to chmod 600 {}", path.display()))?;
174    }
175    Ok(())
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181
182    #[test]
183    fn parse_headers_basic() {
184        let m = parse_headers(&[
185            "Accept: application/json".to_string(),
186            "X-Token: abc".to_string(),
187        ])
188        .unwrap();
189        assert_eq!(m.get("Accept").unwrap(), &json!("application/json"));
190        assert_eq!(m.get("X-Token").unwrap(), &json!("abc"));
191    }
192
193    #[test]
194    fn parse_headers_trims_extra_spaces() {
195        let m = parse_headers(&["  Accept   :   text/plain  ".to_string()]).unwrap();
196        assert_eq!(m.get("Accept").unwrap(), &json!("text/plain"));
197    }
198
199    #[test]
200    fn parse_headers_value_with_colon_kept_intact() {
201        // Only the first `:` separates key/value; the value may contain colons.
202        let m = parse_headers(&["Authorization: Bearer a:b:c".to_string()]).unwrap();
203        assert_eq!(m.get("Authorization").unwrap(), &json!("Bearer a:b:c"));
204    }
205
206    #[test]
207    fn parse_headers_rejects_missing_colon() {
208        let err = parse_headers(&["NoColonHere".to_string()]).unwrap_err();
209        assert!(err.to_string().contains("malformed header"));
210    }
211
212    #[test]
213    fn parse_headers_rejects_empty_key() {
214        let err = parse_headers(&[": value".to_string()]).unwrap_err();
215        assert!(err.to_string().contains("empty key"));
216    }
217
218    #[test]
219    fn parse_headers_rejects_whitespace_in_name() {
220        let err = parse_headers(&["bad name: v".to_string()]).unwrap_err();
221        assert!(err.to_string().contains("invalid character"));
222    }
223
224    #[test]
225    fn build_expr_json_escapes_url_and_body() {
226        let mut h = Map::new();
227        h.insert("X".to_string(), json!("y"));
228        // Body and URL contain quotes / backslashes / newlines that would
229        // break naive string interpolation.
230        let url = "https://x.test/?q=\"hi\"";
231        let body = "line1\n\"line2\"\\end";
232        let expr = build_fetch_expr(url, "POST", &h, Some(body)).unwrap();
233        // The expression must wrap a single JSON-encoded string argument.
234        let prefix = format!("({FETCH_JS})(");
235        let inner = expr
236            .strip_prefix(&prefix)
237            .unwrap()
238            .strip_suffix(')')
239            .unwrap();
240        // No raw user-controlled quote or newline can appear unescaped at the
241        // top level — the literal is JSON, so quotes inside are `\"` and the
242        // string contains no real newline byte.
243        assert!(!inner.contains('\n'));
244        // Decode the literal back twice and confirm round-trip equality.
245        let args_str: String = serde_json::from_str(inner).unwrap();
246        let args: Value = serde_json::from_str(&args_str).unwrap();
247        assert_eq!(args["url"], url);
248        assert_eq!(args["body"], body);
249        assert_eq!(args["method"], "POST");
250    }
251
252    #[test]
253    fn build_expr_method_and_headers_round_trip() {
254        let mut h = Map::new();
255        h.insert("Accept".to_string(), json!("*/*"));
256        let expr = build_fetch_expr("https://x.test/", "GET", &h, None).unwrap();
257        // Extract the JSON-string literal argument and decode twice.
258        let prefix = format!("({FETCH_JS})(");
259        let inner = expr
260            .strip_prefix(&prefix)
261            .unwrap()
262            .strip_suffix(')')
263            .unwrap();
264        let args_str: String = serde_json::from_str(inner).unwrap();
265        let args: Value = serde_json::from_str(&args_str).unwrap();
266        assert_eq!(args["url"], "https://x.test/");
267        assert_eq!(args["method"], "GET");
268        assert_eq!(args["headers"]["Accept"], "*/*");
269        assert!(args["body"].is_null());
270    }
271
272    #[test]
273    fn parse_envelope_decodes_inner_json() {
274        let inner = json!({
275            "status": 200,
276            "statusText": "OK",
277            "headers": {"content-type": "text/plain"},
278            "body": "hello"
279        });
280        let v = Value::String(inner.to_string());
281        let env = parse_envelope(&v).unwrap();
282        assert_eq!(env.status, 200);
283        assert_eq!(env.status_text, "OK");
284        assert_eq!(env.body, "hello");
285        assert_eq!(
286            env.headers,
287            vec![("content-type".to_string(), "text/plain".to_string())]
288        );
289    }
290
291    #[test]
292    fn parse_envelope_rejects_non_string() {
293        let v = json!({"status": 200});
294        assert!(parse_envelope(&v).is_err());
295    }
296
297    #[test]
298    fn format_include_emits_status_and_headers() {
299        let env = FetchEnvelope {
300            status: 404,
301            status_text: "Not Found".to_string(),
302            headers: vec![
303                ("content-type".to_string(), "text/plain".to_string()),
304                ("x-trace".to_string(), "abc".to_string()),
305            ],
306            body: "missing".to_string(),
307        };
308        let s = format_status_and_headers(&env);
309        assert_eq!(
310            s,
311            "HTTP/1.1 404 Not Found\r\n\
312             content-type: text/plain\r\n\
313             x-trace: abc\r\n\
314             \r\n"
315        );
316    }
317
318    #[test]
319    fn write_file_chmods_0600_on_unix() {
320        let dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
321            .join("target")
322            .join("fetch-test-scratch");
323        std::fs::create_dir_all(&dir).unwrap();
324        let p = dir.join(format!("out-{}.bin", std::process::id()));
325        write_file(&p, b"hello").unwrap();
326        assert_eq!(std::fs::read(&p).unwrap(), b"hello");
327        #[cfg(unix)]
328        {
329            use std::os::unix::fs::PermissionsExt;
330            let mode = std::fs::metadata(&p).unwrap().permissions().mode() & 0o777;
331            assert_eq!(mode, 0o600);
332        }
333        let _ = std::fs::remove_file(&p);
334        let _ = std::fs::remove_dir(&dir);
335    }
336}