Skip to main content

browser_control/cli/
fetch.rs

1//! `browser-control fetch` — run an HTTP request from the page's context.
2//!
3//! The request is executed by injecting [`crate::dom::scripts::FETCH_JS`] into
4//! the active page via the engine-agnostic [`PageSession`] and parsing the
5//! `{status, statusText, headers, body}` envelope it returns.
6//!
7//! Output mirrors `curl`:
8//! - `--include` prepends `HTTP/1.1 <code> <text>\r\n` and response headers.
9//! - `--output PATH` writes the body to PATH (and `chmod 0600` on Unix).
10//! - Without `--output`, the body is written to stdout.
11//!
12//! Transport errors (script failure, attach failure) exit non-zero; HTTP
13//! status is reported verbatim and does not change the exit code.
14
15use std::path::{Path, PathBuf};
16
17use anyhow::{anyhow, bail, Context, Result};
18use serde_json::{json, Map, Value};
19
20use crate::cli::mcp::resolve_browser;
21use crate::dom::scripts::FETCH_JS;
22use crate::session::PageSession;
23
24#[allow(clippy::too_many_arguments)]
25pub async fn run(
26    browser: Option<String>,
27    url: String,
28    method: String,
29    headers: Vec<String>,
30    data: Option<String>,
31    target: Option<String>,
32    include: bool,
33    output: Option<PathBuf>,
34) -> Result<()> {
35    let header_map = parse_headers(&headers)?;
36    let expr = build_fetch_expr(&url, &method, &header_map, data.as_deref())?;
37
38    let resolved = resolve_browser(browser).await?;
39    let session = match target.as_deref() {
40        Some(regex) => {
41            PageSession::attach(&resolved.endpoint, resolved.engine, Some(regex)).await?
42        }
43        None => PageSession::attach_for_origin(&resolved.endpoint, resolved.engine, &url).await?,
44    };
45    let result = session.evaluate(&expr, true).await;
46    session.close().await;
47    let result = result?;
48
49    let envelope = parse_envelope(&result)?;
50
51    let mut bytes = Vec::new();
52    if include {
53        bytes.extend_from_slice(format_status_and_headers(&envelope).as_bytes());
54    }
55    bytes.extend_from_slice(envelope.body.as_bytes());
56
57    match output {
58        Some(path) => {
59            write_file(&path, &bytes)?;
60            tracing::info!(
61                target = "fetch",
62                "wrote {} bytes to {}",
63                bytes.len(),
64                path.display()
65            );
66            eprintln!("wrote {} bytes to {}", bytes.len(), path.display());
67        }
68        None => {
69            use std::io::Write;
70            let mut out = std::io::stdout().lock();
71            out.write_all(&bytes)?;
72        }
73    }
74    Ok(())
75}
76
77/// Parsed `{status, statusText, headers, body}` envelope from `FETCH_JS`.
78#[derive(Debug, Clone, PartialEq)]
79struct FetchEnvelope {
80    status: u16,
81    status_text: String,
82    headers: Vec<(String, String)>,
83    body: String,
84}
85
86fn parse_headers(headers: &[String]) -> Result<Map<String, Value>> {
87    let mut map = Map::new();
88    for raw in headers {
89        let (k, v) = raw
90            .split_once(':')
91            .ok_or_else(|| anyhow!("malformed header `{raw}`: expected `Key: Value`"))?;
92        let key = k.trim();
93        if key.is_empty() {
94            bail!("malformed header `{raw}`: empty key");
95        }
96        // Per RFC 7230, header names are tokens; reject whitespace/control in name.
97        if key.chars().any(|c| c.is_whitespace() || c.is_control()) {
98            bail!("malformed header `{raw}`: invalid character in name");
99        }
100        let value = v.trim();
101        map.insert(key.to_string(), Value::String(value.to_string()));
102    }
103    Ok(map)
104}
105
106/// Build the JS expression that invokes `FETCH_JS` with a JSON-encoded arg
107/// string. All user-controlled fields are JSON-encoded twice (once inside the
108/// args object, once when we embed the args string as a JS string literal)
109/// so the page can't be tricked into evaluating arbitrary expressions.
110fn build_fetch_expr(
111    url: &str,
112    method: &str,
113    headers: &Map<String, Value>,
114    body: Option<&str>,
115) -> Result<String> {
116    let args = json!({
117        "url": url,
118        "method": method,
119        "headers": Value::Object(headers.clone()),
120        "body": body,
121    });
122    let args_json = serde_json::to_string(&args)?;
123    let args_literal = serde_json::to_string(&args_json)?;
124    Ok(format!("({FETCH_JS})({args_literal})"))
125}
126
127/// `FETCH_JS` returns `JSON.stringify({...})`, so the evaluator hands us a
128/// JSON value of *type string*. Decode the inner JSON.
129fn parse_envelope(v: &Value) -> Result<FetchEnvelope> {
130    let s = v
131        .as_str()
132        .ok_or_else(|| anyhow!("fetch script returned non-string value: {v}"))?;
133    let inner: Value = serde_json::from_str(s)
134        .with_context(|| format!("fetch script returned invalid JSON: {s}"))?;
135    let status = inner
136        .get("status")
137        .and_then(|x| x.as_u64())
138        .ok_or_else(|| anyhow!("fetch envelope missing `status`"))? as u16;
139    let status_text = inner
140        .get("statusText")
141        .and_then(|x| x.as_str())
142        .unwrap_or("")
143        .to_string();
144    let headers = inner
145        .get("headers")
146        .and_then(|x| x.as_object())
147        .map(|m| {
148            m.iter()
149                .map(|(k, v)| (k.clone(), v.as_str().unwrap_or_default().to_string()))
150                .collect::<Vec<_>>()
151        })
152        .unwrap_or_default();
153    let body = inner
154        .get("body")
155        .and_then(|x| x.as_str())
156        .unwrap_or("")
157        .to_string();
158    Ok(FetchEnvelope {
159        status,
160        status_text,
161        headers,
162        body,
163    })
164}
165
166fn format_status_and_headers(env: &FetchEnvelope) -> String {
167    let mut s = format!("HTTP/1.1 {} {}\r\n", env.status, env.status_text);
168    for (k, v) in &env.headers {
169        s.push_str(&format!("{k}: {v}\r\n"));
170    }
171    s.push_str("\r\n");
172    s
173}
174
175fn write_file(path: &Path, body: &[u8]) -> Result<()> {
176    std::fs::write(path, body).with_context(|| format!("failed to write {}", path.display()))?;
177    #[cfg(unix)]
178    {
179        use std::os::unix::fs::PermissionsExt;
180        std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600))
181            .with_context(|| format!("failed to chmod 600 {}", path.display()))?;
182    }
183    Ok(())
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189
190    #[test]
191    fn parse_headers_basic() {
192        let m = parse_headers(&[
193            "Accept: application/json".to_string(),
194            "X-Token: abc".to_string(),
195        ])
196        .unwrap();
197        assert_eq!(m.get("Accept").unwrap(), &json!("application/json"));
198        assert_eq!(m.get("X-Token").unwrap(), &json!("abc"));
199    }
200
201    #[test]
202    fn parse_headers_trims_extra_spaces() {
203        let m = parse_headers(&["  Accept   :   text/plain  ".to_string()]).unwrap();
204        assert_eq!(m.get("Accept").unwrap(), &json!("text/plain"));
205    }
206
207    #[test]
208    fn parse_headers_value_with_colon_kept_intact() {
209        // Only the first `:` separates key/value; the value may contain colons.
210        let m = parse_headers(&["Authorization: Bearer a:b:c".to_string()]).unwrap();
211        assert_eq!(m.get("Authorization").unwrap(), &json!("Bearer a:b:c"));
212    }
213
214    #[test]
215    fn parse_headers_rejects_missing_colon() {
216        let err = parse_headers(&["NoColonHere".to_string()]).unwrap_err();
217        assert!(err.to_string().contains("malformed header"));
218    }
219
220    #[test]
221    fn parse_headers_rejects_empty_key() {
222        let err = parse_headers(&[": value".to_string()]).unwrap_err();
223        assert!(err.to_string().contains("empty key"));
224    }
225
226    #[test]
227    fn parse_headers_rejects_whitespace_in_name() {
228        let err = parse_headers(&["bad name: v".to_string()]).unwrap_err();
229        assert!(err.to_string().contains("invalid character"));
230    }
231
232    #[test]
233    fn build_expr_json_escapes_url_and_body() {
234        let mut h = Map::new();
235        h.insert("X".to_string(), json!("y"));
236        // Body and URL contain quotes / backslashes / newlines that would
237        // break naive string interpolation.
238        let url = "https://x.test/?q=\"hi\"";
239        let body = "line1\n\"line2\"\\end";
240        let expr = build_fetch_expr(url, "POST", &h, Some(body)).unwrap();
241        // The expression must wrap a single JSON-encoded string argument.
242        let prefix = format!("({FETCH_JS})(");
243        let inner = expr
244            .strip_prefix(&prefix)
245            .unwrap()
246            .strip_suffix(')')
247            .unwrap();
248        // No raw user-controlled quote or newline can appear unescaped at the
249        // top level — the literal is JSON, so quotes inside are `\"` and the
250        // string contains no real newline byte.
251        assert!(!inner.contains('\n'));
252        // Decode the literal back twice and confirm round-trip equality.
253        let args_str: String = serde_json::from_str(inner).unwrap();
254        let args: Value = serde_json::from_str(&args_str).unwrap();
255        assert_eq!(args["url"], url);
256        assert_eq!(args["body"], body);
257        assert_eq!(args["method"], "POST");
258    }
259
260    #[test]
261    fn build_expr_method_and_headers_round_trip() {
262        let mut h = Map::new();
263        h.insert("Accept".to_string(), json!("*/*"));
264        let expr = build_fetch_expr("https://x.test/", "GET", &h, None).unwrap();
265        // Extract the JSON-string literal argument and decode twice.
266        let prefix = format!("({FETCH_JS})(");
267        let inner = expr
268            .strip_prefix(&prefix)
269            .unwrap()
270            .strip_suffix(')')
271            .unwrap();
272        let args_str: String = serde_json::from_str(inner).unwrap();
273        let args: Value = serde_json::from_str(&args_str).unwrap();
274        assert_eq!(args["url"], "https://x.test/");
275        assert_eq!(args["method"], "GET");
276        assert_eq!(args["headers"]["Accept"], "*/*");
277        assert!(args["body"].is_null());
278    }
279
280    #[test]
281    fn parse_envelope_decodes_inner_json() {
282        let inner = json!({
283            "status": 200,
284            "statusText": "OK",
285            "headers": {"content-type": "text/plain"},
286            "body": "hello"
287        });
288        let v = Value::String(inner.to_string());
289        let env = parse_envelope(&v).unwrap();
290        assert_eq!(env.status, 200);
291        assert_eq!(env.status_text, "OK");
292        assert_eq!(env.body, "hello");
293        assert_eq!(
294            env.headers,
295            vec![("content-type".to_string(), "text/plain".to_string())]
296        );
297    }
298
299    #[test]
300    fn parse_envelope_rejects_non_string() {
301        let v = json!({"status": 200});
302        assert!(parse_envelope(&v).is_err());
303    }
304
305    #[test]
306    fn format_include_emits_status_and_headers() {
307        let env = FetchEnvelope {
308            status: 404,
309            status_text: "Not Found".to_string(),
310            headers: vec![
311                ("content-type".to_string(), "text/plain".to_string()),
312                ("x-trace".to_string(), "abc".to_string()),
313            ],
314            body: "missing".to_string(),
315        };
316        let s = format_status_and_headers(&env);
317        assert_eq!(
318            s,
319            "HTTP/1.1 404 Not Found\r\n\
320             content-type: text/plain\r\n\
321             x-trace: abc\r\n\
322             \r\n"
323        );
324    }
325
326    #[test]
327    fn write_file_chmods_0600_on_unix() {
328        let dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
329            .join("target")
330            .join("fetch-test-scratch");
331        std::fs::create_dir_all(&dir).unwrap();
332        let p = dir.join(format!("out-{}.bin", std::process::id()));
333        write_file(&p, b"hello").unwrap();
334        assert_eq!(std::fs::read(&p).unwrap(), b"hello");
335        #[cfg(unix)]
336        {
337            use std::os::unix::fs::PermissionsExt;
338            let mode = std::fs::metadata(&p).unwrap().permissions().mode() & 0o777;
339            assert_eq!(mode, 0o600);
340        }
341        let _ = std::fs::remove_file(&p);
342        let _ = std::fs::remove_dir(&dir);
343    }
344}