Skip to main content

browser_control/cli/
fetch.rs

1//! `browser-control fetch` — run an HTTP request from the page's context.
2//!
3//! The request is executed by injecting [`crate::dom::scripts::FETCH_JS`] into
4//! the active page via the engine-agnostic [`PageSession`] and parsing the
5//! `{status, statusText, headers, body}` envelope it returns.
6//!
7//! Output mirrors `curl`:
8//! - `--include` prepends `HTTP/1.1 <code> <text>\r\n` and response headers.
9//! - `--output PATH` writes the body to PATH (and `chmod 0600` on Unix).
10//! - Without `--output`, the body is written to stdout.
11//!
12//! Transport errors (script failure, attach failure) exit non-zero; HTTP
13//! status is reported verbatim and does not change the exit code.
14
15use std::path::{Path, PathBuf};
16
17use anyhow::{anyhow, bail, Context, Result};
18use serde_json::{json, Map, Value};
19
20use crate::cli::mcp::resolve_browser;
21use crate::dom::scripts::FETCH_JS;
22use crate::session::PageSession;
23
24#[allow(clippy::too_many_arguments)]
25pub async fn run(
26    browser: Option<String>,
27    url: String,
28    method: String,
29    headers: Vec<String>,
30    data: Option<String>,
31    target: Option<String>,
32    include: bool,
33    output: Option<PathBuf>,
34) -> Result<()> {
35    let header_map = parse_headers(&headers)?;
36    let expr = build_fetch_expr(&url, &method, &header_map, data.as_deref())?;
37
38    let resolved = resolve_browser(browser).await?;
39    let session =
40        PageSession::attach(&resolved.endpoint, resolved.engine, target.as_deref()).await?;
41    let result = session.evaluate(&expr, true).await;
42    session.close().await;
43    let result = result?;
44
45    let envelope = parse_envelope(&result)?;
46
47    let mut bytes = Vec::new();
48    if include {
49        bytes.extend_from_slice(format_status_and_headers(&envelope).as_bytes());
50    }
51    bytes.extend_from_slice(envelope.body.as_bytes());
52
53    match output {
54        Some(path) => {
55            write_file(&path, &bytes)?;
56            tracing::info!(
57                target = "fetch",
58                "wrote {} bytes to {}",
59                bytes.len(),
60                path.display()
61            );
62            eprintln!("wrote {} bytes to {}", bytes.len(), path.display());
63        }
64        None => {
65            use std::io::Write;
66            let mut out = std::io::stdout().lock();
67            out.write_all(&bytes)?;
68        }
69    }
70    Ok(())
71}
72
73/// Parsed `{status, statusText, headers, body}` envelope from `FETCH_JS`.
74#[derive(Debug, Clone, PartialEq)]
75struct FetchEnvelope {
76    status: u16,
77    status_text: String,
78    headers: Vec<(String, String)>,
79    body: String,
80}
81
82fn parse_headers(headers: &[String]) -> Result<Map<String, Value>> {
83    let mut map = Map::new();
84    for raw in headers {
85        let (k, v) = raw
86            .split_once(':')
87            .ok_or_else(|| anyhow!("malformed header `{raw}`: expected `Key: Value`"))?;
88        let key = k.trim();
89        if key.is_empty() {
90            bail!("malformed header `{raw}`: empty key");
91        }
92        // Per RFC 7230, header names are tokens; reject whitespace/control in name.
93        if key.chars().any(|c| c.is_whitespace() || c.is_control()) {
94            bail!("malformed header `{raw}`: invalid character in name");
95        }
96        let value = v.trim();
97        map.insert(key.to_string(), Value::String(value.to_string()));
98    }
99    Ok(map)
100}
101
102/// Build the JS expression that invokes `FETCH_JS` with a JSON-encoded arg
103/// string. All user-controlled fields are JSON-encoded twice (once inside the
104/// args object, once when we embed the args string as a JS string literal)
105/// so the page can't be tricked into evaluating arbitrary expressions.
106fn build_fetch_expr(
107    url: &str,
108    method: &str,
109    headers: &Map<String, Value>,
110    body: Option<&str>,
111) -> Result<String> {
112    let args = json!({
113        "url": url,
114        "method": method,
115        "headers": Value::Object(headers.clone()),
116        "body": body,
117    });
118    let args_json = serde_json::to_string(&args)?;
119    let args_literal = serde_json::to_string(&args_json)?;
120    Ok(format!("({FETCH_JS})({args_literal})"))
121}
122
123/// `FETCH_JS` returns `JSON.stringify({...})`, so the evaluator hands us a
124/// JSON value of *type string*. Decode the inner JSON.
125fn parse_envelope(v: &Value) -> Result<FetchEnvelope> {
126    let s = v
127        .as_str()
128        .ok_or_else(|| anyhow!("fetch script returned non-string value: {v}"))?;
129    let inner: Value = serde_json::from_str(s)
130        .with_context(|| format!("fetch script returned invalid JSON: {s}"))?;
131    let status = inner
132        .get("status")
133        .and_then(|x| x.as_u64())
134        .ok_or_else(|| anyhow!("fetch envelope missing `status`"))? as u16;
135    let status_text = inner
136        .get("statusText")
137        .and_then(|x| x.as_str())
138        .unwrap_or("")
139        .to_string();
140    let headers = inner
141        .get("headers")
142        .and_then(|x| x.as_object())
143        .map(|m| {
144            m.iter()
145                .map(|(k, v)| (k.clone(), v.as_str().unwrap_or_default().to_string()))
146                .collect::<Vec<_>>()
147        })
148        .unwrap_or_default();
149    let body = inner
150        .get("body")
151        .and_then(|x| x.as_str())
152        .unwrap_or("")
153        .to_string();
154    Ok(FetchEnvelope {
155        status,
156        status_text,
157        headers,
158        body,
159    })
160}
161
162fn format_status_and_headers(env: &FetchEnvelope) -> String {
163    let mut s = format!("HTTP/1.1 {} {}\r\n", env.status, env.status_text);
164    for (k, v) in &env.headers {
165        s.push_str(&format!("{k}: {v}\r\n"));
166    }
167    s.push_str("\r\n");
168    s
169}
170
171fn write_file(path: &Path, body: &[u8]) -> Result<()> {
172    std::fs::write(path, body).with_context(|| format!("failed to write {}", path.display()))?;
173    #[cfg(unix)]
174    {
175        use std::os::unix::fs::PermissionsExt;
176        std::fs::set_permissions(path, std::fs::Permissions::from_mode(0o600))
177            .with_context(|| format!("failed to chmod 600 {}", path.display()))?;
178    }
179    Ok(())
180}
181
182#[cfg(test)]
183mod tests {
184    use super::*;
185
186    #[test]
187    fn parse_headers_basic() {
188        let m = parse_headers(&[
189            "Accept: application/json".to_string(),
190            "X-Token: abc".to_string(),
191        ])
192        .unwrap();
193        assert_eq!(m.get("Accept").unwrap(), &json!("application/json"));
194        assert_eq!(m.get("X-Token").unwrap(), &json!("abc"));
195    }
196
197    #[test]
198    fn parse_headers_trims_extra_spaces() {
199        let m = parse_headers(&["  Accept   :   text/plain  ".to_string()]).unwrap();
200        assert_eq!(m.get("Accept").unwrap(), &json!("text/plain"));
201    }
202
203    #[test]
204    fn parse_headers_value_with_colon_kept_intact() {
205        // Only the first `:` separates key/value; the value may contain colons.
206        let m = parse_headers(&["Authorization: Bearer a:b:c".to_string()]).unwrap();
207        assert_eq!(m.get("Authorization").unwrap(), &json!("Bearer a:b:c"));
208    }
209
210    #[test]
211    fn parse_headers_rejects_missing_colon() {
212        let err = parse_headers(&["NoColonHere".to_string()]).unwrap_err();
213        assert!(err.to_string().contains("malformed header"));
214    }
215
216    #[test]
217    fn parse_headers_rejects_empty_key() {
218        let err = parse_headers(&[": value".to_string()]).unwrap_err();
219        assert!(err.to_string().contains("empty key"));
220    }
221
222    #[test]
223    fn parse_headers_rejects_whitespace_in_name() {
224        let err = parse_headers(&["bad name: v".to_string()]).unwrap_err();
225        assert!(err.to_string().contains("invalid character"));
226    }
227
228    #[test]
229    fn build_expr_json_escapes_url_and_body() {
230        let mut h = Map::new();
231        h.insert("X".to_string(), json!("y"));
232        // Body and URL contain quotes / backslashes / newlines that would
233        // break naive string interpolation.
234        let url = "https://x.test/?q=\"hi\"";
235        let body = "line1\n\"line2\"\\end";
236        let expr = build_fetch_expr(url, "POST", &h, Some(body)).unwrap();
237        // The expression must wrap a single JSON-encoded string argument.
238        let prefix = format!("({FETCH_JS})(");
239        let inner = expr
240            .strip_prefix(&prefix)
241            .unwrap()
242            .strip_suffix(')')
243            .unwrap();
244        // No raw user-controlled quote or newline can appear unescaped at the
245        // top level — the literal is JSON, so quotes inside are `\"` and the
246        // string contains no real newline byte.
247        assert!(!inner.contains('\n'));
248        // Decode the literal back twice and confirm round-trip equality.
249        let args_str: String = serde_json::from_str(inner).unwrap();
250        let args: Value = serde_json::from_str(&args_str).unwrap();
251        assert_eq!(args["url"], url);
252        assert_eq!(args["body"], body);
253        assert_eq!(args["method"], "POST");
254    }
255
256    #[test]
257    fn build_expr_method_and_headers_round_trip() {
258        let mut h = Map::new();
259        h.insert("Accept".to_string(), json!("*/*"));
260        let expr = build_fetch_expr("https://x.test/", "GET", &h, None).unwrap();
261        // Extract the JSON-string literal argument and decode twice.
262        let prefix = format!("({FETCH_JS})(");
263        let inner = expr
264            .strip_prefix(&prefix)
265            .unwrap()
266            .strip_suffix(')')
267            .unwrap();
268        let args_str: String = serde_json::from_str(inner).unwrap();
269        let args: Value = serde_json::from_str(&args_str).unwrap();
270        assert_eq!(args["url"], "https://x.test/");
271        assert_eq!(args["method"], "GET");
272        assert_eq!(args["headers"]["Accept"], "*/*");
273        assert!(args["body"].is_null());
274    }
275
276    #[test]
277    fn parse_envelope_decodes_inner_json() {
278        let inner = json!({
279            "status": 200,
280            "statusText": "OK",
281            "headers": {"content-type": "text/plain"},
282            "body": "hello"
283        });
284        let v = Value::String(inner.to_string());
285        let env = parse_envelope(&v).unwrap();
286        assert_eq!(env.status, 200);
287        assert_eq!(env.status_text, "OK");
288        assert_eq!(env.body, "hello");
289        assert_eq!(
290            env.headers,
291            vec![("content-type".to_string(), "text/plain".to_string())]
292        );
293    }
294
295    #[test]
296    fn parse_envelope_rejects_non_string() {
297        let v = json!({"status": 200});
298        assert!(parse_envelope(&v).is_err());
299    }
300
301    #[test]
302    fn format_include_emits_status_and_headers() {
303        let env = FetchEnvelope {
304            status: 404,
305            status_text: "Not Found".to_string(),
306            headers: vec![
307                ("content-type".to_string(), "text/plain".to_string()),
308                ("x-trace".to_string(), "abc".to_string()),
309            ],
310            body: "missing".to_string(),
311        };
312        let s = format_status_and_headers(&env);
313        assert_eq!(
314            s,
315            "HTTP/1.1 404 Not Found\r\n\
316             content-type: text/plain\r\n\
317             x-trace: abc\r\n\
318             \r\n"
319        );
320    }
321
322    #[test]
323    fn write_file_chmods_0600_on_unix() {
324        let dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
325            .join("target")
326            .join("fetch-test-scratch");
327        std::fs::create_dir_all(&dir).unwrap();
328        let p = dir.join(format!("out-{}.bin", std::process::id()));
329        write_file(&p, b"hello").unwrap();
330        assert_eq!(std::fs::read(&p).unwrap(), b"hello");
331        #[cfg(unix)]
332        {
333            use std::os::unix::fs::PermissionsExt;
334            let mode = std::fs::metadata(&p).unwrap().permissions().mode() & 0o777;
335            assert_eq!(mode, 0o600);
336        }
337        let _ = std::fs::remove_file(&p);
338        let _ = std::fs::remove_dir(&dir);
339    }
340}