Skip to main content

dravr_browser/
stealth.rs

1// ABOUTME: CDP-injected JS that hides automation tells and optionally captures matching network responses
2// ABOUTME: Applied via Page.addScriptToEvaluateOnNewDocument so it runs before any page JS on every frame
3//
4// SPDX-License-Identifier: MIT OR Apache-2.0
5// Copyright (c) 2026 dravr.ai
6
7use chromiumoxide::cdp::browser_protocol::page::AddScriptToEvaluateOnNewDocumentParams;
8use chromiumoxide::Page;
9
10use crate::error::{BrowserError, BrowserResult};
11
12/// Global JS object the capture hook writes into. Read by [`crate::capture`].
13pub const CAPTURE_GLOBAL: &str = "__dravrCaptures";
14
15/// Options controlling the injected stealth + capture payload.
16#[derive(Debug, Clone, Default)]
17pub struct StealthOptions {
18    /// When set, responses whose request URL matches this JS regex source are
19    /// captured into [`CAPTURE_GLOBAL`]. When `None`, only stealth is applied.
20    pub capture_url_pattern: Option<String>,
21    /// When `true`, matching responses are captured incrementally by teeing
22    /// their `ReadableStream` (for SSE / streamed bodies). When `false`, the
23    /// full response body is captured once it resolves.
24    pub streaming: bool,
25}
26
27impl StealthOptions {
28    /// Stealth only — no network capture.
29    #[must_use]
30    pub const fn stealth_only() -> Self {
31        Self {
32            capture_url_pattern: None,
33            streaming: false,
34        }
35    }
36
37    /// Capture full (non-streamed) response bodies matching `pattern`.
38    #[must_use]
39    pub fn capture(pattern: impl Into<String>) -> Self {
40        Self {
41            capture_url_pattern: Some(pattern.into()),
42            streaming: false,
43        }
44    }
45
46    /// Capture streamed (SSE) response bodies matching `pattern` incrementally.
47    #[must_use]
48    pub fn capture_stream(pattern: impl Into<String>) -> Self {
49        Self {
50            capture_url_pattern: Some(pattern.into()),
51            streaming: true,
52        }
53    }
54}
55
56/// Build the JS payload for the given options.
57///
58/// The `navigator.webdriver` tell is already removed by Chrome's
59/// `--disable-blink-features=AutomationControlled` flag (emitted by
60/// chromiumoxide's `.hide()`), so JS-level navigator spoofing is intentionally
61/// omitted — `Object.defineProperty` overrides leave a detectable `.toString()`
62/// trace that modern detectors flag. The capture hook is the active payload.
63fn build_script(opts: &StealthOptions) -> String {
64    let Some(pattern) = opts.capture_url_pattern.as_ref() else {
65        // Pure stealth: nothing to inject beyond the launch-flag behavior.
66        return "(function(){})();".to_owned();
67    };
68
69    // Embed the pattern as a safe JS string literal.
70    let pattern_lit = serde_json::to_string(pattern).unwrap_or_else(|_| "\"\"".to_owned());
71
72    let capture_body = if opts.streaming {
73        // Tee the ReadableStream: push decoded chunks as they arrive.
74        "var rec = { status: r.status, chunks: [], done: false, streaming: true };
75                store.byUrl[url] = rec; store.last = url;
76                try {
77                    var reader = r.clone().body.getReader();
78                    var dec = new TextDecoder();
79                    (function pump() {
80                        reader.read().then(function(res) {
81                            if (res.done) { rec.done = true; return; }
82                            rec.chunks.push(dec.decode(res.value, { stream: true }));
83                            pump();
84                        }).catch(function() { rec.done = true; });
85                    })();
86                } catch (e) { rec.done = true; }"
87    } else {
88        // Capture the full body once it resolves.
89        "var rec = { status: r.status, chunks: [], done: false, streaming: false };
90                store.byUrl[url] = rec; store.last = url;
91                try {
92                    r.clone().text().then(function(t) {
93                        rec.chunks.push(t); rec.done = true;
94                    }).catch(function() { rec.done = true; });
95                } catch (e) { rec.done = true; }"
96    };
97
98    format!(
99        r"(function() {{
100    if (window.{CAPTURE_GLOBAL}) return;
101    var store = window.{CAPTURE_GLOBAL} = {{ byUrl: {{}}, last: null }};
102    var pattern = new RegExp({pattern_lit});
103
104    var origFetch = window.fetch;
105    window.fetch = function(input, init) {{
106        var url = typeof input === 'string' ? input : (input && input.url) || '';
107        var p = origFetch.apply(this, arguments);
108        if (pattern.test(url)) {{
109            p.then(function(r) {{
110                {capture_body}
111                return r;
112            }}).catch(function() {{}});
113        }}
114        return p;
115    }};
116
117    var origOpen = XMLHttpRequest.prototype.open;
118    var origSend = XMLHttpRequest.prototype.send;
119    XMLHttpRequest.prototype.open = function(method, url) {{
120        this.__dravrUrl = url;
121        return origOpen.apply(this, arguments);
122    }};
123    XMLHttpRequest.prototype.send = function() {{
124        var self = this;
125        var url = this.__dravrUrl || '';
126        if (pattern.test(url)) {{
127            this.addEventListener('load', function() {{
128                try {{
129                    store.byUrl[url] = {{
130                        status: self.status,
131                        chunks: [self.responseText],
132                        done: true,
133                        streaming: false
134                    }};
135                    store.last = url;
136                }} catch (e) {{}}
137            }});
138        }}
139        return origSend.apply(this, arguments);
140    }};
141}})();"
142    )
143}
144
145/// Inject the stealth + capture payload into a page.
146///
147/// Must be called after `new_page` and before navigation. Runs on every frame
148/// creation thereafter, including subsequent `page.goto(...)` calls.
149pub async fn apply_stealth(page: &Page, opts: &StealthOptions) -> BrowserResult<()> {
150    page.execute(AddScriptToEvaluateOnNewDocumentParams::new(build_script(
151        opts,
152    )))
153    .await
154    .map_err(|e| BrowserError::Browser {
155        reason: format!("Failed to inject stealth script: {e}"),
156    })?;
157    Ok(())
158}
159
160#[cfg(test)]
161mod tests {
162    use super::*;
163
164    #[test]
165    fn stealth_only_has_no_capture_hook() {
166        let js = build_script(&StealthOptions::stealth_only());
167        assert!(!js.contains(CAPTURE_GLOBAL));
168    }
169
170    #[test]
171    fn capture_script_embeds_pattern_and_global() {
172        let js = build_script(&StealthOptions::capture("/completion"));
173        assert!(js.contains(CAPTURE_GLOBAL));
174        assert!(js.contains("/completion"));
175        assert!(js.contains("r.clone().text()"));
176        assert!(!js.contains("getReader"));
177    }
178
179    #[test]
180    fn stream_capture_script_tees_reader() {
181        let js = build_script(&StealthOptions::capture_stream("/completion"));
182        assert!(js.contains("getReader"));
183        assert!(js.contains("streaming: true"));
184    }
185
186    #[test]
187    fn pattern_with_quotes_is_escaped() {
188        let js = build_script(&StealthOptions::capture(r#"a"b"#));
189        // Embedded as a JSON string literal — the raw unescaped sequence must
190        // not appear, proving the quote was escaped.
191        assert!(js.contains(r#"a\"b"#));
192    }
193}