cloudscraper_rs/external_deps/interpreters/
boa.rs

1use boa_engine::{Context, Source};
2use once_cell::sync::Lazy;
3use regex::{Regex, RegexBuilder};
4
5use super::{InterpreterError, InterpreterResult, JavascriptInterpreter};
6
7/// Default interpreter backed by the Boa JavaScript engine.
8#[derive(Debug, Default)]
9pub struct BoaJavascriptInterpreter;
10
11impl BoaJavascriptInterpreter {
12    pub fn new() -> Self {
13        Self
14    }
15
16    fn extract_scripts<'a>(&self, html: &'a str) -> Vec<&'a str> {
17        static SCRIPT_RE: Lazy<Regex> = Lazy::new(|| {
18            RegexBuilder::new(r"(?is)<script[^>]*>(?P<body>.*?)</script>")
19                .dot_matches_new_line(true)
20                .case_insensitive(true)
21                .build()
22                .unwrap()
23        });
24
25        SCRIPT_RE
26            .captures_iter(html)
27            .filter_map(|caps| caps.name("body").map(|m| m.as_str()))
28            .collect()
29    }
30
31    fn build_prelude(&self, host: &str) -> String {
32        format!(
33            r#"
34var __host = "{host}";
35var __scheme = "https://";
36var location = {{
37    href: __scheme + __host + "/",
38    hostname: __host,
39    protocol: "https:",
40    port: ""
41}};
42var window = {{ location: location }};
43var navigator = {{
44    userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
45    language: "en-US",
46    languages: ["en-US", "en"],
47    platform: "Win32"
48}};
49window.navigator = navigator;
50var history = {{ replaceState: function() {{}} }};
51window.history = history;
52var performance = {{ now: function() {{ return Date.now(); }} }};
53window.performance = performance;
54var __state = {{
55    values: {{}},
56    setValue: function(id, value) {{ this.values[id] = value; }},
57    getValue: function(id) {{ return this.values[id]; }}
58}};
59function __absUrl(input) {{
60    if (!input) return "";
61    if (input.startsWith("http://") || input.startsWith("https://")) return input;
62    if (input.startsWith("//")) return location.protocol + input;
63    if (input.startsWith("/")) return __scheme + __host + input;
64    return __scheme + __host + (input.startsWith("?") ? "/" + input : "/" + input.replace(/^\/+/, ""));
65}}
66function __makeElement(id) {{
67    var element = {{
68        id: id,
69        style: {{}},
70        attributes: {{}},
71        children: [],
72        addEventListener: function() {{}},
73        removeEventListener: function() {{}},
74        appendChild: function(child) {{ this.children.push(child); return child; }},
75        setAttribute: function(name, value) {{ this.attributes[name] = value; }},
76        getAttribute: function(name) {{ return this.attributes[name] || ""; }},
77        submit: function() {{}}
78    }};
79    Object.defineProperty(element, "value", {{
80        get: function() {{ return __state.getValue(id); }},
81        set: function(v) {{ __state.setValue(id, v); }}
82    }});
83    Object.defineProperty(element, "innerHTML", {{
84        get: function() {{ return this._innerHTML || ""; }},
85        set: function(val) {{
86            this._innerHTML = val;
87            var match = /href\s*=\s*['"]([^'"]+)['"]/i.exec(val || "");
88            if (match) {{
89                this.firstChild = {{ href: __absUrl(match[1]) }};
90            }} else {{
91                this.firstChild = {{ href: "" }};
92            }}
93        }}
94    }});
95    Object.defineProperty(element, "href", {{
96        get: function() {{ return this._href || ""; }},
97        set: function(val) {{ this._href = __absUrl(val); }}
98    }});
99    return element;
100}}
101var document = {{
102    _cache: {{}},
103    location: location,
104    createElement: function(tag) {{ return __makeElement(tag); }},
105    querySelector: function(sel) {{ return __makeElement(sel); }},
106    querySelectorAll: function(sel) {{ return []; }},
107    getElementById: function(id) {{
108        if (!this._cache[id]) {{
109            var el = __makeElement(id);
110            if (id === "challenge-form") {{
111                try {{
112                    el.elements = new Proxy({{}}, {{
113                        get: function(_, prop) {{
114                            if (typeof prop === "string") {{
115                                return document.getElementById(prop);
116                            }}
117                            return undefined;
118                        }}
119                    }});
120                }} catch (e) {{
121                    el.elements = {{ get: function(name) {{ return document.getElementById(name); }} }};
122                }}
123            }}
124            this._cache[id] = el;
125        }}
126        return this._cache[id];
127    }}
128}};
129window.document = document;
130document.defaultView = window;
131function setTimeout(cb, delay) {{ return cb(); }}
132function clearTimeout() {{}}
133var atob = function(str) {{
134    if (typeof Buffer !== "undefined") {{
135        return Buffer.from(str, "base64").toString("binary");
136    }}
137    return str;
138}};
139var btoa = function(str) {{
140    if (typeof Buffer !== "undefined") {{
141        return Buffer.from(str, "binary").toString("base64");
142    }}
143    return str;
144}};
145"#,
146            host = host
147        )
148    }
149
150    fn read_answer(&self, context: &mut Context) -> InterpreterResult<String> {
151        let answer = context
152            .eval(Source::from_bytes("__state.getValue('jschl_answer');"))
153            .map_err(|err| InterpreterError::Execution(err.to_string()))?;
154
155        if answer.is_null() || answer.is_undefined() {
156            return Err(InterpreterError::Execution(
157                "jschl_answer not set by script".into(),
158            ));
159        }
160
161        if let Ok(number) = answer.to_number(context)
162            && number.is_finite()
163        {
164            return Ok(format!("{number:.10}", number = number));
165        }
166
167        let text = answer
168            .to_string(context)
169            .map_err(|err| InterpreterError::Execution(err.to_string()))?
170            .to_std_string()
171            .map_err(|_| InterpreterError::Other("unable to convert interpreter output".into()))?;
172
173        Ok(text)
174    }
175}
176
177impl JavascriptInterpreter for BoaJavascriptInterpreter {
178    fn solve_challenge(&self, page_html: &str, host: &str) -> InterpreterResult<String> {
179        let scripts = self.extract_scripts(page_html);
180        if scripts.is_empty() {
181            return Err(InterpreterError::Execution(
182                "no <script> tags found in challenge page".into(),
183            ));
184        }
185
186        let mut context = Context::default();
187        let prelude = self.build_prelude(host);
188
189        context
190            .eval(Source::from_bytes(&prelude))
191            .map_err(|err| InterpreterError::Other(err.to_string()))?;
192
193        let mut executed_any = false;
194        for script in scripts {
195            if script.trim().is_empty() {
196                continue;
197            }
198            executed_any = true;
199            context
200                .eval(Source::from_bytes(script))
201                .map_err(|err| InterpreterError::Execution(err.to_string()))?;
202        }
203
204        if !executed_any {
205            return Err(InterpreterError::Execution(
206                "challenge page does not contain executable JavaScript".into(),
207            ));
208        }
209
210        self.read_answer(&mut context)
211    }
212
213    fn execute(&self, script: &str, host: &str) -> InterpreterResult<String> {
214        let mut context = Context::default();
215        let prelude = self.build_prelude(host);
216
217        context
218            .eval(Source::from_bytes(&prelude))
219            .map_err(|err| InterpreterError::Other(err.to_string()))?;
220
221        let result = context
222            .eval(Source::from_bytes(script))
223            .map_err(|err| InterpreterError::Execution(err.to_string()))?;
224
225        let text = result
226            .to_string(&mut context)
227            .map_err(|err| InterpreterError::Execution(err.to_string()))?
228            .to_std_string()
229            .map_err(|_| InterpreterError::Other("unable to convert interpreter output".into()))?;
230
231        Ok(text)
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238
239    #[test]
240    fn solves_basic_challenge() {
241        let html = r#"
242        <html>
243        <body>
244            <form id="challenge-form">
245                <input type="hidden" id="jschl_answer" />
246            </form>
247            <script>
248                setTimeout(function(){
249                    var a = 10;
250                    var b = 5;
251                    document.getElementById('jschl_answer').value = a + b;
252                }, 4000);
253            </script>
254        </body>
255        </html>
256        "#;
257
258        let interpreter = BoaJavascriptInterpreter::new();
259        let answer = interpreter.solve_challenge(html, "example.com").unwrap();
260        assert_eq!(answer, "15.0000000000");
261    }
262
263    #[test]
264    fn error_when_missing_script() {
265        let html = "<html><body>No script</body></html>";
266        let interpreter = BoaJavascriptInterpreter::new();
267        let err = interpreter
268            .solve_challenge(html, "example.com")
269            .unwrap_err();
270        assert!(matches!(err, InterpreterError::Execution(_)));
271    }
272}