cloudscraper_rs/external_deps/interpreters/
boa.rs1use boa_engine::{Context, Source};
2use once_cell::sync::Lazy;
3use regex::{Regex, RegexBuilder};
4
5use super::{InterpreterError, InterpreterResult, JavascriptInterpreter};
6
7#[derive(Debug, Default)]
9pub struct BoaJavascriptInterpreter;
10
11impl BoaJavascriptInterpreter {
12 pub fn new() -> Self {
13 Self
14 }
15
16 fn extract_scripts<'a>(&self, html: &'a str) -> Vec<&'a str> {
17 static SCRIPT_RE: Lazy<Regex> = Lazy::new(|| {
18 RegexBuilder::new(r"(?is)<script[^>]*>(?P<body>.*?)</script>")
19 .dot_matches_new_line(true)
20 .case_insensitive(true)
21 .build()
22 .unwrap()
23 });
24
25 SCRIPT_RE
26 .captures_iter(html)
27 .filter_map(|caps| caps.name("body").map(|m| m.as_str()))
28 .collect()
29 }
30
31 fn build_prelude(&self, host: &str) -> String {
32 format!(
33 r#"
34var __host = "{host}";
35var __scheme = "https://";
36var location = {{
37 href: __scheme + __host + "/",
38 hostname: __host,
39 protocol: "https:",
40 port: ""
41}};
42var window = {{ location: location }};
43var navigator = {{
44 userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
45 language: "en-US",
46 languages: ["en-US", "en"],
47 platform: "Win32"
48}};
49window.navigator = navigator;
50var history = {{ replaceState: function() {{}} }};
51window.history = history;
52var performance = {{ now: function() {{ return Date.now(); }} }};
53window.performance = performance;
54var __state = {{
55 values: {{}},
56 setValue: function(id, value) {{ this.values[id] = value; }},
57 getValue: function(id) {{ return this.values[id]; }}
58}};
59function __absUrl(input) {{
60 if (!input) return "";
61 if (input.startsWith("http://") || input.startsWith("https://")) return input;
62 if (input.startsWith("//")) return location.protocol + input;
63 if (input.startsWith("/")) return __scheme + __host + input;
64 return __scheme + __host + (input.startsWith("?") ? "/" + input : "/" + input.replace(/^\/+/, ""));
65}}
66function __makeElement(id) {{
67 var element = {{
68 id: id,
69 style: {{}},
70 attributes: {{}},
71 children: [],
72 addEventListener: function() {{}},
73 removeEventListener: function() {{}},
74 appendChild: function(child) {{ this.children.push(child); return child; }},
75 setAttribute: function(name, value) {{ this.attributes[name] = value; }},
76 getAttribute: function(name) {{ return this.attributes[name] || ""; }},
77 submit: function() {{}}
78 }};
79 Object.defineProperty(element, "value", {{
80 get: function() {{ return __state.getValue(id); }},
81 set: function(v) {{ __state.setValue(id, v); }}
82 }});
83 Object.defineProperty(element, "innerHTML", {{
84 get: function() {{ return this._innerHTML || ""; }},
85 set: function(val) {{
86 this._innerHTML = val;
87 var match = /href\s*=\s*['"]([^'"]+)['"]/i.exec(val || "");
88 if (match) {{
89 this.firstChild = {{ href: __absUrl(match[1]) }};
90 }} else {{
91 this.firstChild = {{ href: "" }};
92 }}
93 }}
94 }});
95 Object.defineProperty(element, "href", {{
96 get: function() {{ return this._href || ""; }},
97 set: function(val) {{ this._href = __absUrl(val); }}
98 }});
99 return element;
100}}
101var document = {{
102 _cache: {{}},
103 location: location,
104 createElement: function(tag) {{ return __makeElement(tag); }},
105 querySelector: function(sel) {{ return __makeElement(sel); }},
106 querySelectorAll: function(sel) {{ return []; }},
107 getElementById: function(id) {{
108 if (!this._cache[id]) {{
109 var el = __makeElement(id);
110 if (id === "challenge-form") {{
111 try {{
112 el.elements = new Proxy({{}}, {{
113 get: function(_, prop) {{
114 if (typeof prop === "string") {{
115 return document.getElementById(prop);
116 }}
117 return undefined;
118 }}
119 }});
120 }} catch (e) {{
121 el.elements = {{ get: function(name) {{ return document.getElementById(name); }} }};
122 }}
123 }}
124 this._cache[id] = el;
125 }}
126 return this._cache[id];
127 }}
128}};
129window.document = document;
130document.defaultView = window;
131function setTimeout(cb, delay) {{ return cb(); }}
132function clearTimeout() {{}}
133var atob = function(str) {{
134 if (typeof Buffer !== "undefined") {{
135 return Buffer.from(str, "base64").toString("binary");
136 }}
137 return str;
138}};
139var btoa = function(str) {{
140 if (typeof Buffer !== "undefined") {{
141 return Buffer.from(str, "binary").toString("base64");
142 }}
143 return str;
144}};
145"#,
146 host = host
147 )
148 }
149
150 fn read_answer(&self, context: &mut Context) -> InterpreterResult<String> {
151 let answer = context
152 .eval(Source::from_bytes("__state.getValue('jschl_answer');"))
153 .map_err(|err| InterpreterError::Execution(err.to_string()))?;
154
155 if answer.is_null() || answer.is_undefined() {
156 return Err(InterpreterError::Execution(
157 "jschl_answer not set by script".into(),
158 ));
159 }
160
161 if let Ok(number) = answer.to_number(context)
162 && number.is_finite()
163 {
164 return Ok(format!("{number:.10}", number = number));
165 }
166
167 let text = answer
168 .to_string(context)
169 .map_err(|err| InterpreterError::Execution(err.to_string()))?
170 .to_std_string()
171 .map_err(|_| InterpreterError::Other("unable to convert interpreter output".into()))?;
172
173 Ok(text)
174 }
175}
176
177impl JavascriptInterpreter for BoaJavascriptInterpreter {
178 fn solve_challenge(&self, page_html: &str, host: &str) -> InterpreterResult<String> {
179 let scripts = self.extract_scripts(page_html);
180 if scripts.is_empty() {
181 return Err(InterpreterError::Execution(
182 "no <script> tags found in challenge page".into(),
183 ));
184 }
185
186 let mut context = Context::default();
187 let prelude = self.build_prelude(host);
188
189 context
190 .eval(Source::from_bytes(&prelude))
191 .map_err(|err| InterpreterError::Other(err.to_string()))?;
192
193 let mut executed_any = false;
194 for script in scripts {
195 if script.trim().is_empty() {
196 continue;
197 }
198 executed_any = true;
199 context
200 .eval(Source::from_bytes(script))
201 .map_err(|err| InterpreterError::Execution(err.to_string()))?;
202 }
203
204 if !executed_any {
205 return Err(InterpreterError::Execution(
206 "challenge page does not contain executable JavaScript".into(),
207 ));
208 }
209
210 self.read_answer(&mut context)
211 }
212
213 fn execute(&self, script: &str, host: &str) -> InterpreterResult<String> {
214 let mut context = Context::default();
215 let prelude = self.build_prelude(host);
216
217 context
218 .eval(Source::from_bytes(&prelude))
219 .map_err(|err| InterpreterError::Other(err.to_string()))?;
220
221 let result = context
222 .eval(Source::from_bytes(script))
223 .map_err(|err| InterpreterError::Execution(err.to_string()))?;
224
225 let text = result
226 .to_string(&mut context)
227 .map_err(|err| InterpreterError::Execution(err.to_string()))?
228 .to_std_string()
229 .map_err(|_| InterpreterError::Other("unable to convert interpreter output".into()))?;
230
231 Ok(text)
232 }
233}
234
235#[cfg(test)]
236mod tests {
237 use super::*;
238
239 #[test]
240 fn solves_basic_challenge() {
241 let html = r#"
242 <html>
243 <body>
244 <form id="challenge-form">
245 <input type="hidden" id="jschl_answer" />
246 </form>
247 <script>
248 setTimeout(function(){
249 var a = 10;
250 var b = 5;
251 document.getElementById('jschl_answer').value = a + b;
252 }, 4000);
253 </script>
254 </body>
255 </html>
256 "#;
257
258 let interpreter = BoaJavascriptInterpreter::new();
259 let answer = interpreter.solve_challenge(html, "example.com").unwrap();
260 assert_eq!(answer, "15.0000000000");
261 }
262
263 #[test]
264 fn error_when_missing_script() {
265 let html = "<html><body>No script</body></html>";
266 let interpreter = BoaJavascriptInterpreter::new();
267 let err = interpreter
268 .solve_challenge(html, "example.com")
269 .unwrap_err();
270 assert!(matches!(err, InterpreterError::Execution(_)));
271 }
272}