1
2use boa_engine::{Context, Source};
3use scraper::{Html, Selector};
4use regex::Regex;
5use serde_json::{Value,Map};
6
7
8
9fn get_script_list(html_str: &String) -> Vec<String> {
10 let mut script_list: Vec<String> = Vec::new();
11
12 let html = Html::parse_document(&html_str);
13 let script_selector = Selector::parse("script").unwrap();
14
15 for script_element in html.select(&script_selector) {
16 let script_text = script_element.text().collect::<Vec<_>>().join("");
17 script_list.push(script_text);
19 }
20 script_list
21
22
23}
24
25
26
27fn deep_merge(result: &mut Map<String, Value>, item: Map<String, Value>) {
28 for (key, value) in item {
29 match value {
30 Value::Object(item_obj) => {
32 if let Some(Value::Object(result_obj)) = result.get_mut(&key) {
33 deep_merge(result_obj, item_obj);
34 } else {
35 result.insert(key, Value::Object(item_obj));
37 }
38 }
39 _ => {
41 result.insert(key, value);
42 }
43 }
44 }
45}
46
47pub fn run(html_str: &String) -> Result<Map<String, serde_json::Value>, Box<dyn std::error::Error>> {
48 let script_list: Vec<String> = get_script_list(html_str);
49
50 let mut result: Map<String, Value> = Map::new();
51
52 let mut context = Context::default();
53 context.eval(Source::from_bytes(r#"
54 var window = this;
55 var self = window;
56 var top = window;
57 var document = {};
58 var location = {};
59 var navigator = {
60 "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36",
61 };
62 "#))?;
63
64 for script_text in script_list {
65 if let Ok(parsed_data) = serde_json::from_str::<serde_json::Value>(&script_text) {
66 if let Some(obj) = parsed_data.as_object() {
67 for (key, value) in obj {
68 result.insert(key.clone(), value.clone());
69 }
70 }
71 }else{
72 match context.eval(Source::from_bytes(&script_text)) {
74 Ok(_) => {} Err(_) => {
76 let json_list = extract_all_json(&script_text);
77 for json_part in json_list{
78
79 deep_merge(&mut result, json_part);
80
81 }
82 continue; }
84 }
85 }
86
87 }
88 match context.eval(Source::from_bytes(r#"
89 var result = Object.entries(window).reduce((acc, [key, val]) => {
90 // 如果当前 key 是要跳过的,直接返回 acc
91
92 const valType = typeof val;
93
94 // 如果值是函数,跳过
95 if (valType === 'function' || valType === 'undefined') {
96 return acc;
97 }
98 try{
99 // 处理数组或对象
100 if (val && (valType === 'object' || Array.isArray(val))) {
101 try{
102 JSON.stringify(val); // 测试是否可序列化
103 acc[key] = val; // 保留有效数据
104 }catch(e){}
105 }
106 // 处理字符串(仅当字符串是合法 JSON 时)
107 else if (valType === 'string' ) {
108 try{
109 const parsedVal = JSON.parse(val); // 尝试解析
110
111 acc[key] = parsedVal;
112
113 }catch(e){
114 if(val){
115 acc['assignment_data'][key] = val
116
117 }
118
119 }
120
121
122 }else if ( valType === 'number'){
123 acc['assignment_data'][key] = val
124 }
125
126 } catch (e) {
127 // 跳过不可序列化的值
128 }
129
130 return acc;
131 }, {"assignment_data":{}})
132 JSON.stringify(result)
133 "#)){
134 Ok(window_result) => {
135
136 let js_str = window_result.to_string(&mut context)?;
138
139 let json_str = js_str.to_std_string_escaped();
141
142 let parsed_value: Value = serde_json::from_str(&json_str)?;
144
145 result.insert("window_result".to_string(), parsed_value);
147
148 }
149 Err(e) => {
150 eprintln!("Script evaluation failed: {:?}", e);
151 }
152 }
153
154
155 Ok(result)
156
157}
158
159pub fn extract_all_json(script_text:&String) -> Vec<serde_json::Map<String, Value>>{
160 let mut json_list: Vec<String> = Vec::new();
161 let mut start: i32 = -1;
162 let mut open_braces = 0;
163 let mut open_brackets = 0;
164
165 let re = Regex::new(r"[{}\[\]]").unwrap();
166 for mat in re.find_iter(script_text){
167 let ch = mat.as_str();
168 match ch {
169 "{" => {
170 if open_braces == 0 && open_brackets == 0{
171 start = mat.start() as i32;
172 }
173 open_braces += 1;
174 }
175 "}" => {
176 if open_braces > 0 {
177 open_braces -= 1
178 }
179 if open_braces == 0 && open_brackets == 0 && start != -1{
180 json_list.push(script_text[start as usize..mat.end()].to_string());
181 start = -1;
182 }
183 }
184 "[" => {
185 if open_braces == 0 && open_brackets == 0{
186 start = mat.start() as i32;
187 }
188 open_brackets += 1;
189 }
190 "]" => {
191 if open_brackets > 0 {
192 open_brackets -= 1
193 }
194 if open_braces == 0 && open_brackets == 0 && start != -1{
195 json_list.push(script_text[start as usize..mat.end()].to_string());
196 start = -1;
197 }
198 }
199 _ => {}
200 }
201 }
202 let mut result: Vec<serde_json::Map<String, Value>> = Vec::new();
203
204 if !json_list.is_empty(){
205 for json_part in json_list{
206 if let Ok(data) = serde_json::from_str::<Map<String, Value>>(&json_part) {
207 result.push(data);
208 }
209
210
211 }
212 }
213 return result;
214
215
216
217}
218
219
220
221#[cfg(test)]
233mod tests {
234 use super::*;
235
236 #[test]
237 fn test_run() {
238 let html_str = r#"
239 <html>
240 <body>
241 <script>
242 var a = 1;
243 var b = 2;
244 var c = a + b;
245 console.log(c);
246 </script>
247 </body>
248 </html>
249 "#.to_string();
250 let result = run(&html_str);
251 println!("result {:?}", result);
253
254 }
255}
256
257