1use anyhow::{anyhow, Result};
2use blockless_sdk::{BlessCrawl, CrawlOptions, MapOptions, ScrapeOptions, Viewport};
3use javy_plugin_api::javy::{
4 hold, hold_and_release,
5 quickjs::{prelude::MutFn, Function, Object, Value},
6 to_js_error, Args,
7};
8use std::collections::HashMap;
9use std::sync::Arc;
10
11pub fn bless_crawl(args: Args<'_>) -> Result<Value<'_>> {
13 let (cx, args) = args.release();
14
15 let scrape_config = if args.len() > 0 {
18 let config_obj = args[0]
19 .as_object()
20 .ok_or_else(|| anyhow!("config must be an object"))?;
21 Arc::new(Some(parse_options(config_obj)))
22 } else {
23 Arc::new(None)
24 };
25
26 let instance = Object::new(cx.clone())?;
28
29 let scrape_config_clone = scrape_config.clone();
31 instance.set(
32 "scrape",
33 Function::new(
34 cx.clone(),
35 MutFn::new(move |cx, args| {
36 let (cx, args) = hold_and_release!(cx, args);
37
38 let scrape_fn = |args: Args<'_>| {
39 let (_cx, args) = args.release();
40
41 if args.is_empty() {
42 return Err(anyhow!("URL is required"));
43 }
44
45 let url = args[0]
46 .as_string()
47 .ok_or_else(|| anyhow!("URL must be a string"))?
48 .to_string()
49 .map_err(|_| anyhow!("invalid UTF-8 in URL"))?;
50
51 let scrape_options = if args.len() > 1 {
53 let opts_obj = args[1]
54 .as_object()
55 .ok_or_else(|| anyhow!("options must be an object"))?;
56 parse_options(opts_obj)
57 } else if let Some(config) = scrape_config_clone.as_ref() {
58 config.clone()
59 } else {
60 return Err(anyhow!(
61 "No scrape options provided and no default config available"
62 ));
63 };
64
65 let result = BlessCrawl::with_config(scrape_options)
66 .map_err(|e| anyhow!("Failed to create BlessCrawl instance: {:?}", e))?
67 .scrape(&url, None)
68 .map_err(|e| anyhow!("Scrape failed: {:?}", e))?;
69
70 let json_str = serde_json::to_string(&result)
72 .map_err(|e| anyhow!("Failed to serialize result: {:?}", e))?;
73 let js_code = format!("({})", json_str);
74 let js_value = cx.eval::<Value, _>(js_code.as_bytes()).map_err(|e| {
75 anyhow!("Failed to parse JSON as JavaScript value: {:?}", e)
76 })?;
77
78 Ok(js_value)
79 };
80
81 scrape_fn(hold!(cx.clone(), args)).map_err(|e| to_js_error(cx, e))
82 }),
83 ),
84 )?;
85
86 let scrape_config_clone = scrape_config.clone();
88 instance.set(
89 "map",
90 Function::new(
91 cx.clone(),
92 MutFn::new(move |cx, args| {
93 let (cx, args) = hold_and_release!(cx, args);
94
95 let map_fn = |args: Args<'_>| {
96 let (_cx, args) = args.release();
97
98 if args.is_empty() {
99 return Err(anyhow!("URL is required"));
100 }
101
102 let url = args[0]
103 .as_string()
104 .ok_or_else(|| anyhow!("URL must be a string"))?
105 .to_string()
106 .map_err(|_| anyhow!("invalid UTF-8 in URL"))?;
107
108 let (scrape_options, map_options) = if args.len() > 1 {
109 let opts_obj = args[1]
110 .as_object()
111 .ok_or_else(|| anyhow!("options must be an object"))?;
112 (parse_options(opts_obj), parse_map_options(opts_obj))
113 } else if let Some(config) = scrape_config_clone.as_ref() {
114 (config.clone(), MapOptions::default())
115 } else {
116 return Err(anyhow!(
117 "No scrape options provided and no default config available"
118 ));
119 };
120
121 let result = BlessCrawl::with_config(scrape_options)
122 .map_err(|e| anyhow!("Failed to create BlessCrawl instance: {:?}", e))?
123 .map(&url, Some(map_options))
124 .map_err(|e| anyhow!("Map failed: {:?}", e))?;
125
126 let json_str = serde_json::to_string(&result)
127 .map_err(|e| anyhow!("Failed to serialize result: {:?}", e))?;
128
129 let js_code = format!("({})", json_str);
130 let js_value = cx.eval::<Value, _>(js_code.as_bytes()).map_err(|e| {
131 anyhow!("Failed to parse JSON as JavaScript value: {:?}", e)
132 })?;
133
134 Ok(js_value)
135 };
136
137 map_fn(hold!(cx.clone(), args)).map_err(|e| to_js_error(cx, e))
138 }),
139 ),
140 )?;
141
142 let scrape_config_clone = scrape_config.clone();
144 instance.set(
145 "crawl",
146 Function::new(
147 cx.clone(),
148 MutFn::new(move |cx, args| {
149 let (cx, args) = hold_and_release!(cx, args);
150
151 let crawl_fn = |args: Args<'_>| {
152 let (_cx, args) = args.release();
153
154 if args.is_empty() {
155 return Err(anyhow!("URL is required"));
156 }
157
158 let url = args[0]
159 .as_string()
160 .ok_or_else(|| anyhow!("URL must be a string"))?
161 .to_string()
162 .map_err(|_| anyhow!("invalid UTF-8 in URL"))?;
163
164 let (scrape_options, crawl_options) = if args.len() > 1 {
165 let opts_obj = args[1]
166 .as_object()
167 .ok_or_else(|| anyhow!("options must be an object"))?;
168 (parse_options(opts_obj), parse_crawl_options(opts_obj))
169 } else if let Some(config) = scrape_config_clone.as_ref() {
170 (config.clone(), CrawlOptions::default())
171 } else {
172 return Err(anyhow!(
173 "No scrape options provided and no default config available"
174 ));
175 };
176
177 let result = BlessCrawl::with_config(scrape_options)
178 .map_err(|e| anyhow!("Failed to create BlessCrawl instance: {:?}", e))?
179 .crawl(&url, Some(crawl_options))
180 .map_err(|e| anyhow!("Crawl failed: {:?}", e))?;
181
182 let json_str = serde_json::to_string(&result)
183 .map_err(|e| anyhow!("Failed to serialize result: {:?}", e))?;
184
185 let js_code = format!("({})", json_str);
186 let js_value = cx.eval::<Value, _>(js_code.as_bytes()).map_err(|e| {
187 anyhow!("Failed to parse JSON as JavaScript value: {:?}", e)
188 })?;
189
190 Ok(js_value)
191 };
192
193 crawl_fn(hold!(cx.clone(), args)).map_err(|e| to_js_error(cx, e))
194 }),
195 ),
196 )?;
197
198 Ok(Value::from_object(instance))
199}
200
201fn parse_options(obj: &Object) -> ScrapeOptions {
203 let mut scrape_options = ScrapeOptions::default();
205 if let Ok(Some(timeout)) = obj.get::<_, Option<f64>>("timeout") {
206 scrape_options.timeout = timeout as u32;
207 }
208 if let Ok(Some(wait_time)) = obj.get::<_, Option<f64>>("wait_time") {
209 scrape_options.wait_time = wait_time as u32;
210 }
211 if let Ok(Some(include_tags)) = obj.get::<_, Option<Vec<String>>>("include_tags") {
212 scrape_options.include_tags = Some(include_tags);
213 }
214 if let Ok(Some(exclude_tags)) = obj.get::<_, Option<Vec<String>>>("exclude_tags") {
215 scrape_options.exclude_tags = Some(exclude_tags);
216 }
217 if let Ok(Some(only_main_content)) = obj.get::<_, Option<bool>>("only_main_content") {
218 scrape_options.only_main_content = only_main_content;
219 }
220 if let Ok(Some(format)) = obj.get::<_, Option<String>>("format") {
221 scrape_options.format = format.parse().unwrap_or_default();
222 }
223 if let Ok(Some(viewport_obj)) = obj.get::<_, Option<Object>>("viewport") {
224 let mut viewport = Viewport::default();
225 if let Ok(Some(width)) = viewport_obj.get::<_, Option<u32>>("width") {
226 viewport.width = Some(width);
227 }
228 if let Ok(Some(height)) = viewport_obj.get::<_, Option<u32>>("height") {
229 viewport.height = Some(height);
230 }
231 scrape_options.viewport = Some(viewport);
232 }
233 if let Ok(Some(user_agent)) = obj.get::<_, Option<String>>("user_agent") {
234 scrape_options.user_agent = Some(user_agent);
235 }
236 if let Ok(Some(headers_obj)) = obj.get::<_, Option<HashMap<String, String>>>("headers") {
237 scrape_options.headers = Some(headers_obj);
238 }
239 scrape_options
240}
241
242fn parse_map_options(obj: &Object) -> MapOptions {
244 let mut options = MapOptions::default();
245 if let Ok(Some(base_url)) = obj.get::<_, Option<String>>("base_url") {
246 options.base_url = Some(base_url);
247 }
248 if let Ok(Some(link_types)) = obj.get::<_, Option<Vec<String>>>("link_types") {
249 options.link_types = Some(link_types);
250 }
251 if let Ok(Some(filter_extensions)) = obj.get::<_, Option<Vec<String>>>("filter_extensions") {
252 options.filter_extensions = Some(filter_extensions);
253 }
254 options
255}
256
257fn parse_crawl_options(obj: &Object) -> CrawlOptions {
259 let mut options = CrawlOptions::default();
260 if let Ok(Some(limit)) = obj.get::<_, Option<f64>>("limit") {
261 options.limit = Some(limit as u32);
262 }
263 if let Ok(Some(max_depth)) = obj.get::<_, Option<u8>>("max_depth") {
264 options.max_depth = Some(max_depth);
265 }
266 if let Ok(Some(exclude_paths)) = obj.get::<_, Option<Vec<String>>>("exclude_paths") {
267 options.exclude_paths = Some(exclude_paths);
268 }
269 if let Ok(Some(include_paths)) = obj.get::<_, Option<Vec<String>>>("include_paths") {
270 options.include_paths = Some(include_paths);
271 }
272 if let Ok(Some(follow_external)) = obj.get::<_, Option<bool>>("follow_external") {
273 options.follow_external = Some(follow_external);
274 }
275 if let Ok(Some(delay)) = obj.get::<_, Option<u32>>("delay_between_requests") {
276 options.delay_between_requests = Some(delay);
277 }
278 if let Ok(Some(parallel)) = obj.get::<_, Option<u32>>("parallel_requests") {
279 options.parallel_requests = Some(parallel);
280 }
281 options
282}