1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4#[derive(Debug, Clone, Deserialize)]
6#[serde(rename_all = "camelCase")]
7pub struct ScrapeRequest {
8 pub url: String,
10
11 #[serde(default = "default_formats")]
13 pub formats: Vec<String>,
14
15 #[serde(default)]
17 pub headers: HashMap<String, String>,
18
19 #[serde(default)]
21 pub include_tags: Vec<String>,
22
23 #[serde(default)]
25 pub exclude_tags: Vec<String>,
26
27 #[serde(default = "default_true")]
29 pub only_main_content: bool,
30
31 #[serde(default = "default_timeout")]
33 pub timeout: u64,
34
35 #[serde(default)]
37 pub wait_for: u64,
38
39 #[serde(default = "default_true")]
41 pub remove_base64_images: bool,
42
43 #[serde(default)]
45 pub skip_tls_verification: bool,
46
47 #[serde(default = "default_engine")]
49 pub engine: String,
50
51 #[serde(default)]
53 pub wait_for_selector: Option<String>,
54
55 #[serde(default)]
57 pub actions: Vec<BrowserAction>,
58
59 #[serde(default)]
61 pub screenshot: bool,
62
63 #[serde(default = "default_screenshot_format")]
65 pub screenshot_format: String,
66}
67
68#[derive(Debug, Clone, Deserialize, Serialize)]
70#[serde(tag = "type", rename_all = "camelCase")]
71pub enum BrowserAction {
72 Click { selector: String },
73 Type { selector: String, text: String },
74 Scroll { direction: String },
75 Wait { milliseconds: u64 },
76 WaitForSelector { selector: String },
77}
78
79fn default_formats() -> Vec<String> {
81 vec!["markdown".to_string()]
82}
83
84fn default_true() -> bool {
85 true
86}
87
88fn default_timeout() -> u64 {
89 30000
90}
91
92fn default_engine() -> String {
93 "auto".to_string()
94}
95
96fn default_screenshot_format() -> String {
97 "png".to_string()
98}
99
100impl Default for ScrapeRequest {
101 fn default() -> Self {
102 Self {
103 url: String::new(),
104 formats: default_formats(),
105 headers: HashMap::new(),
106 include_tags: Vec::new(),
107 exclude_tags: Vec::new(),
108 only_main_content: default_true(),
109 timeout: default_timeout(),
110 wait_for: 0,
111 remove_base64_images: default_true(),
112 skip_tls_verification: false,
113 engine: default_engine(),
114 wait_for_selector: None,
115 actions: Vec::new(),
116 screenshot: false,
117 screenshot_format: default_screenshot_format(),
118 }
119 }
120}
121
122#[derive(Debug, Clone, Serialize)]
124pub struct ScrapeResponse {
125 pub success: bool,
126 #[serde(skip_serializing_if = "Option::is_none")]
127 pub warning: Option<String>,
128 #[serde(skip_serializing_if = "Option::is_none")]
129 pub data: Option<Document>,
130 #[serde(skip_serializing_if = "Option::is_none")]
131 pub error: Option<String>,
132 #[serde(skip_serializing_if = "Option::is_none")]
133 pub scrape_id: Option<String>,
134}
135
136#[derive(Debug, Clone, Default, Serialize, Deserialize)]
138#[serde(rename_all = "camelCase")]
139pub struct Document {
140 #[serde(skip_serializing_if = "Option::is_none")]
142 pub title: Option<String>,
143
144 #[serde(skip_serializing_if = "Option::is_none")]
146 pub description: Option<String>,
147
148 #[serde(skip_serializing_if = "Option::is_none")]
150 pub url: Option<String>,
151
152 #[serde(skip_serializing_if = "Option::is_none")]
154 pub markdown: Option<String>,
155
156 #[serde(skip_serializing_if = "Option::is_none")]
158 pub html: Option<String>,
159
160 #[serde(skip_serializing_if = "Option::is_none")]
162 pub raw_html: Option<String>,
163
164 #[serde(skip_serializing_if = "Option::is_none")]
166 pub links: Option<Vec<String>>,
167
168 #[serde(skip_serializing_if = "Option::is_none")]
170 pub images: Option<Vec<String>>,
171
172 #[serde(skip_serializing_if = "Option::is_none")]
174 pub screenshot: Option<String>,
175
176 pub metadata: Metadata,
178}
179
180#[derive(Debug, Clone, Serialize, Deserialize)]
182#[serde(rename_all = "camelCase")]
183pub struct Metadata {
184 #[serde(skip_serializing_if = "Option::is_none")]
185 pub title: Option<String>,
186
187 #[serde(skip_serializing_if = "Option::is_none")]
188 pub description: Option<String>,
189
190 #[serde(skip_serializing_if = "Option::is_none")]
191 pub language: Option<String>,
192
193 #[serde(skip_serializing_if = "Option::is_none")]
194 pub keywords: Option<String>,
195
196 #[serde(skip_serializing_if = "Option::is_none")]
197 pub robots: Option<String>,
198
199 #[serde(skip_serializing_if = "Option::is_none")]
200 pub og_title: Option<String>,
201
202 #[serde(skip_serializing_if = "Option::is_none")]
203 pub og_description: Option<String>,
204
205 #[serde(skip_serializing_if = "Option::is_none")]
206 pub og_url: Option<String>,
207
208 #[serde(skip_serializing_if = "Option::is_none")]
209 pub og_image: Option<String>,
210
211 #[serde(skip_serializing_if = "Option::is_none")]
212 pub url: Option<String>,
213
214 #[serde(skip_serializing_if = "Option::is_none")]
215 pub source_url: Option<String>,
216
217 pub status_code: u16,
218
219 #[serde(skip_serializing_if = "Option::is_none")]
220 pub content_type: Option<String>,
221
222 #[serde(skip_serializing_if = "Option::is_none")]
223 pub canonical_url: Option<String>,
224
225 #[serde(skip_serializing_if = "Option::is_none")]
227 pub word_count: Option<usize>,
228
229 #[serde(skip_serializing_if = "Option::is_none")]
230 pub reading_time: Option<usize>,
231
232 #[serde(skip_serializing_if = "Option::is_none")]
233 pub excerpt: Option<String>,
234
235 #[serde(skip_serializing_if = "Option::is_none")]
237 pub detected_frameworks: Option<Vec<String>>,
238
239 #[serde(skip_serializing_if = "Option::is_none")]
240 pub detection_reason: Option<String>,
241
242 #[serde(skip_serializing_if = "Option::is_none")]
243 pub content_script_ratio: Option<f64>,
244}
245
246impl Default for Metadata {
248 fn default() -> Self {
249 Self {
250 title: None,
251 description: None,
252 language: None,
253 keywords: None,
254 robots: None,
255 og_title: None,
256 og_description: None,
257 og_url: None,
258 og_image: None,
259 url: None,
260 source_url: None,
261 status_code: 200,
262 content_type: None,
263 canonical_url: None,
264 word_count: None,
265 reading_time: None,
266 excerpt: None,
267 detected_frameworks: None,
268 detection_reason: None,
269 content_script_ratio: None,
270 }
271 }
272}
273
274
275fn default_true_option() -> Option<bool> {
277 Some(true)
278}
279
280impl ScrapeResponse {
281 pub fn success(data: Document) -> Self {
282 Self {
283 success: true,
284 warning: None,
285 data: Some(data),
286 error: None,
287 scrape_id: None,
288 }
289 }
290
291 pub fn error(error: String) -> Self {
292 Self {
293 success: false,
294 warning: None,
295 data: None,
296 error: Some(error),
297 scrape_id: None,
298 }
299 }
300}
301
302#[derive(Debug, Clone, Deserialize)]
304#[serde(rename_all = "camelCase")]
305pub struct MapRequest {
306 pub url: String,
308
309 #[serde(default)]
311 pub search: Option<String>,
312
313 #[serde(default)]
315 pub ignore_sitemap: Option<bool>,
316
317 #[serde(default = "default_include_subdomains")]
319 pub include_subdomains: Option<bool>,
320
321 #[serde(default = "default_map_limit")]
323 pub limit: Option<u32>,
324}
325
326#[derive(Debug, Clone, Serialize)]
328pub struct MapResponse {
329 pub success: bool,
330 #[serde(skip_serializing_if = "Option::is_none")]
331 pub links: Option<Vec<String>>,
332 #[serde(skip_serializing_if = "Option::is_none")]
333 pub error: Option<String>,
334 #[serde(skip_serializing_if = "Option::is_none")]
335 pub scrape_id: Option<String>,
336}
337
338fn default_include_subdomains() -> Option<bool> {
339 Some(true)
340}
341
342fn default_map_limit() -> Option<u32> {
343 Some(5000)
344}
345
346impl MapResponse {
347 pub fn success(links: Vec<String>) -> Self {
348 Self {
349 success: true,
350 links: Some(links),
351 error: None,
352 scrape_id: None,
353 }
354 }
355
356 pub fn error(error: String) -> Self {
357 Self {
358 success: false,
359 links: None,
360 error: Some(error),
361 scrape_id: None,
362 }
363 }
364}
365
366#[derive(Debug, Clone, Deserialize, Serialize)]
368#[serde(rename_all = "camelCase")]
369pub struct CrawlRequest {
370 pub url: String,
372
373 #[serde(default)]
375 pub exclude_paths: Option<Vec<String>>,
376
377 #[serde(default)]
379 pub include_paths: Option<Vec<String>>,
380
381 #[serde(default = "default_max_depth")]
383 pub max_depth: u32,
384
385 #[serde(default = "default_limit")]
387 pub limit: u32,
388
389 #[serde(default)]
391 pub allow_backward_links: Option<bool>,
392
393 #[serde(default)]
395 pub allow_external_links: Option<bool>,
396
397 #[serde(default)]
399 pub ignore_sitemap: Option<bool>,
400
401 #[serde(default = "default_true_option")]
403 pub detect_pagination: Option<bool>,
404
405 #[serde(default = "default_max_pagination_pages")]
407 pub max_pagination_pages: Option<u32>,
408
409 #[serde(default)]
411 pub use_parallel: Option<bool>,
412}
413
414#[derive(Debug, Clone, Serialize, Deserialize)]
416pub struct CrawlResponse {
417 pub success: bool,
418 #[serde(skip_serializing_if = "Option::is_none")]
419 pub data: Option<Vec<Document>>,
420 #[serde(skip_serializing_if = "Option::is_none")]
421 pub error: Option<String>,
422 #[serde(skip_serializing_if = "Option::is_none")]
424 pub crawl_id: Option<String>,
425 #[serde(skip_serializing_if = "Option::is_none")]
427 pub message: Option<String>,
428}
429
430fn default_max_depth() -> u32 {
431 2
432}
433
434fn default_limit() -> u32 {
435 100
436}
437
438fn default_max_pagination_pages() -> Option<u32> {
439 Some(50)
440}
441
442impl CrawlResponse {
443 pub fn success(data: Vec<Document>) -> Self {
444 Self {
445 success: true,
446 data: Some(data),
447 error: None,
448 crawl_id: None,
449 message: None,
450 }
451 }
452
453 pub fn error(error: String) -> Self {
454 Self {
455 success: false,
456 data: None,
457 error: Some(error),
458 crawl_id: None,
459 message: None,
460 }
461 }
462
463 pub fn started(crawl_id: String) -> Self {
464 Self {
465 success: true,
466 data: None,
467 error: None,
468 crawl_id: Some(crawl_id.clone()),
469 message: Some(format!("Crawl started with ID: {}", crawl_id)),
470 }
471 }
472}
473
474#[derive(Debug, Clone, Deserialize)]
478#[serde(rename_all = "camelCase")]
479pub struct SearchRequest {
480 pub query: String,
482
483 #[serde(default = "default_search_limit")]
485 pub limit: u32,
486
487 #[serde(default)]
489 pub scrape_results: bool,
490
491 #[serde(default)]
493 pub scrape_options: Option<ScrapeOptions>,
494}
495
496#[derive(Debug, Clone, Deserialize)]
498#[serde(rename_all = "camelCase")]
499pub struct ScrapeOptions {
500 #[serde(default = "default_formats")]
502 pub formats: Vec<String>,
503
504 #[serde(default = "default_true")]
506 pub only_main_content: bool,
507
508 #[serde(default = "default_scrape_timeout")]
510 pub timeout: u64,
511}
512
513#[derive(Debug, Clone, Serialize)]
515pub struct SearchResponse {
516 pub success: bool,
517 #[serde(skip_serializing_if = "Option::is_none")]
518 pub data: Option<Vec<SearchResult>>,
519 #[serde(skip_serializing_if = "Option::is_none")]
520 pub error: Option<String>,
521}
522
523#[derive(Debug, Clone, Serialize)]
525pub struct SearchResult {
526 pub title: String,
528 pub url: String,
530 pub snippet: String,
532 #[serde(skip_serializing_if = "Option::is_none")]
534 pub content: Option<Document>,
535}
536
537fn default_search_limit() -> u32 {
538 10
539}
540
541fn default_scrape_timeout() -> u64 {
542 10000
543}
544
545impl SearchResponse {
546 pub fn success(data: Vec<SearchResult>) -> Self {
547 Self {
548 success: true,
549 data: Some(data),
550 error: None,
551 }
552 }
553
554 pub fn error(error: String) -> Self {
555 Self {
556 success: false,
557 data: None,
558 error: Some(error),
559 }
560 }
561}
562
563#[derive(Debug, Clone, Serialize)]
567#[serde(tag = "type", rename_all = "lowercase")]
568pub enum CrawlEvent {
569 Status {
571 pages_crawled: usize,
572 queue_size: usize,
573 current_url: Option<String>,
574 },
575 Document {
577 url: String,
578 title: Option<String>,
579 markdown: Option<String>,
580 metadata: Box<Metadata>,
581 },
582 Error {
584 url: String,
585 error: String,
586 },
587 Complete {
589 total_pages: usize,
590 success: usize,
591 errors: usize,
592 },
593}