1use std::fmt;
32use std::time::{Duration, Instant};
33
34use async_trait::async_trait;
35use serde_json::{Value, json};
36
37use crate::domain::error::{Result, ServiceError, StygianError};
38use crate::ports::{ScrapingService, ServiceInput, ServiceOutput};
39
40#[derive(Debug, Clone, PartialEq, Eq, Default)]
42pub enum WaitStrategy {
43 #[default]
45 DomContentLoaded,
46 NetworkIdle,
48 SelectorAppears(String),
50 Fixed(Duration),
52}
53
54impl WaitStrategy {
55 fn from_params(params: &Value) -> Self {
57 match params.get("wait_strategy").and_then(Value::as_str) {
58 Some("network_idle") => Self::NetworkIdle,
59 Some("dom_content_loaded") => Self::DomContentLoaded,
60 Some(s) if s.starts_with("selector:") => {
61 Self::SelectorAppears(s.trim_start_matches("selector:").to_string())
62 }
63 _ => params
64 .get("wait_ms")
65 .and_then(Value::as_u64)
66 .map_or(Self::DomContentLoaded, |ms| {
67 Self::Fixed(Duration::from_millis(ms))
68 }),
69 }
70 }
71}
72
73impl fmt::Display for WaitStrategy {
74 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75 match self {
76 Self::DomContentLoaded => write!(f, "dom_content_loaded"),
77 Self::NetworkIdle => write!(f, "network_idle"),
78 Self::SelectorAppears(selector) => write!(f, "selector_appears({selector})"),
79 Self::Fixed(duration) => write!(f, "fixed_{}ms", duration.as_millis()),
80 }
81 }
82}
83
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
86pub enum StealthLevel {
87 None,
89 #[default]
91 Basic,
92 Advanced,
94}
95
96impl StealthLevel {
97 fn from_params(params: &Value) -> Self {
98 match params.get("stealth_level").and_then(Value::as_str) {
99 Some("advanced") => Self::Advanced,
100 Some("none") => Self::None,
101 _ => Self::Basic,
102 }
103 }
104
105 #[must_use]
107 pub const fn as_str(&self) -> &'static str {
108 match self {
109 Self::None => "none",
110 Self::Basic => "basic",
111 Self::Advanced => "advanced",
112 }
113 }
114}
115
116#[derive(Debug, Clone)]
118pub struct BrowserAdapterConfig {
119 pub timeout: Duration,
121 pub max_concurrent: usize,
123 pub default_wait: WaitStrategy,
125 pub default_stealth: StealthLevel,
127 pub block_resources: bool,
129 pub headless: bool,
131 pub user_agent: Option<String>,
133 pub viewport_width: u32,
135 pub viewport_height: u32,
137}
138
139impl Default for BrowserAdapterConfig {
140 fn default() -> Self {
141 Self {
142 timeout: Duration::from_secs(30),
143 max_concurrent: 5,
144 default_wait: WaitStrategy::DomContentLoaded,
145 default_stealth: StealthLevel::Basic,
146 block_resources: true,
147 headless: true,
148 user_agent: None,
149 viewport_width: 1920,
150 viewport_height: 1080,
151 }
152 }
153}
154
155#[derive(Clone)]
167pub struct BrowserAdapter {
168 config: BrowserAdapterConfig,
169}
170
171impl BrowserAdapter {
172 #[must_use]
184 pub fn new() -> Self {
185 Self {
186 config: BrowserAdapterConfig::default(),
187 }
188 }
189
190 #[must_use]
206 pub const fn with_config(config: BrowserAdapterConfig) -> Self {
207 Self { config }
208 }
209
210 fn resolve_timeout(&self, params: &Value) -> Duration {
212 params
213 .get("timeout_ms")
214 .and_then(Value::as_u64)
215 .map_or(self.config.timeout, Duration::from_millis)
216 }
217
218 #[allow(clippy::option_if_let_else)]
224 #[cfg(feature = "browser")]
225 async fn navigate_with_browser(
226 &self,
227 url: &str,
228 wait: &WaitStrategy,
229 timeout: Duration,
230 ) -> Result<(String, Value)> {
231 use stygian_browser::page::WaitUntil;
232 use stygian_browser::{BrowserConfig, BrowserPool};
233
234 let start = Instant::now();
235
236 let browser_config = BrowserConfig {
238 headless: self.config.headless,
239 ..BrowserConfig::default()
240 };
241
242 let pool = BrowserPool::new(browser_config)
244 .await
245 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
246
247 let handle = match tokio::time::timeout(timeout, pool.acquire()).await {
249 Ok(Ok(h)) => h,
250 Ok(Err(e)) => {
251 return Err(StygianError::Service(ServiceError::Unavailable(format!(
252 "Browser pool exhausted or unavailable: {e}"
253 ))));
254 }
255 Err(_) => {
256 return Err(StygianError::Service(ServiceError::Unavailable(format!(
257 "Browser acquisition timeout after {timeout:?}"
258 ))));
259 }
260 };
261
262 let Some(instance) = handle.browser() else {
264 return Err(StygianError::Service(ServiceError::Unavailable(
265 "Failed to get browser instance after acquisition".to_string(),
266 )));
267 };
268
269 let mut page = instance
270 .new_page()
271 .await
272 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
273
274 let wait_condition = match wait {
276 WaitStrategy::DomContentLoaded => WaitUntil::DomContentLoaded,
277 WaitStrategy::NetworkIdle => WaitUntil::NetworkIdle,
278 WaitStrategy::SelectorAppears(selector) => WaitUntil::Selector(selector.clone()),
279 WaitStrategy::Fixed(_duration) => WaitUntil::DomContentLoaded, };
281
282 if let Err(e) = page.navigate(url, wait_condition, timeout).await {
284 return Err(StygianError::Service(ServiceError::Unavailable(format!(
285 "Browser navigation failed: {e}"
286 ))));
287 }
288
289 if let WaitStrategy::Fixed(duration) = wait {
291 tokio::time::sleep(*duration).await;
292 }
293
294 let html = page
296 .content()
297 .await
298 .map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
299
300 let elapsed = start.elapsed();
301
302 Ok((
305 html,
306 json!({
307 "url": url,
308 "navigation_time_ms": elapsed.as_millis(),
309 "wait_strategy": wait.to_string(),
310 "stealth_level": self.config.default_stealth.as_str(),
311 "viewport": {
312 "width": self.config.viewport_width,
313 "height": self.config.viewport_height
314 },
315 "rendered": true,
316 }),
317 ))
318 }
319
320 #[cfg(not(feature = "browser"))]
322 async fn navigate_with_browser(
323 &self,
324 url: &str,
325 _wait: &WaitStrategy,
326 _timeout: Duration,
327 ) -> Result<(String, Value)> {
328 Err(StygianError::Service(ServiceError::Unavailable(format!(
329 "stygian-graph was compiled without the 'browser' feature; \
330 cannot render JavaScript for URL: {url}"
331 ))))
332 }
333}
334
335impl Default for BrowserAdapter {
336 fn default() -> Self {
337 Self::new()
338 }
339}
340
341#[async_trait]
342impl ScrapingService for BrowserAdapter {
343 async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
367 let wait = WaitStrategy::from_params(&input.params);
368 let _stealth = StealthLevel::from_params(&input.params);
369 let timeout = self.resolve_timeout(&input.params);
370
371 let (html, metadata) = tokio::time::timeout(
372 timeout + Duration::from_secs(5), self.navigate_with_browser(&input.url, &wait, timeout),
374 )
375 .await
376 .map_err(|_| {
377 StygianError::Service(ServiceError::Timeout(
378 u64::try_from(timeout.as_millis()).unwrap_or(u64::MAX),
379 ))
380 })??;
381
382 Ok(ServiceOutput {
383 data: html,
384 metadata,
385 })
386 }
387
388 fn name(&self) -> &'static str {
389 "browser"
390 }
391}
392
393#[cfg(test)]
394#[allow(
395 clippy::unwrap_used,
396 clippy::expect_used,
397 clippy::panic,
398 clippy::redundant_closure_for_method_calls
399)]
400mod tests {
401 use super::*;
402
403 #[test]
404 fn test_adapter_default_name() {
405 let adapter = BrowserAdapter::new();
406 assert_eq!(adapter.name(), "browser");
407 }
408
409 #[test]
410 fn test_wait_strategy_from_params_dom() {
411 let params = json!({ "wait_strategy": "dom_content_loaded" });
412 assert_eq!(
413 WaitStrategy::from_params(¶ms),
414 WaitStrategy::DomContentLoaded
415 );
416 }
417
418 #[test]
419 fn test_wait_strategy_from_params_network_idle() {
420 let params = json!({ "wait_strategy": "network_idle" });
421 assert_eq!(
422 WaitStrategy::from_params(¶ms),
423 WaitStrategy::NetworkIdle
424 );
425 }
426
427 #[test]
428 fn test_wait_strategy_from_params_selector() {
429 let params = json!({ "wait_strategy": "selector:#main-content" });
430 assert_eq!(
431 WaitStrategy::from_params(¶ms),
432 WaitStrategy::SelectorAppears("#main-content".to_string())
433 );
434 }
435
436 #[test]
437 fn test_wait_strategy_from_params_fixed_ms() {
438 let params = json!({ "wait_ms": 500u64 });
439 assert_eq!(
440 WaitStrategy::from_params(¶ms),
441 WaitStrategy::Fixed(Duration::from_millis(500))
442 );
443 }
444
445 #[test]
446 fn test_stealth_level_from_params() {
447 assert_eq!(
448 StealthLevel::from_params(&json!({ "stealth_level": "advanced" })),
449 StealthLevel::Advanced
450 );
451 assert_eq!(
452 StealthLevel::from_params(&json!({ "stealth_level": "none" })),
453 StealthLevel::None
454 );
455 assert_eq!(StealthLevel::from_params(&json!({})), StealthLevel::Basic);
456 }
457
458 #[test]
459 fn test_resolve_timeout_override() {
460 let adapter = BrowserAdapter::new();
461 let params = json!({ "timeout_ms": 5000u64 });
462 assert_eq!(adapter.resolve_timeout(¶ms), Duration::from_secs(5));
463 }
464
465 #[test]
466 fn test_resolve_timeout_default() {
467 let adapter = BrowserAdapter::new();
468 let params = json!({});
469 assert_eq!(adapter.resolve_timeout(¶ms), Duration::from_secs(30));
470 }
471
472 #[test]
473 fn test_config_builder() {
474 let config = BrowserAdapterConfig {
475 timeout: Duration::from_mins(1),
476 max_concurrent: 3,
477 block_resources: false,
478 ..BrowserAdapterConfig::default()
479 };
480 let adapter = BrowserAdapter::with_config(config);
481 assert_eq!(adapter.config.timeout, Duration::from_mins(1));
482 assert_eq!(adapter.config.max_concurrent, 3);
483 }
484
485 #[allow(clippy::panic)]
486 #[tokio::test]
487 #[ignore = "requires real Chrome binary"]
488 async fn test_execute_returns_service_output_or_unavailable() {
489 let adapter = BrowserAdapter::new();
490 let input = ServiceInput {
491 url: "https://example.com".to_string(),
492 params: json!({ "wait_strategy": "dom_content_loaded" }),
493 };
494 match adapter.execute(input).await {
496 Ok(output) => {
497 assert!(!output.data.is_empty(), "output data should not be empty");
498 assert!(output.metadata.is_object());
499 }
500 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
501 }
503 Err(e) => panic!("unexpected error: {e}"),
504 }
505 }
506
507 #[tokio::test]
510 #[ignore = "requires real Chrome binary and external network access"]
511 async fn browser_adapter_navigates_url() {
512 let config = BrowserAdapterConfig::default();
513 let adapter = BrowserAdapter::with_config(config);
514
515 let input = ServiceInput {
516 url: "https://example.com".to_string(),
517 params: json!({
518 "wait_strategy": "dom_content_loaded",
519 "timeout_ms": 30000
520 }),
521 };
522
523 let result = adapter.execute(input).await;
524
525 match result {
527 Ok(output) => {
528 assert!(!output.data.is_empty());
529 assert!(
530 output
531 .metadata
532 .get("rendered")
533 .and_then(|v| v.as_bool())
534 .unwrap_or(false)
535 );
536 assert!(output.metadata.get("navigation_time_ms").is_some());
537 assert_eq!(
538 output.metadata.get("url").and_then(|v| v.as_str()),
539 Some("https://example.com")
540 );
541 }
542 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
543 }
545 Err(e) => panic!("Unexpected error: {e}"),
546 }
547 }
548
549 #[tokio::test]
550 #[ignore = "Requires Chrome installed and network access; may panic if browser unavailable"]
551 async fn browser_adapter_respects_timeout() {
552 let config = BrowserAdapterConfig {
553 timeout: Duration::from_secs(2),
554 ..Default::default()
555 };
556 let adapter = BrowserAdapter::with_config(config);
557
558 let input = ServiceInput {
560 url: "https://httpbin.org/delay/10".to_string(),
561 params: json!({"timeout_ms": 2000}),
562 };
563
564 let result = adapter.execute(input).await;
565
566 match result {
568 Err(StygianError::Service(ServiceError::Unavailable(msg))) => {
569 assert!(
571 msg.contains("timeout")
572 || msg.contains("unavailable")
573 || msg.contains("Chrome")
574 || msg.contains("exhausted")
575 );
576 }
577 Err(StygianError::Service(ServiceError::Timeout(_))) => {
578 }
580 Ok(_) => {
581 panic!("Expected timeout or unavailable, got success");
583 }
584 Err(e) => {
585 eprintln!("Got acceptable error: {e}");
587 }
588 }
589 }
590
591 #[tokio::test]
592 #[ignore = "requires real Chrome binary"]
593 async fn browser_adapter_invalid_url() {
594 let config = BrowserAdapterConfig::default();
595 let adapter = BrowserAdapter::with_config(config);
596
597 let input = ServiceInput {
598 url: "not-a-valid-url".to_string(),
599 params: json!({}),
600 };
601
602 let result = adapter.execute(input).await;
603
604 assert!(result.is_err());
606 }
607
608 #[tokio::test]
609 #[ignore = "requires real Chrome binary and external network access"]
610 async fn browser_adapter_wait_strategy_selector() {
611 let config = BrowserAdapterConfig::default();
612 let adapter = BrowserAdapter::with_config(config);
613
614 let input = ServiceInput {
615 url: "https://example.com".to_string(),
616 params: json!({
617 "wait_strategy": "selector:body"
618 }),
619 };
620
621 match adapter.execute(input).await {
622 Ok(output) => {
623 assert_eq!(
624 output
625 .metadata
626 .get("wait_strategy")
627 .and_then(|v| v.as_str()),
628 Some("selector_appears(body)")
629 );
630 }
631 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
632 }
634 Err(e) => panic!("Unexpected error: {e}"),
635 }
636 }
637
638 #[tokio::test]
639 #[ignore = "requires real Chrome binary and external network access"]
640 async fn browser_adapter_metadata_complete() {
641 let config = BrowserAdapterConfig {
642 default_stealth: StealthLevel::Advanced,
643 user_agent: Some("Mozilla/5.0".to_string()),
644 viewport_width: 1440,
645 viewport_height: 900,
646 ..Default::default()
647 };
648 let adapter = BrowserAdapter::with_config(config);
649
650 let input = ServiceInput {
651 url: "https://example.com".to_string(),
652 params: json!({}),
653 };
654
655 match adapter.execute(input).await {
656 Ok(output) => {
657 assert_eq!(
658 output.metadata.get("url").and_then(|v| v.as_str()),
659 Some("https://example.com")
660 );
661 assert_eq!(
662 output
663 .metadata
664 .get("stealth_level")
665 .and_then(|v| v.as_str()),
666 Some("advanced")
667 );
668 assert!(output.metadata.get("viewport").is_some());
669 assert!(output.metadata.get("navigation_time_ms").is_some());
670 let viewport = output.metadata.get("viewport").expect("viewport exists");
671 assert_eq!(viewport.get("width").and_then(|v| v.as_u64()), Some(1440));
672 assert_eq!(viewport.get("height").and_then(|v| v.as_u64()), Some(900));
673 }
674 Err(StygianError::Service(ServiceError::Unavailable(_))) => {
675 }
677 Err(e) => panic!("Unexpected error: {e}"),
678 }
679 }
680}