use std::fmt;
use std::time::{Duration, Instant};
use async_trait::async_trait;
use serde_json::{Value, json};
use crate::domain::error::{Result, ServiceError, StygianError};
use crate::ports::{ScrapingService, ServiceInput, ServiceOutput};
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub enum WaitStrategy {
#[default]
DomContentLoaded,
NetworkIdle,
SelectorAppears(String),
Fixed(Duration),
}
impl WaitStrategy {
fn from_params(params: &Value) -> Self {
match params.get("wait_strategy").and_then(Value::as_str) {
Some("network_idle") => Self::NetworkIdle,
Some("dom_content_loaded") => Self::DomContentLoaded,
Some(s) if s.starts_with("selector:") => {
Self::SelectorAppears(s.trim_start_matches("selector:").to_string())
}
_ => params
.get("wait_ms")
.and_then(Value::as_u64)
.map_or(Self::DomContentLoaded, |ms| {
Self::Fixed(Duration::from_millis(ms))
}),
}
}
}
impl fmt::Display for WaitStrategy {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::DomContentLoaded => write!(f, "dom_content_loaded"),
Self::NetworkIdle => write!(f, "network_idle"),
Self::SelectorAppears(selector) => write!(f, "selector_appears({selector})"),
Self::Fixed(duration) => write!(f, "fixed_{}ms", duration.as_millis()),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum StealthLevel {
None,
#[default]
Basic,
Advanced,
}
impl StealthLevel {
fn from_params(params: &Value) -> Self {
match params.get("stealth_level").and_then(Value::as_str) {
Some("advanced") => Self::Advanced,
Some("none") => Self::None,
_ => Self::Basic,
}
}
pub const fn as_str(&self) -> &'static str {
match self {
Self::None => "none",
Self::Basic => "basic",
Self::Advanced => "advanced",
}
}
}
#[derive(Debug, Clone)]
pub struct BrowserAdapterConfig {
pub timeout: Duration,
pub max_concurrent: usize,
pub default_wait: WaitStrategy,
pub default_stealth: StealthLevel,
pub block_resources: bool,
pub headless: bool,
pub user_agent: Option<String>,
pub viewport_width: u32,
pub viewport_height: u32,
}
impl Default for BrowserAdapterConfig {
fn default() -> Self {
Self {
timeout: Duration::from_secs(30),
max_concurrent: 5,
default_wait: WaitStrategy::DomContentLoaded,
default_stealth: StealthLevel::Basic,
block_resources: true,
headless: true,
user_agent: None,
viewport_width: 1920,
viewport_height: 1080,
}
}
}
#[derive(Clone)]
pub struct BrowserAdapter {
config: BrowserAdapterConfig,
}
impl BrowserAdapter {
pub fn new() -> Self {
Self {
config: BrowserAdapterConfig::default(),
}
}
pub const fn with_config(config: BrowserAdapterConfig) -> Self {
Self { config }
}
fn resolve_timeout(&self, params: &Value) -> Duration {
params
.get("timeout_ms")
.and_then(Value::as_u64)
.map_or(self.config.timeout, Duration::from_millis)
}
#[allow(clippy::option_if_let_else)]
#[cfg(feature = "browser")]
async fn navigate_with_browser(
&self,
url: &str,
wait: &WaitStrategy,
timeout: Duration,
) -> Result<(String, Value)> {
use stygian_browser::page::WaitUntil;
use stygian_browser::{BrowserConfig, BrowserPool};
let start = Instant::now();
let browser_config = BrowserConfig {
headless: self.config.headless,
..BrowserConfig::default()
};
let pool = BrowserPool::new(browser_config)
.await
.map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
let handle = match tokio::time::timeout(timeout, pool.acquire()).await {
Ok(Ok(h)) => h,
Ok(Err(e)) => {
return Err(StygianError::Service(ServiceError::Unavailable(format!(
"Browser pool exhausted or unavailable: {e}"
))));
}
Err(_) => {
return Err(StygianError::Service(ServiceError::Unavailable(format!(
"Browser acquisition timeout after {timeout:?}"
))));
}
};
let Some(instance) = handle.browser() else {
return Err(StygianError::Service(ServiceError::Unavailable(
"Failed to get browser instance after acquisition".to_string(),
)));
};
let mut page = instance
.new_page()
.await
.map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
let wait_condition = match wait {
WaitStrategy::DomContentLoaded => WaitUntil::DomContentLoaded,
WaitStrategy::NetworkIdle => WaitUntil::NetworkIdle,
WaitStrategy::SelectorAppears(selector) => WaitUntil::Selector(selector.clone()),
WaitStrategy::Fixed(_duration) => WaitUntil::DomContentLoaded, };
if let Err(e) = page.navigate(url, wait_condition, timeout).await {
return Err(StygianError::Service(ServiceError::Unavailable(format!(
"Browser navigation failed: {e}"
))));
}
if let WaitStrategy::Fixed(duration) = wait {
tokio::time::sleep(*duration).await;
}
let html = page
.content()
.await
.map_err(|e| StygianError::Service(ServiceError::Unavailable(e.to_string())))?;
let elapsed = start.elapsed();
Ok((
html,
json!({
"url": url,
"navigation_time_ms": elapsed.as_millis(),
"wait_strategy": wait.to_string(),
"stealth_level": self.config.default_stealth.as_str(),
"viewport": {
"width": self.config.viewport_width,
"height": self.config.viewport_height
},
"rendered": true,
}),
))
}
#[cfg(not(feature = "browser"))]
async fn navigate_with_browser(
&self,
url: &str,
_wait: &WaitStrategy,
_timeout: Duration,
) -> Result<(String, Value)> {
Err(StygianError::Service(ServiceError::Unavailable(format!(
"stygian-graph was compiled without the 'browser' feature; \
cannot render JavaScript for URL: {url}"
))))
}
}
impl Default for BrowserAdapter {
fn default() -> Self {
Self::new()
}
}
#[async_trait]
impl ScrapingService for BrowserAdapter {
async fn execute(&self, input: ServiceInput) -> Result<ServiceOutput> {
let wait = WaitStrategy::from_params(&input.params);
let _stealth = StealthLevel::from_params(&input.params);
let timeout = self.resolve_timeout(&input.params);
let (html, metadata) = tokio::time::timeout(
timeout + Duration::from_secs(5), self.navigate_with_browser(&input.url, &wait, timeout),
)
.await
.map_err(|_| {
StygianError::Service(ServiceError::Timeout(
u64::try_from(timeout.as_millis()).unwrap_or(u64::MAX),
))
})??;
Ok(ServiceOutput {
data: html,
metadata,
})
}
fn name(&self) -> &'static str {
"browser"
}
}
#[cfg(test)]
#[allow(
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::redundant_closure_for_method_calls
)]
mod tests {
use super::*;
#[test]
fn test_adapter_default_name() {
let adapter = BrowserAdapter::new();
assert_eq!(adapter.name(), "browser");
}
#[test]
fn test_wait_strategy_from_params_dom() {
let params = json!({ "wait_strategy": "dom_content_loaded" });
assert_eq!(
WaitStrategy::from_params(¶ms),
WaitStrategy::DomContentLoaded
);
}
#[test]
fn test_wait_strategy_from_params_network_idle() {
let params = json!({ "wait_strategy": "network_idle" });
assert_eq!(
WaitStrategy::from_params(¶ms),
WaitStrategy::NetworkIdle
);
}
#[test]
fn test_wait_strategy_from_params_selector() {
let params = json!({ "wait_strategy": "selector:#main-content" });
assert_eq!(
WaitStrategy::from_params(¶ms),
WaitStrategy::SelectorAppears("#main-content".to_string())
);
}
#[test]
fn test_wait_strategy_from_params_fixed_ms() {
let params = json!({ "wait_ms": 500u64 });
assert_eq!(
WaitStrategy::from_params(¶ms),
WaitStrategy::Fixed(Duration::from_millis(500))
);
}
#[test]
fn test_stealth_level_from_params() {
assert_eq!(
StealthLevel::from_params(&json!({ "stealth_level": "advanced" })),
StealthLevel::Advanced
);
assert_eq!(
StealthLevel::from_params(&json!({ "stealth_level": "none" })),
StealthLevel::None
);
assert_eq!(StealthLevel::from_params(&json!({})), StealthLevel::Basic);
}
#[test]
fn test_resolve_timeout_override() {
let adapter = BrowserAdapter::new();
let params = json!({ "timeout_ms": 5000u64 });
assert_eq!(
adapter.resolve_timeout(¶ms),
Duration::from_millis(5000)
);
}
#[test]
fn test_resolve_timeout_default() {
let adapter = BrowserAdapter::new();
let params = json!({});
assert_eq!(adapter.resolve_timeout(¶ms), Duration::from_secs(30));
}
#[test]
fn test_config_builder() {
let config = BrowserAdapterConfig {
timeout: Duration::from_secs(60),
max_concurrent: 3,
block_resources: false,
..BrowserAdapterConfig::default()
};
let adapter = BrowserAdapter::with_config(config);
assert_eq!(adapter.config.timeout, Duration::from_secs(60));
assert_eq!(adapter.config.max_concurrent, 3);
}
#[allow(clippy::panic)]
#[tokio::test]
#[ignore = "requires real Chrome binary"]
async fn test_execute_returns_service_output_or_unavailable() {
let adapter = BrowserAdapter::new();
let input = ServiceInput {
url: "https://example.com".to_string(),
params: json!({ "wait_strategy": "dom_content_loaded" }),
};
match adapter.execute(input).await {
Ok(output) => {
assert!(!output.data.is_empty(), "output data should not be empty");
assert!(output.metadata.is_object());
}
Err(StygianError::Service(ServiceError::Unavailable(_))) => {
}
Err(e) => panic!("unexpected error: {e}"),
}
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn browser_adapter_navigates_url() {
let config = BrowserAdapterConfig::default();
let adapter = BrowserAdapter::with_config(config);
let input = ServiceInput {
url: "https://example.com".to_string(),
params: json!({
"wait_strategy": "dom_content_loaded",
"timeout_ms": 30000
}),
};
let result = adapter.execute(input).await;
match result {
Ok(output) => {
assert!(!output.data.is_empty());
assert!(
output
.metadata
.get("rendered")
.and_then(|v| v.as_bool())
.unwrap_or(false)
);
assert!(output.metadata.get("navigation_time_ms").is_some());
assert_eq!(
output.metadata.get("url").and_then(|v| v.as_str()),
Some("https://example.com")
);
}
Err(StygianError::Service(ServiceError::Unavailable(_))) => {
}
Err(e) => panic!("Unexpected error: {e}"),
}
}
#[tokio::test]
#[ignore = "Requires Chrome installed and network access; may panic if browser unavailable"]
async fn browser_adapter_respects_timeout() {
let config = BrowserAdapterConfig {
timeout: Duration::from_secs(2),
..Default::default()
};
let adapter = BrowserAdapter::with_config(config);
let input = ServiceInput {
url: "https://httpbin.org/delay/10".to_string(),
params: json!({"timeout_ms": 2000}),
};
let result = adapter.execute(input).await;
match result {
Err(StygianError::Service(ServiceError::Unavailable(msg))) => {
assert!(
msg.contains("timeout")
|| msg.contains("unavailable")
|| msg.contains("Chrome")
|| msg.contains("exhausted")
);
}
Err(StygianError::Service(ServiceError::Timeout(_))) => {
}
Ok(_) => {
panic!("Expected timeout or unavailable, got success");
}
Err(e) => {
eprintln!("Got acceptable error: {e}");
}
}
}
#[tokio::test]
#[ignore = "requires real Chrome binary"]
async fn browser_adapter_invalid_url() {
let config = BrowserAdapterConfig::default();
let adapter = BrowserAdapter::with_config(config);
let input = ServiceInput {
url: "not-a-valid-url".to_string(),
params: json!({}),
};
let result = adapter.execute(input).await;
assert!(result.is_err());
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn browser_adapter_wait_strategy_selector() {
let config = BrowserAdapterConfig::default();
let adapter = BrowserAdapter::with_config(config);
let input = ServiceInput {
url: "https://example.com".to_string(),
params: json!({
"wait_strategy": "selector:body"
}),
};
match adapter.execute(input).await {
Ok(output) => {
assert_eq!(
output
.metadata
.get("wait_strategy")
.and_then(|v| v.as_str()),
Some("selector_appears(body)")
);
}
Err(StygianError::Service(ServiceError::Unavailable(_))) => {
}
Err(e) => panic!("Unexpected error: {e}"),
}
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn browser_adapter_metadata_complete() {
let config = BrowserAdapterConfig {
default_stealth: StealthLevel::Advanced,
user_agent: Some("Mozilla/5.0".to_string()),
viewport_width: 1440,
viewport_height: 900,
..Default::default()
};
let adapter = BrowserAdapter::with_config(config);
let input = ServiceInput {
url: "https://example.com".to_string(),
params: json!({}),
};
match adapter.execute(input).await {
Ok(output) => {
assert_eq!(
output.metadata.get("url").and_then(|v| v.as_str()),
Some("https://example.com")
);
assert_eq!(
output
.metadata
.get("stealth_level")
.and_then(|v| v.as_str()),
Some("advanced")
);
assert!(output.metadata.get("viewport").is_some());
assert!(output.metadata.get("navigation_time_ms").is_some());
let viewport = output.metadata.get("viewport").expect("viewport exists");
assert_eq!(viewport.get("width").and_then(|v| v.as_u64()), Some(1440));
assert_eq!(viewport.get("height").and_then(|v| v.as_u64()), Some(900));
}
Err(StygianError::Service(ServiceError::Unavailable(_))) => {
}
Err(e) => panic!("Unexpected error: {e}"),
}
}
}