use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
use stygian_browser::config::PoolConfig;
use stygian_browser::page::ResourceFilter;
use stygian_browser::{BrowserConfig, BrowserInstance, BrowserPool, WaitUntil};
fn unique_user_data_dir() -> PathBuf {
static COUNTER: AtomicU64 = AtomicU64::new(0);
let n = COUNTER.fetch_add(1, Ordering::Relaxed);
let pid = std::process::id();
std::env::temp_dir().join(format!("stygian-itest-{pid}-{n}"))
}
fn test_config() -> BrowserConfig {
let mut cfg = BrowserConfig::builder().headless(true).build();
cfg.launch_timeout = Duration::from_secs(30);
cfg.cdp_timeout = Duration::from_secs(15);
cfg.user_data_dir = Some(unique_user_data_dir());
if let Ok(p) = std::env::var("STYGIAN_CHROME_PATH") {
cfg.chrome_path = Some(PathBuf::from(p));
}
cfg
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn browser_launch_and_shutdown() -> Result<(), Box<dyn std::error::Error>> {
let mut instance = BrowserInstance::launch(test_config()).await?;
assert!(
instance.is_healthy_cached(),
"freshly launched browser should be healthy"
);
assert!(
instance.is_healthy().await,
"async health check should pass"
);
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn browser_navigate_and_read_title() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let title = page.title().await?;
assert!(
title.to_lowercase().contains("example"),
"expected title to contain 'example', got: {title:?}"
);
let html = page.content().await?;
assert!(
html.contains("<html"),
"content should include <html>, got snippet: {}",
html.get(..200.min(html.len())).unwrap_or_default()
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn page_eval_returns_typed_value() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"about:blank",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(10),
)
.await?;
let result: f64 = page.eval("1 + 2").await?;
assert!(
(result - 3.0).abs() < f64::EPSILON,
"1+2 should be 3, got {result}"
);
let s: String = page.eval(r#""hello""#).await?;
assert_eq!(s, "hello");
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn fingerprint_injection_webdriver_hidden() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"about:blank",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(10),
)
.await?;
let wd: serde_json::Value = page
.eval("typeof navigator.webdriver === 'undefined' || navigator.webdriver === false")
.await?;
assert_eq!(
wd,
serde_json::Value::Bool(true),
"navigator.webdriver should be hidden; got {wd}"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn fingerprint_injection_hardware_values_sensible() -> Result<(), Box<dyn std::error::Error>>
{
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"about:blank",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(10),
)
.await?;
let concurrency: u32 = page.eval("navigator.hardwareConcurrency").await?;
assert!(
(1..=64).contains(&concurrency),
"hardwareConcurrency {concurrency} out of sane range"
);
let memory: u32 = page.eval("navigator.deviceMemory").await?;
assert!(
[4u32, 8, 16].contains(&memory),
"deviceMemory {memory} not in valid set {{4, 8, 16}}"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn resource_filter_api_does_not_error() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.set_resource_filter(ResourceFilter::block_media())
.await?;
page.navigate(
"about:blank",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(10),
)
.await?;
let _title = page.title().await?;
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn pool_acquire_release_reuse() -> Result<(), Box<dyn std::error::Error>> {
let mut config = BrowserConfig::builder()
.headless(true)
.pool(PoolConfig {
min_size: 1,
max_size: 2,
..PoolConfig::default()
})
.build();
config.launch_timeout = Duration::from_secs(30);
config.cdp_timeout = Duration::from_secs(15);
config.user_data_dir = Some(unique_user_data_dir());
let pool = BrowserPool::new(config).await?;
let handle1 = pool.acquire().await?;
let id1 = handle1
.browser()
.ok_or("handle1 has no valid browser")?
.id()
.to_string();
handle1.release().await;
let handle2 = pool.acquire().await?;
let id2 = handle2
.browser()
.ok_or("handle2 has no valid browser")?
.id()
.to_string();
assert_eq!(
id1, id2,
"pool should reuse the released browser; got {id1} then {id2}"
);
handle2.release().await;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn pool_max_concurrency_enforced() -> Result<(), Box<dyn std::error::Error>> {
let mut config = BrowserConfig::builder()
.headless(true)
.pool(PoolConfig {
min_size: 0,
max_size: 1,
acquire_timeout: Duration::from_millis(500),
..PoolConfig::default()
})
.build();
config.launch_timeout = Duration::from_secs(30);
config.cdp_timeout = Duration::from_secs(15);
config.user_data_dir = Some(unique_user_data_dir());
let pool = BrowserPool::new(config).await?;
let _handle = pool.acquire().await?;
let result = pool.acquire().await;
assert!(
result.is_err(),
"expected error when pool is exhausted, got Ok"
);
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn pool_stats_track_active_handles() -> Result<(), Box<dyn std::error::Error>> {
let mut config = BrowserConfig::builder()
.headless(true)
.pool(PoolConfig {
min_size: 0,
max_size: 3,
..PoolConfig::default()
})
.build();
config.launch_timeout = Duration::from_secs(30);
config.cdp_timeout = Duration::from_secs(15);
config.user_data_dir = Some(unique_user_data_dir());
let pool = BrowserPool::new(config).await?;
let stats_before = pool.stats();
assert_eq!(stats_before.active, 0);
let h1 = pool.acquire().await?;
assert_eq!(pool.stats().active, 1, "one handle acquired");
h1.release().await;
let stats_idle = pool.stats();
assert_eq!(stats_idle.active, 1, "browser still managed after release");
let h2 = pool.acquire().await?;
assert_eq!(pool.stats().active, 1, "still just one managed browser");
h2.release().await;
assert_eq!(pool.stats().active, 1, "browser back in idle pool");
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn query_selector_all_returns_nodes() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let nodes = page.query_selector_all("a[href]").await?;
assert!(
!nodes.is_empty(),
"expected at least one <a href> on example.com"
);
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected first <a href> node"))?;
let href = first.attr("href").await?;
assert!(
href.is_some(),
"first <a href> should have an href attribute"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn attr_map_is_exhaustive() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let nodes = page.query_selector_all("a[href]").await?;
assert!(!nodes.is_empty(), "expected <a href> on example.com");
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected first <a href> node"))?;
let map = first.attr_map().await?;
assert!(
map.contains_key("href"),
"attr_map should include 'href'; got keys: {:?}",
map.keys().collect::<Vec<_>>()
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn ancestors_returns_parent_chain() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let nodes = page.query_selector_all("p").await?;
assert!(
!nodes.is_empty(),
"expected at least one <p> on example.com"
);
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected first <p> node"))?;
let chain = first.ancestors().await?;
assert!(
chain.last().map(String::as_str) == Some("html"),
"ancestor chain should end at 'html'; got: {chain:?}"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn children_matching_returns_nested_nodes() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let divs = page.query_selector_all("div").await?;
assert!(
!divs.is_empty(),
"expected at least one <div> on example.com"
);
let first_div = divs
.first()
.ok_or_else(|| std::io::Error::other("expected first <div> node"))?;
let children = first_div.children_matching("p").await?;
assert!(
!children.is_empty(),
"expected at least one <p> inside the first <div> on example.com"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[cfg(feature = "extract")]
mod extract_tests {
use super::*;
use stygian_browser::extract::Extract;
#[derive(Extract)]
struct Link {
#[selector("a", attr = "href")]
href: Option<String>,
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn extract_all_returns_typed_vec() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let items: Vec<Link> = page.extract_all::<Link>("p").await?;
assert!(
!items.is_empty(),
"expected at least one <p> on example.com"
);
let href_count = items.iter().filter(|l| l.href.is_some()).count();
assert!(
href_count <= items.len(),
"sanity check for extracted href count"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn extract_all_empty_on_no_match() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let items: Vec<Link> = page.extract_all::<Link>("div.nonexistent-xyz-9999").await?;
assert!(
items.is_empty(),
"unmatched selector should yield empty Vec"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires real Chrome binary and external network access"]
async fn extract_all_optional_field_none_when_absent() -> Result<(), Box<dyn std::error::Error>>
{
#[derive(Extract)]
struct TextItem {
#[selector("nonexistent-element-xyz-9999")]
label: Option<String>,
}
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let items: Vec<TextItem> = page.extract_all::<TextItem>("p").await?;
assert!(!items.is_empty(), "expected <p> elements on example.com");
for item in &items {
assert!(
item.label.is_none(),
"optional field with unmatched selector should be None"
);
}
page.close().await?;
instance.shutdown().await?;
Ok(())
}
}
fn data_url(html: &str) -> String {
use std::fmt::Write as _;
let mut encoded = String::new();
for byte in html.as_bytes() {
let _ = write!(encoded, "{byte:02x}");
}
format!(
"data:text/html,{}",
html.chars().fold(String::new(), |mut acc, c| {
if c.is_ascii_alphanumeric() || "<>/=\"' \n\r\t;:.#{}[]()!-_".contains(c) {
acc.push(c);
} else {
let _ = write!(acc, "%{:02X}", c as u32);
}
acc
})
)
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn parent_returns_node() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
let html = r#"<html><body><ul><li id="first">A</li><li>B</li></ul></body></html>"#;
page.navigate(
&data_url(html),
WaitUntil::Selector("#first".to_string()),
Duration::from_secs(15),
)
.await?;
let nodes = page.query_selector_all("#first").await?;
assert!(!nodes.is_empty(), "expected #first element");
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected #first node"))?;
let parent = first.parent().await?;
assert!(parent.is_some(), "parent() of <li> should be Some");
let parent_node = parent.ok_or_else(|| std::io::Error::other("expected parent node"))?;
let outer = parent_node.outer_html().await?;
assert!(
outer.contains("<ul") || outer.contains("<UL"),
"parent of <li> should be <ul>; got: {outer}"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn next_sibling_advances() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
let html = r#"<html><body><ul><li id="a">A</li><li id="b">B</li></ul></body></html>"#;
page.navigate(
&data_url(html),
WaitUntil::Selector("#a".to_string()),
Duration::from_secs(15),
)
.await?;
let nodes = page.query_selector_all("#a").await?;
assert!(!nodes.is_empty(), "expected #a element");
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected #a node"))?;
let next = first.next_sibling().await?;
assert!(
next.is_some(),
"next_sibling() of first <li> should be Some"
);
let next_node = next.ok_or_else(|| std::io::Error::other("expected next sibling"))?;
let text = next_node.text_content().await?;
assert_eq!(text.trim(), "B", "next sibling should have text 'B'");
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn previous_sibling_of_first_is_none() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
let html = r#"<html><body><ul><li id="first">A</li><li>B</li></ul></body></html>"#;
page.navigate(
&data_url(html),
WaitUntil::Selector("#first".to_string()),
Duration::from_secs(15),
)
.await?;
let nodes = page.query_selector_all("#first").await?;
assert!(!nodes.is_empty(), "expected #first element");
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected #first node"))?;
let prev = first.previous_sibling().await?;
assert!(
prev.is_none(),
"previous_sibling() of first child should be None"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn parent_of_root_is_none() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"about:blank",
WaitUntil::Selector("html".to_string()),
Duration::from_secs(10),
)
.await?;
let nodes = page.query_selector_all("html").await?;
assert!(!nodes.is_empty(), "expected <html> element");
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected <html> node"))?;
let parent = first.parent().await?;
assert!(
parent.is_none(),
"parent() of <html> should be None (no parentElement)"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn sibling_chain_lateral_extraction() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
let html = concat!(
"<html><body>",
"<table><tr>",
"<td id='label'>Price</td>",
"<td id='value'>$9.99</td>",
"</tr></table>",
"</body></html>"
);
page.navigate(
&data_url(html),
WaitUntil::Selector("#label".to_string()),
Duration::from_secs(15),
)
.await?;
let nodes = page.query_selector_all("#label").await?;
assert!(!nodes.is_empty(), "expected #label <td>");
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected #label node"))?;
let value_cell = first.next_sibling().await?;
assert!(
value_cell.is_some(),
"next sibling of label cell should be Some"
);
let value_node =
value_cell.ok_or_else(|| std::io::Error::other("expected value sibling cell"))?;
let price = value_node.text_content().await?;
assert_eq!(
price.trim(),
"$9.99",
"lateral extraction should yield the price cell's text"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[cfg(feature = "similarity")]
mod similarity_tests {
use super::*;
use stygian_browser::similarity::SimilarityConfig;
#[tokio::test]
#[ignore = "requires Chrome"]
async fn find_similar_returns_same_element() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let refs = page.query_selector_all("p").await?;
assert!(!refs.is_empty(), "expected at least one <p> on example.com");
let result = page
.find_similar(
refs.first()
.ok_or_else(|| std::io::Error::other("expected reference <p> node"))?,
SimilarityConfig {
threshold: 0.0,
max_results: 50,
},
)
.await?;
assert!(
!result.is_empty(),
"find_similar with threshold 0.0 should return at least one match"
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn find_similar_threshold_filters() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let refs = page.query_selector_all("p").await?;
assert!(!refs.is_empty(), "expected at least one <p> on example.com");
let result = page
.find_similar(
refs.first()
.ok_or_else(|| std::io::Error::other("expected reference <p> node"))?,
SimilarityConfig {
threshold: 1.1,
max_results: 10,
},
)
.await?;
assert!(
result.is_empty(),
"threshold > 1.0 should filter all candidates; got {} results",
result.len()
);
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn find_similar_finds_peer_elements() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let refs = page.query_selector_all("p").await?;
assert!(!refs.is_empty(), "expected at least one <p> on example.com");
let result = page
.find_similar(
refs.first()
.ok_or_else(|| std::io::Error::other("expected reference <p> node"))?,
SimilarityConfig {
threshold: 0.5,
max_results: 20,
},
)
.await?;
assert!(
!result.is_empty(),
"expected at least one similar element above threshold 0.5"
);
for window in result.windows(2) {
let [left, right] = window else {
continue;
};
assert!(
left.score >= right.score,
"results must be ordered by score descending; got {:.3} then {:.3}",
left.score,
right.score
);
}
page.close().await?;
instance.shutdown().await?;
Ok(())
}
#[tokio::test]
#[ignore = "requires Chrome"]
async fn fingerprint_captures_tag_and_classes() -> Result<(), Box<dyn std::error::Error>> {
let instance = BrowserInstance::launch(test_config()).await?;
let mut page = instance.new_page().await?;
page.navigate(
"https://example.com",
WaitUntil::Selector("body".to_string()),
Duration::from_secs(30),
)
.await?;
let nodes = page.query_selector_all("p").await?;
assert!(
!nodes.is_empty(),
"expected at least one <p> on example.com"
);
let first = nodes
.first()
.ok_or_else(|| std::io::Error::other("expected first <p> node"))?;
let fp = first.fingerprint().await?;
assert_eq!(fp.tag, "p", "fingerprint tag should be 'p'");
let mut sorted_classes = fp.classes.clone();
sorted_classes.sort();
assert_eq!(fp.classes, sorted_classes, "classes must be sorted");
let mut sorted_attrs = fp.attr_names.clone();
sorted_attrs.sort();
assert_eq!(fp.attr_names, sorted_attrs, "attr_names must be sorted");
page.close().await?;
instance.shutdown().await?;
Ok(())
}
}