use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use serde_json::{Value, json};
use stygian_browser::config::{PoolConfig, StealthLevel};
use stygian_browser::{BrowserConfig, BrowserPool, WaitUntil};
fn epoch_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let url = std::env::args().nth(1).ok_or(
"Usage: scraper_cli <URL>\n e.g. cargo run --example scraper_cli -p stygian-browser -- https://example.com",
)?;
eprintln!("[scraper] target : {url}");
let config = BrowserConfig::builder()
.headless(true)
.stealth_level(StealthLevel::Advanced)
.pool(PoolConfig {
min_size: 1,
max_size: 2,
idle_timeout: Duration::from_secs(60),
acquire_timeout: Duration::from_secs(30),
})
.build();
eprintln!("[scraper] warming browser pool...");
let pool = BrowserPool::new(config).await?;
let handle = pool.acquire().await?;
let browser = handle
.browser()
.ok_or("browser pool returned an expired handle")?;
let mut page = browser.new_page().await?;
eprintln!("[scraper] navigating...");
let t0 = Instant::now();
page.navigate(&url, WaitUntil::NetworkIdle, Duration::from_secs(45))
.await?;
let load_time_ms = u64::try_from(t0.elapsed().as_millis()).unwrap_or(u64::MAX);
eprintln!("[scraper] loaded in {load_time_ms}ms");
let final_url = page.url().await.unwrap_or_else(|_| url.clone());
let title = page.title().await.unwrap_or_default();
let status_code = page.status_code().unwrap_or(None).unwrap_or(0);
let description: String = page
.eval(
"document.querySelector('meta[name=\"description\"]')?.content \
|| document.querySelector('meta[property=\"og:description\"]')?.content \
|| ''",
)
.await
.unwrap_or_default();
let headings: Value = page
.eval(
"Array.from(document.querySelectorAll('h1,h2,h3')).slice(0, 10)\
.map(h => ({ level: h.tagName.toLowerCase(), text: h.textContent.trim() }))",
)
.await
.unwrap_or(json!([]));
let links: Value = page
.eval(
"Array.from(document.querySelectorAll('a[href]')).slice(0, 40)\
.map(a => ({ href: a.href, text: a.textContent.trim().slice(0, 120) }))\
.filter(l => l.href.startsWith('http'))\
.slice(0, 20)",
)
.await
.unwrap_or(json!([]));
let text_excerpt: String = page
.eval("(document.body?.innerText || '').trim().replace(/\\s+/g, ' ').slice(0, 800)")
.await
.unwrap_or_default();
let result = json!({
"url": url,
"final_url": final_url,
"status_code": status_code,
"title": title,
"description": description,
"headings": headings,
"links": links,
"text_excerpt": text_excerpt,
"load_time_ms": load_time_ms,
"scraped_at": epoch_secs(),
});
println!("{}", serde_json::to_string_pretty(&result)?);
page.close().await.ok();
handle.release().await;
eprintln!("[scraper] done.");
Ok(())
}