use anyhow::{Context, Result};
#[cfg(feature = "browser")]
use futures::StreamExt;
use std::time::Duration;
#[cfg(not(feature = "browser"))]
use tracing::debug;
#[cfg(feature = "browser")]
use tracing::{debug, info, warn};
#[cfg(feature = "browser")]
use crate::auth::Credential;
#[cfg(feature = "browser")]
pub struct BrowserLogin {
browser: chromiumoxide::Browser,
}
#[cfg(feature = "browser")]
impl BrowserLogin {
pub async fn connect(port: Option<u16>) -> Result<Self> {
let port = port.unwrap_or(9222);
debug!("Connecting to Chrome on port {}", port);
let (browser, mut handler) = chromiumoxide::Browser::connect(
format!("http://localhost:{port}")
)
.await
.context("Failed to connect to Chrome. Make sure Chrome is running with --remote-debugging-port=9222")?;
tokio::spawn(async move {
while let Some(event) = handler.next().await {
if let Err(e) = event {
warn!("CDP handler error: {}", e);
}
}
});
info!("Connected to Chrome on port {}", port);
Ok(Self { browser })
}
pub async fn login(&self, url: &str, credential: Option<&Credential>) -> Result<Vec<Cookie>> {
info!("Starting browser login for {}", url);
let page = self
.browser
.new_page(url)
.await
.context("Failed to create new browser page")?;
page.wait_for_navigation()
.await
.context("Failed to navigate to login page")?;
debug!("Page loaded: {}", url);
if let Some(cred) = credential {
self.fill_login_form(&page, cred).await?;
}
let has_captcha = self.detect_captcha(&page).await?;
if has_captcha {
warn!("⚠️ CAPTCHA detected - please solve it in the browser window");
warn!(" Waiting 60 seconds for manual intervention...");
tokio::time::sleep(Duration::from_mins(1)).await;
}
let cookies = self.extract_cookies(&page).await?;
info!(
"Browser login complete, extracted {} cookies",
cookies.len()
);
Ok(cookies)
}
async fn fill_login_form(
&self,
page: &chromiumoxide::Page,
credential: &Credential,
) -> Result<()> {
debug!("Attempting to fill login form");
let username_selectors = [
"input[name='username']",
"input[name='email']",
"input[name='user']",
"input[type='email']",
"input[id='username']",
"input[id='email']",
];
let password_selectors = [
"input[name='password']",
"input[type='password']",
"input[id='password']",
];
if let Some(ref username) = credential.username {
for selector in username_selectors {
if let Ok(element) = page.find_element(selector).await {
debug!("Found username field: {}", selector);
element.click().await?;
element
.type_str(username)
.await
.context("Failed to type username")?;
break;
}
}
}
if let Some(ref password) = credential.password {
for selector in password_selectors {
if let Ok(element) = page.find_element(selector).await {
debug!("Found password field: {}", selector);
element.click().await?;
element
.type_str(password)
.await
.context("Failed to type password")?;
break;
}
}
}
let submit_selectors = [
"button[type='submit']",
"input[type='submit']",
"button:has-text('Sign in')",
"button:has-text('Log in')",
"button:has-text('Login')",
];
for selector in submit_selectors {
if let Ok(element) = page.find_element(selector).await {
debug!("Found submit button: {}", selector);
element.click().await?;
tokio::time::sleep(Duration::from_secs(2)).await;
break;
}
}
Ok(())
}
async fn detect_captcha(&self, page: &chromiumoxide::Page) -> Result<bool> {
let captcha_selectors = [
".g-recaptcha",
".h-captcha",
".cf-turnstile",
"iframe[src*='recaptcha']",
"iframe[src*='hcaptcha']",
];
for selector in captcha_selectors {
if page.find_element(selector).await.is_ok() {
debug!("CAPTCHA detected: {}", selector);
return Ok(true);
}
}
Ok(false)
}
pub async fn extract_cookies(&self, page: &chromiumoxide::Page) -> Result<Vec<Cookie>> {
let cdp_cookies = page
.get_cookies()
.await
.context("Failed to get cookies from browser")?;
let cookies = cdp_cookies
.into_iter()
.map(|c| Cookie {
name: c.name,
value: c.value,
domain: c.domain,
path: c.path,
secure: c.secure,
http_only: c.http_only,
})
.collect();
Ok(cookies)
}
pub fn cookies_to_header(cookies: &[Cookie]) -> String {
cookies
.iter()
.map(|c| format!("{}={}", c.name, c.value))
.collect::<Vec<_>>()
.join("; ")
}
pub async fn render_markdown(&self, url: &str) -> Result<String> {
let page = self
.browser
.new_page(url)
.await
.context("failed to open browser page for rung-3 render")?;
page.wait_for_navigation()
.await
.context("failed to navigate the browser page")?;
tokio::time::sleep(Duration::from_millis(800)).await;
let html = page
.content()
.await
.context("failed to read rendered DOM from the browser")?;
let _ = page.close().await;
Self::dom_to_markdown(&html, url)
}
pub async fn render_with_cookies(&self, url: &str, cookies: &[Cookie]) -> Result<String> {
use chromiumoxide::cdp::browser_protocol::network::{CookieParam, SetCookiesParams};
let page = self
.browser
.new_page("about:blank")
.await
.context("failed to open blank browser page for authed render")?;
if !cookies.is_empty() {
let params: Vec<CookieParam> = cookies
.iter()
.map(|c| {
let mut p = CookieParam::new(c.name.clone(), c.value.clone());
p.url = Some(url.to_string());
p.domain = Some(c.domain.clone());
p.path = Some(c.path.clone());
p.secure = Some(c.secure);
p.http_only = Some(c.http_only);
p
})
.collect();
debug!(
"injecting {} session cookie(s) before navigation",
params.len()
);
page.execute(SetCookiesParams::new(params))
.await
.context("failed to inject session cookies into the browser context")?;
}
page.goto(url)
.await
.context("failed to navigate to the authed render target")?;
page.wait_for_navigation()
.await
.context("failed waiting for navigation on the authed render target")?;
tokio::time::sleep(COOKIE_RENDER_SETTLE).await;
let html = page
.content()
.await
.context("failed to read rendered DOM from the authed page")?;
let _ = page.close().await;
Self::dom_to_markdown(&html, url)
}
fn dom_to_markdown(html: &str, url: &str) -> Result<String> {
let markdown = crate::content::html::html_to_markdown_with_url(html, Some(url));
let screened = crate::security::guard_fetch_output(&markdown, "task_browser", url)
.context("YARA screen rejected the rendered page")?;
Ok(screened)
}
}
#[cfg(feature = "browser")]
const COOKIE_RENDER_SETTLE: Duration = Duration::from_millis(2_500);
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Cookie {
pub name: String,
pub value: String,
pub domain: String,
pub path: String,
pub secure: bool,
pub http_only: bool,
}
#[must_use]
pub fn cookies_for_host<S: std::hash::BuildHasher>(
target_url: &str,
flat: &std::collections::HashMap<String, String, S>,
) -> Vec<Cookie> {
let host = crate::util::extract_domain(target_url);
let domain = scope_domain(&host);
flat.iter()
.map(|(name, value)| Cookie {
name: name.clone(),
value: value.clone(),
domain: domain.clone(),
path: "/".to_string(),
secure: true,
http_only: false,
})
.collect()
}
fn scope_domain(host: &str) -> String {
let labels: Vec<&str> = host.split('.').filter(|l| !l.is_empty()).collect();
let parent = if labels.len() > 2 {
labels[labels.len() - 2..].join(".")
} else {
labels.join(".")
};
if parent.is_empty() {
host.to_string()
} else {
format!(".{parent}")
}
}
pub fn open_and_wait(
url: &str,
duration: Duration,
cookie_probe: Option<&dyn Fn() -> String>,
) -> Result<()> {
launch_default_browser(url).context("failed to launch default browser")?;
let initial = cookie_probe.map(|probe| probe());
let start = std::time::Instant::now();
let tick = Duration::from_secs(1);
while start.elapsed() < duration {
std::thread::sleep(tick);
if let (Some(before), Some(probe)) = (initial.as_ref(), cookie_probe) {
let now = probe();
if &now != before {
debug!("cookie store changed — aborting wait early");
return Ok(());
}
}
}
Ok(())
}
fn launch_default_browser(url: &str) -> Result<()> {
#[cfg(feature = "browser-launcher")]
{
webbrowser::open(url).context("webbrowser::open failed")?;
}
#[cfg(not(feature = "browser-launcher"))]
{
#[cfg(target_os = "macos")]
let cmd = {
let mut c = std::process::Command::new("open");
c.arg(url);
c
};
#[cfg(all(unix, not(target_os = "macos")))]
let cmd = {
let mut c = std::process::Command::new("xdg-open");
c.arg(url);
c
};
#[cfg(target_os = "windows")]
let cmd = {
let mut c = std::process::Command::new("cmd");
c.arg("/c").arg("start").arg("").arg(url);
c
};
#[cfg(not(any(target_os = "macos", unix, target_os = "windows")))]
anyhow::bail!("no default-browser launcher available on this platform");
#[cfg(any(target_os = "macos", unix, target_os = "windows"))]
{
let mut cmd = cmd;
let status = cmd.status().context("failed to spawn browser launcher")?;
if !status.success() {
anyhow::bail!("browser launcher exited with {status}");
}
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn scope_domain_collapses_subdomain_to_dot_parent() {
assert_eq!(super::scope_domain("www.x.com"), ".x.com");
assert_eq!(super::scope_domain("mobile.x.com"), ".x.com");
assert_eq!(super::scope_domain("x.com"), ".x.com");
}
#[test]
fn cookies_for_host_synthesizes_scoped_cookies() {
let mut flat = std::collections::HashMap::new();
flat.insert("auth_token".to_string(), "secret".to_string());
let cookies = cookies_for_host("https://x.com/i/article/123", &flat);
assert_eq!(cookies.len(), 1);
let c = &cookies[0];
assert_eq!(c.name, "auth_token");
assert_eq!(c.value, "secret");
assert_eq!(c.domain, ".x.com");
assert_eq!(c.path, "/");
assert!(c.secure);
}
#[test]
fn cookies_for_host_empty_map_yields_no_cookies() {
let flat = std::collections::HashMap::new();
let cookies = cookies_for_host("https://x.com/i/article/1", &flat);
assert!(cookies.is_empty());
}
#[cfg(feature = "browser")]
#[test]
fn test_cookies_to_header() {
let cookies = vec![
Cookie {
name: "session".to_string(),
value: "abc123".to_string(),
domain: "example.com".to_string(),
path: "/".to_string(),
secure: true,
http_only: true,
},
Cookie {
name: "token".to_string(),
value: "xyz789".to_string(),
domain: "example.com".to_string(),
path: "/".to_string(),
secure: true,
http_only: false,
},
];
let header = BrowserLogin::cookies_to_header(&cookies);
assert_eq!(header, "session=abc123; token=xyz789");
}
#[cfg(feature = "browser")]
#[test]
fn test_empty_cookies() {
let cookies = vec![];
let header = BrowserLogin::cookies_to_header(&cookies);
assert_eq!(header, "");
}
#[cfg(feature = "browser")]
#[test]
fn test_single_cookie() {
let cookies = vec![Cookie {
name: "auth".to_string(),
value: "token123".to_string(),
domain: "example.com".to_string(),
path: "/".to_string(),
secure: true,
http_only: true,
}];
let header = BrowserLogin::cookies_to_header(&cookies);
assert_eq!(header, "auth=token123");
}
#[test]
fn test_cookie_equality() {
let c1 = Cookie {
name: "test".to_string(),
value: "value".to_string(),
domain: "example.com".to_string(),
path: "/".to_string(),
secure: true,
http_only: true,
};
let c2 = c1.clone();
assert_eq!(c1, c2);
}
#[test]
fn test_cookie_debug() {
let cookie = Cookie {
name: "test".to_string(),
value: "value".to_string(),
domain: "example.com".to_string(),
path: "/".to_string(),
secure: true,
http_only: true,
};
let debug_str = format!("{cookie:?}");
assert!(debug_str.contains("test"));
assert!(debug_str.contains("value"));
}
#[test]
fn test_cookie_clone() {
let c1 = Cookie {
name: "session".to_string(),
value: "abc".to_string(),
domain: "example.com".to_string(),
path: "/".to_string(),
secure: false,
http_only: false,
};
let c2 = c1.clone();
assert_eq!(c1.name, c2.name);
assert_eq!(c1.value, c2.value);
assert_eq!(c1.domain, c2.domain);
}
#[test]
fn test_open_and_wait_aborts_early_on_cookie_change() {
use std::sync::atomic::{AtomicUsize, Ordering};
let ticks = AtomicUsize::new(0);
let probe = |(): ()| -> String {
let n = ticks.fetch_add(1, Ordering::SeqCst);
if n < 1 { "a".into() } else { "changed".into() }
};
let probe_fn = || probe(());
let initial = probe_fn();
let start = std::time::Instant::now();
let max = Duration::from_secs(3);
let tick = Duration::from_millis(5);
let mut changed = false;
while start.elapsed() < max {
std::thread::sleep(tick);
if probe_fn() != initial {
changed = true;
break;
}
}
assert!(changed, "probe must detect change within timeout");
}
#[cfg(feature = "browser")]
#[tokio::test]
#[ignore = "requires a running Chrome on :9222 with a logged-in x.com session"]
async fn render_with_cookies_live_x_article() {
let url = "https://x.com/i/article/123";
let mut flat = std::collections::HashMap::new();
if let Ok(header) = std::env::var("NAB_TEST_X_COOKIES") {
for pair in header.split(';') {
if let Some((k, v)) = pair.trim().split_once('=') {
flat.insert(k.to_string(), v.to_string());
}
}
}
let cookies = cookies_for_host(url, &flat);
let browser = BrowserLogin::connect(Some(9222))
.await
.expect("connect to Chrome on :9222");
let markdown = browser
.render_with_cookies(url, &cookies)
.await
.expect("authed DOM render should succeed");
assert!(
!markdown.trim().is_empty(),
"rendered article markdown must not be empty"
);
}
}