use std::time::Duration;
use reqwest::Client;
use super::clean::to_plain_text;
const BROWSER_UA: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36";
const JS_SHELL_TEXT_THRESHOLD: usize = 200;
pub async fn fetch_static(url: &str, timeout_secs: u64) -> Result<String, String> {
let client = Client::builder()
.timeout(Duration::from_secs(timeout_secs))
.user_agent(BROWSER_UA)
.redirect(reqwest::redirect::Policy::limited(10))
.build()
.map_err(|e| format!("Failed to build HTTP client: {e}"))?;
let resp = client.get(url).send().await.map_err(|e| {
if e.is_timeout() {
format!("Request timed out after {timeout_secs}s")
} else if e.is_connect() {
format!("Connection failed: {e}")
} else {
format!("Request failed: {e}")
}
})?;
let status = resp.status();
if !status.is_success() {
return Err(format!(
"HTTP {} {} for {url}",
status.as_u16(),
status.canonical_reason().unwrap_or("Unknown")
));
}
resp.text()
.await
.map_err(|e| format!("Failed to read response body: {e}"))
}
pub fn is_js_shell(html: &str) -> bool {
if !html.to_ascii_lowercase().contains("<script") {
return false;
}
let visible = to_plain_text(html);
let visible_len = visible.chars().filter(|c| !c.is_whitespace()).count();
visible_len < JS_SHELL_TEXT_THRESHOLD
}
#[cfg(feature = "browser")]
pub async fn fetch_rendered(
manager: &crate::brain::tools::browser::BrowserManager,
session_id: uuid::Uuid,
url: &str,
) -> Result<String, String> {
let page = manager
.get_or_create_session_page(session_id)
.await
.map_err(|e| format!("Browser error: {e}"))?;
page.goto(url)
.await
.map_err(|e| format!("Navigation failed: {e}"))?;
if let Err(e) = page
.wait_for_network_almost_idle_with_timeout(Duration::from_secs(3))
.await
{
tracing::debug!("web_scrape: network-idle wait timed out for {url} (proceeding): {e}");
}
page.content()
.await
.map_err(|e| format!("Failed to get rendered HTML: {e}"))
}