use {
crate::error::{LlmWebError, Result},
headless_chrome::{Browser, LaunchOptions, LaunchOptionsBuilder, Tab},
std::{ffi::OsStr, sync::Arc},
};
pub struct LlmWebBrower {
pub browser: Browser,
}
impl LlmWebBrower {
pub async fn new() -> Result<LlmWebBrower> {
Ok(Self {
browser: stealthy_browser().await?,
})
}
pub async fn open(&self, url: &str) -> Result<Arc<Tab>> {
let tab = self
.browser
.new_tab()
.map_err(|e| LlmWebError::Browser(format!("new_tab: {e}")))?;
tab.navigate_to(url)
.map_err(|e| LlmWebError::Browser(format!("navigate_to: {e}")))?;
tab.wait_until_navigated()
.map_err(|e| LlmWebError::Browser(format!("wait_until_navigated: {e}")))?;
let html = tab
.get_content()
.map_err(|e| LlmWebError::Browser(format!("get_content: {e}")))?;
if is_js_blocked(&html) {
return Err(LlmWebError::JsBlocked);
}
Ok(tab)
}
}
pub fn evaluate_json(tab: &Arc<Tab>, expression: &str) -> Result<serde_json::Value> {
let wrapped = format!(
"(async () => {{ return JSON.stringify(await ({expression})); }})()"
);
let remote = tab
.evaluate(&wrapped, true)
.map_err(|e| LlmWebError::Browser(format!("evaluate: {e}")))?;
let s = remote
.value
.and_then(|v| v.as_str().map(|s| s.to_string()))
.ok_or_else(|| LlmWebError::Browser("evaluate returned no string value".into()))?;
serde_json::from_str(&s).map_err(LlmWebError::SerdeJson)
}
pub fn is_js_blocked(html: &str) -> bool {
html.contains("<h1>JavaScript is not available.</h1>") || html.contains("Please enable JavaScript")
}
async fn stealthy_browser() -> Result<Browser> {
let opts = browser_launch_options().await?;
Browser::new(opts).map_err(|e| LlmWebError::Browser(format!("Init Browser error: {e}")))
}
async fn browser_launch_options<'a>() -> Result<LaunchOptions<'a>> {
let mut v: Vec<&OsStr> = vec![
OsStr::new("--disable-blink-features=AutomationControlled"),
OsStr::new("--no-sandbox"),
OsStr::new("--disable-web-security"),
OsStr::new(
"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
),
OsStr::new("--lang=en-US,en;q=0.9"),
OsStr::new("--disable-dev-shm-usage"),
OsStr::new("--disable-gpu"),
OsStr::new("--disable-infobars"),
OsStr::new("--no-first-run"),
];
if let Some(proxy) = std::env::var("HTTPS_PROXY")
.or_else(|_| std::env::var("HTTP_PROXY"))
.or_else(|_| std::env::var("ALL_PROXY"))
.or_else(|_| std::env::var("https_proxy"))
.or_else(|_| std::env::var("http_proxy"))
.or_else(|_| std::env::var("all_proxy"))
.ok()
{
let arg: &'static str = Box::leak(format!("--proxy-server={proxy}").into_boxed_str());
v.push(OsStr::new(arg));
}
LaunchOptionsBuilder::default()
.headless(true)
.window_size(Some((1200, 800)))
.args(v)
.build()
.map_err(|e| LlmWebError::Browser(format!("{e}")))
}