use crate::error::{Result, Web2llmError};
use chromiumoxide::browser::{Browser, BrowserConfig};
use futures::StreamExt;
use tempfile::tempdir;
use tokio::sync::OnceCell;
use url::Url;
#[inline(always)]
pub(crate) async fn get_html(url: &Url, browser_cell: &OnceCell<Browser>) -> Result<String> {
let browser = browser_cell
.get_or_try_init(|| async {
let tmp_dir = tempdir()
.map_err(|e| Web2llmError::Http(format!("Failed to create temp dir: {}", e)))?;
let (browser, mut handler) = Browser::launch(
BrowserConfig::builder()
.no_sandbox()
.user_data_dir(tmp_dir.keep())
.build()
.map_err(|e| {
Web2llmError::Http(format!("Failed to build browser config: {}", e))
})?,
)
.await
.map_err(|e| Web2llmError::Http(format!("Failed to launch browser: {}", e)))?;
tokio::spawn(async move {
while let Some(h) = handler.next().await {
if h.is_err() {
break;
}
}
});
Ok::<Browser, Web2llmError>(browser)
})
.await?;
let page = browser
.new_page(url.as_str())
.await
.map_err(|e| Web2llmError::Http(format!("Failed to create page: {}", e)))?;
page.wait_for_navigation()
.await
.map_err(|e| Web2llmError::Http(format!("Navigation failed: {}", e)))?;
let html = page
.content()
.await
.map_err(|e| Web2llmError::Http(format!("Failed to get content: {}", e)))?;
page.close().await.ok();
Ok(html)
}