use std::collections::BTreeMap;
use std::time::{Duration, Instant};
use async_trait::async_trait;
use chromiumoxide::browser::{Browser, BrowserConfig};
use chromiumoxide::cdp::browser_protocol::network::{Headers, SetExtraHttpHeadersParams};
use futures::StreamExt as _;
use serde_json::Value as JsonValue;
use tokio::task::JoinHandle;
use url::Url;
use super::{BrowserBackend, RenderedPage};
use crate::{Error, Result};
#[derive(Debug, Default, Clone)]
pub struct LocalConfig {
pub proxy_url: Option<String>,
}
pub struct LocalBackend {
browser: Browser,
handler: JoinHandle<()>,
}
impl LocalBackend {
pub async fn launch(cfg: LocalConfig) -> Result<Self> {
let mut builder = BrowserConfig::builder();
if let Some(proxy) = cfg.proxy_url.as_deref() {
builder = builder.arg(format!("--proxy-server={proxy}"));
}
let config = builder
.build()
.map_err(|e| Error::BrowserSetup { message: e })?;
let (browser, mut handler) =
Browser::launch(config)
.await
.map_err(|e| Error::BrowserSetup {
message: format!("launch chrome: {e}"),
})?;
let handler_task = tokio::spawn(async move {
while let Some(res) = handler.next().await {
if res.is_err() {
break;
}
}
});
Ok(Self {
browser,
handler: handler_task,
})
}
}
impl Drop for LocalBackend {
fn drop(&mut self) {
self.handler.abort();
}
}
#[async_trait]
impl BrowserBackend for LocalBackend {
async fn fetch(
&self,
url: &Url,
headers: &BTreeMap<String, String>,
timeout: Duration,
) -> Result<RenderedPage> {
let start = Instant::now();
let url_str = url.as_str().to_owned();
let work =
async {
let page = self.browser.new_page("about:blank").await.map_err(|e| {
Error::BrowserSetup {
message: format!("new_page: {e}"),
}
})?;
if !headers.is_empty() {
let mut ua: Option<&str> = None;
let mut extras = serde_json::Map::new();
for (k, v) in headers {
if k.eq_ignore_ascii_case("user-agent") {
ua = Some(v.as_str());
} else {
extras.insert(k.clone(), JsonValue::String(v.clone()));
}
}
if let Some(ua) = ua {
page.set_user_agent(ua)
.await
.map_err(|e| Error::BrowserSetup {
message: format!("set_user_agent: {e}"),
})?;
}
if !extras.is_empty() {
page.execute(SetExtraHttpHeadersParams::new(Headers::new(
JsonValue::Object(extras),
)))
.await
.map_err(|e| Error::BrowserSetup {
message: format!("setExtraHTTPHeaders: {e}"),
})?;
}
}
page.goto(&url_str).await.map_err(|e| Error::BrowserSetup {
message: format!("goto {url_str}: {e}"),
})?;
let nav =
page.wait_for_navigation_response()
.await
.map_err(|e| Error::BrowserSetup {
message: format!("wait_for_navigation: {e}"),
})?;
let (status, final_url) = nav.as_ref().map_or_else(
|| (0_u16, url.clone()),
|req| {
let st = req
.response
.as_ref()
.and_then(|r| u16::try_from(r.status).ok())
.unwrap_or(0);
let fu = req
.url
.as_deref()
.and_then(|s| Url::parse(s).ok())
.unwrap_or_else(|| url.clone());
(st, fu)
},
);
let body = page.content().await.map_err(|e| Error::BrowserSetup {
message: format!("content: {e}"),
})?;
let _ = page.close().await;
Ok::<_, Error>(RenderedPage {
status,
final_url,
body,
elapsed_ms: u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
})
};
tokio::time::timeout(timeout, work)
.await
.map_err(|_| Error::BrowserSetup {
message: format!("browser fetch timeout after {}s", timeout.as_secs()),
})?
}
}