Skip to main content

adler_core/browser/
local.rs

1//! Local headless Chrome / Chromium backend.
2//!
3//! Launches a long-lived browser process via [`chromiumoxide`] and drives
4//! it through the Chrome `DevTools` Protocol. Free to use; requires that
5//! Chrome / Chromium is installed on the host. The user can pass a
6//! [`LocalConfig::proxy_url`] which is forwarded to the child process as
7//! `--proxy-server=<url>` so the browser inherits Adler's `--proxy` flag.
8
9use std::collections::BTreeMap;
10use std::time::{Duration, Instant};
11
12use async_trait::async_trait;
13use chromiumoxide::browser::{Browser, BrowserConfig};
14use chromiumoxide::cdp::browser_protocol::network::{Headers, SetExtraHttpHeadersParams};
15use futures::StreamExt as _;
16use serde_json::Value as JsonValue;
17use tokio::task::JoinHandle;
18use url::Url;
19
20use super::{BrowserBackend, RenderedPage};
21use crate::{Error, Result};
22
23/// Configuration for [`LocalBackend::launch`].
24#[derive(Debug, Default, Clone)]
25pub struct LocalConfig {
26    /// Forwarded to Chrome as `--proxy-server=<url>` if set. Accepts
27    /// `http://…`, `https://…`, or `socks5://…` (with embedded credentials).
28    pub proxy_url: Option<String>,
29}
30
31/// A headless Chrome instance driven over CDP. Reused across many
32/// [`fetch`](Self::fetch) calls until dropped.
33pub struct LocalBackend {
34    browser: Browser,
35    // Kept alive for the lifetime of the backend — chromiumoxide commands
36    // deadlock if this stream isn't drained.
37    handler: JoinHandle<()>,
38}
39
40impl LocalBackend {
41    /// Launch a fresh headless Chrome process.
42    ///
43    /// # Errors
44    /// Returns [`Error::BrowserSetup`] if Chrome can't be located or the
45    /// process fails to start.
46    pub async fn launch(cfg: LocalConfig) -> Result<Self> {
47        // Default builder is already headless; just add proxy if present.
48        let mut builder = BrowserConfig::builder();
49        if let Some(proxy) = cfg.proxy_url.as_deref() {
50            builder = builder.arg(format!("--proxy-server={proxy}"));
51        }
52        let config = builder
53            .build()
54            .map_err(|e| Error::BrowserSetup { message: e })?;
55        let (browser, mut handler) =
56            Browser::launch(config)
57                .await
58                .map_err(|e| Error::BrowserSetup {
59                    message: format!("launch chrome: {e}"),
60                })?;
61        // Drain handler events for the lifetime of the backend; without
62        // this, CDP commands made via `Page` block forever.
63        let handler_task = tokio::spawn(async move {
64            while let Some(res) = handler.next().await {
65                if res.is_err() {
66                    break;
67                }
68            }
69        });
70        Ok(Self {
71            browser,
72            handler: handler_task,
73        })
74    }
75}
76
77impl Drop for LocalBackend {
78    fn drop(&mut self) {
79        self.handler.abort();
80    }
81}
82
83#[async_trait]
84impl BrowserBackend for LocalBackend {
85    async fn fetch(
86        &self,
87        url: &Url,
88        headers: &BTreeMap<String, String>,
89        timeout: Duration,
90    ) -> Result<RenderedPage> {
91        let start = Instant::now();
92        let url_str = url.as_str().to_owned();
93
94        let work =
95            async {
96                let page = self.browser.new_page("about:blank").await.map_err(|e| {
97                    Error::BrowserSetup {
98                        message: format!("new_page: {e}"),
99                    }
100                })?;
101
102                // Per-site overrides (e.g. Instagram's `X-IG-App-ID` +
103                // matching `User-Agent`). UA goes through the dedicated
104                // override command; the rest via Network.setExtraHTTPHeaders.
105                if !headers.is_empty() {
106                    let mut ua: Option<&str> = None;
107                    let mut extras = serde_json::Map::new();
108                    for (k, v) in headers {
109                        if k.eq_ignore_ascii_case("user-agent") {
110                            ua = Some(v.as_str());
111                        } else {
112                            extras.insert(k.clone(), JsonValue::String(v.clone()));
113                        }
114                    }
115                    if let Some(ua) = ua {
116                        page.set_user_agent(ua)
117                            .await
118                            .map_err(|e| Error::BrowserSetup {
119                                message: format!("set_user_agent: {e}"),
120                            })?;
121                    }
122                    if !extras.is_empty() {
123                        page.execute(SetExtraHttpHeadersParams::new(Headers::new(
124                            JsonValue::Object(extras),
125                        )))
126                        .await
127                        .map_err(|e| Error::BrowserSetup {
128                            message: format!("setExtraHTTPHeaders: {e}"),
129                        })?;
130                    }
131                }
132
133                page.goto(&url_str).await.map_err(|e| Error::BrowserSetup {
134                    message: format!("goto {url_str}: {e}"),
135                })?;
136
137                // Pull the response for the navigation — gives us the real HTTP
138                // status code and the final URL after redirects.
139                let nav =
140                    page.wait_for_navigation_response()
141                        .await
142                        .map_err(|e| Error::BrowserSetup {
143                            message: format!("wait_for_navigation: {e}"),
144                        })?;
145
146                let (status, final_url) = nav.as_ref().map_or_else(
147                    || (0_u16, url.clone()),
148                    |req| {
149                        let st = req
150                            .response
151                            .as_ref()
152                            .and_then(|r| u16::try_from(r.status).ok())
153                            .unwrap_or(0);
154                        let fu = req
155                            .url
156                            .as_deref()
157                            .and_then(|s| Url::parse(s).ok())
158                            .unwrap_or_else(|| url.clone());
159                        (st, fu)
160                    },
161                );
162
163                let body = page.content().await.map_err(|e| Error::BrowserSetup {
164                    message: format!("content: {e}"),
165                })?;
166
167                // Best-effort close — even on failure we already have what we need.
168                let _ = page.close().await;
169
170                Ok::<_, Error>(RenderedPage {
171                    status,
172                    final_url,
173                    body,
174                    elapsed_ms: u64::try_from(start.elapsed().as_millis()).unwrap_or(u64::MAX),
175                })
176            };
177
178        tokio::time::timeout(timeout, work)
179            .await
180            .map_err(|_| Error::BrowserSetup {
181                message: format!("browser fetch timeout after {}s", timeout.as_secs()),
182            })?
183    }
184}