Skip to main content

adler_core/browser/
flaresolverr.rs

1//! [FlareSolverr][1] backend — a self-hosted HTTP service that runs
2//! Chrome internally and exposes a REST API for fetching
3//! Cloudflare-protected pages.
4//!
5//! Trade-off vs. the other two backends:
6//!
7//! - [`LocalBackend`](super::local::LocalBackend): local Chrome
8//!   process you maintain. Free, but each scan boots Chrome which
9//!   adds ~1 s setup latency and the local IP is fingerprintable
10//!   so big CF sites can still block it.
11//! - [`BrowserbaseBackend`](super::browserbase::BrowserbaseBackend):
12//!   cloud sessions with residential IPs. Reliable but pays per
13//!   session-minute; cost matters when probing 200+ CF-tagged
14//!   sites in one scan.
15//! - **`FlareSolverrBackend`** *(this module)*: long-running
16//!   `FlareSolverr` instance — typically in Docker — that
17//!   maintains warm browser sessions and answers HTTP requests in
18//!   seconds. Self-hosted, free, no residential IP. Suitable for
19//!   the Cloudflare-WAF subset (`protection: ["cloudflare"]`)
20//!   where the challenge is JS-only; for CF Firewall /
21//!   TLS-fingerprint sites you still want the residential backend.
22//!
23//! ## Setup
24//!
25//! Run the official image: `docker run -d -p 8191:8191
26//! ghcr.io/flaresolverr/flaresolverr:latest`. Then point Adler at
27//! the service:
28//!
29//! ```bash
30//! adler --flaresolverr http://localhost:8191 alice
31//! ```
32//!
33//! [1]: https://github.com/FlareSolverr/FlareSolverr
34
35use std::collections::BTreeMap;
36use std::time::{Duration, Instant};
37
38use async_trait::async_trait;
39use serde::{Deserialize, Serialize};
40use url::Url;
41
42use super::{BrowserBackend, RenderedPage};
43use crate::error::{Error, Result};
44
45/// A [`FlareSolverr`][1] backend pointed at a running instance.
46///
47/// Cheap to clone — the underlying [`reqwest::Client`] is
48/// reference-counted internally.
49///
50/// [1]: https://github.com/FlareSolverr/FlareSolverr
51#[derive(Clone)]
52pub struct FlareSolverrBackend {
53    endpoint: Url,
54    client: reqwest::Client,
55}
56
57impl std::fmt::Debug for FlareSolverrBackend {
58    // reqwest::Client isn't Debug-friendly; we expose only the
59    // endpoint, which is the operationally interesting field.
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        f.debug_struct("FlareSolverrBackend")
62            .field("endpoint", &self.endpoint.as_str())
63            .finish_non_exhaustive()
64    }
65}
66
67impl FlareSolverrBackend {
68    /// Build a backend that POSTs to `<endpoint>/v1` for each
69    /// fetch. The endpoint should be the *base* URL of the
70    /// `FlareSolverr` service — e.g. `http://localhost:8191` —
71    /// without the `/v1` suffix; this method appends it.
72    ///
73    /// # Errors
74    /// Returns [`Error::BrowserSetup`] when `endpoint` is not a
75    /// valid `http(s)` URL or when the inner reqwest client can't
76    /// be built.
77    pub fn new(endpoint: &str) -> Result<Self> {
78        let original = endpoint.to_owned();
79        let mut parsed = Url::parse(endpoint).map_err(|e| Error::BrowserSetup {
80            message: format!("flaresolverr endpoint {original:?}: {e}"),
81        })?;
82        if !matches!(parsed.scheme(), "http" | "https") {
83            return Err(Error::BrowserSetup {
84                message: format!("flaresolverr endpoint {original:?}: must be http(s)"),
85            });
86        }
87        // Always POST to /v1 — ensure the base path ends with a
88        // slash so URL composition lands at `<endpoint>/v1`.
89        if !parsed.path().ends_with('/') {
90            let new_path = format!("{}/", parsed.path());
91            parsed.set_path(&new_path);
92        }
93        let client = reqwest::Client::builder()
94            // FlareSolverr already enforces its own maxTimeout;
95            // we add a small ceiling so a hung service doesn't
96            // wedge the whole scan.
97            .timeout(Duration::from_secs(120))
98            .build()
99            .map_err(|e| Error::BrowserSetup {
100                message: format!("flaresolverr reqwest client: {e}"),
101            })?;
102        Ok(Self {
103            endpoint: parsed,
104            client,
105        })
106    }
107
108    fn v1_endpoint(&self) -> Result<Url> {
109        self.endpoint.join("v1").map_err(|e| Error::BrowserSetup {
110            message: format!("flaresolverr v1 URL join failed: {e}"),
111        })
112    }
113}
114
115#[async_trait]
116impl BrowserBackend for FlareSolverrBackend {
117    async fn fetch(
118        &self,
119        url: &Url,
120        // FlareSolverr v1's request.get accepts a `headers` field
121        // but it's tied to a session-id, not the one-shot request
122        // form Adler uses. Custom headers therefore go *unused* in
123        // this backend — sites that need them (Instagram's
124        // X-IG-App-ID etc.) should keep using LocalBackend /
125        // Browserbase. CF-WAF sites — the main use case — don't
126        // need custom headers.
127        _headers: &BTreeMap<String, String>,
128        timeout: Duration,
129    ) -> Result<RenderedPage> {
130        let started = Instant::now();
131        let request = FlareRequest {
132            cmd: "request.get",
133            url: url.as_str(),
134            // FlareSolverr expects milliseconds; honor the caller's
135            // budget but clamp to at least 5 s (less is pointless
136            // since Chrome boot alone takes a second).
137            max_timeout: u64::try_from(timeout.as_millis())
138                .unwrap_or(u64::MAX)
139                .max(5_000),
140        };
141        let resp = self
142            .client
143            .post(self.v1_endpoint()?)
144            .json(&request)
145            .send()
146            .await
147            .map_err(|e| Error::BrowserSetup {
148                message: format!("flaresolverr POST: {e}"),
149            })?;
150        if !resp.status().is_success() {
151            return Err(Error::BrowserSetup {
152                message: format!("flaresolverr returned HTTP {}", resp.status().as_u16()),
153            });
154        }
155        let body: FlareResponse = resp.json().await.map_err(|e| Error::BrowserSetup {
156            message: format!("flaresolverr body parse: {e}"),
157        })?;
158        if body.status != "ok" {
159            return Err(Error::BrowserSetup {
160                message: format!(
161                    "flaresolverr non-ok status: {} ({})",
162                    body.status, body.message
163                ),
164            });
165        }
166        let solution = body.solution.ok_or_else(|| Error::BrowserSetup {
167            message: "flaresolverr ok status with no `solution` field".into(),
168        })?;
169        let final_url = Url::parse(&solution.url).map_err(|e| Error::BrowserSetup {
170            message: format!("flaresolverr solution.url parse: {e}"),
171        })?;
172        Ok(RenderedPage {
173            status: solution.status,
174            final_url,
175            body: solution.response,
176            elapsed_ms: u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX),
177        })
178    }
179}
180
181#[derive(Serialize)]
182struct FlareRequest<'a> {
183    cmd: &'a str,
184    url: &'a str,
185    #[serde(rename = "maxTimeout")]
186    max_timeout: u64,
187}
188
189#[derive(Deserialize)]
190struct FlareResponse {
191    status: String,
192    #[serde(default)]
193    message: String,
194    #[serde(default)]
195    solution: Option<FlareSolution>,
196}
197
198#[derive(Deserialize)]
199struct FlareSolution {
200    url: String,
201    status: u16,
202    response: String,
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use wiremock::matchers::{method, path};
209    use wiremock::{Mock, MockServer, ResponseTemplate};
210
211    #[tokio::test]
212    async fn fetch_parses_ok_solution_into_rendered_page() {
213        let mock = MockServer::start().await;
214        Mock::given(method("POST"))
215            .and(path("/v1"))
216            .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
217                "status": "ok",
218                "message": "",
219                "solution": {
220                    "url": "https://example.com/u/alice",
221                    "status": 200,
222                    "response": "<html>profile of alice</html>",
223                },
224                "startTimestamp": 0,
225                "endTimestamp": 0,
226                "version": "test"
227            })))
228            .mount(&mock)
229            .await;
230
231        let backend = FlareSolverrBackend::new(&mock.uri()).unwrap();
232        let page = backend
233            .fetch(
234                &Url::parse("https://example.com/u/alice").unwrap(),
235                &BTreeMap::new(),
236                Duration::from_secs(10),
237            )
238            .await
239            .unwrap();
240        assert_eq!(page.status, 200);
241        assert_eq!(page.final_url.as_str(), "https://example.com/u/alice");
242        assert!(page.body.contains("profile of alice"));
243    }
244
245    #[tokio::test]
246    async fn fetch_surfaces_non_ok_status_as_error() {
247        let mock = MockServer::start().await;
248        Mock::given(method("POST"))
249            .and(path("/v1"))
250            .respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
251                "status": "error",
252                "message": "Could not solve the challenge",
253                "solution": null
254            })))
255            .mount(&mock)
256            .await;
257
258        let backend = FlareSolverrBackend::new(&mock.uri()).unwrap();
259        let err = backend
260            .fetch(
261                &Url::parse("https://example.com").unwrap(),
262                &BTreeMap::new(),
263                Duration::from_secs(10),
264            )
265            .await
266            .unwrap_err();
267        match err {
268            Error::BrowserSetup { message } => {
269                assert!(message.contains("non-ok"), "got: {message}");
270                assert!(message.contains("Could not solve"), "got: {message}");
271            }
272            other => panic!("expected Error::BrowserSetup, got {other:?}"),
273        }
274    }
275
276    #[test]
277    fn rejects_non_http_endpoint() {
278        let err = FlareSolverrBackend::new("ftp://localhost").unwrap_err();
279        assert!(matches!(err, Error::BrowserSetup { .. }));
280    }
281}