Skip to main content

scrapling_fetch/
fingerprint.rs

1//! Browser fingerprint generation for stealth HTTP requests.
2//!
3//! Bot-detection systems inspect HTTP headers like `User-Agent`, `Sec-Ch-Ua`, and
4//! `Sec-Fetch-*` to distinguish real browsers from automated tools. This module
5//! generates realistic header sets that match what Chrome, Firefox, and Edge actually
6//! send, including platform-specific details derived from the OS this code was compiled on.
7//!
8//! The key entry points are:
9//!
10//! - [`generate_headers`] -- produces a full set of browser-like headers (User-Agent,
11//!   Accept, Sec-Ch-Ua, Sec-Fetch-*, etc.) for a randomly or explicitly chosen browser.
12//! - [`default_user_agent`] -- returns a Chrome User-Agent string for the current OS.
13//!   Used as a fallback when no impersonation or stealth headers are configured.
14//!
15//! Browser version constants (`CHROME_VERSION`, `FIREFOX_VERSION`, `EDGE_VERSION`) are
16//! defined at the top of this file and should be updated periodically to stay current.
17
18use std::collections::HashMap;
19
20/// Supported operating system targets for fingerprint generation.
21///
22/// The OS determines the platform token inside User-Agent strings (e.g., `"Windows NT
23/// 10.0; Win64; x64"` vs `"Macintosh; Intel Mac OS X 10_15_7"`). This is detected at
24/// compile time via [`detect_os`].
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum OsName {
27    /// Linux-based operating system.
28    Linux,
29    /// macOS operating system.
30    MacOs,
31    /// Windows operating system.
32    Windows,
33}
34
35/// Detects the current operating system at compile time using `cfg!(target_os)`.
36/// Returns [`OsName::Linux`] as a fallback for any platform that is not macOS or Windows.
37pub fn detect_os() -> OsName {
38    if cfg!(target_os = "macos") {
39        OsName::MacOs
40    } else if cfg!(target_os = "windows") {
41        OsName::Windows
42    } else {
43        OsName::Linux
44    }
45}
46
47const CHROME_VERSION: u32 = 145;
48const FIREFOX_VERSION: u32 = 142;
49const EDGE_VERSION: u32 = 140;
50
51fn platform_string(os: OsName, include_rv: bool) -> &'static str {
52    match (os, include_rv) {
53        (OsName::Windows, false) => "Windows NT 10.0; Win64; x64",
54        (OsName::MacOs, false) => "Macintosh; Intel Mac OS X 10_15_7",
55        (OsName::Linux, false) => "X11; Linux x86_64",
56        (OsName::Windows, true) => "Windows NT 10.0; Win64; x64; rv:142.0",
57        (OsName::MacOs, true) => "Macintosh; Intel Mac OS X 10.15; rv:142.0",
58        (OsName::Linux, true) => "X11; Linux x86_64; rv:142.0",
59    }
60}
61
62fn sec_ch_platform(os: OsName) -> &'static str {
63    match os {
64        OsName::Windows => "\"Windows\"",
65        OsName::MacOs => "\"macOS\"",
66        OsName::Linux => "\"Linux\"",
67    }
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71enum BrowserKind {
72    Chrome,
73    Firefox,
74    Edge,
75}
76
77impl BrowserKind {
78    fn user_agent(self, os: OsName) -> String {
79        match self {
80            Self::Chrome => format!(
81                "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{CHROME_VERSION}.0.0.0 Safari/537.36",
82                platform_string(os, false)
83            ),
84            Self::Firefox => format!(
85                "Mozilla/5.0 ({}) Gecko/20100101 Firefox/{FIREFOX_VERSION}.0",
86                platform_string(os, true)
87            ),
88            Self::Edge => format!(
89                "Mozilla/5.0 ({}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{CHROME_VERSION}.0.0.0 Safari/537.36 Edg/{EDGE_VERSION}.0.0.0",
90                platform_string(os, false)
91            ),
92        }
93    }
94
95    fn sec_ch_ua(self) -> String {
96        match self {
97            Self::Edge => format!(
98                "\"Microsoft Edge\";v=\"{EDGE_VERSION}\", \"Chromium\";v=\"{CHROME_VERSION}\", \"Not-A.Brand\";v=\"99\""
99            ),
100            Self::Chrome => format!(
101                "\"Google Chrome\";v=\"{CHROME_VERSION}\", \"Chromium\";v=\"{CHROME_VERSION}\", \"Not-A.Brand\";v=\"99\""
102            ),
103            Self::Firefox => String::new(),
104        }
105    }
106
107    fn random() -> Self {
108        use rand::Rng;
109        const CHOICES: [BrowserKind; 3] =
110            [BrowserKind::Chrome, BrowserKind::Firefox, BrowserKind::Edge];
111        CHOICES[rand::thread_rng().gen_range(0..CHOICES.len())]
112    }
113}
114
115/// Returns a Chrome user-agent string for the given OS. This includes the full
116/// `Mozilla/5.0 (...) AppleWebKit/537.36 ... Chrome/VERSION ...` format that real
117/// Chrome browsers send.
118pub fn chrome_user_agent(os: OsName) -> String {
119    BrowserKind::Chrome.user_agent(os)
120}
121
122/// Generates a full set of realistic browser headers for bypass of bot detection.
123///
124/// When `browser_mode` is `true`, the headers are always Chrome-based (used when wreq
125/// browser impersonation is active, since the TLS fingerprint is already Chrome).
126/// When `false`, a browser is randomly chosen from Chrome, Firefox, and Edge to add
127/// diversity across requests. Chromium-based browsers include `Sec-Ch-Ua` and
128/// `Sec-Fetch-*` headers; Firefox does not.
129pub fn generate_headers(browser_mode: bool) -> HashMap<String, String> {
130    let os = detect_os();
131    let browser = if browser_mode {
132        BrowserKind::Chrome
133    } else {
134        BrowserKind::random()
135    };
136
137    let mut headers = HashMap::from([
138        ("User-Agent".into(), browser.user_agent(os)),
139        ("Accept".into(), "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8".into()),
140        ("Accept-Language".into(), "en-US,en;q=0.9".into()),
141        ("Accept-Encoding".into(), "gzip, deflate, br".into()),
142        ("Upgrade-Insecure-Requests".into(), "1".into()),
143    ]);
144
145    if matches!(browser, BrowserKind::Chrome | BrowserKind::Edge) {
146        headers.insert("Sec-Ch-Ua-Platform".into(), sec_ch_platform(os).into());
147        headers.insert("Sec-Fetch-Site".into(), "none".into());
148        headers.insert("Sec-Fetch-Mode".into(), "navigate".into());
149        headers.insert("Sec-Fetch-User".into(), "?1".into());
150        headers.insert("Sec-Fetch-Dest".into(), "document".into());
151        headers.insert("Sec-Ch-Ua".into(), browser.sec_ch_ua());
152        headers.insert("Sec-Ch-Ua-Mobile".into(), "?0".into());
153    }
154
155    headers
156}
157
158/// Returns the default user-agent string (Chrome on the detected OS). This is used
159/// as a last-resort fallback when neither stealth headers nor browser impersonation
160/// are enabled and no custom User-Agent header has been set.
161pub fn default_user_agent() -> String {
162    chrome_user_agent(detect_os())
163}