markdown_harvest/
user_agent.rs

1use rand::prelude::*;
2
3/// Represents different browser user agent strings for web scraping.
4///
5/// This enum provides a collection of realistic user agent strings from various
6/// browsers and operating systems. Using different user agents helps avoid being
7/// blocked by websites that restrict automated access.
8///
9/// # Examples
10///
11/// ```rust
12/// use markdown_harvest::UserAgent;
13///
14/// // Get a specific user agent
15/// let chrome_windows = UserAgent::WindowsChrome;
16/// println!("User-Agent: {}", chrome_windows.to_string());
17///
18/// // Get a random user agent for better diversity
19/// let random_agent = UserAgent::random();
20/// println!("Random User-Agent: {}", random_agent.to_string());
21/// ```
22#[derive(Debug, Clone, Copy)]
23pub enum UserAgent {
24    // Windows
25    /// Google Chrome browser on Windows 10/11
26    WindowsChrome,
27    /// Mozilla Firefox browser on Windows 10/11
28    WindowsFirefox,
29    /// Microsoft Edge browser on Windows 10/11
30    WindowsEdge,
31
32    // macOS
33    /// Google Chrome browser on macOS
34    MacOSChrome,
35    /// Safari browser on macOS
36    MacOSSafari,
37    /// Mozilla Firefox browser on macOS
38    MacOSFirefox,
39
40    // Linux
41    /// Google Chrome browser on Linux
42    LinuxChrome,
43    /// Mozilla Firefox browser on Linux
44    LinuxFirefox,
45
46    // Mobile Android
47    /// Google Chrome browser on Android devices
48    AndroidChrome,
49    /// Mozilla Firefox browser on Android devices
50    AndroidFirefox,
51
52    // Mobile iOS
53    /// Safari browser on iOS devices (iPhone/iPad)
54    IOSSafari,
55    /// Google Chrome browser on iOS devices (iPhone/iPad)
56    IOSChrome,
57}
58
59impl UserAgent {
60    /// Converts the UserAgent enum variant to its corresponding user agent string.
61    ///
62    /// Each variant returns a realistic, up-to-date user agent string that mimics
63    /// real browsers. These strings include browser version numbers, operating system
64    /// details, and rendering engine information.
65    ///
66    /// # Returns
67    ///
68    /// A `String` containing the complete user agent string for HTTP headers.
69    ///
70    /// # Examples
71    ///
72    /// ```rust
73    /// use markdown_harvest::UserAgent;
74    ///
75    /// let chrome = UserAgent::WindowsChrome;
76    /// let user_agent_string = chrome.to_string();
77    /// assert!(user_agent_string.contains("Chrome"));
78    /// assert!(user_agent_string.contains("Windows"));
79    ///
80    /// let firefox = UserAgent::LinuxFirefox;
81    /// let user_agent_string = firefox.to_string();
82    /// assert!(user_agent_string.contains("Firefox"));
83    /// assert!(user_agent_string.contains("Linux"));
84    /// ```
85    pub fn to_string(&self) -> String {
86        match self {
87            // Windows User Agents
88            UserAgent::WindowsChrome => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36".to_string(),
89            UserAgent::WindowsFirefox => "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0".to_string(),
90            UserAgent::WindowsEdge => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0".to_string(),
91            // macOS User Agents
92            UserAgent::MacOSChrome => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36".to_string(),
93            UserAgent::MacOSSafari => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15".to_string(),
94            UserAgent::MacOSFirefox => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0".to_string(),
95            // Linux User Agents
96            UserAgent::LinuxChrome => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36".to_string(),
97            UserAgent::LinuxFirefox => "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0".to_string(),
98            // Android User Agents
99            UserAgent::AndroidChrome => "Mozilla/5.0 (Linux; Android 14; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36".to_string(),
100            UserAgent::AndroidFirefox => "Mozilla/5.0 (Mobile; rv:121.0) Gecko/121.0 Firefox/121.0".to_string(),
101            // iOS User Agents
102            UserAgent::IOSSafari => "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1".to_string(),
103            UserAgent::IOSChrome => "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/120.0.0.0 Mobile/15E148 Safari/604.1".to_string(),
104        }
105    }
106
107    /// Returns a random user agent for better web scraping diversity.
108    ///
109    /// This method selects a random user agent from all available variants to help
110    /// avoid detection and blocking by websites. Different user agents simulate
111    /// requests from various browsers and operating systems.
112    ///
113    /// # Returns
114    ///
115    /// A randomly selected `UserAgent` variant. If random selection fails
116    /// (which should never happen), defaults to `UserAgent::LinuxFirefox`.
117    ///
118    /// # Examples
119    ///
120    /// ```rust
121    /// use markdown_harvest::UserAgent;
122    ///
123    /// // Get different random user agents
124    /// let agent1 = UserAgent::random();
125    /// let agent2 = UserAgent::random();
126    ///
127    /// // They might be different (but could be the same due to randomness)
128    /// println!("First random agent: {}", agent1.to_string());
129    /// println!("Second random agent: {}", agent2.to_string());
130    ///
131    /// // Use in HTTP request
132    /// let random_agent = UserAgent::random();
133    /// let user_agent_header = random_agent.to_string();
134    /// // Use user_agent_header in your HTTP client...
135    /// ```
136    pub fn random() -> UserAgent {
137        let agents = [
138            UserAgent::WindowsChrome,
139            UserAgent::WindowsFirefox,
140            UserAgent::WindowsEdge,
141            UserAgent::MacOSChrome,
142            UserAgent::MacOSSafari,
143            UserAgent::MacOSFirefox,
144            UserAgent::LinuxChrome,
145            UserAgent::LinuxFirefox,
146            UserAgent::AndroidChrome,
147            UserAgent::AndroidFirefox,
148            UserAgent::IOSSafari,
149            UserAgent::IOSChrome,
150        ];
151
152        *agents
153            .choose(&mut rand::rng())
154            .unwrap_or(&UserAgent::LinuxFirefox)
155    }
156}