markdown_harvest/user_agent.rs
1use rand::prelude::*;
2
3/// Represents different browser user agent strings for web scraping.
4///
5/// This enum provides a collection of realistic user agent strings from various
6/// browsers and operating systems. Using different user agents helps avoid being
7/// blocked by websites that restrict automated access.
8///
9/// # Examples
10///
11/// ```rust
12/// use markdown_harvest::UserAgent;
13///
14/// // Get a specific user agent
15/// let chrome_windows = UserAgent::WindowsChrome;
16/// println!("User-Agent: {}", chrome_windows.to_string());
17///
18/// // Get a random user agent for better diversity
19/// let random_agent = UserAgent::random();
20/// println!("Random User-Agent: {}", random_agent.to_string());
21/// ```
22#[derive(Debug, Clone, Copy)]
23pub enum UserAgent {
24 // Windows
25 /// Google Chrome browser on Windows 10/11
26 WindowsChrome,
27 /// Mozilla Firefox browser on Windows 10/11
28 WindowsFirefox,
29 /// Microsoft Edge browser on Windows 10/11
30 WindowsEdge,
31
32 // macOS
33 /// Google Chrome browser on macOS
34 MacOSChrome,
35 /// Safari browser on macOS
36 MacOSSafari,
37 /// Mozilla Firefox browser on macOS
38 MacOSFirefox,
39
40 // Linux
41 /// Google Chrome browser on Linux
42 LinuxChrome,
43 /// Mozilla Firefox browser on Linux
44 LinuxFirefox,
45
46 // Mobile Android
47 /// Google Chrome browser on Android devices
48 AndroidChrome,
49 /// Mozilla Firefox browser on Android devices
50 AndroidFirefox,
51
52 // Mobile iOS
53 /// Safari browser on iOS devices (iPhone/iPad)
54 IOSSafari,
55 /// Google Chrome browser on iOS devices (iPhone/iPad)
56 IOSChrome,
57}
58
59impl UserAgent {
60 /// Converts the UserAgent enum variant to its corresponding user agent string.
61 ///
62 /// Each variant returns a realistic, up-to-date user agent string that mimics
63 /// real browsers. These strings include browser version numbers, operating system
64 /// details, and rendering engine information.
65 ///
66 /// # Returns
67 ///
68 /// A `String` containing the complete user agent string for HTTP headers.
69 ///
70 /// # Examples
71 ///
72 /// ```rust
73 /// use markdown_harvest::UserAgent;
74 ///
75 /// let chrome = UserAgent::WindowsChrome;
76 /// let user_agent_string = chrome.to_string();
77 /// assert!(user_agent_string.contains("Chrome"));
78 /// assert!(user_agent_string.contains("Windows"));
79 ///
80 /// let firefox = UserAgent::LinuxFirefox;
81 /// let user_agent_string = firefox.to_string();
82 /// assert!(user_agent_string.contains("Firefox"));
83 /// assert!(user_agent_string.contains("Linux"));
84 /// ```
85 pub fn to_string(&self) -> String {
86 match self {
87 // Windows User Agents
88 UserAgent::WindowsChrome => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36".to_string(),
89 UserAgent::WindowsFirefox => "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0".to_string(),
90 UserAgent::WindowsEdge => "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0".to_string(),
91 // macOS User Agents
92 UserAgent::MacOSChrome => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36".to_string(),
93 UserAgent::MacOSSafari => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15".to_string(),
94 UserAgent::MacOSFirefox => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0".to_string(),
95 // Linux User Agents
96 UserAgent::LinuxChrome => "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36".to_string(),
97 UserAgent::LinuxFirefox => "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0".to_string(),
98 // Android User Agents
99 UserAgent::AndroidChrome => "Mozilla/5.0 (Linux; Android 14; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36".to_string(),
100 UserAgent::AndroidFirefox => "Mozilla/5.0 (Mobile; rv:121.0) Gecko/121.0 Firefox/121.0".to_string(),
101 // iOS User Agents
102 UserAgent::IOSSafari => "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1".to_string(),
103 UserAgent::IOSChrome => "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/120.0.0.0 Mobile/15E148 Safari/604.1".to_string(),
104 }
105 }
106
107 /// Returns a random user agent for better web scraping diversity.
108 ///
109 /// This method selects a random user agent from all available variants to help
110 /// avoid detection and blocking by websites. Different user agents simulate
111 /// requests from various browsers and operating systems.
112 ///
113 /// # Returns
114 ///
115 /// A randomly selected `UserAgent` variant. If random selection fails
116 /// (which should never happen), defaults to `UserAgent::LinuxFirefox`.
117 ///
118 /// # Examples
119 ///
120 /// ```rust
121 /// use markdown_harvest::UserAgent;
122 ///
123 /// // Get different random user agents
124 /// let agent1 = UserAgent::random();
125 /// let agent2 = UserAgent::random();
126 ///
127 /// // They might be different (but could be the same due to randomness)
128 /// println!("First random agent: {}", agent1.to_string());
129 /// println!("Second random agent: {}", agent2.to_string());
130 ///
131 /// // Use in HTTP request
132 /// let random_agent = UserAgent::random();
133 /// let user_agent_header = random_agent.to_string();
134 /// // Use user_agent_header in your HTTP client...
135 /// ```
136 pub fn random() -> UserAgent {
137 let agents = [
138 UserAgent::WindowsChrome,
139 UserAgent::WindowsFirefox,
140 UserAgent::WindowsEdge,
141 UserAgent::MacOSChrome,
142 UserAgent::MacOSSafari,
143 UserAgent::MacOSFirefox,
144 UserAgent::LinuxChrome,
145 UserAgent::LinuxFirefox,
146 UserAgent::AndroidChrome,
147 UserAgent::AndroidFirefox,
148 UserAgent::IOSSafari,
149 UserAgent::IOSChrome,
150 ];
151
152 *agents
153 .choose(&mut rand::rng())
154 .unwrap_or(&UserAgent::LinuxFirefox)
155 }
156}