Skip to main content

pylon_auth/
device.rs

1//! User-agent parsing → friendly device labels.
2//!
3//! Real UA parsers (uap-rs, ua_parser) ship 2k+ regex pairs. For the
4//! `/api/auth/sessions` UI we don't need that — we want a single
5//! readable label per session ("Chrome on macOS", "Safari on iOS",
6//! "Yubikey CLI"). Substring matching covers >95% of real traffic
7//! at <1% the binary size.
8//!
9//! What this DOESN'T do:
10//! - Parse exact browser/OS versions (apps that need that should pull
11//!   the UA string verbatim from `Session.device` and parse it themselves)
12//! - Detect WebViews vs native (the UA isn't reliable for that anyway)
13//! - Identify bots (use a separate IP-reputation feed)
14//!
15//! Bounded output — the friendly label caps at 80 chars so a
16//! pathological UA can't blow up the session row.
17
18const MAX_LABEL_LEN: usize = 80;
19
20/// Turn a User-Agent header value into a short friendly label.
21/// Falls back to "Unknown" for empty / unrecognized strings.
22pub fn parse_user_agent(ua: &str) -> String {
23    let ua = ua.trim();
24    if ua.is_empty() {
25        return "Unknown".into();
26    }
27    // SDK / native client clients tend to use simple identifiers.
28    // Match those FIRST so a SDK UA that happens to mention "Mozilla"
29    // (some HTTP clients do) doesn't get bucketed as a browser.
30    if let Some(label) = match_sdk(ua) {
31        return cap(label.to_string());
32    }
33    let browser = match_browser(ua);
34    let os = match_os(ua);
35    let label = match (browser, os) {
36        (Some(b), Some(o)) => format!("{b} on {o}"),
37        (Some(b), None) => b.to_string(),
38        (None, Some(o)) => o.to_string(),
39        (None, None) => "Unknown".into(),
40    };
41    cap(label)
42}
43
44fn cap(s: String) -> String {
45    if s.chars().count() <= MAX_LABEL_LEN {
46        s
47    } else {
48        s.chars().take(MAX_LABEL_LEN).collect()
49    }
50}
51
52/// Match the Pylon SDK family + common API-client identifiers.
53/// Order matters — more specific tokens first.
54fn match_sdk(ua: &str) -> Option<&'static str> {
55    let lc = ua.to_ascii_lowercase();
56    if lc.starts_with("pylonclient/") || lc.starts_with("pylonsdk/") {
57        return Some("Pylon SDK");
58    }
59    if lc.starts_with("pylon-cli/") || lc.starts_with("pylon/") {
60        return Some("Pylon CLI");
61    }
62    if lc.starts_with("curl/") {
63        return Some("curl");
64    }
65    if lc.starts_with("httpie/") || lc.starts_with("python-requests/") {
66        return Some("Python (requests)");
67    }
68    if lc.starts_with("go-http-client/") {
69        return Some("Go HTTP client");
70    }
71    if lc.starts_with("postmanruntime/") {
72        return Some("Postman");
73    }
74    None
75}
76
77/// Browser detection. Order matters: "Edg" / "OPR" / "Brave" must
78/// match BEFORE "Chrome" because all four ship "Chrome/X.Y" in
79/// their UAs (they're all Chromium forks).
80fn match_browser(ua: &str) -> Option<&'static str> {
81    if ua.contains("Edg/") || ua.contains("Edge/") {
82        return Some("Edge");
83    }
84    if ua.contains("OPR/") || ua.contains("Opera") {
85        return Some("Opera");
86    }
87    if ua.contains("Brave") {
88        return Some("Brave");
89    }
90    if ua.contains("Vivaldi") {
91        return Some("Vivaldi");
92    }
93    // Firefox MUST come before generic "Mozilla" check.
94    if ua.contains("Firefox/") {
95        return Some("Firefox");
96    }
97    // Chrome before Safari — Safari ships "Version/X" but every
98    // Chromium-based UA also ships "Safari/" in the suffix.
99    if ua.contains("Chrome/") {
100        return Some("Chrome");
101    }
102    if ua.contains("Safari/") {
103        return Some("Safari");
104    }
105    None
106}
107
108/// OS detection. Order matters: iPad/iPhone before Mac (some iPad
109/// modes report "Macintosh" in the UA per Apple's "request desktop
110/// site" feature — but still include "iPad" earlier).
111fn match_os(ua: &str) -> Option<&'static str> {
112    if ua.contains("iPhone") {
113        return Some("iOS");
114    }
115    if ua.contains("iPad") {
116        return Some("iPadOS");
117    }
118    if ua.contains("Android") {
119        return Some("Android");
120    }
121    // macOS marker varies: "Mac OS X 10_15_7", "Macintosh", "Mac OS".
122    if ua.contains("Macintosh") || ua.contains("Mac OS") {
123        return Some("macOS");
124    }
125    if ua.contains("Windows NT") || ua.contains("Win64") || ua.contains("Win32") {
126        return Some("Windows");
127    }
128    // Linux comes last because both Android and ChromeOS ship "Linux"
129    // as a substring.
130    if ua.contains("CrOS") {
131        return Some("ChromeOS");
132    }
133    if ua.contains("Linux") {
134        return Some("Linux");
135    }
136    None
137}
138
139#[cfg(test)]
140mod tests {
141    use super::*;
142
143    #[test]
144    fn empty_returns_unknown() {
145        assert_eq!(parse_user_agent(""), "Unknown");
146        assert_eq!(parse_user_agent("   "), "Unknown");
147    }
148
149    #[test]
150    fn chrome_macos() {
151        let ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36";
152        assert_eq!(parse_user_agent(ua), "Chrome on macOS");
153    }
154
155    #[test]
156    fn safari_ios() {
157        let ua = "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1";
158        // iPhone token wins over the substring "Mac OS" in iOS UAs.
159        assert_eq!(parse_user_agent(ua), "Safari on iOS");
160    }
161
162    #[test]
163    fn firefox_linux() {
164        let ua = "Mozilla/5.0 (X11; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0";
165        assert_eq!(parse_user_agent(ua), "Firefox on Linux");
166    }
167
168    #[test]
169    fn edge_classified_before_chrome() {
170        // Edge ships "Chrome/" in its UA; the browser detector must
171        // pick Edge first or every Edge user shows as Chrome.
172        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0";
173        assert_eq!(parse_user_agent(ua), "Edge on Windows");
174    }
175
176    #[test]
177    fn opera_classified_before_chrome() {
178        let ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 OPR/115.0.0.0";
179        assert_eq!(parse_user_agent(ua), "Opera on Windows");
180    }
181
182    #[test]
183    fn android_classified_before_linux() {
184        // Android UAs include "Linux" as a substring; OS detector
185        // must pick Android first.
186        let ua = "Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Mobile Safari/537.36";
187        assert_eq!(parse_user_agent(ua), "Chrome on Android");
188    }
189
190    #[test]
191    fn ipad_classified_before_macos() {
192        // Newer iPad UAs say "Macintosh" with "request desktop site".
193        // We want iPadOS not macOS — but if the UA strictly says
194        // "Macintosh" with no iPad token, macOS is the right answer.
195        // This test pins the "real iPad UA" case.
196        let ua = "Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1";
197        assert_eq!(parse_user_agent(ua), "Safari on iPadOS");
198    }
199
200    #[test]
201    fn pylon_sdk_recognized() {
202        assert_eq!(parse_user_agent("PylonClient/0.3.21 ts"), "Pylon SDK");
203        assert_eq!(parse_user_agent("PylonSDK/swift 0.3.21"), "Pylon SDK");
204        assert_eq!(parse_user_agent("pylon/0.3.21"), "Pylon CLI");
205    }
206
207    #[test]
208    fn curl_recognized() {
209        assert_eq!(parse_user_agent("curl/8.4.0"), "curl");
210    }
211
212    #[test]
213    fn capped_at_80_chars() {
214        let label = parse_user_agent(&("X".repeat(500)));
215        assert!(label.chars().count() <= MAX_LABEL_LEN);
216    }
217
218    #[test]
219    fn unknown_browser_known_os() {
220        // A weird browser on a known OS — we still get the OS half.
221        let ua = "WeirdBrowser/1.0 (Windows NT 10.0)";
222        assert_eq!(parse_user_agent(ua), "Windows");
223    }
224
225    #[test]
226    fn unknown_browser_unknown_os() {
227        assert_eq!(parse_user_agent("totally-bogus-junk"), "Unknown");
228    }
229
230    /// Defense against UA-fingerprinting probes that send 10MB UAs.
231    /// The label MUST be a bounded String, not a borrow into the
232    /// caller's input.
233    #[test]
234    fn does_not_panic_on_pathological_input() {
235        let _ = parse_user_agent(&"\u{1F600}".repeat(10000)); // emoji
236        let _ = parse_user_agent(&"\0\0\0".repeat(1000)); // nulls
237        let _ = parse_user_agent("\n\n\n\n");
238    }
239}