Skip to main content

synapse_pingora/fingerprint/
integrity.rs

1//! Client Integrity Analysis
2//!
3//! Detects inconsistencies between:
4//! - User-Agent and Client Hints (Sec-CH-UA)
5//! - Fetch Metadata (Sec-Fetch-*) and request context
6//! - Accept headers and stated browser capabilities
7//! - JA4 TLS fingerprint and User-Agent claims
8//!
9//! This module helps detect "lie" fingerprints where bots pretend to be browsers.
10//!
11//! ## Security
12//! - Input length validation prevents ReDoS and memory exhaustion
13//! - All header values are bounded before processing
14//! - JA4 fingerprint validation prevents spoofing attacks
15
16use super::ja4::{HttpHeaders, Ja4Fingerprint, Ja4Protocol};
17use std::borrow::Cow;
18
19/// Maximum allowed length for User-Agent header (512 bytes)
20pub const MAX_USER_AGENT_LENGTH: usize = 512;
21
22/// Maximum allowed length for other headers (256 bytes)
23pub const MAX_HEADER_LENGTH: usize = 256;
24
25/// Maximum allowed length for Sec-CH-UA header (1024 bytes - multiple brands)
26pub const MAX_SEC_CH_UA_LENGTH: usize = 1024;
27
28/// Integrity analysis result
29#[derive(Debug, Clone, Default)]
30pub struct IntegrityAnalysis {
31    /// Overall suspicious score (0-100, saturating)
32    pub suspicion_score: u8,
33    /// List of detected inconsistencies (uses Cow for zero-copy known messages)
34    pub inconsistencies: Vec<Cow<'static, str>>,
35    /// Whether Client Hints were present
36    pub has_client_hints: bool,
37    /// Whether Fetch Metadata was present
38    pub has_fetch_metadata: bool,
39    /// Whether input was truncated due to length limits
40    pub input_truncated: bool,
41}
42
43/// Saturating add for suspicion score (max 100)
44#[inline]
45fn saturating_add_score(score: &mut u8, delta: u8) {
46    *score = score.saturating_add(delta).min(100);
47}
48
49/// Truncate string to max length, returning whether truncation occurred
50#[inline]
51fn truncate_header(value: &str, max_len: usize) -> (&str, bool) {
52    if value.len() > max_len {
53        // Find a valid UTF-8 boundary
54        let truncated = &value[..value.floor_char_boundary(max_len)];
55        (truncated, true)
56    } else {
57        (value, false)
58    }
59}
60
61/// Analyze request headers for integrity violations
62pub fn analyze_integrity(request: &HttpHeaders<'_>) -> IntegrityAnalysis {
63    let mut result = IntegrityAnalysis::default();
64
65    // Extract key headers with length validation
66    let mut ua = "";
67    let mut sec_ch_ua = "";
68    let mut sec_fetch_site = "";
69    let mut sec_fetch_mode = "";
70    let mut referer = "";
71    let mut host = "";
72    let mut any_truncated = false;
73
74    for (name, value) in request.headers {
75        let Ok(value_str) = value.to_str() else {
76            continue;
77        };
78        match name.as_str() {
79            "user-agent" => {
80                let (truncated, was_truncated) = truncate_header(value_str, MAX_USER_AGENT_LENGTH);
81                ua = truncated;
82                any_truncated |= was_truncated;
83            }
84            "sec-ch-ua" => {
85                let (truncated, was_truncated) = truncate_header(value_str, MAX_SEC_CH_UA_LENGTH);
86                sec_ch_ua = truncated;
87                any_truncated |= was_truncated;
88                result.has_client_hints = true;
89            }
90            "sec-fetch-site" => {
91                let (truncated, was_truncated) = truncate_header(value_str, MAX_HEADER_LENGTH);
92                sec_fetch_site = truncated;
93                any_truncated |= was_truncated;
94                result.has_fetch_metadata = true;
95            }
96            "sec-fetch-mode" => {
97                let (truncated, was_truncated) = truncate_header(value_str, MAX_HEADER_LENGTH);
98                sec_fetch_mode = truncated;
99                any_truncated |= was_truncated;
100            }
101            "referer" => {
102                let (truncated, was_truncated) = truncate_header(value_str, MAX_HEADER_LENGTH);
103                referer = truncated;
104                any_truncated |= was_truncated;
105            }
106            "host" => {
107                let (truncated, was_truncated) = truncate_header(value_str, MAX_HEADER_LENGTH);
108                host = truncated;
109                any_truncated |= was_truncated;
110            }
111            _ => {}
112        }
113    }
114
115    result.input_truncated = any_truncated;
116
117    // Oversized headers are suspicious (potential attack or malformed client)
118    if any_truncated {
119        result
120            .inconsistencies
121            .push(Cow::Borrowed("Header exceeds maximum allowed length"));
122        saturating_add_score(&mut result.suspicion_score, 20);
123    }
124
125    // 1. Check User-Agent vs Client Hints
126    // Modern browsers (Chrome 84+, Edge) send Sec-CH-UA.
127    // If User-Agent says "Chrome/120" but Sec-CH-UA is missing, that's suspicious.
128    if !result.has_client_hints && (ua.contains("Chrome/") || ua.contains("Edg/")) {
129        // Exclude older versions or non-Chromium based on heuristics if needed,
130        // but generally modern Chrome should have it.
131        // For safety, we only flag if it claims to be a very recent version.
132        if ua.contains("Chrome/12") || ua.contains("Chrome/13") {
133            result
134                .inconsistencies
135                .push(Cow::Borrowed("Missing Client Hints for modern Chrome/Edge"));
136            saturating_add_score(&mut result.suspicion_score, 30);
137        }
138    }
139
140    if result.has_client_hints {
141        // If Client Hints present, verify consistency
142        // Format: "Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"
143        if ua.contains("Firefox") && !ua.contains("Seamonkey") {
144            // Firefox typically doesn't send Sec-CH-UA yet (experimental)
145            // If it does, it shouldn't claim to be Chromium unless it is.
146            if sec_ch_ua.contains("Chromium") {
147                result.inconsistencies.push(Cow::Borrowed(
148                    "Firefox User-Agent sent Chromium Client Hints",
149                ));
150                saturating_add_score(&mut result.suspicion_score, 50);
151            }
152        }
153    }
154
155    // 2. Check Fetch Metadata consistency
156    if result.has_fetch_metadata {
157        // "same-origin" requests should generally have matching Referer/Host (if Referer present)
158        if sec_fetch_site == "same-origin" && !referer.is_empty() {
159            // Simple check: referer should contain host
160            // (Note: This is a loose check, proper URL parsing is expensive)
161            if !referer.contains(host) && !host.is_empty() {
162                result.inconsistencies.push(Cow::Borrowed(
163                    "Sec-Fetch-Site: same-origin but Referer mismatch",
164                ));
165                saturating_add_score(&mut result.suspicion_score, 40);
166            }
167        }
168
169        // "navigate" mode usually implies a document request
170        if sec_fetch_mode == "navigate"
171            && request.headers.iter().any(|(name, value)| {
172                name.as_str() == "sec-fetch-dest"
173                    && value
174                        .to_str()
175                        .ok()
176                        .map(|v| v != "document")
177                        .unwrap_or(false)
178            })
179        {
180            // Not always true (e.g. frames), but worth noting for correlation
181        }
182    }
183
184    result
185}
186
187// ============================================================================
188// JA4 Fingerprint Behavioral Validation
189// ============================================================================
190
191/// Known browser JA4 characteristic ranges
192///
193/// These are behavioral signatures based on typical browser TLS configurations.
194/// Browsers have predictable patterns that bots/scripts often fail to replicate.
195#[derive(Debug)]
196pub struct BrowserJa4Profile {
197    /// Minimum TLS version (10=1.0, 11=1.1, 12=1.2, 13=1.3)
198    pub min_tls_version: u8,
199    /// Maximum TLS version
200    pub max_tls_version: u8,
201    /// Minimum cipher suite count
202    pub min_ciphers: u8,
203    /// Maximum cipher suite count
204    pub max_ciphers: u8,
205    /// Minimum extension count
206    pub min_extensions: u8,
207    /// Maximum extension count
208    pub max_extensions: u8,
209    /// Expected ALPN protocols (h1, h2, h3)
210    pub expected_alpn: &'static [&'static str],
211}
212
213/// Modern Chrome profile (Chrome 90+)
214const CHROME_PROFILE: BrowserJa4Profile = BrowserJa4Profile {
215    min_tls_version: 12,
216    max_tls_version: 13,
217    min_ciphers: 10,
218    max_ciphers: 25,
219    min_extensions: 12,
220    max_extensions: 25,
221    expected_alpn: &["h2", "h3"],
222};
223
224/// Modern Firefox profile (Firefox 90+)
225const FIREFOX_PROFILE: BrowserJa4Profile = BrowserJa4Profile {
226    min_tls_version: 12,
227    max_tls_version: 13,
228    min_ciphers: 8,
229    max_ciphers: 20,
230    min_extensions: 10,
231    max_extensions: 22,
232    expected_alpn: &["h2", "h3", "http/1.1"],
233};
234
235/// Modern Safari profile (Safari 14+)
236const SAFARI_PROFILE: BrowserJa4Profile = BrowserJa4Profile {
237    min_tls_version: 12,
238    max_tls_version: 13,
239    min_ciphers: 8,
240    max_ciphers: 20,
241    min_extensions: 8,
242    max_extensions: 18,
243    expected_alpn: &["h2", "http/1.1"],
244};
245
246/// Modern Edge profile (Edge 90+)
247const EDGE_PROFILE: BrowserJa4Profile = BrowserJa4Profile {
248    min_tls_version: 12,
249    max_tls_version: 13,
250    min_ciphers: 10,
251    max_ciphers: 25,
252    min_extensions: 12,
253    max_extensions: 25,
254    expected_alpn: &["h2", "h3"],
255};
256
257/// JA4 spoofing detection result
258#[derive(Debug, Clone, Default)]
259pub struct Ja4SpoofingAnalysis {
260    /// Overall spoofing confidence (0-100)
261    pub spoofing_confidence: u8,
262    /// Whether the fingerprint is likely spoofed
263    pub likely_spoofed: bool,
264    /// Detected inconsistencies
265    pub inconsistencies: Vec<Cow<'static, str>>,
266    /// Claimed browser from User-Agent
267    pub claimed_browser: String,
268    /// Estimated actual client type based on JA4
269    pub estimated_actual: String,
270}
271
272/// Analyze JA4 fingerprint for spoofing attempts
273///
274/// SECURITY: This function detects when a client's JA4 TLS fingerprint
275/// doesn't match its claimed User-Agent. This is a common bot detection
276/// technique because:
277///
278/// 1. TLS fingerprints are harder to spoof than User-Agent strings
279/// 2. Real browsers have predictable TLS configurations
280/// 3. Bots often have minimal TLS stacks that don't match browser claims
281///
282/// # Arguments
283/// * `ja4` - JA4 fingerprint from TLS handshake (via X-JA4-Fingerprint header)
284/// * `user_agent` - User-Agent header value
285///
286/// # Returns
287/// Analysis result with spoofing confidence and detected inconsistencies
288pub fn analyze_ja4_spoofing(ja4: &Ja4Fingerprint, user_agent: &str) -> Ja4SpoofingAnalysis {
289    let mut result = Ja4SpoofingAnalysis::default();
290
291    // Truncate oversized User-Agent
292    let (ua, truncated) = truncate_header(user_agent, MAX_USER_AGENT_LENGTH);
293    if truncated {
294        result
295            .inconsistencies
296            .push(Cow::Borrowed("User-Agent exceeds maximum length"));
297        saturating_add_score(&mut result.spoofing_confidence, 10);
298    }
299
300    // Detect claimed browser from User-Agent
301    let claimed_browser = detect_browser_from_ua(ua);
302    result.claimed_browser = claimed_browser.clone();
303
304    // Get expected profile based on claimed browser
305    let profile = match claimed_browser.as_str() {
306        "chrome" => Some(&CHROME_PROFILE),
307        "firefox" => Some(&FIREFOX_PROFILE),
308        "safari" => Some(&SAFARI_PROFILE),
309        "edge" => Some(&EDGE_PROFILE),
310        _ => None,
311    };
312
313    // If claiming to be a known browser, validate against profile
314    if let Some(profile) = profile {
315        validate_against_profile(ja4, profile, &claimed_browser, &mut result);
316    } else {
317        // Unknown or generic User-Agent - check for bot indicators
318        validate_generic_client(ja4, &mut result);
319    }
320
321    // Estimate actual client type based on JA4 characteristics
322    result.estimated_actual = estimate_actual_client(ja4);
323
324    // If claimed browser doesn't match estimated actual, that's suspicious
325    if !claimed_browser.is_empty()
326        && claimed_browser != "unknown"
327        && result.estimated_actual != "unknown"
328        && !result.estimated_actual.contains(&claimed_browser)
329        && claimed_browser != result.estimated_actual
330    {
331        result.inconsistencies.push(Cow::Owned(format!(
332            "Claimed {} but JA4 indicates {}",
333            claimed_browser, result.estimated_actual
334        )));
335        saturating_add_score(&mut result.spoofing_confidence, 25);
336    }
337
338    // Set likely_spoofed threshold
339    result.likely_spoofed = result.spoofing_confidence >= 50;
340
341    result
342}
343
344/// Detect browser type from User-Agent string
345fn detect_browser_from_ua(ua: &str) -> String {
346    let ua_lower = ua.to_lowercase();
347
348    // Order matters - check more specific strings first
349    if ua_lower.contains("edg/") || ua_lower.contains("edge/") {
350        return "edge".to_string();
351    }
352    if ua_lower.contains("chrome/") && !ua_lower.contains("chromium") {
353        return "chrome".to_string();
354    }
355    if ua_lower.contains("firefox/") {
356        return "firefox".to_string();
357    }
358    if ua_lower.contains("safari/") && !ua_lower.contains("chrome") {
359        return "safari".to_string();
360    }
361    if ua_lower.contains("curl/") || ua_lower.contains("wget/") {
362        return "cli-tool".to_string();
363    }
364    if ua_lower.contains("python") || ua_lower.contains("requests/") {
365        return "python".to_string();
366    }
367    if ua_lower.contains("go-http-client") || ua_lower.contains("golang") {
368        return "golang".to_string();
369    }
370
371    "unknown".to_string()
372}
373
374/// Validate JA4 fingerprint against expected browser profile
375fn validate_against_profile(
376    ja4: &Ja4Fingerprint,
377    profile: &BrowserJa4Profile,
378    browser_name: &str,
379    result: &mut Ja4SpoofingAnalysis,
380) {
381    // Check TLS version
382    if ja4.tls_version < profile.min_tls_version {
383        result.inconsistencies.push(Cow::Owned(format!(
384            "TLS 1.{} too old for modern {} (expected 1.{}-1.{})",
385            ja4.tls_version - 10,
386            browser_name,
387            profile.min_tls_version - 10,
388            profile.max_tls_version - 10
389        )));
390        saturating_add_score(&mut result.spoofing_confidence, 30);
391    }
392
393    // Check cipher suite count
394    if ja4.cipher_count < profile.min_ciphers {
395        result.inconsistencies.push(Cow::Owned(format!(
396            "Only {} ciphers offered, {} typically offers {}-{}",
397            ja4.cipher_count, browser_name, profile.min_ciphers, profile.max_ciphers
398        )));
399        saturating_add_score(&mut result.spoofing_confidence, 25);
400    }
401
402    // Check extension count
403    if ja4.ext_count < profile.min_extensions {
404        result.inconsistencies.push(Cow::Owned(format!(
405            "Only {} extensions offered, {} typically offers {}-{}",
406            ja4.ext_count, browser_name, profile.min_extensions, profile.max_extensions
407        )));
408        saturating_add_score(&mut result.spoofing_confidence, 25);
409    }
410
411    // Check ALPN
412    let alpn_matches = profile
413        .expected_alpn
414        .iter()
415        .any(|&a| ja4.alpn.contains(a) || a == ja4.alpn);
416    if !alpn_matches && ja4.alpn != "unknown" {
417        result.inconsistencies.push(Cow::Owned(format!(
418            "ALPN '{}' unexpected for {} (expected {:?})",
419            ja4.alpn, browser_name, profile.expected_alpn
420        )));
421        saturating_add_score(&mut result.spoofing_confidence, 15);
422    }
423
424    // Check for QUIC with non-H3 claim (Chrome/Edge with QUIC should be doing H3)
425    if ja4.protocol == Ja4Protocol::QUIC
426        && (browser_name == "chrome" || browser_name == "edge")
427        && ja4.alpn != "h3"
428    {
429        result.inconsistencies.push(Cow::Borrowed(
430            "QUIC connection without H3 ALPN for Chromium browser",
431        ));
432        saturating_add_score(&mut result.spoofing_confidence, 20);
433    }
434}
435
436/// Validate generic/unknown client for bot indicators
437fn validate_generic_client(ja4: &Ja4Fingerprint, result: &mut Ja4SpoofingAnalysis) {
438    // Very minimal TLS configuration suggests automated tool
439    if ja4.cipher_count < 3 {
440        result.inconsistencies.push(Cow::Borrowed(
441            "Extremely low cipher count (<3) indicates minimal TLS client",
442        ));
443        saturating_add_score(&mut result.spoofing_confidence, 40);
444    }
445
446    if ja4.ext_count < 3 {
447        result.inconsistencies.push(Cow::Borrowed(
448            "Extremely low extension count (<3) indicates minimal TLS client",
449        ));
450        saturating_add_score(&mut result.spoofing_confidence, 40);
451    }
452
453    // Old TLS version
454    if ja4.tls_version < 12 {
455        result.inconsistencies.push(Cow::Owned(format!(
456            "TLS 1.{} is deprecated and insecure",
457            ja4.tls_version - 10
458        )));
459        saturating_add_score(&mut result.spoofing_confidence, 30);
460    }
461}
462
463/// Estimate actual client type based on JA4 characteristics
464fn estimate_actual_client(ja4: &Ja4Fingerprint) -> String {
465    // Very minimal stack
466    if ja4.cipher_count < 5 && ja4.ext_count < 5 {
467        return "minimal-client".to_string();
468    }
469
470    // Old TLS with minimal features
471    if ja4.tls_version < 12 {
472        return "legacy-client".to_string();
473    }
474
475    // Modern browser-like characteristics
476    if ja4.tls_version >= 12 && ja4.cipher_count >= 10 && ja4.ext_count >= 10 {
477        if ja4.alpn == "h2" || ja4.alpn == "h3" {
478            return "modern-browser".to_string();
479        }
480        return "modern-client".to_string();
481    }
482
483    // Moderate stack
484    if ja4.cipher_count >= 5 && ja4.ext_count >= 5 {
485        return "api-client".to_string();
486    }
487
488    "unknown".to_string()
489}
490
491/// Extended integrity analysis including JA4 validation
492///
493/// This combines header-based integrity checks with JA4 fingerprint validation
494/// for comprehensive spoofing detection.
495pub fn analyze_integrity_with_ja4(
496    request: &HttpHeaders<'_>,
497    ja4: Option<&Ja4Fingerprint>,
498) -> IntegrityAnalysis {
499    // Start with standard header integrity analysis
500    let mut result = analyze_integrity(request);
501
502    // If JA4 fingerprint is available, perform spoofing analysis
503    if let Some(ja4) = ja4 {
504        // Extract User-Agent for comparison
505        let user_agent = request
506            .headers
507            .iter()
508            .find(|(name, _)| name.as_str() == "user-agent")
509            .and_then(|(_, value)| value.to_str().ok())
510            .unwrap_or("");
511
512        let ja4_analysis = analyze_ja4_spoofing(ja4, user_agent);
513
514        // Merge JA4 spoofing results
515        for inconsistency in ja4_analysis.inconsistencies {
516            result.inconsistencies.push(inconsistency);
517        }
518
519        // Add JA4 spoofing score to overall suspicion
520        saturating_add_score(
521            &mut result.suspicion_score,
522            ja4_analysis.spoofing_confidence / 2,
523        );
524
525        // If JA4 analysis shows likely spoofing, ensure high suspicion score
526        if ja4_analysis.likely_spoofed {
527            saturating_add_score(&mut result.suspicion_score, 30);
528        }
529    }
530
531    result
532}
533
534#[cfg(test)]
535mod tests {
536    use super::*;
537    use http::header::{HeaderName, HeaderValue};
538
539    fn header(name: &str, value: &str) -> (HeaderName, HeaderValue) {
540        let header_name = HeaderName::from_bytes(name.as_bytes()).expect("valid header name");
541        let header_value = HeaderValue::from_str(value).expect("valid header value");
542        (header_name, header_value)
543    }
544
545    #[test]
546    fn test_chrome_missing_hints() {
547        let headers = vec![
548            header(
549                "User-Agent",
550                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
551            ),
552        ];
553        let req = HttpHeaders {
554            headers: &headers,
555            method: "GET",
556            http_version: "1.1",
557        };
558
559        let result = analyze_integrity(&req);
560        assert!(result.suspicion_score > 0);
561        let all_inconsistencies: String =
562            result.inconsistencies.iter().map(|c| c.as_ref()).collect();
563        assert!(all_inconsistencies.contains("Missing Client Hints"));
564    }
565
566    #[test]
567    fn test_consistent_chrome() {
568        let headers = vec![
569            header(
570                "User-Agent",
571                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
572            ),
573            header("Sec-CH-UA", "\"Chromium\";v=\"120\", \"Google Chrome\";v=\"120\""),
574        ];
575        let req = HttpHeaders {
576            headers: &headers,
577            method: "GET",
578            http_version: "1.1",
579        };
580
581        let result = analyze_integrity(&req);
582        assert_eq!(result.suspicion_score, 0);
583        assert!(result.has_client_hints);
584    }
585
586    #[test]
587    fn test_oversized_user_agent_truncated() {
588        // Create a User-Agent longer than MAX_USER_AGENT_LENGTH
589        let oversized_ua = "A".repeat(MAX_USER_AGENT_LENGTH + 100);
590        let headers = vec![header("User-Agent", &oversized_ua)];
591        let req = HttpHeaders {
592            headers: &headers,
593            method: "GET",
594            http_version: "1.1",
595        };
596
597        let result = analyze_integrity(&req);
598        assert!(result.input_truncated);
599        assert!(result.suspicion_score >= 20);
600        let all_inconsistencies: String =
601            result.inconsistencies.iter().map(|c| c.as_ref()).collect();
602        assert!(all_inconsistencies.contains("exceeds maximum"));
603    }
604
605    #[test]
606    fn test_suspicion_score_saturates_at_100() {
607        let mut score: u8 = 90;
608        saturating_add_score(&mut score, 50);
609        assert_eq!(score, 100);
610    }
611
612    // ==================== JA4 Spoofing Detection Tests ====================
613
614    /// Create a test JA4 fingerprint with specified parameters
615    fn make_test_ja4(
616        tls_version: u8,
617        cipher_count: u8,
618        ext_count: u8,
619        alpn: &str,
620    ) -> Ja4Fingerprint {
621        Ja4Fingerprint {
622            raw: format!(
623                "t{}d{:02x}{:02x}{}_{}_{}",
624                tls_version, cipher_count, ext_count, alpn, "aabbccddeeff", "112233445566"
625            ),
626            protocol: Ja4Protocol::TCP,
627            tls_version,
628            sni_type: super::super::ja4::Ja4SniType::Domain,
629            cipher_count,
630            ext_count,
631            alpn: alpn.to_string(),
632            cipher_hash: "aabbccddeeff".to_string(),
633            ext_hash: "112233445566".to_string(),
634        }
635    }
636
637    /// SECURITY TEST: Verify Chrome User-Agent with minimal TLS is detected as spoofed
638    #[test]
639    fn test_ja4_spoofing_chrome_with_minimal_tls() {
640        let ja4 = make_test_ja4(12, 3, 3, "h1"); // Minimal TLS stack
641        let chrome_ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
642
643        let result = analyze_ja4_spoofing(&ja4, chrome_ua);
644
645        assert!(result.likely_spoofed, "Should detect spoofing");
646        assert!(
647            result.spoofing_confidence >= 50,
648            "Confidence should be >= 50: {}",
649            result.spoofing_confidence
650        );
651        assert_eq!(result.claimed_browser, "chrome");
652        assert!(
653            !result.inconsistencies.is_empty(),
654            "Should have inconsistencies"
655        );
656    }
657
658    /// SECURITY TEST: Verify legitimate Chrome fingerprint is not flagged
659    #[test]
660    fn test_ja4_legitimate_chrome() {
661        let ja4 = make_test_ja4(13, 16, 18, "h2"); // Modern Chrome-like
662        let chrome_ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
663
664        let result = analyze_ja4_spoofing(&ja4, chrome_ua);
665
666        assert!(
667            !result.likely_spoofed,
668            "Should not flag legitimate Chrome: {:?}",
669            result.inconsistencies
670        );
671        assert!(
672            result.spoofing_confidence < 50,
673            "Confidence should be < 50: {}",
674            result.spoofing_confidence
675        );
676        assert_eq!(result.claimed_browser, "chrome");
677    }
678
679    /// SECURITY TEST: Verify Firefox User-Agent with Chrome-like fingerprint is suspicious
680    #[test]
681    fn test_ja4_firefox_with_chromium_fingerprint() {
682        // This simulates a bot claiming to be Firefox but using a Chromium TLS stack
683        let ja4 = make_test_ja4(13, 20, 22, "h2");
684        let firefox_ua =
685            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0";
686
687        let result = analyze_ja4_spoofing(&ja4, firefox_ua);
688
689        assert_eq!(result.claimed_browser, "firefox");
690        // Firefox and the JA4 might be compatible, so we check for specific issues
691        // The key is that we're validating against Firefox profile
692    }
693
694    /// SECURITY TEST: Verify old TLS version is flagged for modern browser claims
695    #[test]
696    fn test_ja4_old_tls_for_modern_browser() {
697        let ja4 = make_test_ja4(10, 15, 15, "h1"); // TLS 1.0
698        let chrome_ua = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
699
700        let result = analyze_ja4_spoofing(&ja4, chrome_ua);
701
702        assert!(result.likely_spoofed, "Should detect old TLS as spoofing");
703        assert!(
704            result
705                .inconsistencies
706                .iter()
707                .any(|i| i.as_ref().contains("too old")),
708            "Should mention old TLS: {:?}",
709            result.inconsistencies
710        );
711    }
712
713    /// SECURITY TEST: Verify CLI tool User-Agent with browser fingerprint
714    #[test]
715    fn test_ja4_cli_tool_with_browser_fingerprint() {
716        let ja4 = make_test_ja4(13, 16, 18, "h2"); // Browser-like
717        let curl_ua = "curl/8.4.0";
718
719        let result = analyze_ja4_spoofing(&ja4, curl_ua);
720
721        assert_eq!(result.claimed_browser, "cli-tool");
722        // CLI tools don't have expected profiles, so we estimate actual client
723        assert_eq!(result.estimated_actual, "modern-browser");
724    }
725
726    /// SECURITY TEST: Verify Python requests with minimal TLS
727    #[test]
728    fn test_ja4_python_minimal_tls() {
729        let ja4 = make_test_ja4(12, 4, 4, "h1"); // Minimal
730        let python_ua = "python-requests/2.31.0";
731
732        let result = analyze_ja4_spoofing(&ja4, python_ua);
733
734        assert_eq!(result.claimed_browser, "python");
735        // Python is an "unknown" browser type, so we estimate actual client
736        // A minimal TLS stack should be flagged in the generic validation
737        assert_eq!(result.estimated_actual, "minimal-client");
738    }
739
740    /// Test browser detection from User-Agent
741    #[test]
742    fn test_detect_browser_from_ua() {
743        assert_eq!(
744            detect_browser_from_ua("Mozilla/5.0 Chrome/120.0.0.0"),
745            "chrome"
746        );
747        assert_eq!(
748            detect_browser_from_ua("Mozilla/5.0 Firefox/121.0"),
749            "firefox"
750        );
751        assert_eq!(
752            detect_browser_from_ua("Mozilla/5.0 Safari/537.36"),
753            "safari"
754        );
755        assert_eq!(detect_browser_from_ua("Mozilla/5.0 Edg/120.0.0.0"), "edge");
756        assert_eq!(detect_browser_from_ua("curl/8.4.0"), "cli-tool");
757        assert_eq!(detect_browser_from_ua("python-requests/2.31.0"), "python");
758        assert_eq!(detect_browser_from_ua("Go-http-client/1.1"), "golang");
759        assert_eq!(detect_browser_from_ua("SomeRandomBot/1.0"), "unknown");
760    }
761
762    /// Test estimate actual client from JA4
763    #[test]
764    fn test_estimate_actual_client() {
765        // Modern browser
766        let modern = make_test_ja4(13, 16, 18, "h2");
767        assert_eq!(estimate_actual_client(&modern), "modern-browser");
768
769        // Minimal client
770        let minimal = make_test_ja4(12, 2, 2, "h1");
771        assert_eq!(estimate_actual_client(&minimal), "minimal-client");
772
773        // Legacy client
774        let legacy = make_test_ja4(10, 10, 10, "h1");
775        assert_eq!(estimate_actual_client(&legacy), "legacy-client");
776
777        // API client
778        let api = make_test_ja4(12, 8, 8, "h1");
779        assert_eq!(estimate_actual_client(&api), "api-client");
780    }
781
782    /// Test extended integrity analysis with JA4
783    #[test]
784    fn test_analyze_integrity_with_ja4() {
785        let headers = vec![
786            header(
787                "User-Agent",
788                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
789            ),
790            header("Sec-CH-UA", "\"Chromium\";v=\"120\""),
791        ];
792        let req = HttpHeaders {
793            headers: &headers,
794            method: "GET",
795            http_version: "1.1",
796        };
797
798        // Test with legitimate JA4
799        let legitimate_ja4 = make_test_ja4(13, 16, 18, "h2");
800        let result = analyze_integrity_with_ja4(&req, Some(&legitimate_ja4));
801        assert!(
802            result.suspicion_score < 30,
803            "Legitimate request should have low score: {}",
804            result.suspicion_score
805        );
806
807        // Test with suspicious JA4
808        let spoofed_ja4 = make_test_ja4(10, 2, 2, "h1");
809        let result = analyze_integrity_with_ja4(&req, Some(&spoofed_ja4));
810        assert!(
811            result.suspicion_score >= 30,
812            "Spoofed request should have high score: {}",
813            result.suspicion_score
814        );
815    }
816}