crawlex 1.0.4

Stealth crawler with Chrome-perfect TLS/H2 fingerprint, render pool, hooks, persistent queue
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
//! Startup-time coherence check on an `IdentityBundle`.
//!
//! Mismatches between UA ↔ sec-ch-ua ↔ major, locale ↔ languages, or
//! timezone ↔ offset are classic "bot-builder forgot to align one field"
//! tells. We refuse to start a crawl with an inconsistent bundle.
//!
//! Cross-layer checks enforce FP-Inconsistent (arxiv.org/abs/2406.07647)
//! mitigation: every attribute that can be correlated by a detector is
//! cross-verified here. A single inconsistency = reject bundle.

use crate::identity::IdentityBundle;

#[derive(Debug, thiserror::Error)]
pub enum ValidationError {
    #[error("UA string does not contain major version {major}: {ua}")]
    UaMajorMismatch { major: u32, ua: String },
    #[error("sec-ch-ua does not reference major version {major}: {sec}")]
    SecChUaMissingMajor { major: u32, sec: String },
    #[error("locale {locale} absent from languages list {langs}")]
    LocaleNotInLanguages { locale: String, langs: String },
    #[error("accept-language {al} does not start with locale {locale}")]
    AcceptLanguageMismatch { locale: String, al: String },
    #[error("timezone {tz} has offset_min {declared}, expected roughly {guessed}")]
    TimezoneOffsetMismatch {
        tz: String,
        declared: i32,
        guessed: i32,
    },
    #[error("avail_screen_{axis} ({avail}) > screen_{axis} ({screen})")]
    AvailExceedsScreen {
        axis: &'static str,
        avail: u32,
        screen: u32,
    },
    #[error("viewport_{axis} ({view}) > screen_{axis} ({screen})")]
    ViewportExceedsScreen {
        axis: &'static str,
        view: u32,
        screen: u32,
    },
    #[error("UA platform token {ua_token:?} does not match platform {platform:?}")]
    UaPlatformMismatch { ua_token: String, platform: String },
    #[error("sec-ch-ua-platform {ch:?} does not match platform {platform:?}")]
    ChPlatformMismatch { ch: String, platform: String },
    #[error("WebGL renderer {renderer:?} is inconsistent with platform {platform:?}")]
    WebglPlatformMismatch { renderer: String, platform: String },
    #[error(
        "WebGL renderer {renderer:?} must mention a recognised GPU vendor keyword \
         (intel/nvidia/amd/apple)"
    )]
    WebglVendorUnrecognised { renderer: String },
    #[error(
        "WebGL unmasked vendor keyword {unmasked:?} must equal masked keyword {masked:?} \
         (renderers: {renderer:?} vs {unmasked_renderer:?})"
    )]
    WebglVendorKeywordMismatch {
        masked: String,
        unmasked: String,
        renderer: String,
        unmasked_renderer: String,
    },
    #[error("WebGL vendor keyword {keyword:?} is not valid for platform {platform:?}")]
    WebglVendorPlatformMismatch { keyword: String, platform: String },
    #[error(
        "WebGPU adapter description {description:?} must mention the same GPU vendor \
         keyword {keyword:?} as webgl_renderer {renderer:?}"
    )]
    WebgpuVendorMismatch {
        description: String,
        keyword: String,
        renderer: String,
    },
    #[error("ua_brands_json invalid JSON: {err}")]
    UaBrandsInvalidJson { err: String },
    #[error("ua_brands_json missing major version {major}")]
    UaBrandsMissingMajor { major: u32 },
    #[error("ua_full_version_list invalid JSON: {err}")]
    UaFullVersionListInvalidJson { err: String },
    #[error("ua_full_version {full} does not start with major {major}")]
    UaFullVersionMajorMismatch { full: String, major: u32 },
    #[error("languages_json invalid JSON: {err}")]
    LanguagesInvalidJson { err: String },
    #[error("device_memory {0} not in the standard Chrome bucket {{0.25,0.5,1,2,4,8}}")]
    DeviceMemoryInvalid(u32),
    #[error("hardware_concurrency {0} outside plausible desktop range [2,32]")]
    HardwareConcurrencyInvalid(u32),
    #[error("color_depth {0} not in {{24,30,48}}")]
    ColorDepthInvalid(u32),
    #[error("device_pixel_ratio {0} outside [1.0, 3.0]")]
    DprInvalid(f32),
    #[error("viewport_{axis} ({view}) > avail_screen_{axis} ({avail})")]
    ViewportExceedsAvail {
        axis: &'static str,
        view: u32,
        avail: u32,
    },
    #[error("TLS profile major {tls_major} disagrees with bundle.ua_major {ua_major}")]
    TlsProfileMajorMismatch { tls_major: u32, ua_major: u32 },
    #[error("scrollbar_width {0} outside plausible desktop/mobile range [0,24]")]
    ScrollbarWidthInvalid(u32),
    #[error("heap_size_limit {0} below 256 MiB (mobile floor)")]
    HeapSizeTooSmall(u64),
    #[error("heap_size_limit {0} above 8 GiB (not a Chrome value)")]
    HeapSizeTooLarge(u64),
    #[error("max_texture_size {0} not in plausible GPU range [4096,32768]")]
    MaxTextureSizeInvalid(u32),
    #[error(
        "max_viewport_dims ({w},{h}) incoherent with max_texture_size {mts}; \
         viewport dim must be >= max_texture_size on real drivers"
    )]
    MaxViewportDimsIncoherent { w: u32, h: u32, mts: u32 },
    #[error("audio_sample_rate {0} not in standard Chrome set {{22050,44100,48000,96000}}")]
    AudioSampleRateInvalid(u32),
    #[error("fonts_json invalid JSON: {err}")]
    FontsInvalidJson { err: String },
    #[error("fonts list contains {font:?} which is incoherent with platform {platform:?}")]
    FontsPlatformMismatch { font: String, platform: String },
}

pub struct IdentityValidator;

impl IdentityValidator {
    pub fn check(b: &IdentityBundle) -> Result<(), ValidationError> {
        // UA must literally contain the declared major version.
        let major_str = b.ua_major.to_string();
        if !b.ua.contains(&major_str) {
            return Err(ValidationError::UaMajorMismatch {
                major: b.ua_major,
                ua: b.ua.clone(),
            });
        }
        if !b.sec_ch_ua.contains(&format!("v=\"{}\"", b.ua_major)) {
            return Err(ValidationError::SecChUaMissingMajor {
                major: b.ua_major,
                sec: b.sec_ch_ua.clone(),
            });
        }

        // ua_full_version must start with the major number, dot-separated.
        if !b.ua_full_version.starts_with(&format!("{}.", b.ua_major)) {
            return Err(ValidationError::UaFullVersionMajorMismatch {
                full: b.ua_full_version.clone(),
                major: b.ua_major,
            });
        }

        // ua_brands JSON parses and references the declared major.
        let ua_brands: serde_json::Value = serde_json::from_str(&b.ua_brands)
            .map_err(|e| ValidationError::UaBrandsInvalidJson { err: e.to_string() })?;
        let major_as_str = b.ua_major.to_string();
        let has_major = ua_brands
            .as_array()
            .map(|arr| {
                arr.iter().any(|e| {
                    e.get("version")
                        .and_then(|v| v.as_str())
                        .is_some_and(|s| s == major_as_str)
                })
            })
            .unwrap_or(false);
        if !has_major {
            return Err(ValidationError::UaBrandsMissingMajor { major: b.ua_major });
        }

        // ua_full_version_list JSON parses.
        serde_json::from_str::<serde_json::Value>(&b.ua_full_version_list)
            .map_err(|e| ValidationError::UaFullVersionListInvalidJson { err: e.to_string() })?;

        // languages_json parses.
        let langs: serde_json::Value = serde_json::from_str(&b.languages_json)
            .map_err(|e| ValidationError::LanguagesInvalidJson { err: e.to_string() })?;

        // Locale should appear in the languages list.
        let locale_in_langs = langs
            .as_array()
            .map(|arr| {
                arr.iter()
                    .any(|v| v.as_str().is_some_and(|s| s == b.locale))
            })
            .unwrap_or(false);
        if !locale_in_langs {
            return Err(ValidationError::LocaleNotInLanguages {
                locale: b.locale.clone(),
                langs: b.languages_json.clone(),
            });
        }

        // accept-language header should lead with the locale.
        if !b.accept_language.starts_with(&b.locale) {
            return Err(ValidationError::AcceptLanguageMismatch {
                locale: b.locale.clone(),
                al: b.accept_language.clone(),
            });
        }

        // Timezone offset sanity — avoid off-by-an-hour. Unknown
        // timezones return None here: we can't guess, so we can't
        // reject. A bundle that picks an obscure tz with a plausible
        // offset should not be flagged just because our table is
        // short.
        if let Some(guessed) = guess_tz_offset_min(&b.timezone) {
            if (guessed - b.tz_offset_min).abs() > 60 {
                return Err(ValidationError::TimezoneOffsetMismatch {
                    tz: b.timezone.clone(),
                    declared: b.tz_offset_min,
                    guessed,
                });
            }
        }

        // Platform ↔ UA token ↔ sec-ch-ua-platform coherence.
        let ua_os = detect_ua_os(&b.ua);
        let declared_os = detect_platform_os(&b.platform);
        if ua_os != declared_os {
            return Err(ValidationError::UaPlatformMismatch {
                ua_token: ua_os.as_str().into(),
                platform: b.platform.clone(),
            });
        }
        let ch_os = detect_ch_platform_os(&b.ua_platform);
        if ch_os != declared_os {
            return Err(ValidationError::ChPlatformMismatch {
                ch: b.ua_platform.clone(),
                platform: b.platform.clone(),
            });
        }

        // WebGL renderer family must be consistent with the OS.
        // Linux uses GL/Vulkan, not Direct3D/Metal; macOS uses Metal, not D3D;
        // Windows uses ANGLE/Direct3D. A mismatch is a one-line giveaway.
        check_webgl_platform(&b.webgl_renderer, declared_os)?;
        check_webgl_platform(&b.webgl_unmasked_renderer, declared_os)?;

        // WebGL vendor keyword (intel/nvidia/amd/apple) must appear in
        // `webgl_renderer` and match the unmasked pair. Detectors correlate
        // the vendor string across masked/unmasked and across WebGL/WebGPU;
        // any skew is a free FP-Inconsistent flag.
        let masked_kw = detect_gpu_vendor(&b.webgl_renderer).ok_or_else(|| {
            ValidationError::WebglVendorUnrecognised {
                renderer: b.webgl_renderer.clone(),
            }
        })?;
        let unmasked_kw = detect_gpu_vendor(&b.webgl_unmasked_renderer).ok_or_else(|| {
            ValidationError::WebglVendorUnrecognised {
                renderer: b.webgl_unmasked_renderer.clone(),
            }
        })?;
        if masked_kw != unmasked_kw {
            tracing::warn!(
                masked = masked_kw.as_str(),
                unmasked = unmasked_kw.as_str(),
                "webgl masked vs unmasked GPU vendor keyword disagree",
            );
            return Err(ValidationError::WebglVendorKeywordMismatch {
                masked: masked_kw.as_str().into(),
                unmasked: unmasked_kw.as_str().into(),
                renderer: b.webgl_renderer.clone(),
                unmasked_renderer: b.webgl_unmasked_renderer.clone(),
            });
        }
        // Apple silicon only appears on macOS. NVIDIA/AMD/Intel can appear
        // on Linux or Windows, but Apple on Linux/Windows is an instant tell.
        if !masked_kw.is_valid_on(declared_os) {
            return Err(ValidationError::WebglVendorPlatformMismatch {
                keyword: masked_kw.as_str().into(),
                platform: b.platform.clone(),
            });
        }

        // WebGPU adapter description must share the same vendor keyword as
        // WebGL — otherwise "WebGL says Intel, WebGPU says NVIDIA" trips
        // cross-API checks in fingerprinters like CreepJS.
        let webgpu_kw = detect_gpu_vendor(&b.webgpu_adapter_description);
        if webgpu_kw != Some(masked_kw) {
            return Err(ValidationError::WebgpuVendorMismatch {
                description: b.webgpu_adapter_description.clone(),
                keyword: masked_kw.as_str().into(),
                renderer: b.webgl_renderer.clone(),
            });
        }

        // Screen geometry invariants: avail ≤ screen, viewport ≤ avail.
        if b.avail_screen_w > b.screen_w {
            return Err(ValidationError::AvailExceedsScreen {
                axis: "w",
                avail: b.avail_screen_w,
                screen: b.screen_w,
            });
        }
        if b.avail_screen_h > b.screen_h {
            return Err(ValidationError::AvailExceedsScreen {
                axis: "h",
                avail: b.avail_screen_h,
                screen: b.screen_h,
            });
        }
        if b.viewport_w > b.screen_w {
            return Err(ValidationError::ViewportExceedsScreen {
                axis: "w",
                view: b.viewport_w,
                screen: b.screen_w,
            });
        }
        if b.viewport_h > b.screen_h {
            return Err(ValidationError::ViewportExceedsScreen {
                axis: "h",
                view: b.viewport_h,
                screen: b.screen_h,
            });
        }
        // Viewport cannot exceed available screen (browser window fits inside
        // the work area once OS chrome is subtracted).
        if b.viewport_w > b.avail_screen_w {
            return Err(ValidationError::ViewportExceedsAvail {
                axis: "w",
                view: b.viewport_w,
                avail: b.avail_screen_w,
            });
        }
        if b.viewport_h > b.avail_screen_h {
            return Err(ValidationError::ViewportExceedsAvail {
                axis: "h",
                view: b.viewport_h,
                avail: b.avail_screen_h,
            });
        }

        // Device capability sanity — Chrome clamps navigator.deviceMemory to
        // the set {0.25,0.5,1,2,4,8} (stored as u32 with 0 meaning 0.25/0.5
        // is not representable here; we accept the integer subset).
        if !matches!(b.device_memory, 1 | 2 | 4 | 8) {
            return Err(ValidationError::DeviceMemoryInvalid(b.device_memory));
        }
        if !(2..=32).contains(&b.hardware_concurrency) {
            return Err(ValidationError::HardwareConcurrencyInvalid(
                b.hardware_concurrency,
            ));
        }
        if !matches!(b.color_depth, 24 | 30 | 48) {
            return Err(ValidationError::ColorDepthInvalid(b.color_depth));
        }
        if !(1.0..=3.0).contains(&b.device_pixel_ratio) {
            return Err(ValidationError::DprInvalid(b.device_pixel_ratio));
        }

        // Shim-injection scalars (wave1). Reject absurd values so a bad
        // fixture can't silently ship a 64 MiB heap limit or a 128 px
        // scrollbar into the rendered shim.
        if b.scrollbar_width > 24 {
            return Err(ValidationError::ScrollbarWidthInvalid(b.scrollbar_width));
        }
        // 256 MiB floor = low-end mobile; 8 GiB ceiling = V8 allocates
        // 4-8 GiB on 64-bit desktop and never reports more.
        if b.heap_size_limit < 268_435_456 {
            return Err(ValidationError::HeapSizeTooSmall(b.heap_size_limit));
        }
        if b.heap_size_limit > 8_589_934_592 {
            return Err(ValidationError::HeapSizeTooLarge(b.heap_size_limit));
        }
        if !(4096..=32768).contains(&b.max_texture_size) {
            return Err(ValidationError::MaxTextureSizeInvalid(b.max_texture_size));
        }
        // Real GL drivers report MAX_VIEWPORT_DIMS >= MAX_TEXTURE_SIZE.
        // Catch forged personas where someone typed 2048 for both just to
        // fill the field.
        if b.max_viewport_w < b.max_texture_size || b.max_viewport_h < b.max_texture_size {
            return Err(ValidationError::MaxViewportDimsIncoherent {
                w: b.max_viewport_w,
                h: b.max_viewport_h,
                mts: b.max_texture_size,
            });
        }
        if !matches!(b.audio_sample_rate, 22050 | 44100 | 48000 | 96000) {
            return Err(ValidationError::AudioSampleRateInvalid(b.audio_sample_rate));
        }
        // Font list must parse and match the OS. Liberation on macOS, SF
        // Pro on Linux, Segoe UI on mac — all free one-line tells.
        let fonts_parsed: serde_json::Value = serde_json::from_str(&b.fonts_json)
            .map_err(|e| ValidationError::FontsInvalidJson { err: e.to_string() })?;
        if let Some(arr) = fonts_parsed.as_array() {
            let names: Vec<String> = arr
                .iter()
                .filter_map(|v| v.as_str().map(|s| s.to_string()))
                .collect();
            check_fonts_platform(&names, declared_os, &b.platform)?;
        }

        // TLS profile coherence — the bundle-derived Profile must map to
        // the same major version the bundle claims. Guards against a caller
        // mutating ua_major without rebuilding the bundle.
        let tls_major = b.profile().major_version();
        if tls_major != b.ua_major {
            return Err(ValidationError::TlsProfileMajorMismatch {
                tls_major,
                ua_major: b.ua_major,
            });
        }

        Ok(())
    }
}

/// Minutes west of UTC (JS `getTimezoneOffset` convention), for tz names
/// we know. Returns `None` for anything else — the caller treats unknown
/// as "don't check" rather than assuming São Paulo and spuriously
/// rejecting a bundle. Duplicate of the helper in `render/pool.rs` —
/// keep in sync; merging is phase 3's refactor work.
fn guess_tz_offset_min(tz: &str) -> Option<i32> {
    Some(match tz {
        "UTC" | "Etc/UTC" => 0,
        "America/Sao_Paulo" | "America/Buenos_Aires" | "America/Santiago" => 180,
        "America/New_York" => 300,
        "America/Chicago" => 360,
        "America/Denver" => 420,
        "America/Los_Angeles" => 480,
        "Europe/London" | "Europe/Lisbon" => 0,
        "Europe/Berlin" | "Europe/Paris" | "Europe/Madrid" | "Europe/Rome" => -60,
        "Europe/Moscow" => -180,
        "Asia/Tokyo" | "Asia/Seoul" => -540,
        "Asia/Shanghai" | "Asia/Taipei" | "Asia/Singapore" | "Asia/Hong_Kong" => -480,
        "Asia/Kolkata" => -330,
        "Australia/Sydney" => -600,
        _ => return None,
    })
}

#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Os {
    Linux,
    Windows,
    MacOs,
    Unknown,
}

impl Os {
    fn as_str(self) -> &'static str {
        match self {
            Self::Linux => "Linux",
            Self::Windows => "Windows",
            Self::MacOs => "macOS",
            Self::Unknown => "Unknown",
        }
    }
}

fn detect_ua_os(ua: &str) -> Os {
    // Chrome UA tokens:
    //   Linux   -> "X11; Linux x86_64"
    //   Windows -> "Windows NT 10.0; Win64; x64"
    //   macOS   -> "Macintosh; Intel Mac OS X 10_15_7"
    if ua.contains("X11") || ua.contains("Linux") {
        Os::Linux
    } else if ua.contains("Windows NT") {
        Os::Windows
    } else if ua.contains("Macintosh") || ua.contains("Mac OS X") {
        Os::MacOs
    } else {
        Os::Unknown
    }
}

fn detect_platform_os(platform: &str) -> Os {
    // navigator.platform values:
    //   "Linux x86_64" / "Linux armv7l" etc
    //   "Win32"
    //   "MacIntel"
    if platform.starts_with("Linux") {
        Os::Linux
    } else if platform.starts_with("Win") {
        Os::Windows
    } else if platform == "MacIntel" || platform.contains("Mac") {
        Os::MacOs
    } else {
        Os::Unknown
    }
}

fn detect_ch_platform_os(ch: &str) -> Os {
    // sec-ch-ua-platform is quoted: "\"Linux\"" / "\"Windows\"" / "\"macOS\""
    let trimmed = ch.trim_matches('"');
    match trimmed {
        "Linux" => Os::Linux,
        "Windows" => Os::Windows,
        "macOS" | "Mac OS X" => Os::MacOs,
        _ => Os::Unknown,
    }
}

/// GPU vendor keyword recognised by the validator. Restricted to the four
/// majors a desktop Chrome bundle can plausibly ship — adding more would
/// also mean expanding `is_valid_on` below.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum GpuVendor {
    Intel,
    Nvidia,
    Amd,
    Apple,
}

impl GpuVendor {
    fn as_str(self) -> &'static str {
        match self {
            Self::Intel => "intel",
            Self::Nvidia => "nvidia",
            Self::Amd => "amd",
            Self::Apple => "apple",
        }
    }

    /// Apple Silicon GPUs only exist on macOS. The other three majors
    /// appear on both Linux and Windows desktop Chrome. `Os::Unknown`
    /// leaves the gate open because the OS-detection layer will have
    /// already flagged it separately.
    fn is_valid_on(self, os: Os) -> bool {
        match (self, os) {
            (Self::Apple, Os::MacOs) => true,
            (Self::Apple, _) => false,
            (Self::Intel | Self::Nvidia | Self::Amd, Os::Linux | Os::Windows | Os::MacOs) => true,
            (_, Os::Unknown) => true,
        }
    }
}

/// Recover the GPU vendor keyword from a WebGL / WebGPU renderer string.
/// Word-boundary-ish matching avoids pathological cases like "intelligent"
/// landing as Intel — we require a surrounding non-letter or string edge.
fn detect_gpu_vendor(s: &str) -> Option<GpuVendor> {
    let lower = s.to_ascii_lowercase();
    for (needle, v) in [
        ("intel", GpuVendor::Intel),
        ("nvidia", GpuVendor::Nvidia),
        ("amd", GpuVendor::Amd),
        ("apple", GpuVendor::Apple),
    ] {
        if contains_word(&lower, needle) {
            return Some(v);
        }
    }
    None
}

fn contains_word(hay: &str, needle: &str) -> bool {
    let bytes = hay.as_bytes();
    let nlen = needle.len();
    let mut i = 0;
    while i + nlen <= bytes.len() {
        if &bytes[i..i + nlen] == needle.as_bytes() {
            let before_ok = i == 0 || !bytes[i - 1].is_ascii_alphabetic();
            let after_ok = i + nlen == bytes.len() || !bytes[i + nlen].is_ascii_alphabetic();
            if before_ok && after_ok {
                return true;
            }
        }
        i += 1;
    }
    false
}

/// Font-list platform coherence. Checks a small exclusion list per OS —
/// fonts that flatly do not ship on that platform. Full membership
/// checking would be fragile (users install extra fonts); the negative
/// rule catches the bot-author-typed "I mixed the lists" mistake.
fn check_fonts_platform(fonts: &[String], os: Os, platform: &str) -> Result<(), ValidationError> {
    // Exclusion lists. Keyword matching is case-insensitive and
    // substring-based so variants like "SF Pro Display" all match.
    let lower: Vec<String> = fonts.iter().map(|f| f.to_ascii_lowercase()).collect();
    let has = |needle: &str| lower.iter().any(|f| f.contains(needle));
    match os {
        Os::Linux => {
            // Proprietary macOS / Windows fonts never ship on a vanilla
            // Linux Chrome. "Arial"/"Times New Roman" via msttcorefonts are
            // common enough to skip.
            for bad in ["sf pro", "helvetica neue", "segoe ui", "calibri"] {
                if has(bad) {
                    return Err(ValidationError::FontsPlatformMismatch {
                        font: bad.into(),
                        platform: platform.into(),
                    });
                }
            }
        }
        Os::MacOs => {
            for bad in ["segoe ui", "calibri", "liberation mono", "dejavu sans mono"] {
                if has(bad) {
                    return Err(ValidationError::FontsPlatformMismatch {
                        font: bad.into(),
                        platform: platform.into(),
                    });
                }
            }
        }
        Os::Windows => {
            for bad in [
                "sf pro",
                "helvetica neue",
                "liberation mono",
                "dejavu sans mono",
            ] {
                if has(bad) {
                    return Err(ValidationError::FontsPlatformMismatch {
                        font: bad.into(),
                        platform: platform.into(),
                    });
                }
            }
        }
        Os::Unknown => {}
    }
    Ok(())
}

fn check_webgl_platform(renderer: &str, os: Os) -> Result<(), ValidationError> {
    let r = renderer.to_ascii_lowercase();
    // Metal/M1/M2 only exist on macOS.
    let mentions_metal = r.contains("metal") || r.contains("apple m1") || r.contains("apple m2");
    // Direct3D/D3D11 is Windows (ANGLE on Win). Chrome on Linux with ANGLE
    // uses OpenGL/Vulkan backends; Direct3D never appears there.
    let mentions_d3d = r.contains("direct3d") || r.contains("d3d11") || r.contains("d3d9");
    match os {
        Os::Linux => {
            if mentions_metal || mentions_d3d {
                return Err(ValidationError::WebglPlatformMismatch {
                    renderer: renderer.into(),
                    platform: "Linux x86_64".into(),
                });
            }
        }
        Os::MacOs => {
            if mentions_d3d {
                return Err(ValidationError::WebglPlatformMismatch {
                    renderer: renderer.into(),
                    platform: "MacIntel".into(),
                });
            }
        }
        Os::Windows => {
            if mentions_metal {
                return Err(ValidationError::WebglPlatformMismatch {
                    renderer: renderer.into(),
                    platform: "Win32".into(),
                });
            }
        }
        Os::Unknown => {}
    }
    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    fn mut_bundle() -> IdentityBundle {
        IdentityBundle::from_chromium(131, 0xdead_beef)
    }

    #[test]
    fn defaults_are_coherent() {
        IdentityValidator::check(&mut_bundle()).expect("from_chromium must validate");
    }

    #[test]
    fn ua_platform_mismatch_rejected() {
        let mut b = mut_bundle();
        b.platform = "Win32".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::UaPlatformMismatch { .. })
        ));
    }

    #[test]
    fn ch_platform_mismatch_rejected() {
        let mut b = mut_bundle();
        b.ua_platform = "\"Windows\"".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::ChPlatformMismatch { .. })
        ));
    }

    #[test]
    fn webgl_d3d_on_linux_rejected() {
        let renderer = "ANGLE (Intel, Intel(R) UHD Graphics 630 Direct3D11 vs_5_0 ps_5_0, D3D11)";
        assert!(matches!(
            check_webgl_platform(renderer, Os::Linux),
            Err(ValidationError::WebglPlatformMismatch { .. })
        ));
    }

    #[test]
    fn webgl_metal_on_windows_rejected() {
        let renderer = "ANGLE (Apple M1, Metal)";
        assert!(matches!(
            check_webgl_platform(renderer, Os::Windows),
            Err(ValidationError::WebglPlatformMismatch { .. })
        ));
    }

    #[test]
    fn webgl_opengl_on_linux_ok() {
        let renderer = "ANGLE (Intel, Mesa Intel(R) UHD Graphics 630 (CFL GT2), OpenGL 4.6)";
        assert!(check_webgl_platform(renderer, Os::Linux).is_ok());
    }

    #[test]
    fn accept_language_mismatch_rejected() {
        let mut b = mut_bundle();
        b.accept_language = "fr-FR,fr;q=0.9".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::AcceptLanguageMismatch { .. })
        ));
    }

    #[test]
    fn device_memory_out_of_bucket_rejected() {
        let mut b = mut_bundle();
        b.device_memory = 6;
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::DeviceMemoryInvalid(6))
        ));
    }

    #[test]
    fn hardware_concurrency_out_of_range_rejected() {
        let mut b = mut_bundle();
        b.hardware_concurrency = 64;
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::HardwareConcurrencyInvalid(64))
        ));
    }

    #[test]
    fn viewport_exceeds_avail_rejected() {
        let mut b = mut_bundle();
        b.viewport_h = b.avail_screen_h + 1;
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::ViewportExceedsAvail { axis: "h", .. })
        ));
    }

    #[test]
    fn full_version_major_mismatch_rejected() {
        let mut b = mut_bundle();
        b.ua_full_version = "132.0.0.0".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::UaFullVersionMajorMismatch { .. })
        ));
    }

    #[test]
    fn unknown_timezone_does_not_spuriously_reject() {
        // Before: unknown tz fell back to São Paulo's +180 and could
        // reject valid bundles. After nit #8 fix: unknown → None →
        // skip check.
        let mut b = mut_bundle();
        b.timezone = "Pacific/Auckland".into();
        b.tz_offset_min = -720;
        IdentityValidator::check(&b).expect("unknown tz with sane offset must pass");
    }

    #[test]
    fn known_timezone_still_catches_off_by_hour() {
        let mut b = mut_bundle();
        b.timezone = "America/Sao_Paulo".into();
        b.tz_offset_min = 0; // wrong: SP is +180 min west
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::TimezoneOffsetMismatch { .. })
        ));
    }

    #[test]
    #[ignore = "obsolete: catalog-driven Profile derives major 1:1 from \
                ua_major; the float-up that the old closed enum did is \
                gone, so tls_major == ua_major holds by construction"]
    fn tls_profile_major_mismatch_rejected() {
        // Pre-catalog history: `Profile::from_detected_major(130)` used to
        // float up to `Chrome131Stable` (closest-not-newer), giving the
        // validator a meaningful way to detect a stale-vs-claimed-version
        // mismatch. With the catalog-driven `Profile::Chrome { major, os }`
        // form, `from_detected_major(N)` returns `Chrome { major: N }`
        // 1:1 — so `bundle.profile().major_version() == bundle.ua_major`
        // always. The validator check at line 391 is now trivially true.
        // Kept as #[ignore] to document the historical invariant.
        let mut b = mut_bundle();
        b.ua_major = 130;
        b.ua = b.ua.replace("131", "130");
        b.sec_ch_ua = b.sec_ch_ua.replace("131", "130");
        b.ua_brands = b.ua_brands.replace("131", "130");
        b.ua_full_version = "130.0.0.0".into();
        b.ua_full_version_list = b.ua_full_version_list.replace("131", "130");
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::TlsProfileMajorMismatch {
                tls_major: 131,
                ua_major: 130
            })
        ));
    }

    #[test]
    fn ua_brands_missing_major_rejected() {
        let mut b = mut_bundle();
        b.ua_brands = r#"[{"brand":"Google Chrome","version":"130"}]"#.into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::UaBrandsMissingMajor { major: 131 })
        ));
    }

    #[test]
    fn webgl_vendor_keyword_detected_for_all_majors() {
        // Word-boundary matcher handles both ANGLE-wrapped and bare strings,
        // plus common substring traps (nothing matches "intelligent").
        assert_eq!(
            detect_gpu_vendor("ANGLE (Intel, Mesa Intel(R) UHD Graphics)"),
            Some(GpuVendor::Intel)
        );
        assert_eq!(
            detect_gpu_vendor("NVIDIA GeForce RTX 3080"),
            Some(GpuVendor::Nvidia)
        );
        assert_eq!(
            detect_gpu_vendor("ANGLE (AMD, AMD Radeon RX 6800)"),
            Some(GpuVendor::Amd)
        );
        assert_eq!(detect_gpu_vendor("Apple M1"), Some(GpuVendor::Apple));
        assert_eq!(detect_gpu_vendor("intelligent system"), None);
        assert_eq!(detect_gpu_vendor("WebKit WebGL"), None);
    }

    #[test]
    fn webgl_vendor_mismatch_between_masked_and_unmasked_rejected() {
        // Forge a bundle where masked says Intel but unmasked says NVIDIA —
        // the exact inconsistency fingerprinters hash across the two slots.
        let mut b = mut_bundle();
        b.webgl_unmasked_renderer = "NVIDIA GeForce RTX 3080".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::WebglVendorKeywordMismatch { .. })
        ));
    }

    #[test]
    fn webgl_vendor_unrecognised_rejected() {
        let mut b = mut_bundle();
        b.webgl_renderer = "WebKit WebGL".into();
        b.webgl_unmasked_renderer = "WebKit WebGL".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::WebglVendorUnrecognised { .. })
        ));
    }

    #[test]
    fn webgl_apple_on_linux_rejected() {
        // Apple Silicon GPU only exists on macOS. Linux + Apple is instant.
        let mut b = mut_bundle();
        // Use an OpenGL-flavoured string so the earlier OS/renderer check
        // (metal → macOS) doesn't fire first.
        b.webgl_renderer = "ANGLE (Apple, Apple GPU, OpenGL 4.1)".into();
        b.webgl_unmasked_renderer = "ANGLE (Apple, Apple GPU, OpenGL 4.1)".into();
        b.webgpu_adapter_description = "ANGLE (Apple, Apple GPU, OpenGL 4.1)".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::WebglVendorPlatformMismatch { .. })
        ));
    }

    #[test]
    fn webgpu_vendor_mismatch_rejected() {
        // WebGL says Intel but WebGPU adapter description says NVIDIA — the
        // precise one-line tell P1.6 was filed to close.
        let mut b = mut_bundle();
        b.webgpu_adapter_description = "NVIDIA GeForce RTX 3080".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::WebgpuVendorMismatch { .. })
        ));
    }

    #[test]
    fn webgpu_vendor_missing_keyword_rejected() {
        // A description with no vendor keyword at all fails the WebGPU
        // check — we need *some* keyword to compare against webgl.
        let mut b = mut_bundle();
        b.webgpu_adapter_description = "Generic Renderer".into();
        assert!(matches!(
            IdentityValidator::check(&b),
            Err(ValidationError::WebgpuVendorMismatch { .. })
        ));
    }
}