Skip to main content

hpx_browser/
challenge.rs

1//! Challenge detection and solving.
2
3use async_trait::async_trait;
4
5// ── Size gates ──────────────────────────────────────────────────────────
6
7pub const INTERSTITIAL_MAX_BYTES: usize = 30 * 1024;
8pub const BLOCKED_WORD_MAX_BYTES: usize = 5 * 1024;
9pub const THIN_BODY_MAX_BYTES: usize = 1000;
10pub const THIN_SHELL_MAX_BYTES: usize = 15 * 1024;
11pub const SENSOR_SPLIT_BYTES: usize = 50 * 1024;
12
13// ── Marker tables ───────────────────────────────────────────────────────
14
15/// Any-size structural tokens — first match wins.
16const UNAMBIGUOUS: &[(&str, &str)] = &[
17    ("cf-browser-verification", "ManagedChallenge-CHL"),
18    ("_cf_chl_opt", "ManagedChallenge-CHL"),
19    ("/_sec/cp_challenge", "SecCpt-CHL"),
20    ("ddcaptchaencoded", "Interstitial-CHL"),
21];
22
23/// AWS-WAF envelope markers — challenge only when co-signed by active loader.
24const AWSWAF_MARKERS: &[&str] = &["gokuprops", "awswafcookiedomainlist"];
25
26/// Active PoW loader substrings.
27const AWSWAF_ACTIVE_LOADER: &[&str] =
28    &["token.awswaf.com", "awswafintegration", "checkforcerefresh"];
29
30/// Phrase markers — size-gated to < INTERSTITIAL_MAX_BYTES.
31const PHRASE: &[(&str, &str)] = &[
32    ("just a moment", "ManagedChallenge-CHL"),
33    ("checking your browser", "ManagedChallenge-CHL"),
34    ("captcha-delivery.com", "Interstitial-CHL"),
35    ("press &amp; hold", "HoldChallenge-PaH"),
36    ("pardon our interruption", "SensorChallenge-CHL"),
37];
38
39/// Small-body markers — size-gated, some require co-signals.
40const SMALL_BODY: &[(&str, &str)] = &[
41    ("akam/13", "SensorChallenge-CHL"),
42    ("_abck", "SensorChallenge-CHL"),
43    ("_kpsdk", "ScriptChallenge-CHL"),
44    ("ips.js", "ScriptChallenge-CHL"),
45    ("_pxhd", "BehaviorChallenge-CHL"),
46    ("px-captcha", "BehaviorChallenge-CHL"),
47    ("captcha", "captcha-CHL"),
48    ("403 forbidden", "BLOCKED"),
49    ("access denied", "BLOCKED"),
50];
51
52/// Co-signals required for `akam/13` to count as a challenge.
53const SENSOR_CHALLENGE_COSIGNAL: &[&str] = &[
54    "sensor_data",
55    "bm-verify",
56    "sec-if-cpt-container",
57    "sec-cpt-if",
58    "/_sec/cp_challenge",
59    "pardon our interruption",
60];
61
62/// Co-signals that prove a genuinely interactive captcha (not invisible v3).
63const INTERACTIVE_CAPTCHA_COSIGNAL: &[&str] = &[
64    "api2/bframe",
65    "api2/anchor",
66    "hcaptcha.com",
67    "cf-turnstile",
68    "challenges.cloudflare.com/turnstile",
69    "i'm not a robot",
70    "i\u{2019}m not a robot",
71    "verify you are human",
72    "are you a robot",
73    "select all images",
74    "recaptcha challenge",
75];
76
77// ── Verdict ─────────────────────────────────────────────────────────────
78
79#[derive(Debug, Clone, Copy, PartialEq, Eq)]
80pub enum ChallengeVerdict {
81    Pass,
82    RenderIncomplete,
83    EdgeBlock,
84    SensorFail,
85    ThinShell,
86    ChallengeIncomplete,
87}
88
89impl ChallengeVerdict {
90    pub const fn is_challenge(self) -> bool {
91        matches!(
92            self,
93            Self::EdgeBlock | Self::SensorFail | Self::ChallengeIncomplete
94        )
95    }
96}
97
98// ── Classification result ───────────────────────────────────────────────
99
100#[derive(Debug, Clone, Copy, PartialEq, Eq)]
101pub struct EngineClass {
102    pub tag: &'static str,
103    pub verdict: ChallengeVerdict,
104    pub len: usize,
105}
106
107// ── ChallengeKind / SolveOutcome / trait ────────────────────────────────
108
109#[derive(Debug, Clone, PartialEq, Eq)]
110pub struct ChallengeKind {
111    pub vendor: &'static str,
112    pub sub_kind: &'static str,
113}
114
115impl ChallengeKind {
116    pub const fn new(vendor: &'static str, sub_kind: &'static str) -> Self {
117        Self { vendor, sub_kind }
118    }
119}
120
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub enum SolveOutcome {
123    NotApplicable,
124    InProgress,
125    Solved,
126    Unsolvable,
127}
128
129#[async_trait]
130pub trait ChallengeSolver: Send + Sync {
131    fn can_handle(&self, kind: &ChallengeKind) -> bool;
132    async fn solve(&self, kind: &ChallengeKind, page: &mut crate::page::Page) -> SolveOutcome;
133}
134
135// ── Internal helpers ────────────────────────────────────────────────────
136
137fn small_body_row_qualifies(needle: &str, lower: &str) -> bool {
138    match needle {
139        "akam/13" => SENSOR_CHALLENGE_COSIGNAL.iter().any(|c| lower.contains(c)),
140        "captcha" => INTERACTIVE_CAPTCHA_COSIGNAL
141            .iter()
142            .any(|c| lower.contains(c)),
143        _ => true,
144    }
145}
146
147fn verdict_for(tag: &str, len: usize) -> ChallengeVerdict {
148    match tag {
149        "L3-RENDERED" if len < THIN_SHELL_MAX_BYTES => ChallengeVerdict::ThinShell,
150        "L3-RENDERED" => ChallengeVerdict::Pass,
151        "THIN-BODY" => ChallengeVerdict::RenderIncomplete,
152        "ManagedChallenge-CHL" if len >= SENSOR_SPLIT_BYTES => {
153            ChallengeVerdict::ChallengeIncomplete
154        }
155        _ if len < SENSOR_SPLIT_BYTES => ChallengeVerdict::EdgeBlock,
156        _ => ChallengeVerdict::SensorFail,
157    }
158}
159
160// ── Public API ──────────────────────────────────────────────────────────
161
162pub fn engine_classify(body: &str) -> EngineClass {
163    let lower = body.to_lowercase();
164    let len = body.len();
165
166    let tag: &'static str = 'tag: {
167        for (n, t) in UNAMBIGUOUS {
168            if lower.contains(n) {
169                break 'tag t;
170            }
171        }
172        if AWSWAF_MARKERS.iter().any(|n| lower.contains(n))
173            && AWSWAF_ACTIVE_LOADER.iter().any(|n| lower.contains(n))
174        {
175            break 'tag "AWS-WAF-CHL";
176        }
177        if len < INTERSTITIAL_MAX_BYTES {
178            for (n, t) in PHRASE {
179                if lower.contains(n) {
180                    break 'tag t;
181                }
182            }
183            for (n, t) in SMALL_BODY {
184                if lower.contains(n) && small_body_row_qualifies(n, &lower) {
185                    break 'tag t;
186                }
187            }
188        }
189        if len < BLOCKED_WORD_MAX_BYTES && lower.contains("blocked") {
190            break 'tag "BLOCKED";
191        }
192        if len < THIN_BODY_MAX_BYTES {
193            break 'tag "THIN-BODY";
194        }
195        "L3-RENDERED"
196    };
197
198    EngineClass {
199        tag,
200        verdict: verdict_for(tag, len),
201        len,
202    }
203}
204
205pub fn is_managed_challenge_doc(body: &str) -> bool {
206    body.contains("_cf_chl_opt")
207        || body.contains("/cdn-cgi/challenge-platform/")
208        || body.contains("cf-browser-verification")
209}
210
211// ── Tests ───────────────────────────────────────────────────────────────
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    fn page_is_challenge(body: &str) -> bool {
218        engine_classify(body).verdict.is_challenge()
219    }
220    fn holistic_tag(body: &str) -> &'static str {
221        engine_classify(body).tag
222    }
223
224    /// Helper: build a large rendered body embedding `seed`.
225    fn big(seed: &str) -> String {
226        let mut h = String::from("<html><body>");
227        h.push_str(seed);
228        for _ in 0..30000 {
229            h.push_str("<div>actual rendered content paragraph</div>");
230        }
231        h.push_str("</body></html>");
232        h
233    }
234
235    // ── 1. all_call_sites_agree ─────────────────────────────────────────
236
237    #[test]
238    fn all_call_sites_agree() {
239        struct Case {
240            name: &'static str,
241            body: String,
242            tag: &'static str,
243            challenge: bool,
244        }
245        let cases = vec![
246            Case {
247                name: "empty",
248                body: "<html></html>".into(),
249                tag: "THIN-BODY",
250                challenge: false,
251            },
252            Case {
253                name: "cf small",
254                body: "<html><body>Just a moment...</body></html>".into(),
255                tag: "ManagedChallenge-CHL",
256                challenge: true,
257            },
258            Case {
259                name: "dd small",
260                body: r#"<script src="https://geo.captcha-delivery.com/c"></script>"#.into(),
261                tag: "Interstitial-CHL",
262                challenge: true,
263            },
264            Case {
265                name: "akam small",
266                body: r#"<script src="/akam/13/abc"></script><form id="bm-verify"></form>"#.into(),
267                tag: "SensorChallenge-CHL",
268                challenge: true,
269            },
270            Case {
271                name: "pxhd large benign",
272                body: big(r#"<script>window._pxhd="sdk"</script>"#),
273                tag: "L3-RENDERED",
274                challenge: false,
275            },
276            Case {
277                name: "just-a-moment large benign",
278                body: big("<p>give us just a moment to load</p>"),
279                tag: "L3-RENDERED",
280                challenge: false,
281            },
282            Case {
283                name: "grecaptcha config large",
284                body: big(r#"<script>window.C={"googleRecaptcha":1}</script>"#),
285                tag: "L3-RENDERED",
286                challenge: false,
287            },
288        ];
289        for c in cases {
290            let ec = engine_classify(&c.body);
291            assert_eq!(ec.tag, c.tag, "tag mismatch [{}]", c.name);
292            assert_eq!(
293                holistic_tag(&c.body),
294                c.tag,
295                "holistic disagrees [{}]",
296                c.name
297            );
298            assert_eq!(
299                page_is_challenge(&c.body),
300                c.challenge,
301                "page/holistic challenge-verdict disagree [{}] tag={}",
302                c.name,
303                ec.tag
304            );
305        }
306    }
307
308    // ── 2. FP-B2: literal strong markers size-gated ─────────────────────
309
310    #[test]
311    fn fp_b2_literal_strong_markers_size_gated() {
312        let wf = big(
313            r#"<script>window.__CONSENT={"_px3":"NECESSARY","px-captcha":"NECESSARY"};</script>"#,
314        );
315        assert_eq!(engine_classify(&wf).tag, "L3-RENDERED");
316        assert_eq!(engine_classify(&wf).verdict, ChallengeVerdict::Pass);
317
318        let dd = big(r#"<img src="https://x.captcha-delivery.com/pixel.gif">"#);
319        assert_eq!(engine_classify(&dd).tag, "L3-RENDERED");
320        assert_eq!(engine_classify(&dd).verdict, ChallengeVerdict::Pass);
321
322        let px_chl = r#"<html><body><div id="px-captcha"></div><p>verifying</p></body></html>"#;
323        assert_eq!(engine_classify(px_chl).tag, "BehaviorChallenge-CHL");
324        assert!(engine_classify(px_chl).verdict.is_challenge());
325
326        let pah = r#"<html><body><p>Press &amp; Hold to confirm</p></body></html>"#;
327        assert_eq!(engine_classify(pah).tag, "HoldChallenge-PaH");
328        assert!(engine_classify(pah).verdict.is_challenge());
329    }
330
331    // ── 3. FP-B5: AWS-WAF loader co-signed ──────────────────────────────
332
333    #[test]
334    fn fp_b5_awswaf_markers_loader_cosigned() {
335        let mut solved = String::from(
336            r#"<html><head><script>window.awsWafCookieDomainList=["redfin.com"];</script></head><body>
337               <script>window.REDFIN_APP_NAME="customer-pages-personalization";</script>"#,
338        );
339        for _ in 0..6000 {
340            solved.push_str("<div class='HomeCard'>real listing content here</div>");
341        }
342        solved.push_str("</body></html>");
343        assert_eq!(engine_classify(&solved).tag, "L3-RENDERED");
344        assert_eq!(engine_classify(&solved).verdict, ChallengeVerdict::Pass);
345
346        let small_leftover = r#"<html><body><script>window.awsWafCookieDomainList=["redfin.com"];</script>
347            <p>real content</p></body></html>"#;
348        assert_ne!(engine_classify(small_leftover).tag, "AWS-WAF-CHL");
349        assert!(!engine_classify(small_leftover).verdict.is_challenge());
350
351        let stub = r#"<html><head><script src="https://x.token.awswaf.com/challenge.js"></script>
352            <script>window.gokuProps={key:'a',context:'b',iv:'c'};</script></head>
353            <body><div id="challenge-container"></div></body></html>"#;
354        assert_eq!(engine_classify(stub).tag, "AWS-WAF-CHL");
355        assert!(engine_classify(stub).verdict.is_challenge());
356
357        let stub2 = r#"<html><body><script>var awsWafCookieDomainList=["a.com"];
358            AwsWafIntegration.checkForceRefresh().then(()=>{});</script></body></html>"#;
359        assert_eq!(engine_classify(stub2).tag, "AWS-WAF-CHL");
360        assert!(engine_classify(stub2).verdict.is_challenge());
361    }
362
363    // ── 4. FP-B4: ManagedChallenge split from SensorFail ────────────────
364
365    #[test]
366    fn fp_b4_managed_incomplete_split_from_sensorfail() {
367        let mut shell = String::from(
368            r#"<html><head><title>Just a moment...</title></head><body>
369               <script>window._cf_chl_opt={cvId:'3',cType:'managed'};</script>"#,
370        );
371        for _ in 0..2000 {
372            shell.push_str("<div>cf challenge orchestrator shell padding</div>");
373        }
374        shell.push_str("</body></html>");
375        assert!(shell.len() >= SENSOR_SPLIT_BYTES);
376        let ec = engine_classify(&shell);
377        assert_eq!(ec.tag, "ManagedChallenge-CHL");
378        assert_eq!(ec.verdict, ChallengeVerdict::ChallengeIncomplete);
379        assert_ne!(ec.verdict, ChallengeVerdict::SensorFail);
380        assert_ne!(ec.verdict, ChallengeVerdict::Pass);
381        assert!(ec.verdict.is_challenge());
382
383        let stub =
384            "<html><body><script>window._cf_chl_opt={}</script>Just a moment...</body></html>";
385        assert_eq!(engine_classify(stub).verdict, ChallengeVerdict::EdgeBlock);
386
387        let mut passed = String::from(
388            r#"<html><body><script src="/cdn-cgi/challenge-platform/h/b/jsd"></script>"#,
389        );
390        for _ in 0..3000 {
391            passed.push_str("<article>real rendered course catalog content</article>");
392        }
393        passed.push_str("</body></html>");
394        assert!(passed.len() >= SENSOR_SPLIT_BYTES);
395        assert_eq!(engine_classify(&passed).tag, "L3-RENDERED");
396        assert_eq!(engine_classify(&passed).verdict, ChallengeVerdict::Pass);
397    }
398
399    // ── 5. FP-C2: managed challenge doc predicate ───────────────────────
400
401    #[test]
402    fn fp_c2_managed_challenge_doc_predicate() {
403        assert!(is_managed_challenge_doc(
404            r#"<script>window._cf_chl_opt={cvId:'3'};</script>"#
405        ));
406        assert!(is_managed_challenge_doc(
407            r#"<script src="/cdn-cgi/challenge-platform/h/b/jsd/r/x"></script>"#
408        ));
409        assert!(is_managed_challenge_doc(
410            r#"<html class="cf-browser-verification"><body>...</body></html>"#
411        ));
412        assert!(!is_managed_challenge_doc(
413            "<html><body>fully rendered course catalog, no CF challenge</body></html>"
414        ));
415    }
416
417    // ── 6. FP-B3: thin shell band ───────────────────────────────────────
418
419    #[test]
420    fn fp_b3_thin_shell_band() {
421        let mut shell = String::from("<html><body>");
422        for _ in 0..60 {
423            shell.push_str("<div>spa hydration placeholder</div>");
424        }
425        shell.push_str("</body></html>");
426        assert!(shell.len() > THIN_BODY_MAX_BYTES && shell.len() < THIN_SHELL_MAX_BYTES);
427        let ec = engine_classify(&shell);
428        assert_eq!(ec.tag, "L3-RENDERED");
429        assert_eq!(ec.verdict, ChallengeVerdict::ThinShell);
430        assert!(!ec.verdict.is_challenge());
431
432        let mut full = String::from("<html><body>");
433        for _ in 0..1000 {
434            full.push_str("<article>real rendered content paragraph here</article>");
435        }
436        full.push_str("</body></html>");
437        assert!(full.len() >= THIN_SHELL_MAX_BYTES);
438        let fc = engine_classify(&full);
439        assert_eq!(fc.tag, "L3-RENDERED");
440        assert_eq!(fc.verdict, ChallengeVerdict::Pass);
441
442        assert_eq!(
443            engine_classify("<html></html>").verdict,
444            ChallengeVerdict::RenderIncomplete
445        );
446    }
447
448    // ── 7. FP-D2: unsolved managed challenge never passes ───────────────
449
450    #[test]
451    fn fp_d2_managed_unsolved_never_passes() {
452        let stub =
453            "<html><body>Just a moment...<script>window._cf_chl_opt={}</script></body></html>";
454        let s = engine_classify(stub);
455        assert!(s.verdict.is_challenge());
456        assert_ne!(s.verdict, ChallengeVerdict::Pass);
457
458        let mut shell = String::from(r#"<script>window._cf_chl_opt={cvId:'3'}</script>"#);
459        for _ in 0..2500 {
460            shell.push_str("<div>cf shell padding padding padding</div>");
461        }
462        assert!(shell.len() >= SENSOR_SPLIT_BYTES);
463        let l = engine_classify(&shell);
464        assert!(l.verdict.is_challenge());
465        assert_ne!(l.verdict, ChallengeVerdict::Pass);
466        assert_eq!(l.verdict, ChallengeVerdict::ChallengeIncomplete);
467    }
468
469    // ── 8. FP-Tier1: invisible recaptcha + akam/13 co-signal ────────────
470
471    #[test]
472    fn fp_t1_invisible_recaptcha_and_akam13_cosignal() {
473        let mut spotify = String::from(
474            r#"<html><head><style>.grecaptcha-badge { display: none !important }</style></head><body><textarea id="g-recaptcha-response-100000" name="g-recaptcha-response"></textarea><script src="https://www.gstatic.com/recaptcha/releases/abc/recaptcha__en.js"></script>"#,
475        );
476        for _ in 0..120 {
477            spotify
478                .push_str("<div class=\"sp-shell\">spotify web player hydration placeholder</div>");
479        }
480        spotify.push_str("</body></html>");
481        assert!(spotify.len() > THIN_BODY_MAX_BYTES && spotify.len() < THIN_SHELL_MAX_BYTES);
482        let s = engine_classify(&spotify);
483        assert_eq!(s.tag, "L3-RENDERED");
484        assert!(!s.verdict.is_challenge());
485        assert_eq!(s.verdict, ChallengeVerdict::ThinShell);
486
487        let real_cap = r#"<html><body><iframe src="https://www.google.com/recaptcha/api2/bframe?k=x"></iframe><p>Select all images with a bus — verify you are human</p></body></html>"#;
488        assert_eq!(engine_classify(real_cap).tag, "captcha-CHL");
489        assert!(engine_classify(real_cap).verdict.is_challenge());
490
491        let mut bestbuy = String::from(
492            r#"<html><head><script type="text/javascript" src="https://www.bestbuy.com/akam/13/62321f80" defer=""></script></head><body><h1>Choose a country</h1>"#,
493        );
494        for _ in 0..100 {
495            bestbuy.push_str(
496                "<a class=\"country\" href=\"/intl\">United States / Canada region selector</a>",
497            );
498        }
499        bestbuy.push_str("</body></html>");
500        assert!(bestbuy.len() > THIN_BODY_MAX_BYTES && bestbuy.len() < THIN_SHELL_MAX_BYTES);
501        let b = engine_classify(&bestbuy);
502        assert_eq!(b.tag, "L3-RENDERED");
503        assert!(!b.verdict.is_challenge());
504
505        let akam_chl = r#"<html><head><script src="/akam/13/abc"></script></head><body><form id="bm-verify"></form></body></html>"#;
506        assert_eq!(engine_classify(akam_chl).tag, "SensorChallenge-CHL");
507        assert!(engine_classify(akam_chl).verdict.is_challenge());
508    }
509
510    // ── 9. verdict_mapping_is_consistent ─────────────────────────────────
511
512    #[test]
513    fn verdict_mapping_is_consistent() {
514        assert_eq!(
515            engine_classify("<html></html>").verdict,
516            ChallengeVerdict::RenderIncomplete
517        );
518        assert_eq!(
519            engine_classify("<html><body>Just a moment...</body></html>").verdict,
520            ChallengeVerdict::EdgeBlock
521        );
522        let mut big_dd = String::from(r#"<script>var ddcaptchaEncoded="z";</script>"#);
523        for _ in 0..3000 {
524            big_dd.push_str("<p>padding padding padding padding</p>");
525        }
526        assert!(big_dd.len() >= SENSOR_SPLIT_BYTES);
527        assert_eq!(
528            engine_classify(&big_dd).verdict,
529            ChallengeVerdict::SensorFail
530        );
531    }
532
533    // ── 10. AWS-WAF never passes unsolved ───────────────────────────────
534
535    #[test]
536    fn inverse_chl_awswaf_never_passes_unsolved() {
537        let stub = r#"<html><head><script type="text/javascript">
538            window.awsWafCookieDomainList = [];
539            window.gokuProps = {"key":"AQ==","iv":"A6==","context":"gl=="};
540            </script><script src="https://x.token.awswaf.com/x/challenge.js"></script></head>
541            <body><script>AwsWafIntegration.checkForceRefresh().then(()=>{});</script></body></html>"#;
542        let s = engine_classify(stub);
543        assert_eq!(s.tag, "AWS-WAF-CHL");
544        assert!(s.verdict.is_challenge());
545        assert_ne!(s.verdict, ChallengeVerdict::Pass);
546
547        let mut grown = String::from(
548            r#"<script>window.gokuProps={"key":"AQ=="};window.awsWafCookieDomainList=[];
549            AwsWafIntegration.checkForceRefresh();</script>
550            <script src="https://x.token.awswaf.com/x/challenge.js"></script>"#,
551        );
552        for _ in 0..3000 {
553            grown.push_str("<div>partially rendered challenge shell padding here</div>");
554        }
555        assert!(grown.len() >= SENSOR_SPLIT_BYTES);
556        let g = engine_classify(&grown);
557        assert_eq!(g.tag, "AWS-WAF-CHL");
558        assert!(g.verdict.is_challenge());
559        assert_ne!(g.verdict, ChallengeVerdict::Pass);
560
561        let mut solved = String::from("<html><body>");
562        for _ in 0..2000 {
563            solved.push_str("<div class=\"product-card\">real amazon product listing</div>");
564        }
565        solved.push_str("</body></html>");
566        assert!(solved.len() >= THIN_SHELL_MAX_BYTES);
567        let v = engine_classify(&solved);
568        assert_eq!(v.tag, "L3-RENDERED");
569        assert_eq!(v.verdict, ChallengeVerdict::Pass);
570    }
571
572    // ── 11. tail-pin: known thin shells stay ThinShell ──────────────────
573
574    #[test]
575    fn tail_pin_known_thin_shells_stay_thinshell() {
576        let mut duo = String::from(
577            r#"<html><head><style>.grecaptcha-badge{display:none}</style></head><body>"#,
578        );
579        while duo.len() < 13_000 {
580            duo.push_str("<div class=\"_2it2\">duolingo unsupported-browser shell</div>");
581        }
582        duo.push_str("</body></html>");
583        assert!(
584            duo.len() > THIN_BODY_MAX_BYTES && duo.len() < THIN_SHELL_MAX_BYTES,
585            "duolingo shell must sit under the 15 KB ThinShell floor (len={})",
586            duo.len()
587        );
588        let d = engine_classify(&duo);
589        assert_eq!(d.tag, "L3-RENDERED");
590        assert_eq!(d.verdict, ChallengeVerdict::ThinShell);
591        assert!(!d.verdict.is_challenge());
592    }
593
594    // ── 12. DataDome captcha detection ──────────────────────────────────
595
596    #[test]
597    fn detect_datadome_captcha() {
598        let body = r#"<script src="https://geo.captcha-delivery.com/captcha.js"></script><div id="ddcaptchaencoded">encoded_payload</div>"#;
599        let ec = engine_classify(body);
600        assert_eq!(ec.tag, "Interstitial-CHL");
601        assert!(ec.verdict.is_challenge());
602    }
603
604    // ── 13. SecCpt challenge ────────────────────────────────────────────
605
606    #[test]
607    fn detect_sec_cpt() {
608        let body = r#"<html><body><div>loading...</div><script src="/_sec/cp_challenge/verify"></script></body></html>"#;
609        let ec = engine_classify(body);
610        assert_eq!(ec.tag, "SecCpt-CHL");
611        assert!(ec.verdict.is_challenge());
612    }
613
614    // ── 14. Kasada script challenge ─────────────────────────────────────
615
616    #[test]
617    fn detect_kasada_script() {
618        let mut body = String::from(
619            r#"<html><body><script>window._kpsdk={p:"abc"};</script><script src="/ips.js"></script>"#,
620        );
621        while body.len() < 2000 {
622            body.push_str("<div>padding for threshold</div>");
623        }
624        body.push_str("</body></html>");
625        let ec = engine_classify(&body);
626        assert_eq!(ec.tag, "ScriptChallenge-CHL");
627        assert!(ec.verdict.is_challenge());
628    }
629
630    // ── 15. PerimeterX challenge ────────────────────────────────────────
631
632    #[test]
633    fn detect_perimeterx_challenge() {
634        let body = r#"<html><body><div id="px-captcha"></div><script>window._pxhd="abc";</script></body></html>"#;
635        let ec = engine_classify(body);
636        assert_eq!(ec.tag, "BehaviorChallenge-CHL");
637        assert!(ec.verdict.is_challenge());
638    }
639
640    // ── 16. Akamai sensor with co-signal ────────────────────────────────
641
642    #[test]
643    fn detect_akamai_sensor_with_cosignal() {
644        let body = r#"<html><body><script src="/akam/13/pixel"></script><div id="sensor_data">payload</div></body></html>"#;
645        let ec = engine_classify(body);
646        assert_eq!(ec.tag, "SensorChallenge-CHL");
647        assert!(ec.verdict.is_challenge());
648    }
649
650    // ── 17. Akamai without co-signal is NOT a challenge ─────────────────
651
652    #[test]
653    fn akamai_without_cosignal_not_challenge() {
654        let body =
655            r#"<html><body><script src="/akam/13/pixel"></script><p>Welcome</p></body></html>"#;
656        let ec = engine_classify(body);
657        assert_ne!(ec.tag, "SensorChallenge-CHL");
658    }
659
660    // ── 18. Blocked word detection ──────────────────────────────────────
661
662    #[test]
663    fn detect_blocked_small_body() {
664        let body = "<html><body><h1>403 Forbidden</h1><p>Access Denied</p></body></html>";
665        let ec = engine_classify(body);
666        assert_eq!(ec.tag, "BLOCKED");
667        assert!(ec.verdict.is_challenge());
668    }
669
670    // ── 19. Blocked word size-gated ─────────────────────────────────────
671
672    #[test]
673    fn blocked_word_size_gated() {
674        let body = big("Access Denied");
675        let ec = engine_classify(&body);
676        assert_eq!(ec.tag, "L3-RENDERED");
677        assert_eq!(ec.verdict, ChallengeVerdict::Pass);
678    }
679
680    // ── 20. cf-browser-verification ─────────────────────────────────────
681
682    #[test]
683    fn detect_cf_browser_verification() {
684        let body =
685            r#"<html class="cf-browser-verification"><body>Checking your browser...</body></html>"#;
686        let ec = engine_classify(body);
687        assert_eq!(ec.tag, "ManagedChallenge-CHL");
688        assert!(ec.verdict.is_challenge());
689    }
690
691    // ── 21. checking your browser phrase ────────────────────────────────
692
693    #[test]
694    fn detect_checking_your_browser() {
695        let body =
696            "<html><body><p>Checking your browser before accessing the site...</p></body></html>";
697        let ec = engine_classify(body);
698        assert_eq!(ec.tag, "ManagedChallenge-CHL");
699        assert!(ec.verdict.is_challenge());
700    }
701
702    // ── 22. hcaptcha detection ──────────────────────────────────────────
703
704    #[test]
705    fn detect_hcaptcha() {
706        let body = r#"<html><body><iframe src="https://hcaptcha.com/captcha/v1/challenge"></iframe></body></html>"#;
707        let ec = engine_classify(body);
708        assert_eq!(ec.tag, "captcha-CHL");
709        assert!(ec.verdict.is_challenge());
710    }
711
712    // ── 23. cf-turnstile detection ──────────────────────────────────────
713
714    #[test]
715    fn detect_cf_turnstile() {
716        let mut body = String::from(
717            r#"<html><body><div class="cf-turnstile" data-sitekey="x"></div><p>captcha verification</p>"#,
718        );
719        while body.len() < 2000 {
720            body.push_str("<p>Verify you are human to continue browsing this site</p>");
721        }
722        body.push_str("</body></html>");
723        let ec = engine_classify(&body);
724        assert_eq!(ec.tag, "captcha-CHL");
725        assert!(ec.verdict.is_challenge());
726    }
727
728    // ── 24. pardon our interruption ─────────────────────────────────────
729
730    #[test]
731    fn detect_pardon_interruption() {
732        let body = "<html><body><p>Pardon our interruption, verifying access</p></body></html>";
733        let ec = engine_classify(body);
734        assert_eq!(ec.tag, "SensorChallenge-CHL");
735        assert!(ec.verdict.is_challenge());
736    }
737
738    // ── 25. normal HTML is L3-RENDERED ──────────────────────────────────
739
740    #[test]
741    fn normal_html_passes() {
742        let mut body = String::from("<html><body>");
743        for _ in 0..400 {
744            body.push_str("<p>Normal rendered content paragraph with enough text to fill.</p>");
745        }
746        body.push_str("</body></html>");
747        assert!(body.len() >= THIN_SHELL_MAX_BYTES, "body must be >= 15KB");
748        let ec = engine_classify(&body);
749        assert_eq!(ec.tag, "L3-RENDERED");
750        assert_eq!(ec.verdict, ChallengeVerdict::Pass);
751    }
752
753    // ── 26. BDD: CF managed challenge ───────────────────────────────────
754
755    #[test]
756    fn bdd_cf_managed_challenge() {
757        let body = r#"<html><body><script>window._cf_chl_opt={cvId:'3'};</script>
758            <script src="/cdn-cgi/challenge-platform/h/b/jsd"></script></body></html>"#;
759        let ec = engine_classify(body);
760        assert_eq!(ec.tag, "ManagedChallenge-CHL");
761        assert_eq!(ec.verdict, ChallengeVerdict::EdgeBlock);
762    }
763
764    // ── 27. BDD: AWS-WAF challenge ──────────────────────────────────────
765
766    #[test]
767    fn bdd_aws_waf_challenge() {
768        let body = r#"<html><body>
769            <script>window.gokuProps={key:'a'};window.awsWafCookieDomainList=["x.com"];</script>
770            <script src="https://x.token.awswaf.com/challenge.js"></script>
771            <script>AwsWafIntegration.checkForceRefresh();</script>
772        </body></html>"#;
773        let ec = engine_classify(body);
774        assert_eq!(ec.tag, "AWS-WAF-CHL");
775        assert_eq!(ec.verdict, ChallengeVerdict::EdgeBlock);
776    }
777
778    // ── 28. BDD: clean response ─────────────────────────────────────────
779
780    #[test]
781    fn bdd_clean_response() {
782        let mut body = String::from("<html><body>");
783        for _ in 0..400 {
784            body.push_str("<p>Normal content with enough text to exceed the 15KB threshold.</p>");
785        }
786        body.push_str("</body></html>");
787        assert!(body.len() >= THIN_SHELL_MAX_BYTES);
788        let ec = engine_classify(&body);
789        assert_eq!(ec.tag, "L3-RENDERED");
790        assert_eq!(ec.verdict, ChallengeVerdict::Pass);
791    }
792
793    // ── 29. BDD: thin body ──────────────────────────────────────────────
794
795    #[test]
796    fn bdd_thin_body() {
797        let body = "<html><body>tiny</body></html>";
798        let ec = engine_classify(body);
799        assert_eq!(ec.tag, "THIN-BODY");
800        assert_eq!(ec.verdict, ChallengeVerdict::RenderIncomplete);
801    }
802
803    // ── 30. ChallengeVerdict::is_challenge coverage ─────────────────────
804
805    #[test]
806    fn is_challenge_coverage() {
807        assert!(!ChallengeVerdict::Pass.is_challenge());
808        assert!(!ChallengeVerdict::RenderIncomplete.is_challenge());
809        assert!(!ChallengeVerdict::ThinShell.is_challenge());
810        assert!(ChallengeVerdict::EdgeBlock.is_challenge());
811        assert!(ChallengeVerdict::SensorFail.is_challenge());
812        assert!(ChallengeVerdict::ChallengeIncomplete.is_challenge());
813    }
814
815    // ── 31. EngineClamp len field ───────────────────────────────────────
816
817    #[test]
818    fn engine_class_len_matches_body() {
819        let body = "hello";
820        let ec = engine_classify(body);
821        assert_eq!(ec.len, 5);
822    }
823
824    // ── proptest: engine_classify never panics on arbitrary input ────────
825
826    #[cfg(feature = "proptest")]
827    mod proptests {
828        use proptest::prelude::*;
829
830        use super::*;
831
832        proptest! {
833            #[test]
834            fn engine_classify_never_panics(body in ".*") {
835                let ec = engine_classify(&body);
836                // Must always return a valid tag
837                let _ = ec.tag;
838                let _ = ec.verdict;
839                let _ = ec.len;
840            }
841        }
842    }
843}