cloudscraper_rs/challenges/core/
analysis.rs

1//! Challenge page parsing helpers and utilities.
2//!
3//! Provides the building blocks needed to inspect Cloudflare challenge HTML and
4//! extract the parameters required by the solvers.
5
6use http::header::SERVER;
7use once_cell::sync::Lazy;
8use regex::{Regex, RegexBuilder};
9use thiserror::Error;
10use url::Url;
11
12use super::types::{ChallengeResponse, ChallengeSubmission};
13
14/// Minimal snapshot of the IUAM challenge form used by solvers.
15#[derive(Debug, Clone)]
16pub struct IuamChallengeBlueprint {
17    pub action: String,
18    pub hidden_fields: Vec<(String, String)>,
19}
20
21impl IuamChallengeBlueprint {
22    pub fn to_submission(
23        self,
24        base_url: &Url,
25        mut payload: Vec<(String, String)>,
26    ) -> Result<ChallengeSubmission, ChallengeParseError> {
27        payload.extend(self.hidden_fields);
28        let form_fields = payload
29            .into_iter()
30            .collect::<std::collections::HashMap<_, _>>();
31
32        let submit_url = base_url
33            .join(&self.action)
34            .map_err(ChallengeParseError::InvalidAction)?;
35
36        Ok(ChallengeSubmission::new(
37            http::Method::POST,
38            submit_url,
39            form_fields,
40            Default::default(),
41            std::time::Duration::from_secs(0),
42        ))
43    }
44}
45
46/// Outcomes when parsing a Cloudflare challenge fails.
47#[derive(Debug, Error)]
48pub enum ChallengeParseError {
49    #[error("response is not a Cloudflare challenge")]
50    NotCloudflare,
51    #[error("unable to locate challenge form")]
52    FormNotFound,
53    #[error("missing required hidden field: {0}")]
54    MissingField(&'static str),
55    #[error("invalid challenge action: {0}")]
56    InvalidAction(url::ParseError),
57}
58
59/// Extract IUAM challenge blueprint (action + hidden fields) from HTML body.
60pub fn parse_iuam_challenge(
61    response: &ChallengeResponse<'_>,
62) -> Result<IuamChallengeBlueprint, ChallengeParseError> {
63    if !is_cloudflare_response(response) {
64        return Err(ChallengeParseError::NotCloudflare);
65    }
66
67    let captures = IUAM_FORM_RE
68        .captures(response.body)
69        .ok_or(ChallengeParseError::FormNotFound)?;
70
71    let action = captures
72        .name("action")
73        .map(|m| html_escape::decode_html_entities(m.as_str()).to_string())
74        .ok_or(ChallengeParseError::FormNotFound)?;
75
76    let inputs = captures.name("inputs").map(|m| m.as_str()).unwrap_or("");
77    let hidden_fields = extract_hidden_fields(inputs)?;
78
79    Ok(IuamChallengeBlueprint {
80        action,
81        hidden_fields,
82    })
83}
84
85fn extract_hidden_fields(fragment: &str) -> Result<Vec<(String, String)>, ChallengeParseError> {
86    static INPUT_RE: Lazy<Regex> = Lazy::new(|| {
87        RegexBuilder::new(r#"(?si)<input\s+([^>]+?)/?>"#)
88            .case_insensitive(true)
89            .dot_matches_new_line(true)
90            .build()
91            .unwrap()
92    });
93    static ATTR_RE: Lazy<Regex> = Lazy::new(|| {
94        RegexBuilder::new(r#"(?si)(?P<name>[^\s=]+)=['"](?P<value>[^'"]*)['"]"#)
95            .case_insensitive(true)
96            .build()
97            .unwrap()
98    });
99
100    let mut payload = Vec::new();
101
102    for caps in INPUT_RE.captures_iter(fragment) {
103        let attributes = caps.get(1).map(|m| m.as_str()).unwrap_or("");
104        let mut field_name: Option<String> = None;
105        let mut field_value: Option<String> = None;
106
107        for attr_caps in ATTR_RE.captures_iter(attributes) {
108            if let (Some(name), Some(value)) = (attr_caps.name("name"), attr_caps.name("value")) {
109                match name.as_str().to_ascii_lowercase().as_str() {
110                    "name" => field_name = Some(value.as_str().to_string()),
111                    "value" => field_value = Some(value.as_str().to_string()),
112                    _ => {}
113                }
114            }
115        }
116
117        if let (Some(name), Some(value)) = (field_name, field_value)
118            && matches!(name.as_str(), "r" | "jschl_vc" | "pass")
119        {
120            payload.push((name, value));
121        }
122    }
123
124    for key in ["r", "jschl_vc", "pass"] {
125        if !payload.iter().any(|(name, _)| name == key) {
126            return Err(ChallengeParseError::MissingField(key));
127        }
128    }
129
130    Ok(payload)
131}
132
133/// Detect whether the response is served by Cloudflare.
134pub fn is_cloudflare_response(response: &ChallengeResponse<'_>) -> bool {
135    response
136        .headers
137        .get(SERVER)
138        .and_then(|value| value.to_str().ok())
139        .map(|value| value.to_ascii_lowercase().starts_with("cloudflare"))
140        .unwrap_or(false)
141}
142
143/// Build origin header value from URL (`scheme://host[:port]`).
144pub fn origin_from_url(url: &Url) -> String {
145    let mut origin = format!("{}://{}", url.scheme(), url.host_str().unwrap_or(""));
146    if let Some(port) = url.port() {
147        origin.push(':');
148        origin.push_str(&port.to_string());
149    }
150    origin
151}
152
153static IUAM_FORM_RE: Lazy<Regex> = Lazy::new(|| {
154    RegexBuilder::new(
155        r#"(?si)<form[^>]*id=['"]challenge-form['"][^>]*action=['"](?P<action>[^"']*__cf_chl_f_tk=[^"']+)['"][^>]*>(?P<inputs>.*?)</form>"#,
156    )
157    .case_insensitive(true)
158    .dot_matches_new_line(true)
159    .build()
160    .unwrap()
161});