1use async_trait::async_trait;
11use px_core::{CookieJarDelta, NamedCookie};
12use px_errors::AppError;
13use px_harvester::{HarvestRequest, Harvester};
14use px_pipeline::{ChallengeHandler, HandlerMetrics, HandlerOutcome, PageHtml};
15use std::sync::Arc;
16use std::time::Instant;
17
18mod cookie_extractor;
19pub use cookie_extractor::{extract_session_cookies, is_session_cookie};
20
21pub struct CloudflareHandler {
22 harvester: Option<Arc<dyn Harvester>>,
23}
24
25impl CloudflareHandler {
26 pub fn new() -> Self {
27 Self { harvester: None }
28 }
29
30 pub fn with_harvester(harvester: Arc<dyn Harvester>) -> Self {
31 Self {
32 harvester: Some(harvester),
33 }
34 }
35}
36
37impl Default for CloudflareHandler {
38 fn default() -> Self {
39 Self::new()
40 }
41}
42
43#[async_trait]
44impl ChallengeHandler for CloudflareHandler {
45 fn name(&self) -> &'static str {
46 "cloudflare"
47 }
48
49 async fn detects(&self, page: &PageHtml) -> Result<bool, AppError> {
50 let h = &page.html;
51 Ok(h.contains("cdn-cgi/challenge-platform")
52 || h.contains("cf-mitigated")
53 || h.contains("cf_clearance"))
54 }
55
56 async fn solve(&self, page: &PageHtml) -> Result<HandlerOutcome, AppError> {
57 let Some(harvester) = self.harvester.as_ref() else {
58 return Ok(HandlerOutcome::not_implemented(self.name()));
59 };
60 let start = Instant::now();
61 let result = harvester.harvest(HarvestRequest::new(&page.url)).await?;
62 let session_cookies: Vec<NamedCookie> = extract_session_cookies(&result.cookies)
63 .into_iter()
64 .map(|c| NamedCookie {
65 name: c.name,
66 value: c.value,
67 domain: c.domain,
68 path: c.path,
69 })
70 .collect();
71 let delta = CookieJarDelta {
72 set: session_cookies,
73 removed: Vec::new(),
74 };
75 let metrics = HandlerMetrics {
76 detect_us: 0,
77 solve_ms: start.elapsed().as_millis() as u64,
78 bytes_read: result.html.len() as u64,
79 };
80 Ok(HandlerOutcome::solved_with_ua(
81 self.name(),
82 delta,
83 Vec::new(),
84 metrics,
85 result.user_agent,
86 ))
87 }
88}
89
90#[cfg(test)]
91#[allow(clippy::expect_used, clippy::unwrap_used, clippy::panic)]
92mod tests {
93 use super::*;
94 use px_harvester::{HarvestResult, HarvestedCookie};
95
96 struct FakeHarvester {
97 ua: String,
98 cookies: Vec<HarvestedCookie>,
99 html: String,
100 }
101
102 #[async_trait]
103 impl Harvester for FakeHarvester {
104 async fn harvest(&self, _req: HarvestRequest) -> Result<HarvestResult, AppError> {
105 Ok(HarvestResult {
106 html: self.html.clone(),
107 user_agent: self.ua.clone(),
108 cookies: self.cookies.clone(),
109 })
110 }
111 }
112
113 fn cookie(name: &str) -> HarvestedCookie {
114 HarvestedCookie {
115 name: name.into(),
116 value: "v".into(),
117 domain: "x.com".into(),
118 path: "/".into(),
119 }
120 }
121
122 #[tokio::test]
123 async fn solve_without_harvester_is_not_implemented() {
124 let h = CloudflareHandler::new();
125 let page = PageHtml::new("https://x.com", "");
126 let oc = h.solve(&page).await.expect("solve");
127 assert_eq!(oc.status, px_pipeline::HandlerStatus::NotImplemented);
128 }
129
130 #[tokio::test]
131 async fn solve_with_harvester_returns_session_cookies_and_ua() {
132 let fake = Arc::new(FakeHarvester {
133 ua: "Mozilla/5.0 Camoufox".into(),
134 cookies: vec![
135 cookie("cf_clearance"),
136 cookie("__cf_bm"),
137 cookie("_px3"),
138 cookie("_pxhd"),
139 cookie("unrelated_session"),
140 ],
141 html: "real page".into(),
142 });
143 let h = CloudflareHandler::with_harvester(fake);
144 let page = PageHtml::new("https://x.com", "<challenge>");
145 let oc = h.solve(&page).await.expect("solve");
146 assert_eq!(oc.status, px_pipeline::HandlerStatus::Solved);
147 assert_eq!(oc.user_agent.as_deref(), Some("Mozilla/5.0 Camoufox"));
148 let names: Vec<&str> = oc.cookies.set.iter().map(|c| c.name.as_str()).collect();
149 assert!(names.contains(&"cf_clearance"));
150 assert!(names.contains(&"__cf_bm"));
151 assert!(names.contains(&"_px3"));
152 assert!(names.contains(&"_pxhd"));
153 assert!(!names.contains(&"unrelated_session"));
154 }
155}