1use std::sync::Arc;
2use std::time::Duration;
3
4use scraper::{Html, Selector};
5use wreq::Client;
6use wreq::cookie::Jar;
7use wreq_util::Emulation;
8
9use crate::error::{self, FlightError};
10
11fn cache_buster() -> String {
12 use std::time::{SystemTime, UNIX_EPOCH};
13 SystemTime::now()
14 .duration_since(UNIX_EPOCH)
15 .unwrap()
16 .as_secs()
17 .to_string()
18}
19
20const BASE_URL: &str = "https://www.google.com/travel/flights";
21const MAX_REDIRECTS: u8 = 10;
22
23#[derive(Clone)]
24pub struct FetchOptions {
25 pub proxy: Option<String>,
26 pub timeout: u64,
27}
28
29impl Default for FetchOptions {
30 fn default() -> Self {
31 Self {
32 proxy: None,
33 timeout: 30,
34 }
35 }
36}
37
38fn is_redirect(status: u16) -> bool {
39 matches!(status, 301 | 302 | 303 | 307 | 308)
40}
41
42fn extract_location(response: &wreq::Response) -> Option<String> {
43 response
44 .headers()
45 .get("location")
46 .and_then(|v| v.to_str().ok())
47 .map(String::from)
48}
49
50fn extract_consent_form(html: &str) -> Option<String> {
51 let document = Html::parse_document(html);
52 let form_sel = Selector::parse("form[action=\"https://consent.google.com/save\"]").ok()?;
53 let input_sel = Selector::parse("input[type=\"hidden\"]").ok()?;
54
55 let form = document.select(&form_sel).next()?;
56
57 let mut fields: Vec<(String, String)> = Vec::new();
58 for input in form.select(&input_sel) {
59 if let (Some(name), Some(value)) = (input.attr("name"), input.attr("value")) {
60 fields.push((name.to_string(), value.to_string()));
61 }
62 }
63
64 if fields.is_empty() {
65 return None;
66 }
67
68 Some(
69 fields
70 .iter()
71 .map(|(k, v)| format!("{}={}", urlencoding::encode(k), urlencoding::encode(v)))
72 .collect::<Vec<_>>()
73 .join("&"),
74 )
75}
76
77async fn follow_redirects(client: &Client, start_url: &str) -> Result<String, FlightError> {
78 let mut url = start_url.to_string();
79
80 for _ in 0..MAX_REDIRECTS {
81 let response = client
82 .get(&url)
83 .send()
84 .await
85 .map_err(error::from_http_error)?;
86
87 let status = response.status().as_u16();
88
89 if is_redirect(status) {
90 url = extract_location(&response)
91 .ok_or_else(|| FlightError::JsParse("redirect without location".into()))?;
92 continue;
93 }
94
95 match status {
96 200 => {}
97 429 => return Err(FlightError::RateLimited),
98 403 | 503 => return Err(FlightError::Blocked(status)),
99 s if s >= 400 => return Err(FlightError::HttpStatus(s)),
100 _ => {}
101 }
102
103 let html = response.text().await.map_err(error::from_http_error)?;
104
105 if let Some(form_body) = extract_consent_form(&html) {
106 let save_resp = client
107 .post("https://consent.google.com/save")
108 .header("content-type", "application/x-www-form-urlencoded")
109 .body(form_body)
110 .send()
111 .await
112 .map_err(error::from_http_error)?;
113
114 if is_redirect(save_resp.status().as_u16()) {
115 url = extract_location(&save_resp)
116 .ok_or_else(|| FlightError::JsParse("consent save: no redirect".into()))?;
117 continue;
118 }
119
120 return Err(FlightError::Blocked(save_resp.status().as_u16()));
121 }
122
123 return Ok(html);
124 }
125
126 Err(FlightError::Blocked(302))
127}
128
129pub async fn fetch_html(
130 params: &[(String, String)],
131 options: &FetchOptions,
132) -> Result<String, FlightError> {
133 let jar = Arc::new(Jar::default());
134
135 let mut builder = Client::builder()
136 .emulation(Emulation::Chrome137)
137 .cookie_provider(jar)
138 .timeout(Duration::from_secs(options.timeout));
139
140 if let Some(ref proxy) = options.proxy {
141 builder = builder.proxy(
142 wreq::Proxy::all(proxy).map_err(error::from_http_error)?,
143 );
144 }
145
146 let client = builder.build().map_err(error::from_http_error)?;
147
148 let mut params = params.to_vec();
149 params.push(("cx".to_string(), cache_buster()));
150
151 let mut start_url = format!("{BASE_URL}?");
152 for (i, (k, v)) in params.iter().enumerate() {
153 if i > 0 {
154 start_url.push('&');
155 }
156 start_url.push_str(&urlencoding::encode(k));
157 start_url.push('=');
158 start_url.push_str(&urlencoding::encode(v));
159 }
160
161 follow_redirects(&client, &start_url).await
162}