Skip to main content

flyr/
parse.rs

1use scraper::{Html, Selector};
2use serde_json::Value;
3
4use crate::error::FlightError;
5use crate::model::*;
6
7fn get_val(val: &Value, idx: usize) -> Option<&Value> {
8    val.as_array().and_then(|arr| arr.get(idx))
9}
10
11fn get_str(val: &Value, idx: usize) -> Option<String> {
12    get_val(val, idx).and_then(|v| v.as_str()).map(String::from)
13}
14
15fn get_i64(val: &Value, idx: usize) -> Option<i64> {
16    get_val(val, idx).and_then(|v| v.as_i64())
17}
18
19fn get_u32(val: &Value, idx: usize) -> Option<u32> {
20    get_val(val, idx).and_then(|v| v.as_u64()).map(|v| v as u32)
21}
22
23pub fn extract_script(html: &str) -> Result<String, FlightError> {
24    let document = Html::parse_document(html);
25    let selector =
26        Selector::parse(r#"script[class="ds:1"]"#).expect("valid selector");
27
28    document
29        .select(&selector)
30        .next()
31        .map(|el| el.inner_html())
32        .ok_or(FlightError::ScriptTagNotFound)
33}
34
35pub fn parse_js(js: &str) -> Result<Value, FlightError> {
36    let data = js
37        .split_once("data:")
38        .map(|(_, rest)| rest)
39        .ok_or_else(|| FlightError::JsParse("no 'data:' marker found".into()))?;
40
41    let data = data
42        .rsplit_once(',')
43        .map(|(left, _)| left)
44        .ok_or_else(|| FlightError::JsParse("no trailing comma found".into()))?;
45
46    serde_json::from_str(data).map_err(|e| FlightError::JsParse(e.to_string()))
47}
48
49fn parse_datetime(date_val: &Value, time_val: &Value) -> Option<FlightDateTime> {
50    Some(FlightDateTime {
51        year: get_u32(date_val, 0)?,
52        month: get_u32(date_val, 1)?,
53        day: get_u32(date_val, 2)?,
54        hour: get_u32(time_val, 0)?,
55        minute: get_u32(time_val, 1).unwrap_or(0),
56    })
57}
58
59fn parse_segment(sf: &Value) -> Option<Segment> {
60    let from_airport = Airport {
61        code: get_str(sf, 3)?,
62        name: get_str(sf, 4).unwrap_or_default(),
63    };
64
65    let to_airport = Airport {
66        code: get_str(sf, 6)?,
67        name: get_str(sf, 5).unwrap_or_default(),
68    };
69
70    let departure_date = get_val(sf, 20)?;
71    let departure_time = get_val(sf, 8)?;
72    let departure = parse_datetime(departure_date, departure_time)?;
73
74    let arrival_date = get_val(sf, 21)?;
75    let arrival_time = get_val(sf, 10)?;
76    let arrival = parse_datetime(arrival_date, arrival_time)?;
77
78    let duration_minutes = get_u32(sf, 11).unwrap_or(0);
79    let aircraft = get_str(sf, 17);
80
81    Some(Segment {
82        from_airport,
83        to_airport,
84        departure,
85        arrival,
86        duration_minutes,
87        aircraft,
88    })
89}
90
91fn parse_flight(k: &Value) -> Option<FlightResult> {
92    let flight = get_val(k, 0)?;
93
94    let flight_type = get_str(flight, 0).unwrap_or_default();
95
96    let airlines: Vec<String> = get_val(flight, 1)
97        .and_then(|v| v.as_array())
98        .map(|arr| arr.iter().filter_map(|v| v.as_str().map(String::from)).collect())
99        .unwrap_or_default();
100
101    let segments_arr = get_val(flight, 2).and_then(|v| v.as_array());
102    let segments: Vec<Segment> = segments_arr
103        .map(|arr| arr.iter().filter_map(parse_segment).collect())
104        .unwrap_or_default();
105
106    let price = get_val(k, 1)
107        .and_then(|v| get_val(v, 0))
108        .and_then(|v| get_i64(v, 1));
109
110    let extras = get_val(flight, 22);
111    let carbon = CarbonEmission {
112        emission_grams: extras.and_then(|e| get_i64(e, 7)),
113        typical_grams: extras.and_then(|e| get_i64(e, 8)),
114    };
115
116    Some(FlightResult {
117        flight_type,
118        airlines,
119        segments,
120        price,
121        carbon,
122    })
123}
124
125fn parse_metadata(payload: &Value) -> SearchMetadata {
126    let mut alliances = Vec::new();
127    let mut airlines = Vec::new();
128
129    if let Some(meta_root) = get_val(payload, 7)
130        .and_then(|v| get_val(v, 1))
131    {
132        if let Some(alliances_data) = get_val(meta_root, 0).and_then(|v| v.as_array()) {
133            for item in alliances_data {
134                if let (Some(code), Some(name)) = (get_str(item, 0), get_str(item, 1)) {
135                    alliances.push(Alliance { code, name });
136                }
137            }
138        }
139
140        if let Some(airlines_data) = get_val(meta_root, 1).and_then(|v| v.as_array()) {
141            for item in airlines_data {
142                if let (Some(code), Some(name)) = (get_str(item, 0), get_str(item, 1)) {
143                    airlines.push(Airline { code, name });
144                }
145            }
146        }
147    }
148
149    SearchMetadata {
150        airlines,
151        alliances,
152    }
153}
154
155pub fn parse_payload(payload: &Value) -> Result<SearchResult, FlightError> {
156    let metadata = parse_metadata(payload);
157
158    let flights_root = get_val(payload, 3).and_then(|v| get_val(v, 0));
159
160    let flights = match flights_root {
161        Some(root) if !root.is_null() => {
162            let arr = root
163                .as_array()
164                .ok_or_else(|| FlightError::JsParse("payload[3][0] is not an array".into()))?;
165            arr.iter().filter_map(parse_flight).collect()
166        }
167        _ => Vec::new(),
168    };
169
170    Ok(SearchResult { flights, metadata })
171}
172
173pub fn parse_html(html: &str) -> Result<SearchResult, FlightError> {
174    let js = extract_script(html)?;
175    let payload = parse_js(&js)?;
176    parse_payload(&payload)
177}