eaip/parse/
mod.rs

1use std::collections::VecDeque;
2
3use airac::AIRAC;
4use async_trait::async_trait;
5use ego_tree::iter::{Edge, Traverse};
6use regex::Regex;
7use scraper::{Html, Node};
8
9use crate::prelude::*;
10
11/// Parsers for a list of radio navaids.
12pub mod navaids;
13
14/// Parsers for a list of intersections.
15pub mod intersections;
16
17/// Parsers for a list of airways.
18pub mod airways;
19
20/// Parsers for airports.
21pub mod airports;
22
23/// Fetch and parse some data from an eAIP
24#[async_trait]
25pub trait FromEAIP {
26    /// The type this parser will output when successful
27    type Output;
28
29    /// Fetch the data from the given eAIP for the given AIRAC.
30    async fn from_eaip(eaip: &EAIP, airac: AIRAC) -> Result<Self::Output>;
31
32    /// Fetch the data from the given eAIP for the current AIRAC.
33    async fn from_current_eaip(eaip: &EAIP) -> Result<Self::Output> {
34        Self::from_eaip(eaip, AIRAC::current()).await
35    }
36}
37
38/// The trait for all eAIP data parsers
39pub trait Parser<'a> {
40    /// The type this parser will output when successful
41    type Output;
42
43    /// Parse the given HTML data into the type given by `Self::Output`.
44    fn parse(data: &'a str) -> Result<Self::Output>;
45}
46
47/// Get just the text content of some HTML, removing elements that are hidden with display: none.
48pub(crate) fn get_clean_text(html_frag: String) -> String {
49    let frag = Html::parse_fragment(&html_frag);
50    get_clean_text_traverse(frag.root_element().traverse())
51        .trim()
52        .to_string()
53}
54
55fn get_clean_text_traverse(traverse: Traverse<'_, Node>) -> String {
56    let mut s = String::new();
57    let mut ignore_chain = VecDeque::new();
58    for edge in traverse {
59        match edge {
60            Edge::Open(node) => {
61                match node.value() {
62                    Node::Element(e) => {
63                        // if <br>, newline
64                        if e.name() == "br" || e.name() == "p" {
65                            if !s.ends_with('\n') {
66                                s += "\n";
67                            }
68                        } else if e.name() == "del" {
69                            ignore_chain.push_front(e.name());
70                        } else if let Some(attr) = e.attr("style") {
71                            // if <... style="display: none">, push_front to ignore chain
72                            if attr == "display: none;" {
73                                // TODO: Improve reliability
74                                ignore_chain.push_front(e.name());
75                            }
76                        }
77                    }
78                    Node::Text(t) => {
79                        if ignore_chain.is_empty() {
80                            s += &*t.trim();
81                        }
82                    }
83                    _ => (),
84                }
85            }
86            Edge::Close(node) => {
87                // if matches end of ignore chain, pop_front
88                if let Node::Element(e) = node.value() {
89                    if !s.ends_with('\n') {
90                        let inline_elems = vec!["span", "strong", "i", "em"];
91                        if !inline_elems.contains(&&*e.name().to_lowercase()) {
92                            s += "\n";
93                        }
94                    }
95
96                    if !ignore_chain.is_empty() && e.name() == ignore_chain[0] {
97                        ignore_chain.pop_front().unwrap();
98                    }
99                }
100            }
101        }
102    }
103    s
104}
105
106/// Parses a frequency - always returns kHz
107pub(crate) fn parse_frequency<S: Into<String>>(data: S) -> Result<usize> {
108    let re = Regex::new(r"([0-9.]{3,7})\s*([kM])Hz").unwrap();
109    let ch_re = Regex::new(r"CH(\d+[XY])").unwrap();
110    let data = data.into();
111
112    if let Some(caps) = re.captures(&data) {
113        let mut freq = caps[1].parse::<f32>().unwrap();
114        if &caps[2] == "M" {
115            freq *= 1000f32;
116        }
117        Ok(freq as usize)
118    } else if let Some(caps) = ch_re.captures(&data) {
119        let channel = &caps[1];
120        let typ = channel.chars().last().unwrap();
121        let f = &channel[0..channel.len() - 1];
122        let f = f.parse::<usize>().unwrap();
123        match typ {
124            'X' => {
125                if f <= 16 {
126                    Ok(134300 + (f * 100))
127                } else if f <= 59 {
128                    Ok(106300 + (f * 100))
129                } else if f <= 69 {
130                    Ok(127300 + (f * 100))
131                } else {
132                    Ok(105300 + (f * 100))
133                }
134            }
135            'Y' => {
136                if f < 70 {
137                    Ok(106350 + (f * 100))
138                } else {
139                    Ok(105350 + (f * 100))
140                }
141            }
142            _ => Err(Error::ParseError("channel type", data)),
143        }
144    } else {
145        Err(Error::ParseError("frequency", data))
146    }
147}
148
149/// Parses a latlong
150pub(crate) fn parse_latlong<S: Into<String>>(data: S) -> Result<(Option<f64>, Option<f64>)> {
151    let re = Regex::new(r"(?:([0-9.]{6,})([NnSs]))?\s*(?:([0-9.]{7,})([EeWw]))?").unwrap();
152    let dms_re = Regex::new(r#"(?:(\d+)°(\d+)'([\d.]+)"([NnSs]))?\s*(?:(\d+)°(\d+)'([\d.]+)"([EeWw]))?"#).unwrap();
153    let data = data.into();
154    let mut lat = None;
155    let mut lon = None;
156    if let Some(caps) = re.captures(&data) {
157        if let Some(raw_lat) = caps.get(1) {
158            lat = Some(raw_lat.as_str().parse::<f64>().unwrap() / 10000f64);
159            if caps[2].to_lowercase() == *"s" {
160                lat = Some(-lat.unwrap());
161            }
162        }
163        if let Some(raw_lon) = caps.get(3) {
164            lon = Some(raw_lon.as_str().parse::<f64>().unwrap() / 10000f64);
165            if caps[4].to_lowercase() == *"w" {
166                lon = Some(-lon.unwrap());
167            }
168        }
169    }
170    if let Some(caps) = dms_re.captures(&data) {
171        if let Some(deg_lat) = caps.get(1) {
172            let deg_lat = deg_lat.as_str().parse::<f64>().unwrap();
173            let min_lat = caps.get(2).unwrap().as_str().parse::<f64>().unwrap();
174            let sec_lat = caps.get(3).unwrap().as_str().parse::<f64>().unwrap();
175            let dir_lat = caps.get(4).unwrap().as_str();
176            lat = Some(deg_lat + (min_lat / 60f64) + (sec_lat / 3600f64));
177            if dir_lat.to_lowercase() == *"s" {
178                lat = Some(-lat.unwrap());
179            }
180        }
181
182        if let Some(deg_lon) = caps.get(5) {
183            let deg_lon = deg_lon.as_str().parse::<f64>().unwrap();
184            let min_lon = caps.get(6).unwrap().as_str().parse::<f64>().unwrap();
185            let sec_lon = caps.get(7).unwrap().as_str().parse::<f64>().unwrap();
186            let dir_lon = caps.get(8).unwrap().as_str();
187            lon = Some(deg_lon + (min_lon / 60f64) + (sec_lon / 3600f64));
188            if dir_lon.to_lowercase() == *"w" {
189                lon = Some(-lon.unwrap());
190            }
191        }
192    }
193
194    if lat == None && lon == None {
195        Err(Error::ParseError("latlong", data))
196    } else {
197        Ok((lat, lon))
198    }
199}
200
201/// Parses an elevation, always returning ft
202pub(crate) fn parse_elevation<S: Into<String>>(data: S) -> Result<usize> {
203    let re = Regex::new(r"([0-9]+)\s*(?:ft|FT)").unwrap();
204    let data = data.into();
205
206    if let Some(caps) = re.captures(&data) {
207        Ok(caps[1].parse::<usize>().unwrap())
208    } else {
209        Err(Error::ParseError("elevation", data))
210    }
211}
212
213#[cfg(test)]
214mod tests {
215    use super::{get_clean_text, parse_elevation, parse_frequency, parse_latlong};
216
217    #[test]
218    fn test_already_clean_string() {
219        assert_eq!("CLN", get_clean_text("CLN".to_string()));
220    }
221
222    #[test]
223    fn test_clean_string() {
224        assert_eq!("ADN", get_clean_text(r#"<span id="ID_10161034" class="SD">ADN</span><span class="sdParams" style="display: none;">TVOR;CODE_ID;111</span>"#.to_string()));
225    }
226
227    #[test]
228    fn test_clean_multiline_string() {
229        assert_eq!("ADN\nL2", get_clean_text(r#"<span id="ID_10161034" class="SD">ADN</span><span class="sdParams" style="display: none;">TVOR;CODE_ID;111</span><br /><span id="ID_10161034" class="SD">L2</span><span class="sdParams" style="display: none;">TVOR;CODE_ID;111</span>"#.to_string()));
230    }
231
232    #[test]
233    fn test_complex_string() {
234        let str = r#"
235        <strong>
236        <span class="SD" id="ID_10161012">ABERDEEN</span><span class="sdParams" style="display: none;">TVOR;TXT_NAME;111</span>
237      </strong>
238    <p class="line">
239      <span class="SD" id="ID_10161015">VOR</span><span class="sdParams" style="display: none;">TVOR;CODE_TYPE;111</span>/DME<br/><span class="SD" id="ID_10161019">0.95°W</span><span class="sdParams" style="display: none;">TVOR;VAL_MAG_VAR;111</span> (<span class="SD" id="ID_10161022">2022</span><span class="sdParams" style="display: none;">TVOR;DATE_MAG_VAR;111</span>)<br/><span class="SD" id="ID_10161026">2.00°W</span><span class="sdParams" style="display: none;">TVOR;VAL_DECLINATION;111</span> (<span class="SD" id="ID_10161029">2018</span><span class="sdParams" style="display: none;">TVOR;CUSTOM_ATT1;111</span>)</p></td><td id="ID_10161031">
240        "#;
241
242        let intended_result = "ABERDEEN\nVOR/DME\n0.95°W(2022)\n2.00°W(2018)";
243
244        assert_eq!(intended_result, get_clean_text(str.to_string()));
245    }
246
247    #[test]
248    fn test_parse_frequency() {
249        assert_eq!(123, parse_frequency("123 kHz").unwrap());
250        assert_eq!(123000, parse_frequency("123 MHz").unwrap());
251        assert_eq!(123456, parse_frequency("123.456 MHz").unwrap());
252        assert_eq!(123456, parse_frequency("123456 kHz").unwrap());
253        assert_eq!(123000, parse_frequency("123MHz").unwrap());
254    }
255
256    #[test]
257    fn test_parse_elevation() {
258        assert_eq!(10, parse_elevation("10ft").unwrap());
259        assert_eq!(10, parse_elevation("10 ft").unwrap());
260        assert_eq!(10, parse_elevation("10 FT").unwrap());
261        assert!(parse_elevation("10M").is_err());
262    }
263
264    #[test]
265    fn test_parse_latlong() {
266        assert_eq!(
267            (Some(57.1209), Some(2.1153)),
268            parse_latlong("571209N 0021153E").unwrap()
269        );
270        assert_eq!(
271            (Some(57.1209), Some(-2.1153)),
272            parse_latlong("571209N 0021153W").unwrap()
273        );
274        assert_eq!(
275            (Some(-57.1209), Some(2.1153)),
276            parse_latlong("571209S 0021153E").unwrap()
277        );
278        assert_eq!(
279            (Some(-57.1209), Some(-2.1153)),
280            parse_latlong("571209S 0021153W").unwrap()
281        );
282
283        assert_eq!(
284            (Some(57.1209), Some(2.1153)),
285            parse_latlong("571209n 0021153e").unwrap()
286        );
287        assert_eq!(
288            (Some(57.1209), Some(-2.1153)),
289            parse_latlong("571209n 0021153w").unwrap()
290        );
291        assert_eq!(
292            (Some(-57.1209), Some(2.1153)),
293            parse_latlong("571209s 0021153e").unwrap()
294        );
295        assert_eq!(
296            (Some(-57.1209), Some(-2.1153)),
297            parse_latlong("571209s 0021153w").unwrap()
298        );
299
300        assert_eq!((Some(57.1209), None), parse_latlong("571209N").unwrap());
301        assert_eq!((Some(-57.1209), None), parse_latlong("571209S").unwrap());
302        assert_eq!((None, Some(2.1153)), parse_latlong("0021153E").unwrap());
303        assert_eq!((None, Some(-2.1153)), parse_latlong("0021153W").unwrap());
304
305        assert_eq!(
306            (Some(57.120962), None),
307            parse_latlong("571209.62N").unwrap()
308        );
309        assert_eq!(
310            (None, Some(2.115312)),
311            parse_latlong("0021153.12E").unwrap()
312        );
313
314        assert_eq!(
315            (Some(57.12096), None),
316            parse_latlong("571209.6N").unwrap()
317        );
318        assert_eq!(
319            (None, Some(2.11531)),
320            parse_latlong("0021153.1E").unwrap()
321        );
322
323        if let Some(v) = parse_latlong(r#"50°50'13.60"N"#).unwrap().0 {
324            assert!(v - 50.83711 < 0.00001);
325        }
326    }
327}