sounding_bufkit/
parse_util.rs

1//! Utilites for parsing a sounding.
2use std::error::Error;
3
4use crate::error::*;
5use chrono::{NaiveDate, NaiveDateTime};
6use optional::{none, some, Optioned};
7
8// Missing or no data values used in Bufkit files
9pub(crate) const MISSING_I32: i32 = -9999;
10pub(crate) const MISSING_F64: f64 = -9999.0;
11pub(crate) const MISSING_F64_INDEX: f64 = 999.0;
12
13pub(crate) fn check_missing(val: f64) -> Optioned<f64> {
14    if val == MISSING_F64 {
15        none()
16    } else {
17        some(val)
18    }
19}
20
21pub(crate) fn check_missing_i32(val: i32) -> Option<i32> {
22    if val == MISSING_I32 {
23        None
24    } else {
25        Some(val)
26    }
27}
28
29/// Isolate a value into a sub-string for further parsing.
30///
31/// Given a string `src` with a sub-string of the form "KEY = VALUE" (with or without spaces), and
32/// closures that describe the first character you want to keep after the '=' and the last
33/// character in the sub-string you want to keep, return a tuple with the first value as the
34/// sub-string you were looking for and the second value the remainder of `src` after this
35/// sub-string has been parsed out.
36pub fn parse_kv<'a, 'b, FS, FE>(
37    src: &'a str,
38    key: &'b str,
39    start_val: FS,
40    end_val: FE,
41) -> Result<(&'a str, &'a str), BufkitFileError>
42where
43    FS: Fn(char) -> bool,
44    FE: Fn(char) -> bool,
45{
46    let mut idx = src.find(key).ok_or_else(BufkitFileError::new)?;
47    let head = &src[idx..];
48    let mut head = head.trim_start_matches(key);
49    idx = head.find(start_val).ok_or_else(BufkitFileError::new)?;
50    head = &head[idx..];
51    // When finding the end of the value, you may go all the way to the end of the slice.
52    // If so, find returns None, just convert that into the end of the slice.
53    let tail_idx = head.find(end_val).or_else(|| Some(head.len())).unwrap();
54    Ok((head[..tail_idx].trim(), &head[tail_idx..]))
55}
56
57#[test]
58#[rustfmt::skip]
59fn test_parse_kv() {
60    let test_data =
61        "STID = STNM = 727730 TIME = 170401/0000 \
62         SLAT = 46.92 SLON = -114.08 SELV = 972.0 \
63         STIM = 0";
64
65    if let Ok((value_to_parse, head)) =
66        parse_kv(test_data,
67                 "STID",
68                 |c| char::is_alphanumeric(c),
69                 |c| !char::is_alphanumeric(c)) {
70        assert_eq!(value_to_parse, "STNM");
71        assert_eq!(head, " = 727730 TIME = 170401/0000 SLAT = 46.92 SLON = -114.08 SELV = 972.0 STIM = 0");
72    } else {
73        assert!(false, "There was an error parsing.");
74    }
75
76    if let Ok((value_to_parse, head)) =
77        parse_kv(test_data,
78                 "STNM",
79                 |c| char::is_digit(c, 10),
80                 |c| !char::is_digit(c, 10)) {
81        assert_eq!(value_to_parse, "727730");
82        assert_eq!(head, " TIME = 170401/0000 SLAT = 46.92 SLON = -114.08 SELV = 972.0 STIM = 0");
83    } else {
84        assert!(false, "There was an error parsing.");
85    }
86
87    if let Ok((val_to_parse, head)) =
88        parse_kv(test_data,
89                 "TIME",
90                 |c| char::is_digit(c, 10),
91                 |c| !(char::is_digit(c, 10) || c == '/')) {
92        assert_eq!(val_to_parse, "170401/0000");
93        assert_eq!(head, " SLAT = 46.92 SLON = -114.08 SELV = 972.0 STIM = 0");
94    } else {
95        assert!(false, "There was an error parsing.");
96    }
97
98    if let Ok((val_to_parse, head)) =
99        parse_kv(test_data,
100                 "STIM",
101                 |c| char::is_digit(c, 10),
102                 |c| !char::is_digit(c, 10)) {
103        assert_eq!(val_to_parse, "0");
104        assert_eq!(head, "");
105    } else {
106        assert!(false, "There was an error parsing the very last element.");
107    }
108}
109
110/// Parse an f64 value.
111pub fn parse_f64<'a, 'b>(
112    src: &'a str,
113    key: &'b str,
114) -> Result<(Optioned<f64>, &'a str), Box<dyn Error>> {
115    use std::str::FromStr;
116
117    let (val_to_parse, head) = parse_kv(
118        src,
119        key,
120        |c| char::is_digit(c, 10) || c == '-',
121        |c| !(char::is_digit(c, 10) || c == '.' || c == '-'),
122    )?;
123    let val = check_missing(f64::from_str(val_to_parse)?);
124    Ok((val, head))
125}
126
127#[test]
128#[rustfmt::skip]
129fn test_parse_f64() {
130    let test_data =
131        "STID = STNM = 727730 TIME = 170401/0000 \
132         SLAT = 46.92 SLON = -114.08 SELV = 972.0 \
133         STIM = 0";
134
135    if let Ok((lat, head)) = parse_f64(test_data, "SLAT") {
136        assert_eq!(lat, some(46.92));
137        assert_eq!(head, " SLON = -114.08 SELV = 972.0 STIM = 0");
138    } else {
139        assert!(false, "There was an error parsing.");
140    }
141
142    if let Ok((lon, head)) = parse_f64(test_data, "SLON") {
143        assert_eq!(lon, some(-114.08));
144        assert_eq!(head, " SELV = 972.0 STIM = 0");
145    } else {
146        assert!(false, "There was an error parsing.");
147    }
148}
149
150/// Parse an i32 value.
151pub fn parse_i32<'a, 'b>(src: &'a str, key: &'b str) -> Result<(i32, &'a str), Box<dyn Error>> {
152    use std::str::FromStr;
153
154    let (val_to_parse, head) = parse_kv(
155        src,
156        key,
157        |c| char::is_digit(c, 10),
158        |c| !char::is_digit(c, 10),
159    )?;
160    let val = i32::from_str(val_to_parse)?;
161    Ok((val, head))
162}
163
164#[test]
165#[rustfmt::skip]
166fn test_parse_i32() {
167    let test_data =
168        "STID = STNM = 727730 TIME = 170401/0000 \
169         SLAT = 46.92 SLON = -114.08 SELV = 972.0 \
170         STIM = 0";
171
172    if let Ok((stnm, head)) = parse_i32(test_data, "STNM") {
173        assert_eq!(stnm, 727730);
174        assert_eq!(head, " TIME = 170401/0000 SLAT = 46.92 SLON = -114.08 SELV = 972.0 STIM = 0");
175    } else {
176        assert!(false, "There was an error parsing.");
177    }
178
179    if let Ok((ymd, head)) = parse_i32(test_data, "TIME") {
180        assert_eq!(ymd, 170401);
181        assert_eq!(head, "/0000 SLAT = 46.92 SLON = -114.08 SELV = 972.0 STIM = 0");
182    } else {
183        assert!(false, "There was an error parsing.");
184    }
185}
186
187/// Parse a string of the form "YYmmdd/hhMM" to a `NaiveDateTime`.
188pub fn parse_naive_date_time(src: &str) -> Result<NaiveDateTime, Box<dyn Error>> {
189    use std::str::FromStr;
190
191    let val_to_parse = src.trim();
192
193    let year = i32::from_str(&val_to_parse[..2])? + 2000;
194    let month = u32::from_str(&val_to_parse[2..4])?;
195    let day = u32::from_str(&val_to_parse[4..6])?;
196    let hour = u32::from_str(&val_to_parse[7..9])?;
197    let minute = u32::from_str(&val_to_parse[9..11])?;
198    Ok(NaiveDate::from_ymd(year, month, day).and_hms(hour, minute, 0))
199}
200
201#[test]
202fn test_parse_naive_date_time() {
203    let test_data = " 170401/0000 ";
204
205    let test_value = parse_naive_date_time(test_data).unwrap();
206    assert_eq!(test_value, NaiveDate::from_ymd(2017, 4, 1).and_hms(0, 0, 0));
207}
208
209/// Find a blank line, or a line without any ASCII numbers or letters.
210///
211/// Return `None` if one cannot be found, otherwise return the byte location of the character just
212/// after the second newline.
213pub fn find_blank_line(src: &str) -> Option<usize> {
214    let mut first_newline = false;
215
216    let mut iter = src.char_indices().peekable();
217    loop {
218        let (_, c) = iter.next()?;
219
220        if c == '\n' && !first_newline {
221            first_newline = true;
222        } else if c.is_alphanumeric() {
223            // Found a letter or number, since last newline, reset flag.
224            first_newline = false;
225        } else if c == '\n' && first_newline {
226            // We've found the second one in a row!
227            if let Some(&(next_index, _)) = iter.peek() {
228                return Some(next_index);
229            } else {
230                return None;
231            }
232        }
233    }
234}
235
236#[test]
237fn test_find_blank_line() {
238    let test_string = "STID = STNM = 727730 TIME = 170401/0300
239                       SLAT = 46.92 SLON = -114.08 SELV = 972.0
240                       STIM = 3
241
242                       SHOW = 9.67 LIFT = 9.84 SWET = 33.41 KINX = 3.88
243                       LCLP = 822.95 PWAT = 9.52 TOTL = 37.25 CAPE = 0.00
244                       LCLT = 273.49 CINS = 0.00 EQLV = -9999.00 LFCT = -9999.00
245                       BRCH = 0.00
246
247                       PRES TMPC TMWC DWPC THTE DRCT SKNT OMEG
248                       HGHT
249                       906.90 8.04 4.99 1.70 303.11 250.71 4.12 -2.00";
250
251    let (station_info, the_rest) = test_string.split_at(find_blank_line(test_string).unwrap());
252    let (indexes, the_rest) = the_rest.split_at(find_blank_line(the_rest).unwrap());
253
254    assert!(station_info.trim().starts_with("STID = STNM = 727730"));
255    assert!(station_info.trim().ends_with("STIM = 3"));
256
257    assert!(indexes.trim().starts_with("SHOW = 9.67"));
258    assert!(indexes.trim().ends_with("BRCH = 0.00"));
259
260    assert!(the_rest.trim().starts_with("PRES TMPC TMWC"));
261    assert!(find_blank_line(the_rest).is_none());
262}
263
264/// In a list of white space delimited floating point values, find a string with `n` values.
265pub fn find_next_n_tokens(src: &str, n: usize) -> Result<Option<usize>, BufkitFileError> {
266    if src.trim().is_empty() {
267        return Ok(None);
268    }
269
270    let mut started = false;
271    let mut token_count = 0;
272    let mut in_white_space = src.starts_with(char::is_whitespace);
273
274    for (i, c) in src.char_indices() {
275        if !started && (c.is_numeric() || c == '-' || c == '.') {
276            started = true;
277        } else if !in_white_space && c.is_whitespace() {
278            // Just passed into white space, increase token count
279            token_count += 1;
280            in_white_space = true;
281        } else if in_white_space && !c.is_whitespace() {
282            // Just passed out of white space
283            in_white_space = false;
284        }
285
286        if token_count == n {
287            return Ok(Some(i));
288        }
289    }
290
291    // Special case for end of string
292    if !in_white_space && token_count == n - 1 {
293        return Ok(Some(src.len()));
294    }
295
296    // Invalid number of tokens
297    if token_count > 0 {
298        return Err(BufkitFileError::new());
299    }
300    // Out of tokens
301    Ok(None)
302}
303
304#[test]
305fn test_find_next_n_tokens() {
306    let test_data = "
307        727730 170401/0700 1021.50 869.80 0.14 275.50 0.00 74.00
308        0.00 0.00 277.40 0.00 0.00 0.00
309        0.00 1.00 0.70 0.00 0.07 1.44
310        3.73 0.00 0.00 0.00 0.00 -4.60
311        -4.80 30.30 0.01 999.00 -9999.00 20.00
312        -2.30
313        727730 170401/0800 1022.00 869.70 -0.36 274.90 0.00 74.00
314        0.00 0.00 277.20 0.00 0.00 0.00
315        0.00 1.00 0.50 0.00 0.07 0.34
316        3.60 0.00 0.00 0.00 0.00 -3.70
317        -5.30 35.40 0.01 999.00 -9999.00 20.00
318        -2.78
319        727730 170401/0900 1022.80 869.80 -0.46 274.80 0.00 74.00
320        0.00 0.00 277.10 0.00 0.00 0.00
321        0.00 0.90 0.80 0.00 0.07 -0.56
322        3.50 0.00 0.00 0.00 0.00 -2.70
323        -6.70 31.90 0.01 999.00 -9999.00 20.00
324        -3.15";
325
326    let brk = find_next_n_tokens(test_data, 33).unwrap().unwrap();
327    let (substr, remaining) = test_data.split_at(brk);
328
329    println!("First: {}", substr);
330    assert_eq!(
331        substr,
332        "
333        727730 170401/0700 1021.50 869.80 0.14 275.50 0.00 74.00
334        0.00 0.00 277.40 0.00 0.00 0.00
335        0.00 1.00 0.70 0.00 0.07 1.44
336        3.73 0.00 0.00 0.00 0.00 -4.60
337        -4.80 30.30 0.01 999.00 -9999.00 20.00
338        -2.30"
339    );
340
341    let brk = find_next_n_tokens(remaining, 33).unwrap().unwrap();
342    let (substr, remaining) = remaining.split_at(brk);
343    println!("Second: {}", substr);
344    assert_eq!(
345        substr,
346        "
347        727730 170401/0800 1022.00 869.70 -0.36 274.90 0.00 74.00
348        0.00 0.00 277.20 0.00 0.00 0.00
349        0.00 1.00 0.50 0.00 0.07 0.34
350        3.60 0.00 0.00 0.00 0.00 -3.70
351        -5.30 35.40 0.01 999.00 -9999.00 20.00
352        -2.78"
353    );
354
355    let brk = find_next_n_tokens(remaining, 33).unwrap().unwrap();
356    let (substr, remaining) = remaining.split_at(brk);
357    println!("Third: {}", substr.trim());
358    assert_eq!(
359        substr,
360        "
361        727730 170401/0900 1022.80 869.80 -0.46 274.80 0.00 74.00
362        0.00 0.00 277.10 0.00 0.00 0.00
363        0.00 0.90 0.80 0.00 0.07 -0.56
364        3.50 0.00 0.00 0.00 0.00 -2.70
365        -6.70 31.90 0.01 999.00 -9999.00 20.00
366        -3.15"
367    );
368
369    assert_eq!(find_next_n_tokens(remaining, 33).unwrap(), None);
370}