gchemol_parser/
parsers.rs

1// [[file:../parser.note::*docs][docs:1]]
2//! Selected and extra winnow parser combinators
3// docs:1 ends here
4
5// [[file:../parser.note::273abf0b][273abf0b]]
6use crate::common::*;
7
8use winnow::error::StrContext;
9use winnow::error::{ContextError, ParserError};
10use winnow::stream::Stream;
11// 273abf0b ends here
12
13// [[file:../parser.note::0512156a][0512156a]]
14pub use winnow::ascii::{alpha0, alpha1, digit0, digit1, line_ending, space0, space1};
15pub use winnow::combinator::cut_err;
16pub use winnow::combinator::rest;
17pub use winnow::combinator::seq;
18pub use winnow::combinator::{delimited, preceded, repeat, repeat_till, separated, terminated};
19pub use winnow::prelude::*;
20pub use winnow::Parser;
21
22pub use line_ending as eol;
23pub use rest_line as read_until_eol;
24// 0512156a ends here
25
26// [[file:../parser.note::fb1326ab][fb1326ab]]
27/// Create context label
28pub fn label(s: &'static str) -> StrContext {
29    StrContext::Label(s)
30}
31
32/// Convert winnow error to anyhow Error with `input` context
33pub fn parse_error(e: winnow::error::ParseError<&str, winnow::error::ContextError>, input: &str) -> Error {
34    anyhow!("found parse error:\n{:}\ninput={input:?}", e.to_string())
35}
36
37/// Anything except whitespace, this parser will not consume "\n" character
38pub fn not_space<'a>(input: &mut &'a str) -> PResult<&'a str> {
39    winnow::token::take_till(1.., |c| " \t\r\n".contains(c))
40        .context(label("not_space"))
41        .parse_next(input)
42}
43
44/// Read a new line including eol (\n) or consume the rest if there is no eol
45/// char.
46pub fn read_line<'a>(s: &mut &'a str) -> PResult<&'a str> {
47    use winnow::ascii::till_line_ending;
48    use winnow::combinator::opt;
49
50    // if there is no newline in `s`, take the whole str
51    let o = (till_line_ending, opt(line_ending))
52        .recognize()
53        .context(label("read_line"))
54        .parse_next(s)?;
55    Ok(o)
56}
57
58/// Take the rest line. The line ending is not included.
59pub fn rest_line<'a>(input: &mut &'a str) -> PResult<&'a str> {
60    use winnow::ascii::till_line_ending;
61    terminated(till_line_ending, line_ending).context(label("rest_line")).parse_next(input)
62}
63
64/// Take and consuming to `literal`.
65pub fn jump_to<'a>(literal: &str) -> impl FnMut(&mut &str) -> PResult<()> + '_ {
66    use winnow::token::take_until;
67    move |input: &mut &str| {
68        let _: (&str, &str) = (take_until(0.., literal), literal).context(label("jump_to")).parse_next(input)?;
69        Ok(())
70    }
71}
72
73/// Take until found `literal`. The `literal` will not be consumed.
74pub fn jump_until<'a>(literal: &str) -> impl FnMut(&mut &str) -> PResult<()> + '_ {
75    use winnow::token::take_until;
76    move |input: &mut &str| {
77        let _: &str = take_until(0.., literal).context(label("jump_until")).parse_next(input)?;
78        Ok(())
79    }
80}
81
82/// A combinator that takes a parser `inner` and produces a parser
83/// that also consumes both leading and trailing whitespace, returning
84/// the output of `inner`.
85pub fn ws<'a, ParseInner, Output, Error>(inner: ParseInner) -> impl Parser<&'a str, Output, Error>
86where
87    ParseInner: Parser<&'a str, Output, Error>,
88    Error: ParserError<&'a str>,
89{
90    delimited(space0, inner, space0)
91}
92
93/// Keep reading lines until the innner parser produces a result
94pub fn skip_line_till<'a, O>(inner: impl Parser<&'a str, O, ContextError>) -> impl Parser<&'a str, (), ContextError> {
95    repeat_till(0.., rest_line, inner).map(|_: (Vec<_>, _)| ())
96}
97// fb1326ab ends here
98
99// [[file:../parser.note::3d14b516][3d14b516]]
100/// Recognize one or more decimal digits, optionally preceded by sign
101pub fn recognize_integer<'i>(input: &mut &'i str) -> PResult<&'i str> {
102    use winnow::combinator::opt;
103    use winnow::token::one_of;
104
105    let r = (opt(one_of(['+', '-'])), cut_err(digit1)).recognize().parse_next(input)?;
106    Ok(r)
107}
108
109/// Match one unsigned integer: 123
110pub fn unsigned_integer<'a>(input: &mut &'a str) -> PResult<usize> {
111    digit1.try_map(|x: &str| x.parse()).context(label("usize")).parse_next(input)
112}
113
114/// Match one signed integer: -123 or +123
115pub fn signed_integer(s: &mut &str) -> PResult<isize> {
116    recognize_integer.try_map(|x: &str| x.parse::<isize>()).parse_next(s)
117}
118
119/// Parse a line containing an unsigned integer number.
120pub fn read_usize(s: &mut &str) -> PResult<usize> {
121    // allow white spaces
122    let p = delimited(space0, unsigned_integer, space0);
123    terminated(p, line_ending).parse_next(s)
124}
125
126/// Parse a line containing many unsigned numbers
127pub fn read_usize_many(s: &mut &str) -> PResult<Vec<usize>> {
128    let x = seq! {
129        _: space0,
130        separated(1.., unsigned_integer, space1),
131        _: space0,
132        _: line_ending,
133    }
134    .parse_next(s)?;
135    Ok(x.0)
136}
137
138pub use self::signed_integer as signed_digit;
139pub use self::unsigned_integer as unsigned_digit;
140// 3d14b516 ends here
141
142// [[file:../parser.note::4ef79da3][4ef79da3]]
143/// Parse a f64 float number
144pub fn double(input: &mut &str) -> PResult<f64> {
145    use winnow::ascii::float;
146    float(input)
147}
148
149/// Parse a normal float number. The D format code for scientific
150/// (exponential) notation is also supported.
151pub fn sci_double<'i>(input: &mut &'i str) -> PResult<f64> {
152    use winnow::combinator::alt;
153    use winnow::combinator::opt;
154    use winnow::stream::Located;
155    use winnow::token::one_of;
156
157    // e.g. -1.34D+8
158    let pre_exponent = (
159        opt(one_of(['+', '-'])),
160        alt(((digit1, opt(('.', opt(digit1)))).map(|_| ()), ('.', digit1).map(|_| ()))),
161    )
162        .recognize()
163        .parse_next(input)?;
164
165    let f = if let Some(exponent) = opt(preceded(one_of(['e', 'E', 'D', 'd']), recognize_integer)).parse_next(input)? {
166        format!("{pre_exponent}E{exponent}")
167    } else {
168        format!("{pre_exponent}")
169    }
170    .parse()
171    .unwrap();
172    Ok(f)
173}
174
175/// Consume three float numbers separated by one or more spaces. Return xyz array.
176pub fn xyz_array(s: &mut &str) -> PResult<[f64; 3]> {
177    let x = seq! {double, _: space1, double, _: space1, double}.parse_next(s)?;
178    Ok([x.0, x.1, x.2])
179}
180
181/// Parse a line containing a float number possibly surrounded by spaces
182pub fn read_double(s: &mut &str) -> PResult<f64> {
183    // allow white spaces
184    let p = delimited(space0, double, space0);
185    terminated(p, line_ending).parse_next(s)
186}
187
188/// Parse a line containing many float numbers
189pub fn read_double_many(s: &mut &str) -> PResult<Vec<f64>> {
190    let x = seq! {
191        _: space0,
192        separated(1.., double, space1),
193        _: space0,
194        _: line_ending,
195    }
196    .parse_next(s)?;
197    Ok(x.0)
198}
199// 4ef79da3 ends here
200
201// [[file:../parser.note::838e8dea][838e8dea]]
202/// Convert a string to a float.
203///
204/// This method performs certain checks, that are specific to quantum
205/// chemistry output, including avoiding the problem with Ds instead
206/// of Es in scientific notation. Another point is converting string
207/// signifying numerical problems (*****) to something we can manage
208/// (NaN).
209pub fn parse_float(s: &str) -> Option<f64> {
210    if s.chars().all(|x| x == '*') {
211        std::f64::NAN.into()
212    } else {
213        s.parse().ok().or_else(|| s.replacen("D", "E", 1).parse().ok())
214    }
215}
216
217#[test]
218fn test_fortran_float() {
219    let x = parse_float("14");
220    assert_eq!(x, Some(14.0));
221
222    let x = parse_float("14.12E4");
223    assert_eq!(x, Some(14.12E4));
224
225    let x = parse_float("14.12D4");
226    assert_eq!(x, Some(14.12E4));
227
228    let x = parse_float("****");
229    assert!(x.unwrap().is_nan());
230}
231// 838e8dea ends here
232
233// [[file:../parser.note::10e5dba2][10e5dba2]]
234#[test]
235fn test_sci_double() -> PResult<()> {
236    let s = "-12.34d-1";
237    let (_, v) = sci_double.parse_peek(s)?;
238    assert_eq!(v, -1.234);
239
240    let s = "-12";
241    let (_, v) = sci_double.parse_peek(s)?;
242    assert_eq!(v, -12.0);
243
244    let s = "-12.3E-1";
245    let (_, v) = sci_double.parse_peek(s)?;
246    assert_eq!(v, -1.23);
247
248    Ok(())
249}
250
251#[test]
252fn test_ws() -> PResult<()> {
253    let s = " 123 ";
254    let (_, x) = ws(digit1).parse_peek(s)?;
255    assert_eq!(x, "123");
256
257    let s = "123 ";
258    let (_, x) = ws(digit1).parse_peek(s)?;
259    assert_eq!(x, "123");
260
261    let s = "123\n";
262    let (_, x) = ws(digit1).parse_peek(s)?;
263    assert_eq!(x, "123");
264
265    Ok(())
266}
267
268#[test]
269fn test_jump() {
270    let x = "xxbcc aa cc";
271    let (r, _) = jump_to("aa").parse_peek(x).unwrap();
272    assert_eq!(r, " cc");
273
274    let input = " Leave Link  103 at Fri Apr 19 13:58:11 2019, MaxMem=    33554432 cpu:         0.0
275 (Enter /home/ybyygu/gaussian/g09/l202.exe)
276                          Input orientation:";
277    let input_orientation = (space1, "Input orientation:");
278    let (r, _) = skip_line_till(input_orientation).parse_peek(input).unwrap();
279    assert!(r.is_empty());
280}
281
282#[test]
283fn test_read_line() {
284    let txt = "first line\nsecond line\r\nthird line\n";
285    let (rest, line) = read_line.parse_peek(txt).unwrap();
286    assert_eq!(line, "first line\n");
287    let (rest, line) = read_line.parse_peek(rest).unwrap();
288    assert_eq!(line, "second line\r\n");
289    let (rest, line) = read_line.parse_peek(rest).unwrap();
290    assert_eq!(line, "third line\n");
291    assert_eq!(rest, "");
292
293    // when there is no newline
294    let txt = "no newline at the end";
295    let (rest, line) = read_line.parse_peek(txt).unwrap();
296    assert_eq!(line, txt);
297    assert_eq!(rest, "");
298
299    let txt = "no";
300    let (_, line) = not_space.parse_peek(txt).unwrap();
301    assert_eq!(line, "no");
302
303    let txt = "no ";
304    let (_, line) = not_space.parse_peek(txt).unwrap();
305    assert_eq!(line, "no");
306
307    let txt = "no-a\n";
308    let (_, line) = not_space.parse_peek(txt).unwrap();
309    assert_eq!(line, "no-a");
310
311    let txt = "no+b\t";
312    let (_, line) = not_space.parse_peek(txt).unwrap();
313    assert_eq!(line, "no+b");
314
315    let txt = " no-a\n";
316    let x = not_space.parse_peek(txt);
317    assert!(x.is_err());
318}
319
320#[test]
321fn test_read_many() {
322    let (_, ns) = read_usize_many.parse_peek("11 2 3 4 5\r\n\n").expect("usize parser");
323    assert_eq!(5, ns.len());
324    let _ = read_usize_many.parse_peek(" 11 2 3 4 5 \n").expect("usize parser");
325    let _ = read_usize_many.parse_peek("11 2 3 4 5 \r\n").expect("usize parser");
326
327    let line = " 1.2  3.4 -5.7 0.2 \n";
328    let (_, fs) = read_double_many.parse_peek(line).expect("f64 parser");
329    assert_eq!(4, fs.len());
330}
331
332#[test]
333fn test_signed_digit() {
334    let (_, x) = signed_digit.parse_peek("-123").expect("signed digit, minus");
335    assert_eq!(x, -123);
336
337    let (_, x) = signed_digit.parse_peek("123").expect("signed digit, normal");
338    assert_eq!(x, 123);
339
340    let (_, x) = signed_digit.parse_peek("+123").expect("signed digit, plus");
341    assert_eq!(x, 123);
342
343    let s = "12x";
344    let (r, n) = unsigned_digit.parse_peek(s).unwrap();
345    assert_eq!(n, 12);
346    assert_eq!(r, "x");
347
348    let (r, n) = read_usize.parse_peek(" 12 \n").unwrap();
349    assert_eq!(n, 12);
350    assert_eq!(r, "");
351}
352// 10e5dba2 ends here