python_parser/
helpers.rs

1#[cfg(test)]
2use std::fmt::Debug;
3
4use unicode_xid::UnicodeXID;
5
6use nom::types::CompleteStr;
7use nom::Slice;
8use nom_locate::LocatedSpan;
9pub(crate) type StrSpan<'a> = LocatedSpan<CompleteStr<'a>>;
10
11/// Like `ws!()`, but does not allow newlines.
12macro_rules! ws_nonl (
13  ($i:expr, $($args:tt)*) => (
14    {
15      use nom::Convert;
16      use nom::Err;
17
18      match sep!($i, $crate::helpers::spaces_nonl, $($args)*) {
19        Err(e) => Err(e),
20        Ok((i1,o))    => {
21          match $crate::helpers::spaces_nonl(i1) {
22            Err(e) => Err(Err::convert(e)),
23            Ok((i2,_))    => Ok((i2, o))
24          }
25        }
26      }
27    }
28  )
29);
30
31/// Like `ws!()`, but ignores comments as well
32macro_rules! ws_comm (
33  ($i:expr, $($args:tt)*) => (
34    {
35      use nom::Convert;
36      use nom::Err;
37
38      match sep!($i, $crate::helpers::spaces_nl, $($args)*) {
39        Err(e) => Err(e),
40        Ok((i1,o))    => {
41          match $crate::helpers::spaces_nl(i1) {
42            Err(e) => Err(Err::convert(e)),
43            Ok((i2,_))    => Ok((i2, o))
44          }
45        }
46      }
47    }
48  )
49);
50
51named!(escaped_newline<StrSpan, ()>,
52  map!(terminated!(char!('\\'), char!('\n')), |_| ())
53);
54
55named!(pub spaces_nl<StrSpan, ()>,
56  map!(many0!(alt!(one_of!(" \t\x0c") => { |_|() } | escaped_newline | newline)), |_| ())
57);
58
59// Bottleneck:
60// named!(pub spaces_nonl<StrSpan, ()>,
61//   map!(many0!(alt!(one_of!(" \t\x0c") => { |_| () }|escaped_newline)), |_| ())
62// );
63// Rewritten as:
64pub fn spaces_nonl(i: StrSpan) -> Result<(StrSpan, ()), ::nom::Err<StrSpan>> {
65    let mut it = i.fragment.chars().enumerate().peekable();
66    while let Some((index, c)) = it.next() {
67        let next_char = it.peek().map(|&(_, c)| c);
68        match c {
69            ' ' | '\t' | '\x0c' => (),
70            '\\' if next_char.unwrap_or(' ') == '\n' => {
71                it.next();
72            }
73            _ => {
74                if index == 0 {
75                    return Ok((i, ()));
76                } else {
77                    return Ok((i.slice(index..), ()));
78                }
79            }
80        }
81    }
82    Ok((i.slice(i.fragment.len()..), ()))
83}
84
85named!(pub space_sep_nl<StrSpan, ()>,
86  map!(many1!(alt!(one_of!(" \t\x0c") => { |_|() } | escaped_newline | newline)), |_| ())
87);
88
89named!(pub space_sep_nonl<StrSpan, ()>,
90  map!(many1!(alt!(one_of!(" \t\x0c") => { |_| () } | escaped_newline)), |_| ())
91);
92
93// Let me explain this ugliness.
94//
95// We allow newlines in expressions if and only if the newline is
96// wrapped in parenthesis, square brackets, or curly brackets.
97// As any given subparser can be used either in or out one of these
98// pairs, we need either:
99//
100// 1. a boolean argument to the subparser telling whether it is wrapped
101//    in one of these pairs or not
102// 2. two versions of each subparser
103//
104// The first version has the downside of requiring run-time checks, whereas
105// the second one resolves everything at compile-time.
106//
107// Since I do not want to write each subparser twice, I'm writing them
108// in the impl{} of a polymorphic structure, which has a static boolean
109// argument corresponding to newlines, so monomorphing the structure
110// generates the two subparsers. Then, a simple constant propagation
111// is able to get rid of the runtime checks for this boolean.
112pub(crate) trait AreNewlinesSpaces {
113    const VALUE: bool;
114}
115pub(crate) struct NewlinesAreSpaces;
116impl AreNewlinesSpaces for NewlinesAreSpaces {
117    const VALUE: bool = true;
118}
119pub(crate) struct NewlinesAreNotSpaces;
120impl AreNewlinesSpaces for NewlinesAreNotSpaces {
121    const VALUE: bool = false;
122}
123
124macro_rules! spaces {
125    ( $i:expr, $($args:tt)* ) => {
126        match ANS::VALUE {
127            true => call!($i, $crate::helpers::spaces_nl, $($args)*),
128            false => call!($i, $crate::helpers::spaces_nonl, $($args)*),
129        }
130    }
131}
132
133macro_rules! ws_auto {
134    ( $i:expr, $($args:tt)* ) => {
135        delimited!($i, spaces!(), $($args)*, spaces!())
136    }
137}
138
139macro_rules! space_sep {
140    ( $i:expr, $($args:tt)* ) => {
141        match ANS::VALUE {
142            true => call!($i, $crate::helpers::space_sep_nl, $($args)*),
143            false => call!($i, $crate::helpers::space_sep_nonl, $($args)*),
144        }
145    }
146}
147
148const KEYWORDS: [&'static str; 2] = ["yield", "import"];
149named!(pub name<StrSpan, String>,
150  do_parse!(
151    name: map!(
152      tuple!(
153        alt!(char!('_') | verify!(call!(::nom::anychar), |c| UnicodeXID::is_xid_start(c))),
154        take_while!(call!(|c| UnicodeXID::is_xid_continue(c)))
155      ), |(c, s)| format!("{}{}", c, s.fragment)
156    ) >>
157    verify!(tag!(""), |_| !KEYWORDS.contains(&&name[..])) >>
158    (name)
159  )
160);
161
162named!(pub word_end<StrSpan, ()>,
163  not!(verify!(peek!(::nom::anychar), |c| UnicodeXID::is_xid_continue(c)))
164);
165
166macro_rules! keyword {
167    ($i:expr, $kw:expr) => {
168        terminated!($i, tag!($kw), word_end)
169    };
170}
171
172named!(pub newline<StrSpan, ()>,
173  map!(
174    many1!(
175      tuple!(
176        spaces_nonl,
177        opt!(preceded!(char!('#'), many0!(none_of!("\n")))),
178        char!('\n')
179      )
180    ),
181    |_| ()
182  )
183);
184
185named!(pub semicolon<StrSpan, ()>,
186  map!(ws_nonl!(char!(';')), |_| ())
187);
188
189/// Helper to make an instance of `StrSpan`, that can be used as the argument
190/// to other parsers.
191pub fn make_strspan(s: &str) -> StrSpan {
192    StrSpan::new(CompleteStr(s))
193}
194
195#[cfg(test)]
196pub(crate) fn assert_parse_eq<T: Debug + PartialEq>(
197    left: Result<(StrSpan, T), ::nom::Err<StrSpan>>,
198    right: Result<(StrSpan, T), ::nom::Err<StrSpan>>,
199) {
200    use nom::Context;
201    match (left, right) {
202        (Ok((left_span, left_tree)), Ok((right_span, right_tree))) => assert_eq!(
203            ((left_span.fragment, left_tree)),
204            ((right_span.fragment, right_tree))
205        ),
206        (
207            Err(::nom::Err::Failure(Context::Code(left_span, left_code))),
208            Err(::nom::Err::Failure(Context::Code(right_span, right_code))),
209        ) => assert_eq!(
210            (left_span.fragment, left_code),
211            (right_span.fragment, right_code)
212        ),
213        (Err(::nom::Err::Incomplete(_)), _) => unreachable!(),
214        (_, Err(::nom::Err::Incomplete(_))) => panic!("We're only using complete strings here!"),
215        (l, r) => assert_eq!(l, r),
216    }
217}
218
219pub(crate) fn first_word(i: StrSpan) -> Result<(StrSpan, &str), ::nom::Err<StrSpan>> {
220    map!(i, terminated!(call!(::nom::alpha), word_end), |s| s
221        .fragment
222        .0)
223}
224
225// https://github.com/Geal/nom/pull/800
226macro_rules! fold_many1_fixed(
227  ($i:expr, $submac:ident!( $($args:tt)* ), $init:expr, $f:expr) => (
228    {
229      use nom;
230      use nom::lib::std::result::Result::*;
231      use nom::{Err,Needed,InputLength,Context,AtEof};
232
233      match $submac!($i, $($args)*) {
234        Err(Err::Error(_))      => Err(Err::Error(
235          error_position!($i, nom::ErrorKind::Many1)
236        )),
237        Err(Err::Failure(_))      => Err(Err::Failure(
238          error_position!($i, nom::ErrorKind::Many1)
239        )),
240        Err(Err::Incomplete(i)) => Err(Err::Incomplete(i)),
241        Ok((i1,o1))   => {
242          let f = $f;
243          let mut acc = f($init, o1);
244          let mut input  = i1;
245          let mut incomplete: nom::lib::std::option::Option<Needed> =
246            nom::lib::std::option::Option::None;
247          let mut failure: nom::lib::std::option::Option<Context<_,_>> =
248            nom::lib::std::option::Option::None;
249          loop {
250            match $submac!(input, $($args)*) {
251              Err(Err::Error(_))                    => {
252                break;
253              },
254              Err(Err::Incomplete(i)) => {
255                incomplete = nom::lib::std::option::Option::Some(i);
256                break;
257              },
258              Err(Err::Failure(e)) => {
259                failure = nom::lib::std::option::Option::Some(e);
260                break;
261              },
262              Ok((i, o)) => {
263                if i.input_len() == input.input_len() {
264                  if !i.at_eof() {
265                    failure = nom::lib::std::option::Option::Some(error_position!(i, nom::ErrorKind::Many1));
266                  }
267                  break;
268                }
269                acc = f(acc, o);
270                input = i;
271              }
272            }
273          }
274
275          match failure {
276            nom::lib::std::option::Option::Some(e) => Err(Err::Failure(e)),
277            nom::lib::std::option::Option::None    => match incomplete {
278              nom::lib::std::option::Option::Some(i) => nom::need_more($i, i),
279              nom::lib::std::option::Option::None    => Ok((input, acc))
280            }
281          }
282        }
283      }
284    }
285  );
286  ($i:expr, $f:expr, $init:expr, $fold_f:expr) => (
287    fold_many_fixed1!($i, call!($f), $init, $fold_f);
288  );
289);
290
291macro_rules! indent {
292    ($i:expr, $nb_spaces:expr) => {{
293        use nom::ErrorKind;
294        use $crate::errors::PyParseError;
295        count!($i, char!(' '), $nb_spaces).and_then(|(i2, _)| {
296            return_error!(
297                i2,
298                ErrorKind::Custom(PyParseError::UnexpectedIndent.into()),
299                not!(peek!(char!(' ')))
300            )
301        })
302    }};
303}