duskphantom_frontend/parse/
parser.rs

1// Copyright 2024 Duskphantom Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// SPDX-License-Identifier: Apache-2.0
16
17use hexf_parse::parse_hexf32;
18use winnow::error::{ErrMode, ErrorKind};
19
20use super::*;
21
22/// Parser of a word that begins with letter,
23/// and continues with letters or numbers.
24/// For example, `int`, `x114ee`
25pub fn word(input: &mut &str) -> PResult<String> {
26    let head = one_of(('A'..='Z', 'a'..='z', '_')).parse_next(input)?;
27    let rest = take_while(0.., ('A'..='Z', 'a'..='z', '0'..='9', '_')).parse_next(input)?;
28    Ok(format!("{}{}", head, rest))
29}
30
31/// List of all keywords.
32const KEYWORDS: [&str; 10] = [
33    "void", "int", "float", "break", "continue", "return", "if", "else", "do", "while",
34];
35
36/// Parser of an identifier, a word which is not a keyword.
37pub fn ident(input: &mut &str) -> PResult<String> {
38    word.verify(|x| !KEYWORDS.contains(&x)).parse_next(input)
39}
40
41/// Match decimal or hexadecimal numbers.
42pub fn match_numbers<'a>(input: &mut &'a str, is_hex: bool, min_count: usize) -> PResult<&'a str> {
43    if is_hex {
44        take_while(min_count.., ('0'..='9', 'a'..='f', 'A'..='F')).parse_next(input)
45    } else {
46        take_while(min_count.., '0'..='9').parse_next(input)
47    }
48}
49
50/// Parser of a usize.
51pub fn usize(input: &mut &str) -> PResult<usize> {
52    let is_oct_or_hex = opt("0").parse_next(input)?.is_some();
53    let is_hex = opt(alt(("x", "X"))).parse_next(input)?.is_some();
54    let radix = if is_oct_or_hex {
55        8
56    } else if is_hex {
57        16
58    } else {
59        10
60    };
61
62    // Octal number matches 8 and 9, but will error anyways
63    // Empty number throws recoverable error
64    let number = match_numbers(input, is_hex, 1)?;
65    usize::from_str_radix(number, radix)
66        .map_err(|_| ErrMode::from_error_kind(input, ErrorKind::Verify).cut())
67}
68
69/// Parser of a constant number.
70pub fn constant_number(input: &mut &str) -> PResult<Expr> {
71    let hex_prefix = opt(alt(("0x", "0X"))).parse_next(input)?.unwrap_or("");
72    let is_hex = !hex_prefix.is_empty();
73    let exponent_charset: (&str, &str) = if is_hex { ("p", "P") } else { ("e", "E") };
74    let before_point = match_numbers(input, is_hex, 0)?;
75    let point = opt(".").parse_next(input)?.unwrap_or("");
76    let after_point = match_numbers(input, is_hex, 0)?;
77    let exponent_indicator = opt(alt(exponent_charset)).parse_next(input)?.unwrap_or("");
78    if point.is_empty() && exponent_indicator.is_empty() {
79        // Number is empty, throw recoverable error
80        if before_point.is_empty() {
81            return Err(ErrMode::from_error_kind(input, ErrorKind::Verify));
82        }
83
84        // If number exists, but no point and exponent, parse `before_point` as int instead
85        let radix = if is_hex {
86            16
87        } else if before_point.starts_with('0') {
88            8
89        } else {
90            10
91        };
92        return i32::from_str_radix(before_point, radix)
93            .map_err(|_| ErrMode::from_error_kind(input, ErrorKind::Verify).cut())
94            .map(Expr::Int);
95    }
96
97    // Read exponent value only if there is exponent indicator ("e" | "E" | "p" | "P")
98    let number = if exponent_indicator.is_empty() {
99        format!("{}{}{}{}", hex_prefix, before_point, point, after_point)
100    } else {
101        let sign = opt(alt(("+", "-"))).parse_next(input)?.unwrap_or("");
102        let exponent = match_numbers(input, false, 1).map_err(|e| e.cut())?;
103        format!(
104            "{}{}{}{}{}{}{}",
105            hex_prefix, before_point, point, after_point, exponent_indicator, sign, exponent
106        )
107    };
108
109    // Parse the number as float
110    if is_hex {
111        parse_hexf32(&number, false)
112            .map_err(|_| ErrMode::from_error_kind(input, ErrorKind::Verify).cut())
113            .map(Expr::Float)
114    } else {
115        number
116            .parse()
117            .map_err(|_| ErrMode::from_error_kind(input, ErrorKind::Verify).cut())
118            .map(Expr::Float)
119    }
120}
121
122/// Parser of a string literal.
123pub fn string_lit(input: &mut &str) -> PResult<String> {
124    // TODO escape
125    let _ = '"'.parse_next(input)?;
126    let content = take_until(0.., '"').parse_next(input)?;
127    let _ = '"'.parse_next(input)?;
128    Ok(content.to_string())
129}
130
131/// Parser of a char literal.
132pub fn char_lit(input: &mut &str) -> PResult<char> {
133    // TODO escape
134    let _ = '\''.parse_next(input)?;
135    let content = any.parse_next(input)?;
136    let _ = '\''.parse_next(input)?;
137    Ok(content)
138}
139
140/// Parser of blank.
141pub fn blank(input: &mut &str) -> PResult<()> {
142    (multispace0, alt((line_comment, block_comment, empty)))
143        .value(())
144        .parse_next(input)
145}
146
147/// Parser of blank beginning with line comment.
148pub fn line_comment(input: &mut &str) -> PResult<()> {
149    ("//", opt(take_until(0.., '\n')), blank)
150        .value(())
151        .parse_next(input)
152}
153
154/// Parser of blank beginning with block comment.
155pub fn block_comment(input: &mut &str) -> PResult<()> {
156    ("/*", cut_err(take_until(0.., "*/")), "*/", blank)
157        .value(())
158        .parse_next(input)
159}
160
161/// Parser of something wrapped in `()`.
162pub fn paren<'s, Output, InnerParser>(
163    mut parser: InnerParser,
164) -> impl Parser<&'s str, Output, ContextError>
165where
166    InnerParser: Parser<&'s str, Output, ContextError>,
167{
168    trace("paren", move |input: &mut &'s str| {
169        let _ = token("(").parse_next(input)?;
170        let output = parser.parse_next(input)?;
171        let _ = token(")").parse_next(input)?;
172        Ok(output)
173    })
174}
175
176/// Parser of something wrapped in `[]`.
177pub fn bracket<'s, Output, InnerParser>(
178    mut parser: InnerParser,
179) -> impl Parser<&'s str, Output, ContextError>
180where
181    InnerParser: Parser<&'s str, Output, ContextError>,
182{
183    trace("bracket", move |input: &mut &'s str| {
184        let _ = token("[").parse_next(input)?;
185        let output = parser.parse_next(input)?;
186        let _ = token("]").parse_next(input)?;
187        Ok(output)
188    })
189}
190
191/// Parser of something wrapped in `{}`.
192pub fn curly<'s, Output, InnerParser>(
193    mut parser: InnerParser,
194) -> impl Parser<&'s str, Output, ContextError>
195where
196    InnerParser: Parser<&'s str, Output, ContextError>,
197{
198    trace("curly", move |input: &mut &'s str| {
199        let _ = token("{").parse_next(input)?;
200        let output = parser.parse_next(input)?;
201        let _ = token("}").parse_next(input)?;
202        Ok(output)
203    })
204}
205
206/// Parser of a token.
207pub fn token<'s>(mut parser: &'static str) -> impl Parser<&'s str, &'s str, ContextError> {
208    // Get the first character and length of the token
209    let head = parser.chars().next().unwrap();
210    let len = parser.chars().count();
211    trace("token", move |input: &mut &'s str| {
212        let output = parser.parse_next(input)?;
213
214        // The next character after a token can not connect with the token
215        if head.is_alphanum() {
216            let _ = alt((peek(any), empty.value(' ')))
217                .verify(|x: &char| !x.is_alphanum() && *x != '_')
218                .parse_next(input)?;
219        }
220
221        // The next character of '>' can not be '>' or '='
222        if head == '>' && len == 1 {
223            let _ = alt((peek(any), empty.value(' ')))
224                .verify(|x: &char| *x != '>' && *x != '=')
225                .parse_next(input)?;
226        }
227
228        // The next character of '<' can not be '<' or '='
229        if head == '<' && len == 1 {
230            let _ = alt((peek(any), empty.value(' ')))
231                .verify(|x: &char| *x != '<' && *x != '=')
232                .parse_next(input)?;
233        }
234
235        // The next character of '=' can not be '='
236        if head == '=' && len == 1 {
237            let _ = alt((peek(any), empty.value(' ')))
238                .verify(|x: &char| *x != '=')
239                .parse_next(input)?;
240        }
241
242        // The next character of '!' can not be '='
243        if head == '!' && len == 1 {
244            let _ = alt((peek(any), empty.value(' ')))
245                .verify(|x: &char| *x != '=')
246                .parse_next(input)?;
247        }
248
249        // The next character of '&' can not be '&'
250        if head == '&' && len == 1 {
251            let _ = alt((peek(any), empty.value(' ')))
252                .verify(|x: &char| *x != '&')
253                .parse_next(input)?;
254        }
255
256        // The next character of '|' can not be '|'
257        if head == '|' && len == 1 {
258            let _ = alt((peek(any), empty.value(' ')))
259                .verify(|x: &char| *x != '|')
260                .parse_next(input)?;
261        }
262
263        // Consume some optional blanks
264        blank(input)?;
265        Ok(output)
266    })
267}
268
269/// Parser of something ending with zero or more spaces.
270pub fn pad<'s, Output, InnerParser>(
271    mut parser: InnerParser,
272) -> impl Parser<&'s str, Output, ContextError>
273where
274    InnerParser: Parser<&'s str, Output, ContextError>,
275{
276    trace("pad", move |input: &mut &'s str| {
277        let output = parser.parse_next(input)?;
278        blank(input)?;
279        Ok(output)
280    })
281}
282
283/// Boxed one-time closure that converts Box<T> to T.
284pub struct BoxF<T>(Box<dyn FnOnce(Box<T>) -> T>);
285
286impl<T> BoxF<T> {
287    pub fn new<F>(f: F) -> Self
288    where
289        F: FnOnce(Box<T>) -> T + 'static,
290    {
291        BoxF(Box::new(f))
292    }
293
294    pub fn apply(self, t: T) -> T {
295        self.0(Box::new(t))
296    }
297}
298
299/// Left recursion, function depends on tail parser.
300///
301/// Different from the usual mutual-recursive implementation,
302/// this implementation generates all closures first,
303/// and then apply the result continuously.
304///
305/// Writing mutual-recursive function for each operator is too verbose,
306/// and for left-recurse currying is required, bringing lifetime problems,
307/// so I made a procedual version for simplicity.
308///
309/// Note that `lrec` has built-in memoization, so parsing would be O(n).
310/// Benchmark results show that lrec / rrec has slightly better performance
311/// than hand-written recursive version.
312///
313/// `head`: parser of the FIRST element of left-recursive chain.
314/// `tail`: parser of ALL later elements, returning MUTATION on `head`.
315pub fn lrec<I, OH, E, PH, PT>(head: PH, tail: PT) -> impl Parser<I, OH, E>
316where
317    I: Stream + StreamIsPartial + Compare<char>,
318    E: ParserError<I>,
319    PH: Parser<I, OH, E>,
320    PT: Parser<I, Vec<BoxF<OH>>, E>,
321{
322    (head, tail).map(move |(base, vec): (_, Vec<BoxF<OH>>)| {
323        let mut res = base;
324        for ix in vec {
325            res = ix.apply(res);
326        }
327        res
328    })
329}
330
331/// Right recursion, function depends on init parser.
332pub fn rrec<I, OL, E, PL, PI>(init: PI, last: PL) -> impl Parser<I, OL, E>
333where
334    I: Stream + StreamIsPartial + Compare<char>,
335    E: ParserError<I>,
336    PI: Parser<I, Vec<BoxF<OL>>, E>,
337    PL: Parser<I, OL, E>,
338{
339    (init, last).map(move |(vec, base): (Vec<BoxF<OL>>, _)| {
340        let mut res = base;
341        for ix in vec.into_iter().rev() {
342            res = ix.apply(res);
343        }
344        res
345    })
346}