py_ast/parse/
expr.rs

1use py_lex::{
2    ops::{OperatorAssociativity, OperatorTypes, Operators},
3    syntax::*,
4};
5
6use super::*;
7use crate::complex_pu;
8
9#[derive(Debug, Clone)]
10pub struct CharLiteral {
11    pub parsed: char,
12}
13
14fn escape(src: &Token, c: char) -> Result<char> {
15    Result::Ok(match c {
16        '_' => '_',
17        't' => '\t',
18        'n' => '\n',
19        's' => ' ',
20        _ => return src.throw(format!("Invalid or unsupported escape character: {}", c)),
21    })
22}
23
24impl ParseUnit<Token> for CharLiteral {
25    type Target = CharLiteral;
26
27    fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
28        p.r#match(Symbol::Char)?;
29        let unparsed = p.parse::<Token>()?;
30        if !(unparsed.len() == 1 || unparsed.len() == 2 && unparsed.starts_with('_')) {
31            return unparsed.throw(format!("Invalid CharLiteral {}", unparsed));
32        }
33        let parsed = if unparsed.len() == 1 {
34            unparsed.as_bytes()[0] as char
35        } else {
36            escape(&unparsed, unparsed.as_bytes()[1] as _)?
37        };
38
39        Ok(CharLiteral { parsed })
40    }
41}
42
43#[derive(Debug, Clone)]
44pub struct StringLiteral {
45    pub parsed: String,
46}
47
48impl ParseUnit<Token> for StringLiteral {
49    type Target = StringLiteral;
50
51    fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
52        p.r#match(Symbol::String)?;
53        let unparsed = p.parse::<Token>()?;
54
55        let mut next_escape = false;
56        let mut parsed = String::new();
57        for c in unparsed.chars() {
58            if next_escape {
59                next_escape = false;
60                parsed.push(escape(&unparsed, c)?);
61            } else if c == '_' {
62                next_escape = true
63            } else {
64                parsed.push(c)
65            }
66        }
67        if next_escape {
68            return unparsed.throw("Invalid escape! maybe you losted a character");
69        }
70
71        Ok(StringLiteral { parsed })
72    }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq)]
76pub enum NumberLiteral {
77    Float(f64),
78    Digit(usize),
79}
80
81impl ParseUnit<Token> for NumberLiteral {
82    type Target = NumberLiteral;
83
84    fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
85        let number = p.parse::<Token>()?; // digit
86        let mut int_dec = number.split('f');
87
88        let Some(int) = int_dec.next() else {
89            unreachable!()
90        };
91        let int = match int.parse::<usize>() {
92            Ok(int) => int,
93            Err(e) => return p.unmatch(e),
94        };
95
96        let dec = match int_dec.next() {
97            Some("") => 0.0,
98            Some(dec) => match dec.parse::<usize>() {
99                Ok(0) => 0.0,
100                Ok(dec) => dec as f64 / 10f64.powi(dec.ilog10() as _),
101                Err(e) => return p.unmatch(e),
102            },
103            None => return Ok(Self::Digit(int)),
104        };
105        // check that if anything left
106        if let Some(next) = int_dec.next() {
107            return p.unmatch(format!("unexpect {}", next));
108        }
109
110        Ok(Self::Float(int as f64 + dec))
111    }
112}
113
114#[derive(Debug, Clone)]
115pub struct FnCallArgs {
116    pub args: Vec<Expr>,
117}
118
119impl std::ops::Deref for FnCallArgs {
120    type Target = Vec<Expr>;
121
122    fn deref(&self) -> &Self::Target {
123        &self.args
124    }
125}
126
127impl ParseUnit<Token> for FnCallArgs {
128    type Target = FnCallArgs;
129
130    fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
131        p.r#match(Symbol::FnCallL)?;
132        let Some(arg) = p.parse::<Expr>().apply(mapper::Try)? else {
133            p.r#match(Symbol::FnCallR).apply(mapper::MustMatch)?;
134            return Ok(FnCallArgs { args: vec![] });
135        };
136
137        let mut args = vec![arg];
138
139        while p.r#match(Symbol::Semicolon).is_ok() {
140            args.push(p.parse::<Expr>()?);
141        }
142
143        p.r#match(Symbol::FnCallR).apply(mapper::MustMatch)?;
144
145        Ok(FnCallArgs { args })
146    }
147}
148
149#[derive(Debug, Clone)]
150pub struct FnCall {
151    span: Span,
152    pub fn_name: Ident,
153    pub args: FnCallArgs,
154}
155
156impl WithSpan for FnCall {
157    fn get_span(&self) -> Span {
158        self.span
159    }
160}
161
162impl ParseUnit<Token> for FnCall {
163    type Target = FnCall;
164
165    fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
166        let args = p.parse::<FnCallArgs>()?;
167        let fn_name = p.parse::<Ident>()?;
168
169        Ok(FnCall {
170            fn_name,
171            args,
172            span: p.get_span(),
173        })
174    }
175}
176
177pub type Variable = Ident;
178
179#[derive(Debug, Clone)]
180pub struct Array {
181    elements: Vec<Expr>,
182}
183
184impl std::ops::Deref for Array {
185    type Target = Vec<Expr>;
186
187    fn deref(&self) -> &Self::Target {
188        &self.elements
189    }
190}
191
192impl ParseUnit<Token> for Array {
193    type Target = Array;
194
195    fn parse(p: &mut Parser<Token>) -> terl::Result<Self::Target, ParseError> {
196        p.r#match(Symbol::ArrayL)?;
197        let mut elements = vec![];
198        while let Some(expr) = p.parse::<Expr>().apply(mapper::Try)? {
199            elements.push(expr);
200        }
201        p.r#match(Symbol::ArrayR).apply(mapper::MustMatch)?;
202        Ok(Self { elements })
203    }
204}
205
206complex_pu! {
207    cpu AtomicExpr {
208        CharLiteral,
209        StringLiteral,
210        NumberLiteral,
211        FnCall,
212        Array,
213        Variable
214    }
215}
216
217#[derive(Debug, Clone)]
218pub enum ExprItem {
219    AtomicExpr(PU<AtomicExpr>),
220    Operators(PU<Operators>),
221}
222
223impl WithSpan for ExprItem {
224    fn get_span(&self) -> Span {
225        match self {
226            ExprItem::AtomicExpr(ws) => ws.get_span(),
227            ExprItem::Operators(ws) => ws.get_span(),
228        }
229    }
230}
231
232impl From<PU<Operators>> for ExprItem {
233    fn from(v: PU<Operators>) -> Self {
234        Self::Operators(v)
235    }
236}
237
238impl From<PU<AtomicExpr>> for ExprItem {
239    fn from(v: PU<AtomicExpr>) -> Self {
240        Self::AtomicExpr(v)
241    }
242}
243
244#[derive(Debug, Clone)]
245struct ExprItems;
246
247impl ParseUnit<Token> for ExprItems {
248    type Target = Vec<ExprItem>;
249
250    fn parse(p: &mut Parser<Token>) -> terl::Result<Self::Target, ParseError> {
251        let get_unary_op = |p: &mut Parser<Token>| {
252            p.parse::<PU<Operators>>().apply(mapper::Satisfy::new(
253                |op: &PU<Operators>| op.associativity() == OperatorAssociativity::Unary,
254                |e| e.unmatch(""),
255            ))
256        };
257        let get_binary_op = |p: &mut Parser<Token>| {
258            p.parse::<PU<Operators>>().apply(mapper::Satisfy::new(
259                |op: &PU<Operators>| op.associativity() == OperatorAssociativity::Binary,
260                |e| e.unmatch(""),
261            ))
262        };
263
264        let left_bracket = |items: &[ExprItem], nth: usize| {
265            items
266                .iter()
267                .rev()
268                .filter_map(|item| match item {
269                    ExprItem::Operators(pu) if **pu == Operators::BracketL => Some(item.get_span()),
270                    _ => None,
271                })
272                .nth(nth)
273                .map(|span| span.make_message("left bracket here"))
274        };
275
276        enum Expect {
277            Val,
278            OP,
279        }
280        let mut items: Vec<ExprItem> = vec![];
281        let mut bracket_depth = 0;
282        let mut state = Expect::Val;
283        loop {
284            state = match state {
285                Expect::Val => {
286                    if let Some(lb) = p.r#match(RPU(Operators::BracketL)).apply(mapper::Try)? {
287                        items.push(lb.into());
288                        bracket_depth += 1;
289                        Expect::Val
290                    } else if let Some(unary) = p.once(get_unary_op).apply(mapper::Try)? {
291                        items.push(unary.into());
292                        Expect::Val
293                    } else {
294                        items.push(p.parse::<PU<AtomicExpr>>()?.into());
295                        Expect::OP
296                    }
297                }
298                Expect::OP => {
299                    if bracket_depth != 0
300                        && let Some(rb) = p.r#match(RPU(Operators::BracketR)).apply(mapper::Try)?
301                    {
302                        items.push(rb.into());
303
304                        bracket_depth -= 1;
305                        Expect::OP
306                    } else if let Some(unary) = p.once(get_binary_op).apply(mapper::Try)? {
307                        items.push(unary.into());
308                        Expect::Val
309                    } else if bracket_depth != 0 {
310                        let left_bracket = left_bracket(&items, bracket_depth);
311                        let current_span = p.get_span();
312                        let expect_next = format!("expect this to be `{}`", Operators::BracketR);
313                        let expect_next = p
314                            .parse::<PU<Token>>()
315                            .map(|tk| tk.make_message(expect_next));
316                        break current_span.throw("unclosed bracket").map_err(|mut e| {
317                            e.extend(left_bracket);
318                            e.extend(expect_next.ok());
319                            e
320                        });
321                    } else {
322                        break Ok(items);
323                    }
324                }
325            }
326        }
327    }
328}
329
330#[derive(Debug, Clone)]
331pub struct Expr {
332    items: Vec<ExprItem>,
333    span: Span,
334}
335
336impl WithSpan for Expr {
337    fn get_span(&self) -> Span {
338        self.span
339    }
340}
341
342impl std::ops::Deref for Expr {
343    type Target = Vec<ExprItem>;
344
345    fn deref(&self) -> &Self::Target {
346        &self.items
347    }
348}
349
350impl ParseUnit<Token> for Expr {
351    type Target = Expr;
352
353    fn parse(p: &mut Parser<Token>) -> terl::Result<Self::Target, ParseError> {
354        // is heap allocation fewer than previous algo?
355        let mut exprs = vec![];
356        let mut ops: Vec<PU<Operators>> = vec![];
357
358        fn could_fold(last: Operators, current: Operators) -> bool {
359            last.op_ty() != OperatorTypes::StructOperator && last.priority() <= current.priority()
360        }
361
362        for item in p.parse::<ExprItems>()? {
363            match item {
364                ExprItem::AtomicExpr(..) => {
365                    exprs.push(item);
366                }
367                ExprItem::Operators(op) => match *op {
368                    Operators::BracketL => ops.push(PU::new(item.get_span(), *op)),
369                    Operators::BracketR => {
370                        while let Some(op) = ops.pop() {
371                            if *op == Operators::BracketL {
372                                break;
373                            }
374                            exprs.push(op.into())
375                        }
376                    }
377                    current => {
378                        while ops.last().is_some_and(|last| {
379                            could_fold(**last, current) && exprs.len() >= last.cost()
380                        }) {
381                            let last = ops.pop().unwrap();
382                            exprs.push(last.into());
383                        }
384                        ops.push(PU::new(item.get_span(), *op));
385                    }
386                },
387            }
388        }
389
390        for op in ops.into_iter().rev() {
391            exprs.push(op.into());
392        }
393
394        Ok(Self {
395            items: exprs,
396            span: p.get_span(),
397        })
398    }
399}
400
401#[cfg(test)]
402mod tests {
403    use super::*;
404    use crate::parse_test;
405
406    #[test]
407    fn char() {
408        parse_test("wen2 _t", |p| {
409            p.parse::<CharLiteral>()?;
410            Ok(())
411        });
412    }
413
414    #[test]
415    fn string() {
416        parse_test("chuan4 _t11514___na", |p| {
417            p.parse::<StringLiteral>()?;
418            Ok(())
419        })
420    }
421
422    #[test]
423    fn number1() {
424        parse_test("114514", |p| {
425            p.parse::<NumberLiteral>()?;
426            Ok(())
427        })
428    }
429
430    #[test]
431    fn number2() {
432        parse_test("114514f", |p| {
433            p.parse::<NumberLiteral>()?;
434            Ok(())
435        })
436    }
437
438    #[test]
439    fn number3() {
440        parse_test("1919f810", |p| {
441            p.parse::<NumberLiteral>()?;
442            Ok(())
443        })
444    }
445
446    #[test]
447    fn function_call() {
448        parse_test("ya1 1919810 fen1 chuan4 acminoac ru4 han2shu4", |p| {
449            p.parse::<FnCall>()?;
450            Ok(())
451        })
452    }
453
454    #[test]
455    fn unary() {
456        parse_test("fei1 191810", |p| {
457            p.parse::<Expr>()?;
458            Ok(())
459        })
460    }
461
462    #[test]
463    fn nested_unary() {
464        parse_test("fei1 fei1 fei1 fei1 191810", |p| {
465            p.parse::<Expr>()?;
466            Ok(())
467        })
468    }
469
470    #[test]
471    fn bracket() {
472        // unary + bracket
473        parse_test("fei1 jie2 114514 he2", |p| {
474            p.parse::<Expr>()?;
475            Ok(())
476        })
477    }
478
479    #[test]
480    fn complex_expr() {
481        // 119 + 810 * 114514 - 12
482        parse_test("1919 jia1 810 cheng2 114514 jian3 12", |p| {
483            p.parse::<Expr>()?;
484            Ok(())
485        });
486    }
487
488    #[test]
489    fn empty_array() {
490        parse_test("zu3 he2", |p| {
491            assert!(p.parse::<Array>()?.elements.is_empty());
492            Ok(())
493        });
494    }
495
496    #[test]
497    fn array_with_an_element() {
498        parse_test("zu3 jie2 1 he2 he2", |p| {
499            p.parse::<Array>()?;
500            Ok(())
501        });
502    }
503
504    #[test]
505    fn array_with_elements() {
506        parse_test(
507            concat!(
508                "zu3 ",             // start array
509                "jie2 1 he2 ",      // bracket
510                "ya1 ru4 foo ",     // fn_call
511                "a b c d e ",       // variables
512                "114514 1919f810 ", // number
513                "chuan4 awa ",      // string
514                "he2"               // end array
515            ),
516            |p| {
517                p.parse::<Array>()?;
518                Ok(())
519            },
520        );
521    }
522
523    #[test]
524    fn array_with_elements_with_white_space() {
525        parse_test(
526            concat!(
527                "zu3%",                                       // start array
528                "jie2#$#$1*&)*(he2^&*(^&*(",                  // bracket
529                "ya1{#$*()!@*}ru4<>#$%*$%&*(%^*(*^&foo{[&*}", // fn_call
530                "a啊b波呲d地e鹅",                             // variables
531                "114514-=-=-=-1919f810<?><{}(*)",             // number
532                "chuan4<>(^&%^%^&*awa$%&^",                   // string
533                "he2"                                         // end array
534            ),
535            |p| {
536                p.parse::<Array>()?;
537                Ok(())
538            },
539        );
540    }
541}