1use py_lex::{
2 ops::{OperatorAssociativity, OperatorTypes, Operators},
3 syntax::*,
4};
5
6use super::*;
7use crate::complex_pu;
8
9#[derive(Debug, Clone)]
10pub struct CharLiteral {
11 pub parsed: char,
12}
13
14fn escape(src: &Token, c: char) -> Result<char> {
15 Result::Ok(match c {
16 '_' => '_',
17 't' => '\t',
18 'n' => '\n',
19 's' => ' ',
20 _ => return src.throw(format!("Invalid or unsupported escape character: {}", c)),
21 })
22}
23
24impl ParseUnit<Token> for CharLiteral {
25 type Target = CharLiteral;
26
27 fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
28 p.r#match(Symbol::Char)?;
29 let unparsed = p.parse::<Token>()?;
30 if !(unparsed.len() == 1 || unparsed.len() == 2 && unparsed.starts_with('_')) {
31 return unparsed.throw(format!("Invalid CharLiteral {}", unparsed));
32 }
33 let parsed = if unparsed.len() == 1 {
34 unparsed.as_bytes()[0] as char
35 } else {
36 escape(&unparsed, unparsed.as_bytes()[1] as _)?
37 };
38
39 Ok(CharLiteral { parsed })
40 }
41}
42
43#[derive(Debug, Clone)]
44pub struct StringLiteral {
45 pub parsed: String,
46}
47
48impl ParseUnit<Token> for StringLiteral {
49 type Target = StringLiteral;
50
51 fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
52 p.r#match(Symbol::String)?;
53 let unparsed = p.parse::<Token>()?;
54
55 let mut next_escape = false;
56 let mut parsed = String::new();
57 for c in unparsed.chars() {
58 if next_escape {
59 next_escape = false;
60 parsed.push(escape(&unparsed, c)?);
61 } else if c == '_' {
62 next_escape = true
63 } else {
64 parsed.push(c)
65 }
66 }
67 if next_escape {
68 return unparsed.throw("Invalid escape! maybe you losted a character");
69 }
70
71 Ok(StringLiteral { parsed })
72 }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq)]
76pub enum NumberLiteral {
77 Float(f64),
78 Digit(usize),
79}
80
81impl ParseUnit<Token> for NumberLiteral {
82 type Target = NumberLiteral;
83
84 fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
85 let number = p.parse::<Token>()?; let mut int_dec = number.split('f');
87
88 let Some(int) = int_dec.next() else {
89 unreachable!()
90 };
91 let int = match int.parse::<usize>() {
92 Ok(int) => int,
93 Err(e) => return p.unmatch(e),
94 };
95
96 let dec = match int_dec.next() {
97 Some("") => 0.0,
98 Some(dec) => match dec.parse::<usize>() {
99 Ok(0) => 0.0,
100 Ok(dec) => dec as f64 / 10f64.powi(dec.ilog10() as _),
101 Err(e) => return p.unmatch(e),
102 },
103 None => return Ok(Self::Digit(int)),
104 };
105 if let Some(next) = int_dec.next() {
107 return p.unmatch(format!("unexpect {}", next));
108 }
109
110 Ok(Self::Float(int as f64 + dec))
111 }
112}
113
114#[derive(Debug, Clone)]
115pub struct FnCallArgs {
116 pub args: Vec<Expr>,
117}
118
119impl std::ops::Deref for FnCallArgs {
120 type Target = Vec<Expr>;
121
122 fn deref(&self) -> &Self::Target {
123 &self.args
124 }
125}
126
127impl ParseUnit<Token> for FnCallArgs {
128 type Target = FnCallArgs;
129
130 fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
131 p.r#match(Symbol::FnCallL)?;
132 let Some(arg) = p.parse::<Expr>().apply(mapper::Try)? else {
133 p.r#match(Symbol::FnCallR).apply(mapper::MustMatch)?;
134 return Ok(FnCallArgs { args: vec![] });
135 };
136
137 let mut args = vec![arg];
138
139 while p.r#match(Symbol::Semicolon).is_ok() {
140 args.push(p.parse::<Expr>()?);
141 }
142
143 p.r#match(Symbol::FnCallR).apply(mapper::MustMatch)?;
144
145 Ok(FnCallArgs { args })
146 }
147}
148
149#[derive(Debug, Clone)]
150pub struct FnCall {
151 span: Span,
152 pub fn_name: Ident,
153 pub args: FnCallArgs,
154}
155
156impl WithSpan for FnCall {
157 fn get_span(&self) -> Span {
158 self.span
159 }
160}
161
162impl ParseUnit<Token> for FnCall {
163 type Target = FnCall;
164
165 fn parse(p: &mut Parser<Token>) -> ParseResult<Self, Token> {
166 let args = p.parse::<FnCallArgs>()?;
167 let fn_name = p.parse::<Ident>()?;
168
169 Ok(FnCall {
170 fn_name,
171 args,
172 span: p.get_span(),
173 })
174 }
175}
176
177pub type Variable = Ident;
178
179#[derive(Debug, Clone)]
180pub struct Array {
181 elements: Vec<Expr>,
182}
183
184impl std::ops::Deref for Array {
185 type Target = Vec<Expr>;
186
187 fn deref(&self) -> &Self::Target {
188 &self.elements
189 }
190}
191
192impl ParseUnit<Token> for Array {
193 type Target = Array;
194
195 fn parse(p: &mut Parser<Token>) -> terl::Result<Self::Target, ParseError> {
196 p.r#match(Symbol::ArrayL)?;
197 let mut elements = vec![];
198 while let Some(expr) = p.parse::<Expr>().apply(mapper::Try)? {
199 elements.push(expr);
200 }
201 p.r#match(Symbol::ArrayR).apply(mapper::MustMatch)?;
202 Ok(Self { elements })
203 }
204}
205
206complex_pu! {
207 cpu AtomicExpr {
208 CharLiteral,
209 StringLiteral,
210 NumberLiteral,
211 FnCall,
212 Array,
213 Variable
214 }
215}
216
217#[derive(Debug, Clone)]
218pub enum ExprItem {
219 AtomicExpr(PU<AtomicExpr>),
220 Operators(PU<Operators>),
221}
222
223impl WithSpan for ExprItem {
224 fn get_span(&self) -> Span {
225 match self {
226 ExprItem::AtomicExpr(ws) => ws.get_span(),
227 ExprItem::Operators(ws) => ws.get_span(),
228 }
229 }
230}
231
232impl From<PU<Operators>> for ExprItem {
233 fn from(v: PU<Operators>) -> Self {
234 Self::Operators(v)
235 }
236}
237
238impl From<PU<AtomicExpr>> for ExprItem {
239 fn from(v: PU<AtomicExpr>) -> Self {
240 Self::AtomicExpr(v)
241 }
242}
243
244#[derive(Debug, Clone)]
245struct ExprItems;
246
247impl ParseUnit<Token> for ExprItems {
248 type Target = Vec<ExprItem>;
249
250 fn parse(p: &mut Parser<Token>) -> terl::Result<Self::Target, ParseError> {
251 let get_unary_op = |p: &mut Parser<Token>| {
252 p.parse::<PU<Operators>>().apply(mapper::Satisfy::new(
253 |op: &PU<Operators>| op.associativity() == OperatorAssociativity::Unary,
254 |e| e.unmatch(""),
255 ))
256 };
257 let get_binary_op = |p: &mut Parser<Token>| {
258 p.parse::<PU<Operators>>().apply(mapper::Satisfy::new(
259 |op: &PU<Operators>| op.associativity() == OperatorAssociativity::Binary,
260 |e| e.unmatch(""),
261 ))
262 };
263
264 let left_bracket = |items: &[ExprItem], nth: usize| {
265 items
266 .iter()
267 .rev()
268 .filter_map(|item| match item {
269 ExprItem::Operators(pu) if **pu == Operators::BracketL => Some(item.get_span()),
270 _ => None,
271 })
272 .nth(nth)
273 .map(|span| span.make_message("left bracket here"))
274 };
275
276 enum Expect {
277 Val,
278 OP,
279 }
280 let mut items: Vec<ExprItem> = vec![];
281 let mut bracket_depth = 0;
282 let mut state = Expect::Val;
283 loop {
284 state = match state {
285 Expect::Val => {
286 if let Some(lb) = p.r#match(RPU(Operators::BracketL)).apply(mapper::Try)? {
287 items.push(lb.into());
288 bracket_depth += 1;
289 Expect::Val
290 } else if let Some(unary) = p.once(get_unary_op).apply(mapper::Try)? {
291 items.push(unary.into());
292 Expect::Val
293 } else {
294 items.push(p.parse::<PU<AtomicExpr>>()?.into());
295 Expect::OP
296 }
297 }
298 Expect::OP => {
299 if bracket_depth != 0
300 && let Some(rb) = p.r#match(RPU(Operators::BracketR)).apply(mapper::Try)?
301 {
302 items.push(rb.into());
303
304 bracket_depth -= 1;
305 Expect::OP
306 } else if let Some(unary) = p.once(get_binary_op).apply(mapper::Try)? {
307 items.push(unary.into());
308 Expect::Val
309 } else if bracket_depth != 0 {
310 let left_bracket = left_bracket(&items, bracket_depth);
311 let current_span = p.get_span();
312 let expect_next = format!("expect this to be `{}`", Operators::BracketR);
313 let expect_next = p
314 .parse::<PU<Token>>()
315 .map(|tk| tk.make_message(expect_next));
316 break current_span.throw("unclosed bracket").map_err(|mut e| {
317 e.extend(left_bracket);
318 e.extend(expect_next.ok());
319 e
320 });
321 } else {
322 break Ok(items);
323 }
324 }
325 }
326 }
327 }
328}
329
330#[derive(Debug, Clone)]
331pub struct Expr {
332 items: Vec<ExprItem>,
333 span: Span,
334}
335
336impl WithSpan for Expr {
337 fn get_span(&self) -> Span {
338 self.span
339 }
340}
341
342impl std::ops::Deref for Expr {
343 type Target = Vec<ExprItem>;
344
345 fn deref(&self) -> &Self::Target {
346 &self.items
347 }
348}
349
350impl ParseUnit<Token> for Expr {
351 type Target = Expr;
352
353 fn parse(p: &mut Parser<Token>) -> terl::Result<Self::Target, ParseError> {
354 let mut exprs = vec![];
356 let mut ops: Vec<PU<Operators>> = vec![];
357
358 fn could_fold(last: Operators, current: Operators) -> bool {
359 last.op_ty() != OperatorTypes::StructOperator && last.priority() <= current.priority()
360 }
361
362 for item in p.parse::<ExprItems>()? {
363 match item {
364 ExprItem::AtomicExpr(..) => {
365 exprs.push(item);
366 }
367 ExprItem::Operators(op) => match *op {
368 Operators::BracketL => ops.push(PU::new(item.get_span(), *op)),
369 Operators::BracketR => {
370 while let Some(op) = ops.pop() {
371 if *op == Operators::BracketL {
372 break;
373 }
374 exprs.push(op.into())
375 }
376 }
377 current => {
378 while ops.last().is_some_and(|last| {
379 could_fold(**last, current) && exprs.len() >= last.cost()
380 }) {
381 let last = ops.pop().unwrap();
382 exprs.push(last.into());
383 }
384 ops.push(PU::new(item.get_span(), *op));
385 }
386 },
387 }
388 }
389
390 for op in ops.into_iter().rev() {
391 exprs.push(op.into());
392 }
393
394 Ok(Self {
395 items: exprs,
396 span: p.get_span(),
397 })
398 }
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404 use crate::parse_test;
405
406 #[test]
407 fn char() {
408 parse_test("wen2 _t", |p| {
409 p.parse::<CharLiteral>()?;
410 Ok(())
411 });
412 }
413
414 #[test]
415 fn string() {
416 parse_test("chuan4 _t11514___na", |p| {
417 p.parse::<StringLiteral>()?;
418 Ok(())
419 })
420 }
421
422 #[test]
423 fn number1() {
424 parse_test("114514", |p| {
425 p.parse::<NumberLiteral>()?;
426 Ok(())
427 })
428 }
429
430 #[test]
431 fn number2() {
432 parse_test("114514f", |p| {
433 p.parse::<NumberLiteral>()?;
434 Ok(())
435 })
436 }
437
438 #[test]
439 fn number3() {
440 parse_test("1919f810", |p| {
441 p.parse::<NumberLiteral>()?;
442 Ok(())
443 })
444 }
445
446 #[test]
447 fn function_call() {
448 parse_test("ya1 1919810 fen1 chuan4 acminoac ru4 han2shu4", |p| {
449 p.parse::<FnCall>()?;
450 Ok(())
451 })
452 }
453
454 #[test]
455 fn unary() {
456 parse_test("fei1 191810", |p| {
457 p.parse::<Expr>()?;
458 Ok(())
459 })
460 }
461
462 #[test]
463 fn nested_unary() {
464 parse_test("fei1 fei1 fei1 fei1 191810", |p| {
465 p.parse::<Expr>()?;
466 Ok(())
467 })
468 }
469
470 #[test]
471 fn bracket() {
472 parse_test("fei1 jie2 114514 he2", |p| {
474 p.parse::<Expr>()?;
475 Ok(())
476 })
477 }
478
479 #[test]
480 fn complex_expr() {
481 parse_test("1919 jia1 810 cheng2 114514 jian3 12", |p| {
483 p.parse::<Expr>()?;
484 Ok(())
485 });
486 }
487
488 #[test]
489 fn empty_array() {
490 parse_test("zu3 he2", |p| {
491 assert!(p.parse::<Array>()?.elements.is_empty());
492 Ok(())
493 });
494 }
495
496 #[test]
497 fn array_with_an_element() {
498 parse_test("zu3 jie2 1 he2 he2", |p| {
499 p.parse::<Array>()?;
500 Ok(())
501 });
502 }
503
504 #[test]
505 fn array_with_elements() {
506 parse_test(
507 concat!(
508 "zu3 ", "jie2 1 he2 ", "ya1 ru4 foo ", "a b c d e ", "114514 1919f810 ", "chuan4 awa ", "he2" ),
516 |p| {
517 p.parse::<Array>()?;
518 Ok(())
519 },
520 );
521 }
522
523 #[test]
524 fn array_with_elements_with_white_space() {
525 parse_test(
526 concat!(
527 "zu3%", "jie2#$#$1*&)*(he2^&*(^&*(", "ya1{#$*()!@*}ru4<>#$%*$%&*(%^*(*^&foo{[&*}", "a啊b波呲d地e鹅", "114514-=-=-=-1919f810<?><{}(*)", "chuan4<>(^&%^%^&*awa$%&^", "he2" ),
535 |p| {
536 p.parse::<Array>()?;
537 Ok(())
538 },
539 );
540 }
541}