1use std::{collections::BTreeSet, fmt::Debug};
2
3use anyhow::{Result, anyhow};
4use dynamic::{ConstIntOp, Dynamic, Type};
5use smol_str::SmolStr;
6
7mod expr;
8pub use expr::{BinaryOp, Expr, ExprKind, UnaryOp};
9
10mod pattern;
11pub use pattern::{Pattern, PatternKind};
12
13mod stmt;
14pub use stmt::{Stmt, StmtKind};
15
16#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
17pub struct Span {
18 pub start: usize,
19 pub end: usize,
20}
21
22impl Span {
23 pub const fn new(start: usize, end: usize) -> Self {
24 Self { start, end }
25 }
26
27 pub const fn empty(pos: usize) -> Self {
28 Self { start: pos, end: pos }
29 }
30
31 pub fn merge(self, other: Self) -> Self {
32 Self { start: self.start.min(other.start), end: self.end.max(other.end) }
33 }
34}
35
36#[derive(Debug)]
37pub struct Parser {
38 pos: usize, buf: Vec<u8>, spans: Vec<usize>,
41 decl_scopes: Vec<BTreeSet<SmolStr>>,
42 impl_depth: usize,
43 depth: usize, fatal: bool, }
46
47pub const MAX_PARSE_DEPTH: usize = 128;
54
55const NOT_IDENT: &[u8] = &[b' ', b'\t', b'\n', b'\r', b'/', b'*', b'+', b'-', b'=', b'(', b')', b'{', b'}', b'[', b']', b';', b':', b',', b'.', b'<', b'>', b'!', b'#', b'$', b'%', b'^', b'&', b'|', b'\\', b'"', b'\''];
56const WHITE_SPACE: &[u8] = &[b' ', b'\t', b'\n', b'\r'];
57const TYPES: &[(&str, Type)] = &[
58 ("bool", Type::Bool),
59 ("string", Type::Str),
60 ("i8", Type::I8),
61 ("i16", Type::I16),
62 ("i32", Type::I32),
63 ("i64", Type::I64),
64 ("u8", Type::U8),
65 ("u16", Type::U16),
66 ("u32", Type::U32),
67 ("u64", Type::U64),
68 ("f16", Type::F16),
69 ("f32", Type::F32),
70 ("f64", Type::F64),
71];
72const KEYWORDS: &[&str] = &["true", "false", "null", "let", "if", "else", "for", "in", "while", "pub", "fn", "struct", "impl", "const", "static", "continue", "return", "break"];
73
74#[macro_export]
75macro_rules! parse_list {
76 ($self: ident, $start: expr, $end: expr, $sep: expr, $item_expr: expr) => {{
77 let mut items = $start;
78 loop {
79 $self.whitespace()?;
80 if $self.get()? == $end {
81 $self.pos += 1;
82 break;
83 }
84 let item = $item_expr;
85 items.push(item);
86 $self.whitespace()?;
87 if $self.get()? == $sep {
88 $self.pos += 1;
89 }
90 }
91 items
92 }};
93}
94
95#[macro_export]
96macro_rules! try_parse {
97 ($self: ident, $method: expr) => {{
98 let save_pos = $self.pos; let save_decl_scopes = $self.decl_scopes.clone();
100 let save_impl_depth = $self.impl_depth;
101 match $method {
102 Ok(expr) => Ok(expr),
103 Err(e) if $self.fatal => Err(e),
105 Err(e) => {
106 $self.pos = save_pos;
107 $self.decl_scopes = save_decl_scopes;
108 $self.impl_depth = save_impl_depth;
109 Err(e)
110 }
111 }
112 }};
113}
114
115#[derive(Debug, thiserror::Error)]
116pub enum ParserErr {
117 #[error("期望字符 {0} 实际字符 {1}")]
118 ExpectChar(char, char),
119 #[error("未发现期望字符")]
120 NoCharCollect,
121 #[error("期望字符串 {0}")]
122 ExpectedString(SmolStr),
123 #[error("输入结束")]
124 EndofInput,
125 #[error("未关闭的注释")]
126 UncloseComment,
127 #[error("非法的原始字符串")]
128 IllegalRawString,
129 #[error("未关闭字符串")]
130 UnclosedString,
131 #[error("非字符串")]
132 NotString,
133 #[error("非数字")]
134 NotNumber,
135 #[error("符号 {0} 已经声明")]
136 DuplicateSymbol(SmolStr),
137 #[error("表达式嵌套过深")]
138 TooDeep,
139}
140
141impl Parser {
142 pub fn new(buf: Vec<u8>) -> Self {
143 Self { pos: 0, buf, spans: Vec::new(), decl_scopes: vec![BTreeSet::new()], impl_depth: 0, depth: 0, fatal: false }
144 }
145
146 fn enter_depth(&mut self) -> Result<()> {
153 self.depth += 1;
154 if self.depth > MAX_PARSE_DEPTH {
155 self.depth -= 1;
156 self.fatal = true;
157 return Err(ParserErr::TooDeep.into());
158 }
159 Ok(())
160 }
161
162 fn exit_depth(&mut self) {
163 self.depth = self.depth.saturating_sub(1);
164 }
165
166 fn check_fatal(&self) -> Result<()> {
168 if self.fatal { Err(ParserErr::TooDeep.into()) } else { Ok(()) }
169 }
170
171 fn push_decl_scope(&mut self) {
172 self.decl_scopes.push(BTreeSet::new());
173 }
174
175 fn pop_decl_scope(&mut self) {
176 if self.decl_scopes.len() > 1 {
177 self.decl_scopes.pop();
178 }
179 }
180
181 fn declare_symbol(&mut self, name: &SmolStr) -> Result<()> {
182 if name.is_empty() {
183 return Ok(());
184 }
185 if self.decl_scopes.iter().rev().any(|scope| scope.contains(name)) {
186 return Err(ParserErr::DuplicateSymbol(name.clone()).into());
187 }
188 self.decl_scopes.last_mut().expect("parser always has a declaration scope").insert(name.clone());
189 Ok(())
190 }
191
192 fn declare_symbol_in_current_scope(&mut self, name: &SmolStr) -> Result<()> {
193 if name.is_empty() {
194 return Ok(());
195 }
196 let scope = self.decl_scopes.last_mut().expect("parser always has a declaration scope");
197 if scope.contains(name) {
198 return Err(ParserErr::DuplicateSymbol(name.clone()).into());
199 }
200 scope.insert(name.clone());
201 Ok(())
202 }
203
204 fn declare_function_name(&mut self, name: &SmolStr) -> Result<()> {
205 if self.impl_depth > 0 { self.declare_symbol_in_current_scope(name) } else { self.declare_symbol(name) }
206 }
207
208 fn declare_args(&mut self, args: &[(SmolStr, Type)]) -> Result<()> {
209 for (name, _) in args {
210 self.declare_symbol(name)?;
211 }
212 Ok(())
213 }
214
215 fn declare_pattern_symbols(&mut self, pat: &Pattern) -> Result<()> {
216 match &pat.kind {
217 PatternKind::Ident { name, .. } => self.declare_symbol_in_current_scope(name),
218 PatternKind::Tuple(items) => {
219 for item in items {
220 self.declare_pattern_symbols(item)?;
221 }
222 Ok(())
223 }
224 PatternKind::List { elems, .. } => {
225 for item in elems {
226 self.declare_pattern_symbols(item)?;
227 }
228 Ok(())
229 }
230 PatternKind::Wildcard | PatternKind::Var { .. } | PatternKind::Literal(_) | PatternKind::Member(_, _) | PatternKind::Idx(_, _) => Ok(()),
231 }
232 }
233
234 fn function_body(&mut self, args: &[(SmolStr, Type)]) -> Result<Stmt> {
235 self.push_decl_scope();
236 let result = (|| {
237 self.declare_args(args)?;
238 self.block()
239 })();
240 self.pop_decl_scope();
241 result
242 }
243
244 fn impl_body(&mut self) -> Result<Stmt> {
245 self.push_decl_scope();
246 self.impl_depth += 1;
247 let result = self.block();
248 self.impl_depth -= 1;
249 self.pop_decl_scope();
250 result
251 }
252
253 pub fn is_eof(&self) -> bool {
254 self.pos >= self.buf.len()
255 }
256
257 pub fn get(&self) -> Result<u8> {
258 self.buf.get(self.pos).cloned().ok_or(ParserErr::EndofInput.into())
260 }
261
262 pub fn take(&mut self, ch: u8) -> Result<()> {
263 if self.buf.get(self.pos).map(|b| *b == ch).unwrap_or(false) {
265 self.pos += 1;
266 Ok(())
267 } else {
268 Err(ParserErr::ExpectChar(ch as char, self.buf.get(self.pos as usize).cloned().unwrap_or(0) as char).into())
269 }
270 }
271
272 pub fn until(&mut self, ch: u8) -> Result<()> {
273 self.whitespace()?;
275 self.take(ch)
276 }
277
278 pub fn ahead(&self) -> Result<u8> {
279 self.buf.get(self.pos + 1).cloned().ok_or(ParserErr::EndofInput.into())
281 }
282
283 pub fn get_str(&self, start: usize, stop: usize) -> SmolStr {
284 SmolStr::from(String::from_utf8_lossy(&self.buf[start..stop]))
285 }
286
287 pub fn error_stmt(&self) -> SmolStr {
288 SmolStr::from(String::from_utf8_lossy(&self.buf[self.spans.last().cloned().unwrap_or(0)..self.pos]))
289 }
290
291 pub fn current_pos(&self) -> usize {
292 self.pos
293 }
294
295 pub fn span_from(&self, start: usize) -> Span {
296 Span::new(start, self.pos)
297 }
298
299 pub fn collect<F: Fn(u8) -> bool>(&mut self, f: F) -> Result<(usize, usize)> {
300 let start = self.pos;
301 while self.pos < self.buf.len() && f(self.buf[self.pos]) {
302 self.pos += 1;
303 }
304 if self.pos > start { Ok((start, self.pos)) } else { Err(ParserErr::NoCharCollect.into()) }
305 }
306
307 pub fn just(&mut self, pattern: &str) -> Result<()> {
308 if self.buf.len() - self.pos >= pattern.len() && self.buf[self.pos..self.pos + pattern.len()].eq(pattern.as_bytes()) {
309 self.pos += pattern.len();
310 Ok(())
311 } else {
312 Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into())
313 }
314 }
315
316 pub fn keyword(&mut self, pattern: &str) -> Result<()> {
317 self.just(pattern)?;
318 if self.pos < self.buf.len() && !NOT_IDENT.contains(&self.buf[self.pos]) {
319 self.pos -= pattern.len();
320 return Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into());
321 }
322 Ok(())
323 }
324
325 pub fn get_type(&mut self) -> Result<Type> {
326 self.whitespace()?;
327 if self.get()? == b'[' {
328 self.pos += 1;
329 let ty = self.get_type()?;
330 self.until(b';')?;
331 self.whitespace()?;
332 let len = self.get_type_param()?;
333 self.until(b']')?;
334 if let Type::ConstInt(number) = len {
335 let number = u32::try_from(number).map_err(|_| anyhow!("数组长度超出 u32 范围"))?;
336 Ok(Type::Array(std::rc::Rc::new(ty), number))
337 } else {
338 Ok(Type::ArrayParam(std::rc::Rc::new(ty), std::rc::Rc::new(len)))
339 }
340 } else {
341 for ty in TYPES {
342 if self.just(ty.0).is_ok() {
343 return Ok(ty.1.clone());
344 }
345 }
346 let name = self.ident()?;
347 if self.take(b'<').is_ok() {
348 let params = crate::parse_list!(self, Vec::new(), b'>', b',', self.get_type_param()?);
349 Ok(Type::Ident { name, params })
350 } else {
351 Ok(Type::Ident { name, params: Vec::new() })
352 }
353 }
354 }
355
356 pub fn get_type_param(&mut self) -> Result<Type> {
357 self.const_type_param_add()
358 }
359
360 fn const_type_param_add(&mut self) -> Result<Type> {
361 let mut left = self.const_type_param_mul()?;
362 loop {
363 self.whitespace()?;
364 let op = if self.take(b'+').is_ok() {
365 Some(ConstIntOp::Add)
366 } else if self.take(b'-').is_ok() {
367 Some(ConstIntOp::Sub)
368 } else {
369 None
370 };
371 let Some(op) = op else { break };
372 let right = self.const_type_param_mul()?;
373 left = Self::fold_const_type_binary(op, left, right)?;
374 }
375 Ok(left)
376 }
377
378 fn const_type_param_mul(&mut self) -> Result<Type> {
379 let mut left = self.const_type_param_primary()?;
380 loop {
381 self.whitespace()?;
382 let op = if self.take(b'*').is_ok() {
383 Some(ConstIntOp::Mul)
384 } else if self.take(b'/').is_ok() {
385 Some(ConstIntOp::Div)
386 } else if self.take(b'%').is_ok() {
387 Some(ConstIntOp::Mod)
388 } else {
389 None
390 };
391 let Some(op) = op else { break };
392 let right = self.const_type_param_primary()?;
393 left = Self::fold_const_type_binary(op, left, right)?;
394 }
395 Ok(left)
396 }
397
398 fn const_type_param_primary(&mut self) -> Result<Type> {
399 self.whitespace()?;
400 if self.take(b'(').is_ok() {
401 let ty = self.get_type_param()?;
402 self.until(b')')?;
403 return Ok(ty);
404 }
405 if self.get()?.is_ascii_digit() {
406 let value = self.number()?;
407 if let Some(value) = value.as_uint() {
408 let value = i64::try_from(value).map_err(|_| anyhow!("模板数字参数超出 i64 范围"))?;
409 Ok(Type::ConstInt(value))
410 } else if let Some(value) = value.as_int() {
411 Ok(Type::ConstInt(value))
412 } else {
413 Err(anyhow!("模板数字参数必须是整数"))
414 }
415 } else {
416 self.get_type()
417 }
418 }
419
420 fn fold_const_type_binary(op: ConstIntOp, left: Type, right: Type) -> Result<Type> {
421 if let (Type::ConstInt(left), Type::ConstInt(right)) = (&left, &right) {
422 let value = match op {
423 ConstIntOp::Add => left + right,
424 ConstIntOp::Sub => left - right,
425 ConstIntOp::Mul => left * right,
426 ConstIntOp::Div => {
427 if *right == 0 {
428 return Err(anyhow!("模板整数除以 0"));
429 }
430 left / right
431 }
432 ConstIntOp::Mod => {
433 if *right == 0 {
434 return Err(anyhow!("模板整数取模 0"));
435 }
436 left % right
437 }
438 };
439 Ok(Type::ConstInt(value))
440 } else {
441 Ok(Type::ConstBinary { op, left: std::rc::Rc::new(left), right: std::rc::Rc::new(right) })
442 }
443 }
444
445 pub fn comment(&mut self) -> Result<()> {
446 if self.get()? == b'/' && self.ahead()? == b'/' {
447 self.pos += 2;
448 while self.pos < self.buf.len() && self.buf[self.pos] != b'\n' {
449 self.pos += 1;
450 }
451 Ok(())
452 } else if self.get()? == b'/' && self.ahead()? == b'*' {
453 self.pos += 2;
454 while self.pos + 1 < self.buf.len() {
455 if self.buf[self.pos] == b'*' && self.buf[self.pos + 1] == b'/' {
456 self.pos += 2;
457 return Ok(());
458 }
459 self.pos += 1;
460 }
461 Err(ParserErr::UncloseComment.into())
462 } else {
463 Ok(())
464 }
465 }
466
467 pub fn whitespace(&mut self) -> Result<()> {
468 while self.pos < self.buf.len() {
469 self.comment()?;
470 if self.pos >= self.buf.len() || !WHITE_SPACE.contains(&self.buf[self.pos]) {
471 break;
472 }
473 self.pos += 1;
474 }
475 Ok(())
476 }
477
478 pub fn ident(&mut self) -> Result<SmolStr> {
479 let (start, mut stop) = self.collect(|ch| !NOT_IDENT.contains(&ch))?;
480 loop {
481 let save_pos = self.pos;
482 if self.just("::").is_err() {
483 break;
484 }
485 match self.collect(|ch| !NOT_IDENT.contains(&ch)) {
486 Ok((_, next_stop)) => {
487 stop = next_stop;
488 }
489 Err(_) => {
490 self.pos = save_pos;
491 break;
492 }
493 }
494 }
495 if KEYWORDS.iter().position(|k| k.as_bytes() == &self.buf[start..stop]).is_some() {
496 return Err(anyhow!("发现关键字{}", String::from_utf8_lossy(&self.buf[start..stop])));
497 }
498 Ok(self.get_str(start, stop))
499 }
500
501 pub fn string(&mut self) -> Result<SmolStr> {
502 if self.get()? != b'"' {
503 return Err(ParserErr::NotString.into());
504 }
505 self.pos += 1;
506 let mut text_buf = Vec::new();
507 while self.pos < self.buf.len() {
508 if self.buf[self.pos] == b'\\' {
509 self.pos += 1;
511 match self.buf[self.pos] {
512 b'n' => { text_buf.push(b'\n'); self.pos += 1; }
513 b'r' => { text_buf.push(b'\r'); self.pos += 1; }
514 b't' => { text_buf.push(b'\t'); self.pos += 1; }
515 ch @ (b'\\' | b'"') => {
516 text_buf.push(ch);
517 self.pos += 1;
518 }
519 b'u' => {
520 self.pos += 1;
521 let unicode = if self.take(b'{').is_ok() {
522 let code = self.hex()?;
523 self.pos += 1;
524 code
525 } else {
526 self.hex()?
527 };
528 let ch = char::from_u32(unicode as u32).ok_or(anyhow!("非法 unicode {}", unicode))?;
529 let mut utf8_buf = [0u8; 4];
530 let s = ch.encode_utf8(&mut utf8_buf);
531 text_buf.extend_from_slice(s.as_bytes());
532 }
533 b'x' => {
534 self.pos += 1;
535 if self.pos + 2 < self.buf.len() {
536 let start = self.pos;
537 self.pos += 2;
538 let hex = &self.buf[start..self.pos];
539 let code = u32::from_str_radix(String::from_utf8_lossy(hex).as_ref(), 16)?;
540 text_buf.push(code as u8);
541 }
542 }
543 other => {
544 return Err(anyhow!("invalid escape character: {}", other as char));
545 }
546 }
547 } else {
548 if self.buf[self.pos] == b'"' {
549 self.pos += 1;
550 return Ok(String::from_utf8(text_buf)?.into());
551 }
552 text_buf.push(self.buf[self.pos]);
553 self.pos += 1;
554 }
555 }
556 Err(ParserErr::UnclosedString.into())
557 }
558
559 pub fn text(&mut self) -> Result<SmolStr> {
560 if self.get()? == b'r' && [b'#', b'"'].contains(&self.ahead()?) {
561 self.pos += 1;
562 let mut end = String::from("\"");
563 while self.buf[self.pos] == b'#' {
564 end.push('#');
565 self.pos += 1;
566 }
567 if self.get()? != b'"' {
568 return Err(ParserErr::IllegalRawString.into());
569 }
570 self.pos += 1;
571 let start_pos = self.pos;
572 while self.pos < self.buf.len() {
573 if self.just(&end).is_ok() {
574 break;
575 }
576 self.pos += 1;
577 }
578 Ok(self.get_str(start_pos, self.pos - end.len()))
579 } else {
580 self.string()
581 }
582 }
583
584 fn hex(&mut self) -> Result<i32> {
585 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
587 Ok(i32::from_str_radix(&String::from_utf8_lossy(&self.buf[start..stop]), 16)?)
588 }
589
590 fn numeric_suffix(&mut self) -> Option<Type> {
591 let save = self.pos;
592 for (name, ty) in TYPES {
593 if !ty.is_native() || *ty == Type::F16 {
594 continue;
595 }
596 if self.buf.len() >= self.pos + name.len() && self.buf[self.pos..self.pos + name.len()].eq(name.as_bytes()) {
597 self.pos += name.len();
598 return Some(ty.clone());
599 }
600 }
601 self.pos = save;
602 None
603 }
604
605 fn int_literal(&mut self, digits: &str, radix: u32, suffix: Option<Type>) -> Result<Dynamic> {
606 let ty = suffix.unwrap_or(Type::I64);
608 let magnitude = u128::from_str_radix(digits, radix).map_err(|_| anyhow!("整数字面量 {} 超出可表示范围", digits))?;
610 let (signed, bits) = match ty {
611 Type::I8 => (true, 8u32),
612 Type::I16 => (true, 16),
613 Type::I32 => (true, 32),
614 Type::I64 => (true, 64),
615 Type::U8 => (false, 8),
616 Type::U16 => (false, 16),
617 Type::U32 => (false, 32),
618 Type::U64 => (false, 64),
619 Type::F32 => return Ok(Dynamic::F32(magnitude as f32)),
620 Type::F64 => return Ok(Dynamic::F64(magnitude as f64)),
621 ty => return Err(anyhow!("{:?} 不能作为数字后缀", ty)),
622 };
623 let unsigned_max = (1u128 << bits) - 1;
624 let max_allowed = if radix == 10 {
627 if signed { unsigned_max / 2 + 1 } else { unsigned_max }
628 } else {
629 unsigned_max
630 };
631 if magnitude > max_allowed {
632 return Err(anyhow!("整数字面量 {} 超出 {:?} 的范围", digits, ty));
633 }
634 Ok(match ty {
635 Type::I8 => Dynamic::I8(magnitude as i8),
636 Type::I16 => Dynamic::I16(magnitude as i16),
637 Type::I32 => Dynamic::I32(magnitude as i32),
638 Type::I64 => Dynamic::I64(magnitude as i64),
639 Type::U8 => Dynamic::U8(magnitude as u8),
640 Type::U16 => Dynamic::U16(magnitude as u16),
641 Type::U32 => Dynamic::U32(magnitude as u32),
642 Type::U64 => Dynamic::U64(magnitude as u64),
643 _ => unreachable!(),
644 })
645 }
646
647 fn float_literal(&mut self, digits: &str, suffix: Option<Type>) -> Result<Dynamic> {
648 let value: f64 = digits.parse()?;
649 Ok(match suffix.unwrap_or(Type::F32) {
650 Type::I8 => Dynamic::I8(value as i8),
651 Type::I16 => Dynamic::I16(value as i16),
652 Type::I32 => Dynamic::I32(value as i32),
653 Type::I64 => Dynamic::I64(value as i64),
654 Type::U8 => Dynamic::U8(value as u8),
655 Type::U16 => Dynamic::U16(value as u16),
656 Type::U32 => Dynamic::U32(value as u32),
657 Type::U64 => Dynamic::U64(value as u64),
658 Type::F32 => Dynamic::F32(value as f32),
659 Type::F64 => Dynamic::F64(value),
660 ty => return Err(anyhow!("{:?} 不能作为浮点数字后缀", ty)),
661 })
662 }
663
664 pub fn number(&mut self) -> Result<Dynamic> {
665 if self.get()? == b'0' {
666 if [b'b', b'B'].contains(&self.ahead()?) {
667 self.pos += 2;
668 let (start, stop) = self.collect(|ch| ch == b'0' || ch == b'1')?;
669 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
670 let suffix = self.numeric_suffix();
671 return self.int_literal(&s, 2, suffix);
672 } else if [b'o', b'O'].contains(&self.ahead()?) {
673 self.pos += 2;
674 let (start, stop) = self.collect(|ch| ch >= b'0' && ch <= b'7')?;
675 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
676 let suffix = self.numeric_suffix();
677 return self.int_literal(&s, 8, suffix);
678 } else if [b'x', b'X'].contains(&self.ahead()?) {
679 self.pos += 2;
680 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
681 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
682 let suffix = self.numeric_suffix();
683 return self.int_literal(&s, 16, suffix);
684 }
685 }
686 let start = self.pos;
687 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
688 self.pos += 1;
689 }
690 let mut is_float = false;
691 if self.pos < self.buf.len() && self.buf[self.pos] == b'.' && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
692 is_float = true;
693 self.pos += 1;
694 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
695 self.pos += 1;
696 }
697 }
698 if self.pos < self.buf.len() && (self.buf[self.pos] == b'e' || self.buf[self.pos] == b'E') {
699 let mut exp_pos = self.pos + 1;
700 if exp_pos < self.buf.len() && (self.buf[exp_pos] == b'+' || self.buf[exp_pos] == b'-') {
701 exp_pos += 1;
702 }
703 if exp_pos < self.buf.len() && self.buf[exp_pos] <= b'9' && self.buf[exp_pos] >= b'0' {
704 is_float = true;
705 self.pos = exp_pos + 1;
706 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
707 self.pos += 1;
708 }
709 }
710 }
711 if self.pos > start {
712 let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
713 let suffix = self.numeric_suffix();
714 if is_float {
715 return self.float_literal(&text, suffix);
716 }
717 return self.int_literal(&text, 10, suffix);
718 }
719 Err(ParserErr::NotNumber.into())
720 }
721}
722
723#[cfg(test)]
724mod tests {
725 use super::*;
726
727 fn parse_all(code: &str) -> Result<Vec<Stmt>> {
728 let mut parser = Parser::new(code.as_bytes().to_vec());
729 let mut stmts = Vec::new();
730 loop {
731 match parser.stmt(false) {
732 Ok(stmt) => stmts.push(stmt),
733 Err(err) => {
734 if parser.is_eof() {
735 return Ok(stmts);
736 }
737 return Err(err);
738 }
739 }
740 }
741 }
742
743 fn run_with_big_stack(f: impl FnOnce() + Send + 'static) {
747 std::thread::Builder::new().stack_size(64 * 1024 * 1024).spawn(f).unwrap().join().unwrap();
748 }
749
750 #[test]
751 fn deeply_nested_parens_error_instead_of_stack_overflow() {
752 run_with_big_stack(|| {
753 let depth = MAX_PARSE_DEPTH + 50;
754 let code = format!("{}1{}", "(".repeat(depth), ")".repeat(depth));
755 let mut parser = Parser::new(code.into_bytes());
756 let err = parser.get_expr().unwrap_err();
757 assert!(err.to_string().contains("嵌套过深"), "got: {err}");
758 });
759 }
760
761 #[test]
762 fn deeply_nested_blocks_error_instead_of_stack_overflow() {
763 run_with_big_stack(|| {
764 let depth = MAX_PARSE_DEPTH + 50;
765 let code = format!("fn f() {}{}{}", "{".repeat(depth), "1", "}".repeat(depth));
766 let err = parse_all(&code).unwrap_err();
767 assert!(err.to_string().contains("嵌套过深"), "got: {err}");
768 });
769 }
770
771 #[test]
772 fn normal_nesting_within_limit_parses() {
773 let code = format!("{}1{}", "(".repeat(32), ")".repeat(32));
775 let mut parser = Parser::new(code.into_bytes());
776 parser.get_expr().unwrap();
777 }
778
779 fn parse_literal(code: &str) -> Result<Dynamic> {
780 let mut parser = Parser::new(code.as_bytes().to_vec());
781 match parser.get_expr()?.kind {
782 crate::ExprKind::Value(value) => Ok(value),
783 other => Err(anyhow!("不是字面量: {:?}", other)),
784 }
785 }
786
787 #[test]
788 fn unsuffixed_integer_defaults_to_i64() {
789 assert_eq!(parse_literal("5").unwrap(), Dynamic::I64(5));
790 assert_eq!(parse_literal("3000000000").unwrap(), Dynamic::I64(3000000000));
792 }
793
794 #[test]
795 fn out_of_range_integer_literals_error() {
796 assert!(parse_literal("99999999999999999999999999999999999999999").is_err());
798 assert!(parse_literal("255i8").unwrap_err().to_string().contains("超出"));
800 assert!(parse_literal("70000i16").unwrap_err().to_string().contains("超出"));
801 assert!(parse_literal("256u8").unwrap_err().to_string().contains("超出"));
802 }
803
804 #[test]
805 fn signed_min_magnitude_literals_allowed() {
806 assert_eq!(parse_literal("128i8").unwrap(), Dynamic::I8(-128));
808 assert_eq!(parse_literal("9223372036854775808").unwrap(), Dynamic::I64(i64::MIN));
809 }
810
811 #[test]
812 fn hex_literals_keep_bit_pattern() {
813 assert_eq!(parse_literal("0xFFFFFFFF").unwrap(), Dynamic::I64(0xFFFFFFFF));
815 assert_eq!(parse_literal("0xFFi8").unwrap(), Dynamic::I8(-1));
817 assert_eq!(parse_literal("0xFFFFFFFFu32").unwrap(), Dynamic::U32(u32::MAX));
818 }
819
820 fn shape(code: &str) -> String {
822 let mut parser = Parser::new(code.as_bytes().to_vec());
823 let expr = parser.get_expr().expect("parse");
824 fmt_shape(&expr)
825 }
826
827 fn binop_sym(op: &crate::BinaryOp) -> &'static str {
828 use crate::BinaryOp::*;
829 match op {
830 Add => "+", Sub => "-", Mul => "*", Div => "/", Mod => "%",
831 Shl => "<<", Shr => ">>", BitAnd => "&", BitOr => "|", BitXor => "^",
832 Assign => "=", AddAssign => "+=", Eq => "==", Ne => "!=", Lt => "<", Gt => ">",
833 Le => "<=", Ge => ">=", And => "&&", Or => "||", Idx => "idx",
834 other => {
835 let _ = other;
836 "?"
837 }
838 }
839 }
840
841 fn fmt_shape(expr: &crate::Expr) -> String {
842 use crate::ExprKind::*;
843 match &expr.kind {
844 Value(v) => format!("{:?}", v).replace("I64(", "").replace("I32(", "").trim_end_matches(')').to_string(),
845 Ident(name) => name.to_string(),
846 Unary { op, value } => {
847 let s = if matches!(op, crate::UnaryOp::Neg) { "-" } else { "!" };
848 format!("({} {})", s, fmt_shape(value))
849 }
850 Binary { left, op, right } => format!("({} {} {})", binop_sym(op), fmt_shape(left), fmt_shape(right)),
851 Range { start, stop, inclusive } => format!("({} {} {})", if *inclusive { "..=" } else { ".." }, fmt_shape(start), fmt_shape(stop)),
852 Typed { value, ty } => format!("(as {} {:?})", fmt_shape(value), ty),
853 other => format!("{:?}", other),
854 }
855 }
856
857 #[test]
858 fn precedence_and_associativity_golden() {
859 assert_eq!(shape("1 + 2 * 3"), "(+ 1 (* 2 3))");
861 assert_eq!(shape("1 * 2 + 3"), "(+ (* 1 2) 3)");
862 assert_eq!(shape("1 - 2 - 3"), "(- (- 1 2) 3)");
864 assert_eq!(shape("8 / 4 / 2"), "(/ (/ 8 4) 2)");
865 assert_eq!(shape("2 + 3 << 4"), "(<< (+ 2 3) 4)");
867 assert_eq!(shape("1 | 2 ^ 3 & 4"), "(| 1 (^ 2 (& 3 4)))");
869 assert_eq!(shape("1 + 2 == 3"), "(== (+ 1 2) 3)");
871 assert_eq!(shape("a && b || c"), "(|| (&& a b) c)");
873 assert_eq!(shape("-a * b"), "(* (- a) b)");
875 assert_eq!(shape("!a == b"), "(== (! a) b)");
876 }
877
878 #[test]
879 fn assignment_range_and_as_precedence_golden() {
880 assert_eq!(shape("a = b + c"), "(= a (+ b c))");
882 assert_eq!(shape("a = b = c"), "(= (= a b) c)");
886 assert_eq!(shape("a += b * c"), "(+= a (* b c))");
888 assert_eq!(shape("1 + 1 .. n * 2"), "(.. (+ 1 1) (* n 2))");
890 assert_eq!(shape("0 ..= n - 1"), "(..= 0 (- n 1))");
891 assert_eq!(shape("a + b as i64"), "(as (+ a b) I64)");
894 }
895
896 #[test]
900 fn parser_never_panics_on_random_input() {
901 run_with_big_stack(|| {
902 const FRAGMENTS: &[&str] = &[
903 "fn", "let", "if", "else", "for", "in", "while", "return", "struct", "impl", "pub", "(", ")", "{", "}", "[", "]", "<", ">", "+", "-", "*", "/", "%", "=", "==", "&&", "||", "..", "..=", "as", "i32", "u64", "f64", ".", ",", ";", ":", "::", "x", "0", "1", "255i8", "0xFF", "\"s\"", "true", "null", "|a|", "->",
904 ];
905 let mut state: u64 = 0x9E3779B97F4A7C15;
907 let mut next = || {
908 state ^= state >> 12;
909 state ^= state << 25;
910 state ^= state >> 27;
911 state = state.wrapping_mul(0x2545F4914F6CDD1D);
912 state
913 };
914
915 for _ in 0..4000 {
916 let mut code = String::new();
917 let tokens = (next() % 40) as usize;
918 for _ in 0..tokens {
919 code.push_str(FRAGMENTS[(next() as usize) % FRAGMENTS.len()]);
920 if next() % 2 == 0 {
921 code.push(' ');
922 }
923 }
924 let result = std::panic::catch_unwind(|| {
926 let mut parser = Parser::new(code.clone().into_bytes());
927 let mut count = 0;
928 loop {
929 match parser.stmt(false) {
930 Ok(_) => {
931 count += 1;
932 if parser.is_eof() || count > 1000 {
933 break;
934 }
935 }
936 Err(_) => break,
937 }
938 }
939 });
940 assert!(result.is_ok(), "parser panicked on input: {:?}", code);
941 }
942 });
943 }
944
945 #[test]
946 fn allows_local_name_to_shadow_prior_function() {
947 parse_all(
948 r#"
949 fn chunk_id(x, y) {
950 x + y
951 }
952
953 fn open() {
954 let chunk_id = 1;
955 chunk_id
956 }
957 "#,
958 )
959 .unwrap();
960 }
961
962 #[test]
963 fn rejects_duplicate_function_args() {
964 let err = parse_all("fn open(value, value) { value }").unwrap_err();
965 assert!(err.to_string().contains("符号 value 已经声明"));
966 }
967
968 #[test]
969 fn rejects_duplicate_local_let_names() {
970 let err = parse_all(
971 r#"
972 fn open() {
973 let value = 1;
974 let value = 2;
975 value
976 }
977 "#,
978 )
979 .unwrap_err();
980 assert!(err.to_string().contains("符号 value 已经声明"));
981 }
982
983 #[test]
984 fn allows_same_method_name_in_different_impl_blocks() {
985 parse_all(
986 r#"
987 struct A {}
988 struct B {}
989
990 impl A {
991 fn zero() { 0 }
992 }
993
994 impl B {
995 fn zero() { 0 }
996 }
997 "#,
998 )
999 .unwrap();
1000 }
1001
1002 #[test]
1003 fn parses_scientific_float_suffixes() {
1004 let mut parser = Parser::new(b"1.7976931348623157e308f64".to_vec());
1005 assert_eq!(parser.number().unwrap(), Dynamic::F64(1.7976931348623157e308));
1006
1007 let mut parser = Parser::new(b"1e-3f32".to_vec());
1008 assert_eq!(parser.number().unwrap(), Dynamic::F32(1e-3f32));
1009 }
1010
1011 #[test]
1012 fn parses_immediate_closure_call() {
1013 let mut parser = Parser::new(b"|| { 1i32 }()".to_vec());
1014 let expr = parser.get_expr().unwrap();
1015 let ExprKind::Call { obj, params } = expr.kind else {
1016 panic!("expected closure call, got {expr:?}");
1017 };
1018 assert!(params.is_empty());
1019 let ExprKind::Closure { args, .. } = obj.kind else {
1020 panic!("expected closure callee, got {obj:?}");
1021 };
1022 assert!(args.is_empty());
1023 }
1024
1025 #[test]
1026 fn parses_empty_tuple_expression() {
1027 let mut parser = Parser::new(b"()".to_vec());
1028 let expr = parser.get_expr().unwrap();
1029 let ExprKind::Tuple(items) = expr.kind else {
1030 panic!("expected empty tuple, got {expr:?}");
1031 };
1032 assert!(items.is_empty());
1033 }
1034
1035 #[test]
1036 fn parses_explicit_generic_function_call() {
1037 let mut parser = Parser::new(b"value::<4>()".to_vec());
1038 let expr = parser.get_expr().unwrap();
1039 let ExprKind::Call { obj, params } = expr.kind else {
1040 panic!("expected function call, got {expr:?}");
1041 };
1042 assert!(params.is_empty());
1043 let ExprKind::Generic { obj, params } = obj.kind else {
1044 panic!("expected generic callee, got {obj:?}");
1045 };
1046 assert!(matches!(obj.kind, ExprKind::Ident(name) if name.as_str() == "value"));
1047 assert!(matches!(params.as_slice(), [Type::ConstInt(4)]));
1048 }
1049
1050 #[test]
1051 fn parses_bigfloat_cmp_context_segment() {
1052 let code = r#"
1053 struct BigFloat<N> { data: [u32; N], exp: i32, sign: bool }
1054
1055 impl BigFloat<N> {
1056 fn abs_cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1057 let self_high = self.exp + ((N - 1) as i32);
1058 let rhs_high = rhs.exp + ((N - 1) as i32);
1059 let high = if self_high >= rhs_high { self_high } else { rhs_high };
1060 let low = if self.exp <= rhs.exp { self.exp } else { rhs.exp };
1061 let result = 0i32;
1062 let power = high;
1063
1064 while power >= low && result == 0i32 {
1065 let a_idx = power - self.exp;
1066 let b_idx = power - rhs.exp;
1067 let a_limb = 0u32;
1068 let b_limb = 0u32;
1069
1070 if a_idx >= 0i32 && a_idx < (N as i32) {
1071 a_limb = self.data[a_idx as u32];
1072 }
1073 if b_idx >= 0i32 && b_idx < (N as i32) {
1074 b_limb = rhs.data[b_idx as u32];
1075 }
1076
1077 if a_limb > b_limb {
1078 result = 1i32;
1079 } else if a_limb < b_limb {
1080 result = -1i32;
1081 }
1082
1083 power -= 1i32;
1084 }
1085
1086 result
1087 }
1088
1089 pub fn cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1090 if self.is_zero() && rhs.is_zero() {
1091 0i32
1092 } else if self.sign != rhs.sign {
1093 if self.sign { -1i32 } else { 1i32 }
1094 } else {
1095 let cmp = self.abs_cmp(rhs);
1096 if self.sign { -cmp } else { cmp }
1097 }
1098 }
1099 }
1100 "#;
1101 parse_all(code).unwrap();
1102 }
1103
1104 #[test]
1105 fn parses_bigfloat_file() {
1106 let code = include_str!("../../zusts/bigfloat.zs");
1107 parse_all(code).unwrap();
1108 }
1109}