1use std::{collections::BTreeSet, fmt::Debug};
2
3use anyhow::{Result, anyhow};
4use dynamic::{ConstIntOp, Dynamic, Type};
5use smol_str::SmolStr;
6
7mod expr;
8pub use expr::{BinaryOp, Expr, ExprKind, UnaryOp};
9
10mod pattern;
11pub use pattern::{Pattern, PatternKind};
12
13mod stmt;
14pub use stmt::{Stmt, StmtKind};
15
16#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
17pub struct Span {
18 pub start: usize,
19 pub end: usize,
20}
21
22impl Span {
23 pub const fn new(start: usize, end: usize) -> Self {
24 Self { start, end }
25 }
26
27 pub const fn empty(pos: usize) -> Self {
28 Self { start: pos, end: pos }
29 }
30
31 pub fn merge(self, other: Self) -> Self {
32 Self { start: self.start.min(other.start), end: self.end.max(other.end) }
33 }
34}
35
36#[derive(Debug)]
37pub struct Parser {
38 pos: usize, buf: Vec<u8>, spans: Vec<usize>,
41 decl_scopes: Vec<BTreeSet<SmolStr>>,
42 impl_depth: usize,
43 depth: usize, fatal: bool, }
46
47pub const MAX_PARSE_DEPTH: usize = 128;
54
55const NOT_IDENT: &[u8] = &[b' ', b'\t', b'\n', b'\r', b'/', b'*', b'+', b'-', b'=', b'(', b')', b'{', b'}', b'[', b']', b';', b':', b',', b'.', b'<', b'>', b'!', b'#', b'$', b'%', b'^', b'&', b'|', b'\\', b'"', b'\''];
56const WHITE_SPACE: &[u8] = &[b' ', b'\t', b'\n', b'\r'];
57const TYPES: &[(&str, Type)] = &[
58 ("bool", Type::Bool),
59 ("string", Type::Str),
60 ("i8", Type::I8),
61 ("i16", Type::I16),
62 ("i32", Type::I32),
63 ("i64", Type::I64),
64 ("u8", Type::U8),
65 ("u16", Type::U16),
66 ("u32", Type::U32),
67 ("u64", Type::U64),
68 ("f16", Type::F16),
69 ("f32", Type::F32),
70 ("f64", Type::F64),
71];
72const KEYWORDS: &[&str] = &["true", "false", "null", "let", "if", "else", "for", "in", "while", "pub", "fn", "struct", "impl", "const", "static", "continue", "return", "break"];
73
74#[macro_export]
75macro_rules! parse_list {
76 ($self: ident, $start: expr, $end: expr, $sep: expr, $item_expr: expr) => {{
77 let mut items = $start;
78 loop {
79 $self.whitespace()?;
80 if $self.get()? == $end {
81 $self.pos += 1;
82 break;
83 }
84 let item = $item_expr;
85 items.push(item);
86 $self.whitespace()?;
87 if $self.get()? == $sep {
88 $self.pos += 1;
89 }
90 }
91 items
92 }};
93}
94
95#[macro_export]
96macro_rules! try_parse {
97 ($self: ident, $method: expr) => {{
98 let save_pos = $self.pos; let save_decl_scopes = $self.decl_scopes.clone();
100 let save_impl_depth = $self.impl_depth;
101 match $method {
102 Ok(expr) => Ok(expr),
103 Err(e) if $self.fatal => Err(e),
105 Err(e) => {
106 $self.pos = save_pos;
107 $self.decl_scopes = save_decl_scopes;
108 $self.impl_depth = save_impl_depth;
109 Err(e)
110 }
111 }
112 }};
113}
114
115#[derive(Debug, thiserror::Error)]
116pub enum ParserErr {
117 #[error("期望字符 {0} 实际字符 {1}")]
118 ExpectChar(char, char),
119 #[error("未发现期望字符")]
120 NoCharCollect,
121 #[error("期望字符串 {0}")]
122 ExpectedString(SmolStr),
123 #[error("输入结束")]
124 EndofInput,
125 #[error("未关闭的注释")]
126 UncloseComment,
127 #[error("非法的原始字符串")]
128 IllegalRawString,
129 #[error("未关闭字符串")]
130 UnclosedString,
131 #[error("非字符串")]
132 NotString,
133 #[error("非数字")]
134 NotNumber,
135 #[error("符号 {0} 已经声明")]
136 DuplicateSymbol(SmolStr),
137 #[error("表达式嵌套过深")]
138 TooDeep,
139}
140
141impl Parser {
142 pub fn new(buf: Vec<u8>) -> Self {
143 Self { pos: 0, buf, spans: Vec::new(), decl_scopes: vec![BTreeSet::new()], impl_depth: 0, depth: 0, fatal: false }
144 }
145
146 fn enter_depth(&mut self) -> Result<()> {
153 self.depth += 1;
154 if self.depth > MAX_PARSE_DEPTH {
155 self.depth -= 1;
156 self.fatal = true;
157 return Err(ParserErr::TooDeep.into());
158 }
159 Ok(())
160 }
161
162 fn exit_depth(&mut self) {
163 self.depth = self.depth.saturating_sub(1);
164 }
165
166 fn check_fatal(&self) -> Result<()> {
168 if self.fatal { Err(ParserErr::TooDeep.into()) } else { Ok(()) }
169 }
170
171 fn push_decl_scope(&mut self) {
172 self.decl_scopes.push(BTreeSet::new());
173 }
174
175 fn pop_decl_scope(&mut self) {
176 if self.decl_scopes.len() > 1 {
177 self.decl_scopes.pop();
178 }
179 }
180
181 fn declare_symbol(&mut self, name: &SmolStr) -> Result<()> {
182 if name.is_empty() {
183 return Ok(());
184 }
185 if self.decl_scopes.iter().rev().any(|scope| scope.contains(name)) {
186 return Err(ParserErr::DuplicateSymbol(name.clone()).into());
187 }
188 self.decl_scopes.last_mut().expect("parser always has a declaration scope").insert(name.clone());
189 Ok(())
190 }
191
192 fn declare_symbol_in_current_scope(&mut self, name: &SmolStr) -> Result<()> {
193 if name.is_empty() {
194 return Ok(());
195 }
196 let scope = self.decl_scopes.last_mut().expect("parser always has a declaration scope");
197 if scope.contains(name) {
198 return Err(ParserErr::DuplicateSymbol(name.clone()).into());
199 }
200 scope.insert(name.clone());
201 Ok(())
202 }
203
204 fn declare_function_name(&mut self, name: &SmolStr) -> Result<()> {
205 if self.impl_depth > 0 { self.declare_symbol_in_current_scope(name) } else { self.declare_symbol(name) }
206 }
207
208 fn declare_args(&mut self, args: &[(SmolStr, Type)]) -> Result<()> {
209 for (name, _) in args {
210 self.declare_symbol(name)?;
211 }
212 Ok(())
213 }
214
215 fn declare_pattern_symbols(&mut self, pat: &Pattern) -> Result<()> {
216 match &pat.kind {
217 PatternKind::Ident { name, .. } => self.declare_symbol_in_current_scope(name),
218 PatternKind::Tuple(items) => {
219 for item in items {
220 self.declare_pattern_symbols(item)?;
221 }
222 Ok(())
223 }
224 PatternKind::List { elems, .. } => {
225 for item in elems {
226 self.declare_pattern_symbols(item)?;
227 }
228 Ok(())
229 }
230 PatternKind::Wildcard | PatternKind::Var { .. } | PatternKind::Literal(_) | PatternKind::Member(_, _) | PatternKind::Idx(_, _) => Ok(()),
231 }
232 }
233
234 fn function_body(&mut self, args: &[(SmolStr, Type)]) -> Result<Stmt> {
235 self.push_decl_scope();
236 let result = (|| {
237 self.declare_args(args)?;
238 self.block()
239 })();
240 self.pop_decl_scope();
241 result
242 }
243
244 fn impl_body(&mut self) -> Result<Stmt> {
245 self.push_decl_scope();
246 self.impl_depth += 1;
247 let result = self.block();
248 self.impl_depth -= 1;
249 self.pop_decl_scope();
250 result
251 }
252
253 pub fn is_eof(&self) -> bool {
254 self.pos >= self.buf.len()
255 }
256
257 pub fn get(&self) -> Result<u8> {
258 self.buf.get(self.pos).cloned().ok_or(ParserErr::EndofInput.into())
260 }
261
262 pub fn take(&mut self, ch: u8) -> Result<()> {
263 if self.buf.get(self.pos).map(|b| *b == ch).unwrap_or(false) {
265 self.pos += 1;
266 Ok(())
267 } else {
268 Err(ParserErr::ExpectChar(ch as char, self.buf.get(self.pos as usize).cloned().unwrap_or(0) as char).into())
269 }
270 }
271
272 pub fn until(&mut self, ch: u8) -> Result<()> {
273 self.whitespace()?;
275 self.take(ch)
276 }
277
278 pub fn ahead(&self) -> Result<u8> {
279 self.buf.get(self.pos + 1).cloned().ok_or(ParserErr::EndofInput.into())
281 }
282
283 pub fn get_str(&self, start: usize, stop: usize) -> SmolStr {
284 SmolStr::from(String::from_utf8_lossy(&self.buf[start..stop]))
285 }
286
287 pub fn error_stmt(&self) -> SmolStr {
288 SmolStr::from(String::from_utf8_lossy(&self.buf[self.spans.last().cloned().unwrap_or(0)..self.pos]))
289 }
290
291 pub fn current_pos(&self) -> usize {
292 self.pos
293 }
294
295 pub fn span_from(&self, start: usize) -> Span {
296 Span::new(start, self.pos)
297 }
298
299 pub fn collect<F: Fn(u8) -> bool>(&mut self, f: F) -> Result<(usize, usize)> {
300 let start = self.pos;
301 while self.pos < self.buf.len() && f(self.buf[self.pos]) {
302 self.pos += 1;
303 }
304 if self.pos > start { Ok((start, self.pos)) } else { Err(ParserErr::NoCharCollect.into()) }
305 }
306
307 pub fn just(&mut self, pattern: &str) -> Result<()> {
308 if self.buf.len() - self.pos >= pattern.len() && self.buf[self.pos..self.pos + pattern.len()].eq(pattern.as_bytes()) {
309 self.pos += pattern.len();
310 Ok(())
311 } else {
312 Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into())
313 }
314 }
315
316 pub fn keyword(&mut self, pattern: &str) -> Result<()> {
317 self.just(pattern)?;
318 if self.pos < self.buf.len() && !NOT_IDENT.contains(&self.buf[self.pos]) {
319 self.pos -= pattern.len();
320 return Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into());
321 }
322 Ok(())
323 }
324
325 pub fn get_type(&mut self) -> Result<Type> {
326 self.whitespace()?;
327 if self.get()? == b'[' {
328 self.pos += 1;
329 let ty = self.get_type()?;
330 self.until(b';')?;
331 self.whitespace()?;
332 let len = self.get_type_param()?;
333 self.until(b']')?;
334 if let Type::ConstInt(number) = len {
335 let number = u32::try_from(number).map_err(|_| anyhow!("数组长度超出 u32 范围"))?;
336 Ok(Type::Array(std::rc::Rc::new(ty), number))
337 } else {
338 Ok(Type::ArrayParam(std::rc::Rc::new(ty), std::rc::Rc::new(len)))
339 }
340 } else {
341 for ty in TYPES {
342 if self.just(ty.0).is_ok() {
343 return Ok(ty.1.clone());
344 }
345 }
346 let name = self.ident()?;
347 if self.take(b'<').is_ok() {
348 let params = crate::parse_list!(self, Vec::new(), b'>', b',', self.get_type_param()?);
349 Ok(Type::Ident { name, params })
350 } else {
351 Ok(Type::Ident { name, params: Vec::new() })
352 }
353 }
354 }
355
356 pub fn get_type_param(&mut self) -> Result<Type> {
357 self.const_type_param_add()
358 }
359
360 fn const_type_param_add(&mut self) -> Result<Type> {
361 let mut left = self.const_type_param_mul()?;
362 loop {
363 self.whitespace()?;
364 let op = if self.take(b'+').is_ok() {
365 Some(ConstIntOp::Add)
366 } else if self.take(b'-').is_ok() {
367 Some(ConstIntOp::Sub)
368 } else {
369 None
370 };
371 let Some(op) = op else { break };
372 let right = self.const_type_param_mul()?;
373 left = Self::fold_const_type_binary(op, left, right)?;
374 }
375 Ok(left)
376 }
377
378 fn const_type_param_mul(&mut self) -> Result<Type> {
379 let mut left = self.const_type_param_primary()?;
380 loop {
381 self.whitespace()?;
382 let op = if self.take(b'*').is_ok() {
383 Some(ConstIntOp::Mul)
384 } else if self.take(b'/').is_ok() {
385 Some(ConstIntOp::Div)
386 } else if self.take(b'%').is_ok() {
387 Some(ConstIntOp::Mod)
388 } else {
389 None
390 };
391 let Some(op) = op else { break };
392 let right = self.const_type_param_primary()?;
393 left = Self::fold_const_type_binary(op, left, right)?;
394 }
395 Ok(left)
396 }
397
398 fn const_type_param_primary(&mut self) -> Result<Type> {
399 self.whitespace()?;
400 if self.take(b'(').is_ok() {
401 let ty = self.get_type_param()?;
402 self.until(b')')?;
403 return Ok(ty);
404 }
405 if self.get()?.is_ascii_digit() {
406 let value = self.number()?;
407 if let Some(value) = value.as_uint() {
408 let value = i64::try_from(value).map_err(|_| anyhow!("模板数字参数超出 i64 范围"))?;
409 Ok(Type::ConstInt(value))
410 } else if let Some(value) = value.as_int() {
411 Ok(Type::ConstInt(value))
412 } else {
413 Err(anyhow!("模板数字参数必须是整数"))
414 }
415 } else {
416 self.get_type()
417 }
418 }
419
420 fn fold_const_type_binary(op: ConstIntOp, left: Type, right: Type) -> Result<Type> {
421 if let (Type::ConstInt(left), Type::ConstInt(right)) = (&left, &right) {
422 let value = match op {
423 ConstIntOp::Add => left + right,
424 ConstIntOp::Sub => left - right,
425 ConstIntOp::Mul => left * right,
426 ConstIntOp::Div => {
427 if *right == 0 {
428 return Err(anyhow!("模板整数除以 0"));
429 }
430 left / right
431 }
432 ConstIntOp::Mod => {
433 if *right == 0 {
434 return Err(anyhow!("模板整数取模 0"));
435 }
436 left % right
437 }
438 };
439 Ok(Type::ConstInt(value))
440 } else {
441 Ok(Type::ConstBinary { op, left: std::rc::Rc::new(left), right: std::rc::Rc::new(right) })
442 }
443 }
444
445 pub fn comment(&mut self) -> Result<()> {
446 if self.get()? == b'/' && self.ahead()? == b'/' {
447 self.pos += 2;
448 while self.pos < self.buf.len() && self.buf[self.pos] != b'\n' {
449 self.pos += 1;
450 }
451 Ok(())
452 } else if self.get()? == b'/' && self.ahead()? == b'*' {
453 self.pos += 2;
454 while self.pos + 1 < self.buf.len() {
455 if self.buf[self.pos] == b'*' && self.buf[self.pos + 1] == b'/' {
456 self.pos += 2;
457 return Ok(());
458 }
459 self.pos += 1;
460 }
461 Err(ParserErr::UncloseComment.into())
462 } else {
463 Ok(())
464 }
465 }
466
467 pub fn whitespace(&mut self) -> Result<()> {
468 while self.pos < self.buf.len() {
469 self.comment()?;
470 if self.pos >= self.buf.len() || !WHITE_SPACE.contains(&self.buf[self.pos]) {
471 break;
472 }
473 self.pos += 1;
474 }
475 Ok(())
476 }
477
478 pub fn ident(&mut self) -> Result<SmolStr> {
479 let (start, mut stop) = self.collect(|ch| !NOT_IDENT.contains(&ch))?;
480 loop {
481 let save_pos = self.pos;
482 if self.just("::").is_err() {
483 break;
484 }
485 match self.collect(|ch| !NOT_IDENT.contains(&ch)) {
486 Ok((_, next_stop)) => {
487 stop = next_stop;
488 }
489 Err(_) => {
490 self.pos = save_pos;
491 break;
492 }
493 }
494 }
495 if KEYWORDS.iter().position(|k| k.as_bytes() == &self.buf[start..stop]).is_some() {
496 return Err(anyhow!("发现关键字{}", String::from_utf8_lossy(&self.buf[start..stop])));
497 }
498 Ok(self.get_str(start, stop))
499 }
500
501 pub fn string(&mut self) -> Result<SmolStr> {
502 if self.get()? != b'"' {
503 return Err(ParserErr::NotString.into());
504 }
505 self.pos += 1;
506 let mut text_buf = Vec::new();
507 while self.pos < self.buf.len() {
508 if self.buf[self.pos] == b'\\' {
509 self.pos += 1;
511 match self.buf[self.pos] {
512 b'n' => {
513 text_buf.push(b'\n');
514 self.pos += 1;
515 }
516 b'r' => {
517 text_buf.push(b'\r');
518 self.pos += 1;
519 }
520 b't' => {
521 text_buf.push(b'\t');
522 self.pos += 1;
523 }
524 ch @ (b'\\' | b'"') => {
525 text_buf.push(ch);
526 self.pos += 1;
527 }
528 b'u' => {
529 self.pos += 1;
530 let unicode = if self.take(b'{').is_ok() {
531 let code = self.hex()?;
532 self.pos += 1;
533 code
534 } else {
535 self.hex()?
536 };
537 let ch = char::from_u32(unicode as u32).ok_or(anyhow!("非法 unicode {}", unicode))?;
538 let mut utf8_buf = [0u8; 4];
539 let s = ch.encode_utf8(&mut utf8_buf);
540 text_buf.extend_from_slice(s.as_bytes());
541 }
542 b'x' => {
543 self.pos += 1;
544 if self.pos + 2 < self.buf.len() {
545 let start = self.pos;
546 self.pos += 2;
547 let hex = &self.buf[start..self.pos];
548 let code = u32::from_str_radix(String::from_utf8_lossy(hex).as_ref(), 16)?;
549 text_buf.push(code as u8);
550 }
551 }
552 other => {
553 return Err(anyhow!("invalid escape character: {}", other as char));
554 }
555 }
556 } else {
557 if self.buf[self.pos] == b'"' {
558 self.pos += 1;
559 return Ok(String::from_utf8(text_buf)?.into());
560 }
561 text_buf.push(self.buf[self.pos]);
562 self.pos += 1;
563 }
564 }
565 Err(ParserErr::UnclosedString.into())
566 }
567
568 pub fn text(&mut self) -> Result<SmolStr> {
569 if self.get()? == b'r' && [b'#', b'"'].contains(&self.ahead()?) {
570 self.pos += 1;
571 let mut end = String::from("\"");
572 while self.buf[self.pos] == b'#' {
573 end.push('#');
574 self.pos += 1;
575 }
576 if self.get()? != b'"' {
577 return Err(ParserErr::IllegalRawString.into());
578 }
579 self.pos += 1;
580 let start_pos = self.pos;
581 while self.pos < self.buf.len() {
582 if self.just(&end).is_ok() {
583 break;
584 }
585 self.pos += 1;
586 }
587 Ok(self.get_str(start_pos, self.pos - end.len()))
588 } else {
589 self.string()
590 }
591 }
592
593 fn hex(&mut self) -> Result<i32> {
594 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
596 Ok(i32::from_str_radix(&String::from_utf8_lossy(&self.buf[start..stop]), 16)?)
597 }
598
599 fn numeric_suffix(&mut self) -> Option<Type> {
600 let save = self.pos;
601 for (name, ty) in TYPES {
602 if !ty.is_native() || *ty == Type::F16 {
603 continue;
604 }
605 if self.buf.len() >= self.pos + name.len() && self.buf[self.pos..self.pos + name.len()].eq(name.as_bytes()) {
606 self.pos += name.len();
607 return Some(ty.clone());
608 }
609 }
610 self.pos = save;
611 None
612 }
613
614 fn int_literal(&mut self, digits: &str, radix: u32, suffix: Option<Type>) -> Result<Dynamic> {
615 let ty = suffix.unwrap_or(Type::I64);
617 let magnitude = u128::from_str_radix(digits, radix).map_err(|_| anyhow!("整数字面量 {} 超出可表示范围", digits))?;
619 let (signed, bits) = match ty {
620 Type::I8 => (true, 8u32),
621 Type::I16 => (true, 16),
622 Type::I32 => (true, 32),
623 Type::I64 => (true, 64),
624 Type::U8 => (false, 8),
625 Type::U16 => (false, 16),
626 Type::U32 => (false, 32),
627 Type::U64 => (false, 64),
628 Type::F32 => return Ok(Dynamic::F32(magnitude as f32)),
629 Type::F64 => return Ok(Dynamic::F64(magnitude as f64)),
630 ty => return Err(anyhow!("{:?} 不能作为数字后缀", ty)),
631 };
632 let unsigned_max = (1u128 << bits) - 1;
633 let max_allowed = if radix == 10 { if signed { unsigned_max / 2 + 1 } else { unsigned_max } } else { unsigned_max };
636 if magnitude > max_allowed {
637 return Err(anyhow!("整数字面量 {} 超出 {:?} 的范围", digits, ty));
638 }
639 Ok(match ty {
640 Type::I8 => Dynamic::I8(magnitude as i8),
641 Type::I16 => Dynamic::I16(magnitude as i16),
642 Type::I32 => Dynamic::I32(magnitude as i32),
643 Type::I64 => Dynamic::I64(magnitude as i64),
644 Type::U8 => Dynamic::U8(magnitude as u8),
645 Type::U16 => Dynamic::U16(magnitude as u16),
646 Type::U32 => Dynamic::U32(magnitude as u32),
647 Type::U64 => Dynamic::U64(magnitude as u64),
648 _ => unreachable!(),
649 })
650 }
651
652 fn float_literal(&mut self, digits: &str, suffix: Option<Type>) -> Result<Dynamic> {
653 let value: f64 = digits.parse()?;
654 Ok(match suffix.unwrap_or(Type::F32) {
655 Type::I8 => Dynamic::I8(value as i8),
656 Type::I16 => Dynamic::I16(value as i16),
657 Type::I32 => Dynamic::I32(value as i32),
658 Type::I64 => Dynamic::I64(value as i64),
659 Type::U8 => Dynamic::U8(value as u8),
660 Type::U16 => Dynamic::U16(value as u16),
661 Type::U32 => Dynamic::U32(value as u32),
662 Type::U64 => Dynamic::U64(value as u64),
663 Type::F32 => Dynamic::F32(value as f32),
664 Type::F64 => Dynamic::F64(value),
665 ty => return Err(anyhow!("{:?} 不能作为浮点数字后缀", ty)),
666 })
667 }
668
669 pub fn number(&mut self) -> Result<Dynamic> {
670 if self.get()? == b'0' {
671 if [b'b', b'B'].contains(&self.ahead()?) {
672 self.pos += 2;
673 let (start, stop) = self.collect(|ch| ch == b'0' || ch == b'1')?;
674 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
675 let suffix = self.numeric_suffix();
676 return self.int_literal(&s, 2, suffix);
677 } else if [b'o', b'O'].contains(&self.ahead()?) {
678 self.pos += 2;
679 let (start, stop) = self.collect(|ch| ch >= b'0' && ch <= b'7')?;
680 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
681 let suffix = self.numeric_suffix();
682 return self.int_literal(&s, 8, suffix);
683 } else if [b'x', b'X'].contains(&self.ahead()?) {
684 self.pos += 2;
685 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
686 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
687 let suffix = self.numeric_suffix();
688 return self.int_literal(&s, 16, suffix);
689 }
690 }
691 let start = self.pos;
692 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
693 self.pos += 1;
694 }
695 let mut is_float = false;
696 if self.pos < self.buf.len() && self.buf[self.pos] == b'.' && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
697 is_float = true;
698 self.pos += 1;
699 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
700 self.pos += 1;
701 }
702 }
703 if self.pos < self.buf.len() && (self.buf[self.pos] == b'e' || self.buf[self.pos] == b'E') {
704 let mut exp_pos = self.pos + 1;
705 if exp_pos < self.buf.len() && (self.buf[exp_pos] == b'+' || self.buf[exp_pos] == b'-') {
706 exp_pos += 1;
707 }
708 if exp_pos < self.buf.len() && self.buf[exp_pos] <= b'9' && self.buf[exp_pos] >= b'0' {
709 is_float = true;
710 self.pos = exp_pos + 1;
711 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
712 self.pos += 1;
713 }
714 }
715 }
716 if self.pos > start {
717 let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
718 let suffix = self.numeric_suffix();
719 if is_float {
720 return self.float_literal(&text, suffix);
721 }
722 return self.int_literal(&text, 10, suffix);
723 }
724 Err(ParserErr::NotNumber.into())
725 }
726}
727
728#[cfg(test)]
729mod tests {
730 use super::*;
731
732 fn parse_all(code: &str) -> Result<Vec<Stmt>> {
733 let mut parser = Parser::new(code.as_bytes().to_vec());
734 let mut stmts = Vec::new();
735 loop {
736 match parser.stmt(false) {
737 Ok(stmt) => stmts.push(stmt),
738 Err(err) => {
739 if parser.is_eof() {
740 return Ok(stmts);
741 }
742 return Err(err);
743 }
744 }
745 }
746 }
747
748 fn run_with_big_stack(f: impl FnOnce() + Send + 'static) {
752 std::thread::Builder::new().stack_size(64 * 1024 * 1024).spawn(f).unwrap().join().unwrap();
753 }
754
755 #[test]
756 fn deeply_nested_parens_error_instead_of_stack_overflow() {
757 run_with_big_stack(|| {
758 let depth = MAX_PARSE_DEPTH + 50;
759 let code = format!("{}1{}", "(".repeat(depth), ")".repeat(depth));
760 let mut parser = Parser::new(code.into_bytes());
761 let err = parser.get_expr().unwrap_err();
762 assert!(err.to_string().contains("嵌套过深"), "got: {err}");
763 });
764 }
765
766 #[test]
767 fn deeply_nested_blocks_error_instead_of_stack_overflow() {
768 run_with_big_stack(|| {
769 let depth = MAX_PARSE_DEPTH + 50;
770 let code = format!("fn f() {}{}{}", "{".repeat(depth), "1", "}".repeat(depth));
771 let err = parse_all(&code).unwrap_err();
772 assert!(err.to_string().contains("嵌套过深"), "got: {err}");
773 });
774 }
775
776 #[test]
777 fn normal_nesting_within_limit_parses() {
778 let code = format!("{}1{}", "(".repeat(32), ")".repeat(32));
780 let mut parser = Parser::new(code.into_bytes());
781 parser.get_expr().unwrap();
782 }
783
784 fn parse_literal(code: &str) -> Result<Dynamic> {
785 let mut parser = Parser::new(code.as_bytes().to_vec());
786 match parser.get_expr()?.kind {
787 crate::ExprKind::Value(value) => Ok(value),
788 other => Err(anyhow!("不是字面量: {:?}", other)),
789 }
790 }
791
792 #[test]
793 fn unsuffixed_integer_defaults_to_i64() {
794 assert_eq!(parse_literal("5").unwrap(), Dynamic::I64(5));
795 assert_eq!(parse_literal("3000000000").unwrap(), Dynamic::I64(3000000000));
797 }
798
799 #[test]
800 fn out_of_range_integer_literals_error() {
801 assert!(parse_literal("99999999999999999999999999999999999999999").is_err());
803 assert!(parse_literal("255i8").unwrap_err().to_string().contains("超出"));
805 assert!(parse_literal("70000i16").unwrap_err().to_string().contains("超出"));
806 assert!(parse_literal("256u8").unwrap_err().to_string().contains("超出"));
807 }
808
809 #[test]
810 fn signed_min_magnitude_literals_allowed() {
811 assert_eq!(parse_literal("128i8").unwrap(), Dynamic::I8(-128));
813 assert_eq!(parse_literal("9223372036854775808").unwrap(), Dynamic::I64(i64::MIN));
814 }
815
816 #[test]
817 fn hex_literals_keep_bit_pattern() {
818 assert_eq!(parse_literal("0xFFFFFFFF").unwrap(), Dynamic::I64(0xFFFFFFFF));
820 assert_eq!(parse_literal("0xFFi8").unwrap(), Dynamic::I8(-1));
822 assert_eq!(parse_literal("0xFFFFFFFFu32").unwrap(), Dynamic::U32(u32::MAX));
823 }
824
825 fn shape(code: &str) -> String {
827 let mut parser = Parser::new(code.as_bytes().to_vec());
828 let expr = parser.get_expr().expect("parse");
829 fmt_shape(&expr)
830 }
831
832 fn binop_sym(op: &crate::BinaryOp) -> &'static str {
833 use crate::BinaryOp::*;
834 match op {
835 Add => "+",
836 Sub => "-",
837 Mul => "*",
838 Div => "/",
839 Mod => "%",
840 Shl => "<<",
841 Shr => ">>",
842 BitAnd => "&",
843 BitOr => "|",
844 BitXor => "^",
845 Assign => "=",
846 AddAssign => "+=",
847 Eq => "==",
848 Ne => "!=",
849 Lt => "<",
850 Gt => ">",
851 Le => "<=",
852 Ge => ">=",
853 And => "&&",
854 Or => "||",
855 Idx => "idx",
856 other => {
857 let _ = other;
858 "?"
859 }
860 }
861 }
862
863 fn fmt_shape(expr: &crate::Expr) -> String {
864 use crate::ExprKind::*;
865 match &expr.kind {
866 Value(v) => format!("{:?}", v).replace("I64(", "").replace("I32(", "").trim_end_matches(')').to_string(),
867 Ident(name) => name.to_string(),
868 Unary { op, value } => {
869 let s = if matches!(op, crate::UnaryOp::Neg) { "-" } else { "!" };
870 format!("({} {})", s, fmt_shape(value))
871 }
872 Binary { left, op, right } => format!("({} {} {})", binop_sym(op), fmt_shape(left), fmt_shape(right)),
873 Range { start, stop, inclusive } => format!("({} {} {})", if *inclusive { "..=" } else { ".." }, fmt_shape(start), fmt_shape(stop)),
874 Typed { value, ty } => format!("(as {} {:?})", fmt_shape(value), ty),
875 other => format!("{:?}", other),
876 }
877 }
878
879 #[test]
880 fn precedence_and_associativity_golden() {
881 assert_eq!(shape("1 + 2 * 3"), "(+ 1 (* 2 3))");
883 assert_eq!(shape("1 * 2 + 3"), "(+ (* 1 2) 3)");
884 assert_eq!(shape("1 - 2 - 3"), "(- (- 1 2) 3)");
886 assert_eq!(shape("8 / 4 / 2"), "(/ (/ 8 4) 2)");
887 assert_eq!(shape("2 + 3 << 4"), "(<< (+ 2 3) 4)");
889 assert_eq!(shape("1 | 2 ^ 3 & 4"), "(| 1 (^ 2 (& 3 4)))");
891 assert_eq!(shape("1 + 2 == 3"), "(== (+ 1 2) 3)");
893 assert_eq!(shape("a && b || c"), "(|| (&& a b) c)");
895 assert_eq!(shape("-a * b"), "(* (- a) b)");
897 assert_eq!(shape("!a == b"), "(== (! a) b)");
898 }
899
900 #[test]
901 fn assignment_range_and_as_precedence_golden() {
902 assert_eq!(shape("a = b + c"), "(= a (+ b c))");
904 assert_eq!(shape("a = b = c"), "(= (= a b) c)");
908 assert_eq!(shape("a += b * c"), "(+= a (* b c))");
910 assert_eq!(shape("1 + 1 .. n * 2"), "(.. (+ 1 1) (* n 2))");
912 assert_eq!(shape("0 ..= n - 1"), "(..= 0 (- n 1))");
913 assert_eq!(shape("a + b as i64"), "(as (+ a b) I64)");
916 }
917
918 #[test]
922 fn parser_never_panics_on_random_input() {
923 run_with_big_stack(|| {
924 const FRAGMENTS: &[&str] = &[
925 "fn", "let", "if", "else", "for", "in", "while", "return", "struct", "impl", "pub", "(", ")", "{", "}", "[", "]", "<", ">", "+", "-", "*", "/", "%", "=", "==", "&&", "||", "..", "..=", "as", "i32",
926 "u64", "f64", ".", ",", ";", ":", "::", "x", "0", "1", "255i8", "0xFF", "\"s\"", "true", "null", "|a|", "->",
927 ];
928 let mut state: u64 = 0x9E3779B97F4A7C15;
930 let mut next = || {
931 state ^= state >> 12;
932 state ^= state << 25;
933 state ^= state >> 27;
934 state = state.wrapping_mul(0x2545F4914F6CDD1D);
935 state
936 };
937
938 for _ in 0..4000 {
939 let mut code = String::new();
940 let tokens = (next() % 40) as usize;
941 for _ in 0..tokens {
942 code.push_str(FRAGMENTS[(next() as usize) % FRAGMENTS.len()]);
943 if next() % 2 == 0 {
944 code.push(' ');
945 }
946 }
947 let result = std::panic::catch_unwind(|| {
949 let mut parser = Parser::new(code.clone().into_bytes());
950 let mut count = 0;
951 loop {
952 match parser.stmt(false) {
953 Ok(_) => {
954 count += 1;
955 if parser.is_eof() || count > 1000 {
956 break;
957 }
958 }
959 Err(_) => break,
960 }
961 }
962 });
963 assert!(result.is_ok(), "parser panicked on input: {:?}", code);
964 }
965 });
966 }
967
968 #[test]
969 fn allows_local_name_to_shadow_prior_function() {
970 parse_all(
971 r#"
972 fn chunk_id(x, y) {
973 x + y
974 }
975
976 fn open() {
977 let chunk_id = 1;
978 chunk_id
979 }
980 "#,
981 )
982 .unwrap();
983 }
984
985 #[test]
986 fn rejects_duplicate_function_args() {
987 let err = parse_all("fn open(value, value) { value }").unwrap_err();
988 assert!(err.to_string().contains("符号 value 已经声明"));
989 }
990
991 #[test]
992 fn rejects_duplicate_local_let_names() {
993 let err = parse_all(
994 r#"
995 fn open() {
996 let value = 1;
997 let value = 2;
998 value
999 }
1000 "#,
1001 )
1002 .unwrap_err();
1003 assert!(err.to_string().contains("符号 value 已经声明"));
1004 }
1005
1006 #[test]
1007 fn allows_same_method_name_in_different_impl_blocks() {
1008 parse_all(
1009 r#"
1010 struct A {}
1011 struct B {}
1012
1013 impl A {
1014 fn zero() { 0 }
1015 }
1016
1017 impl B {
1018 fn zero() { 0 }
1019 }
1020 "#,
1021 )
1022 .unwrap();
1023 }
1024
1025 #[test]
1026 fn parses_scientific_float_suffixes() {
1027 let mut parser = Parser::new(b"1.7976931348623157e308f64".to_vec());
1028 assert_eq!(parser.number().unwrap(), Dynamic::F64(1.7976931348623157e308));
1029
1030 let mut parser = Parser::new(b"1e-3f32".to_vec());
1031 assert_eq!(parser.number().unwrap(), Dynamic::F32(1e-3f32));
1032 }
1033
1034 #[test]
1035 fn parses_immediate_closure_call() {
1036 let mut parser = Parser::new(b"|| { 1i32 }()".to_vec());
1037 let expr = parser.get_expr().unwrap();
1038 let ExprKind::Call { obj, params } = expr.kind else {
1039 panic!("expected closure call, got {expr:?}");
1040 };
1041 assert!(params.is_empty());
1042 let ExprKind::Closure { args, .. } = obj.kind else {
1043 panic!("expected closure callee, got {obj:?}");
1044 };
1045 assert!(args.is_empty());
1046 }
1047
1048 #[test]
1049 fn parses_empty_tuple_expression() {
1050 let mut parser = Parser::new(b"()".to_vec());
1051 let expr = parser.get_expr().unwrap();
1052 let ExprKind::Tuple(items) = expr.kind else {
1053 panic!("expected empty tuple, got {expr:?}");
1054 };
1055 assert!(items.is_empty());
1056 }
1057
1058 #[test]
1059 fn parses_explicit_generic_function_call() {
1060 let mut parser = Parser::new(b"value::<4>()".to_vec());
1061 let expr = parser.get_expr().unwrap();
1062 let ExprKind::Call { obj, params } = expr.kind else {
1063 panic!("expected function call, got {expr:?}");
1064 };
1065 assert!(params.is_empty());
1066 let ExprKind::Generic { obj, params } = obj.kind else {
1067 panic!("expected generic callee, got {obj:?}");
1068 };
1069 assert!(matches!(obj.kind, ExprKind::Ident(name) if name.as_str() == "value"));
1070 assert!(matches!(params.as_slice(), [Type::ConstInt(4)]));
1071 }
1072
1073 #[test]
1074 fn parses_bigfloat_cmp_context_segment() {
1075 let code = r#"
1076 struct BigFloat<N> { data: [u32; N], exp: i32, sign: bool }
1077
1078 impl BigFloat<N> {
1079 fn abs_cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1080 let self_high = self.exp + ((N - 1) as i32);
1081 let rhs_high = rhs.exp + ((N - 1) as i32);
1082 let high = if self_high >= rhs_high { self_high } else { rhs_high };
1083 let low = if self.exp <= rhs.exp { self.exp } else { rhs.exp };
1084 let result = 0i32;
1085 let power = high;
1086
1087 while power >= low && result == 0i32 {
1088 let a_idx = power - self.exp;
1089 let b_idx = power - rhs.exp;
1090 let a_limb = 0u32;
1091 let b_limb = 0u32;
1092
1093 if a_idx >= 0i32 && a_idx < (N as i32) {
1094 a_limb = self.data[a_idx as u32];
1095 }
1096 if b_idx >= 0i32 && b_idx < (N as i32) {
1097 b_limb = rhs.data[b_idx as u32];
1098 }
1099
1100 if a_limb > b_limb {
1101 result = 1i32;
1102 } else if a_limb < b_limb {
1103 result = -1i32;
1104 }
1105
1106 power -= 1i32;
1107 }
1108
1109 result
1110 }
1111
1112 pub fn cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1113 if self.is_zero() && rhs.is_zero() {
1114 0i32
1115 } else if self.sign != rhs.sign {
1116 if self.sign { -1i32 } else { 1i32 }
1117 } else {
1118 let cmp = self.abs_cmp(rhs);
1119 if self.sign { -cmp } else { cmp }
1120 }
1121 }
1122 }
1123 "#;
1124 parse_all(code).unwrap();
1125 }
1126
1127 #[test]
1128 fn parses_bigfloat_file() {
1129 let code = include_str!("../../zusts/bigfloat.zs");
1130 parse_all(code).unwrap();
1131 }
1132}