1use std::{collections::BTreeSet, fmt::Debug};
2
3use anyhow::{Result, anyhow};
4use dynamic::{ConstIntOp, Dynamic, Type};
5use smol_str::SmolStr;
6
7mod expr;
8pub use expr::{BinaryOp, Expr, ExprKind, UnaryOp};
9
10mod pattern;
11pub use pattern::{Pattern, PatternKind};
12
13mod stmt;
14pub use stmt::{Stmt, StmtKind};
15
16#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
17pub struct Span {
18 pub start: usize,
19 pub end: usize,
20}
21
22impl Span {
23 pub const fn new(start: usize, end: usize) -> Self {
24 Self { start, end }
25 }
26
27 pub const fn empty(pos: usize) -> Self {
28 Self { start: pos, end: pos }
29 }
30
31 pub fn merge(self, other: Self) -> Self {
32 Self { start: self.start.min(other.start), end: self.end.max(other.end) }
33 }
34}
35
36#[derive(Debug)]
37pub struct Parser {
38 pos: usize, buf: Vec<u8>, spans: Vec<usize>,
41 decl_scopes: Vec<BTreeSet<SmolStr>>,
42 impl_depth: usize,
43 fn_body_depth: usize,
46 impl_body_depth: usize,
49 pub(crate) match_counter: usize,
52 depth: usize, fatal: bool, }
55
56pub const MAX_PARSE_DEPTH: usize = 128;
63
64const NOT_IDENT: &[u8] = &[b' ', b'\t', b'\n', b'\r', b'/', b'*', b'+', b'-', b'=', b'(', b')', b'{', b'}', b'[', b']', b';', b':', b',', b'.', b'<', b'>', b'!', b'#', b'$', b'%', b'^', b'&', b'|', b'\\', b'"', b'\''];
65const WHITE_SPACE: &[u8] = &[b' ', b'\t', b'\n', b'\r'];
66const TYPES: &[(&str, Type)] = &[
67 ("bool", Type::Bool),
68 ("string", Type::Str),
69 ("i8", Type::I8),
70 ("i16", Type::I16),
71 ("i32", Type::I32),
72 ("i64", Type::I64),
73 ("u8", Type::U8),
74 ("u16", Type::U16),
75 ("u32", Type::U32),
76 ("u64", Type::U64),
77 ("f16", Type::F16),
78 ("f32", Type::F32),
79 ("f64", Type::F64),
80];
81const KEYWORDS: &[&str] = &["true", "false", "null", "let", "if", "else", "for", "in", "while", "pub", "fn", "struct", "impl", "const", "static", "continue", "return", "break", "match"];
82
83#[macro_export]
84macro_rules! parse_list {
85 ($self: ident, $start: expr, $end: expr, $sep: expr, $item_expr: expr) => {{
86 let mut items = $start;
87 loop {
88 $self.whitespace()?;
89 if $self.get()? == $end {
90 $self.pos += 1;
91 break;
92 }
93 let item = $item_expr;
94 items.push(item);
95 $self.whitespace()?;
96 if $self.get()? == $sep {
97 $self.pos += 1;
98 }
99 }
100 items
101 }};
102}
103
104#[macro_export]
105macro_rules! try_parse {
106 ($self: ident, $method: expr) => {{
107 let save_pos = $self.pos; let save_decl_scopes = $self.decl_scopes.clone();
109 let save_impl_depth = $self.impl_depth;
110 match $method {
111 Ok(expr) => Ok(expr),
112 Err(e) if $self.fatal => Err(e),
114 Err(e) => {
115 $self.pos = save_pos;
116 $self.decl_scopes = save_decl_scopes;
117 $self.impl_depth = save_impl_depth;
118 Err(e)
119 }
120 }
121 }};
122}
123
124#[derive(Debug, thiserror::Error)]
125pub enum ParserErr {
126 #[error("{message}")]
127 Spanned { message: String, span: Span },
128}
129
130impl ParserErr {
131 pub fn new(message: impl Into<String>, span: Span) -> Self {
133 Self::Spanned { message: message.into(), span }
134 }
135
136 pub fn at(message: impl Into<String>, pos: usize) -> Self {
138 Self::Spanned { message: message.into(), span: Span::new(pos, pos) }
139 }
140
141 pub fn span(&self) -> Span {
142 match self {
143 Self::Spanned { span, .. } => *span,
144 }
145 }
146
147 pub fn message(&self) -> &str {
148 match self {
149 Self::Spanned { message, .. } => message,
150 }
151 }
152}
153
154#[derive(Debug, thiserror::Error)]
157#[error("{err}")]
158pub struct SpannedParseError {
159 pub err: ParserErr,
160 pub pos: usize,
161}
162
163impl SpannedParseError {
164 pub fn new(err: ParserErr, pos: usize) -> Self {
165 Self { err, pos }
166 }
167}
168
169impl Parser {
170 pub fn new(buf: Vec<u8>) -> Self {
171 Self { pos: 0, buf, spans: Vec::new(), decl_scopes: vec![BTreeSet::new()], impl_depth: 0, fn_body_depth: 0, impl_body_depth: 0, match_counter: 0, depth: 0, fatal: false }
172 }
173
174 fn enter_depth(&mut self) -> Result<()> {
181 self.depth += 1;
182 if self.depth > MAX_PARSE_DEPTH {
183 self.depth -= 1;
184 self.fatal = true;
185 return Err(ParserErr::at("表达式嵌套过深", self.current_pos()).into());
186 }
187 Ok(())
188 }
189
190 fn exit_depth(&mut self) {
191 self.depth = self.depth.saturating_sub(1);
192 }
193
194 fn check_fatal(&self) -> Result<()> {
196 if self.fatal { Err(ParserErr::at("表达式嵌套过深", self.current_pos()).into()) } else { Ok(()) }
197 }
198
199 pub(crate) fn push_decl_scope(&mut self) {
200 self.decl_scopes.push(BTreeSet::new());
201 }
202
203 pub(crate) fn pop_decl_scope(&mut self) {
204 if self.decl_scopes.len() > 1 {
205 self.decl_scopes.pop();
206 }
207 }
208
209 fn declare_symbol(&mut self, name: &SmolStr) -> Result<()> {
210 if name.is_empty() {
211 return Ok(());
212 }
213 if self.decl_scopes.iter().rev().any(|scope| scope.contains(name)) {
214 return Err(ParserErr::at(format!("符号 {} 已经声明", name), self.current_pos()).into());
215 }
216 self.decl_scopes.last_mut().expect("parser always has a declaration scope").insert(name.clone());
217 Ok(())
218 }
219
220 pub(crate) fn declare_symbol_in_current_scope(&mut self, name: &SmolStr) -> Result<()> {
221 if name.is_empty() {
222 return Ok(());
223 }
224 let scope = self.decl_scopes.last_mut().expect("parser always has a declaration scope");
225 if scope.contains(name) {
226 return Err(ParserErr::at(format!("符号 {} 已经声明", name), self.current_pos()).into());
227 }
228 scope.insert(name.clone());
229 Ok(())
230 }
231
232 fn declare_function_name(&mut self, name: &SmolStr) -> Result<()> {
233 if self.impl_depth > 0 { self.declare_symbol_in_current_scope(name) } else { self.declare_symbol(name) }
234 }
235
236 fn declare_args(&mut self, args: &[(SmolStr, Type)]) -> Result<()> {
237 for (name, _) in args {
238 self.declare_symbol(name)?;
239 }
240 Ok(())
241 }
242
243 pub(crate) fn declare_pattern_symbols(&mut self, pat: &Pattern) -> Result<()> {
244 match &pat.kind {
245 PatternKind::Ident { name, .. } => self.declare_symbol_in_current_scope(name),
246 PatternKind::Tuple(items) => {
247 for item in items {
248 self.declare_pattern_symbols(item)?;
249 }
250 Ok(())
251 }
252 PatternKind::List { elems, .. } => {
253 for item in elems {
254 self.declare_pattern_symbols(item)?;
255 }
256 Ok(())
257 }
258 PatternKind::Struct { fields, .. } => {
259 for (name, sub) in fields {
260 if let Some(sub) = sub {
261 self.declare_pattern_symbols(sub)?;
262 } else {
263 self.declare_symbol_in_current_scope(name)?;
264 }
265 }
266 Ok(())
267 }
268 PatternKind::Wildcard | PatternKind::Var { .. } | PatternKind::Literal(_) | PatternKind::Member(_, _) | PatternKind::Idx(_, _) => Ok(()),
269 }
270 }
271
272 fn function_body(&mut self, args: &[(SmolStr, Type)]) -> Result<Stmt> {
273 self.push_decl_scope();
274 self.fn_body_depth += 1;
275 let result = (|| {
276 self.declare_args(args)?;
277 self.block()
278 })();
279 self.fn_body_depth -= 1;
280 self.pop_decl_scope();
281 result
282 }
283
284 fn impl_body(&mut self) -> Result<Stmt> {
285 self.push_decl_scope();
286 self.impl_depth += 1;
287 self.impl_body_depth += 1;
288 let result = self.block();
289 self.impl_body_depth -= 1;
290 self.impl_depth -= 1;
291 self.pop_decl_scope();
292 result
293 }
294
295 pub fn is_eof(&self) -> bool {
296 self.pos >= self.buf.len()
297 }
298
299 pub fn get(&self) -> Result<u8> {
300 self.buf.get(self.pos).cloned().ok_or_else(|| ParserErr::at("输入结束", self.pos).into())
302 }
303
304 pub fn take(&mut self, ch: u8) -> Result<()> {
305 if self.buf.get(self.pos).map(|b| *b == ch).unwrap_or(false) {
307 self.pos += 1;
308 Ok(())
309 } else {
310 Err(SpannedParseError::new(ParserErr::at(format!("期望字符 {} 实际字符 {}", ch as char, self.buf.get(self.pos as usize).cloned().unwrap_or(0) as char), self.pos), self.pos).into())
311 }
312 }
313
314 pub fn until(&mut self, ch: u8) -> Result<()> {
315 self.whitespace()?;
317 self.take(ch)
318 }
319
320 pub fn ahead(&self) -> Result<u8> {
321 self.buf.get(self.pos + 1).cloned().ok_or_else(|| ParserErr::at("输入结束", self.pos).into())
323 }
324
325 pub fn get_str(&self, start: usize, stop: usize) -> SmolStr {
326 SmolStr::from(String::from_utf8_lossy(&self.buf[start..stop]))
327 }
328
329 pub fn error_stmt(&self) -> SmolStr {
330 SmolStr::from(String::from_utf8_lossy(&self.buf[self.spans.last().cloned().unwrap_or(0)..self.pos]))
331 }
332
333 pub fn current_pos(&self) -> usize {
334 self.pos
335 }
336
337 pub fn span_from(&self, start: usize) -> Span {
338 Span::new(start, self.pos)
339 }
340
341 pub fn collect<F: Fn(u8) -> bool>(&mut self, f: F) -> Result<(usize, usize)> {
342 let start = self.pos;
343 while self.pos < self.buf.len() && f(self.buf[self.pos]) {
344 self.pos += 1;
345 }
346 if self.pos > start { Ok((start, self.pos)) } else { Err(ParserErr::at("未发现期望字符", start).into()) }
347 }
348
349 pub fn just(&mut self, pattern: &str) -> Result<()> {
350 if self.buf.len() - self.pos >= pattern.len() && self.buf[self.pos..self.pos + pattern.len()].eq(pattern.as_bytes()) {
351 self.pos += pattern.len();
352 Ok(())
353 } else {
354 Err(ParserErr::at(format!("期望字符串 {}", pattern), self.pos).into())
355 }
356 }
357
358 pub fn keyword(&mut self, pattern: &str) -> Result<()> {
359 self.just(pattern)?;
360 if self.pos < self.buf.len() && !NOT_IDENT.contains(&self.buf[self.pos]) {
361 self.pos -= pattern.len();
362 return Err(ParserErr::at(format!("期望字符串 {}", pattern), self.pos).into());
363 }
364 Ok(())
365 }
366
367 pub fn get_type(&mut self) -> Result<Type> {
368 self.whitespace()?;
369 if self.get()? == b'[' {
370 self.pos += 1;
371 let ty = self.get_type()?;
372 self.until(b';')?;
373 self.whitespace()?;
374 let len = self.get_type_param()?;
375 self.until(b']')?;
376 if let Type::ConstInt(number) = len {
377 let number = u32::try_from(number).map_err(|_| anyhow!("数组长度超出 u32 范围"))?;
378 Ok(Type::Array(std::rc::Rc::new(ty), number))
379 } else {
380 Ok(Type::ArrayParam(std::rc::Rc::new(ty), std::rc::Rc::new(len)))
381 }
382 } else {
383 for ty in TYPES {
384 if self.just(ty.0).is_ok() {
385 return Ok(ty.1.clone());
386 }
387 }
388 let name = self.ident()?;
389 if self.take(b'<').is_ok() {
390 let params = crate::parse_list!(self, Vec::new(), b'>', b',', self.get_type_param()?);
391 Ok(Type::Ident { name, params })
392 } else {
393 Ok(Type::Ident { name, params: Vec::new() })
394 }
395 }
396 }
397
398 pub fn get_type_param(&mut self) -> Result<Type> {
399 self.const_type_param_add()
400 }
401
402 fn const_type_param_add(&mut self) -> Result<Type> {
403 let mut left = self.const_type_param_mul()?;
404 loop {
405 self.whitespace()?;
406 let op = if self.take(b'+').is_ok() {
407 Some(ConstIntOp::Add)
408 } else if self.take(b'-').is_ok() {
409 Some(ConstIntOp::Sub)
410 } else {
411 None
412 };
413 let Some(op) = op else { break };
414 let right = self.const_type_param_mul()?;
415 left = Self::fold_const_type_binary(op, left, right)?;
416 }
417 Ok(left)
418 }
419
420 fn const_type_param_mul(&mut self) -> Result<Type> {
421 let mut left = self.const_type_param_primary()?;
422 loop {
423 self.whitespace()?;
424 let op = if self.take(b'*').is_ok() {
425 Some(ConstIntOp::Mul)
426 } else if self.take(b'/').is_ok() {
427 Some(ConstIntOp::Div)
428 } else if self.take(b'%').is_ok() {
429 Some(ConstIntOp::Mod)
430 } else {
431 None
432 };
433 let Some(op) = op else { break };
434 let right = self.const_type_param_primary()?;
435 left = Self::fold_const_type_binary(op, left, right)?;
436 }
437 Ok(left)
438 }
439
440 fn const_type_param_primary(&mut self) -> Result<Type> {
441 self.whitespace()?;
442 if self.take(b'(').is_ok() {
443 let ty = self.get_type_param()?;
444 self.until(b')')?;
445 return Ok(ty);
446 }
447 if self.get()?.is_ascii_digit() {
448 let value = self.number()?;
449 if let Some(value) = value.as_uint() {
450 let value = i64::try_from(value).map_err(|_| anyhow!("模板数字参数超出 i64 范围"))?;
451 Ok(Type::ConstInt(value))
452 } else if let Some(value) = value.as_int() {
453 Ok(Type::ConstInt(value))
454 } else {
455 Err(anyhow!("模板数字参数必须是整数"))
456 }
457 } else {
458 self.get_type()
459 }
460 }
461
462 fn fold_const_type_binary(op: ConstIntOp, left: Type, right: Type) -> Result<Type> {
463 if let (Type::ConstInt(left), Type::ConstInt(right)) = (&left, &right) {
464 let value = match op {
465 ConstIntOp::Add => left + right,
466 ConstIntOp::Sub => left - right,
467 ConstIntOp::Mul => left * right,
468 ConstIntOp::Div => {
469 if *right == 0 {
470 return Err(anyhow!("模板整数除以 0"));
471 }
472 left / right
473 }
474 ConstIntOp::Mod => {
475 if *right == 0 {
476 return Err(anyhow!("模板整数取模 0"));
477 }
478 left % right
479 }
480 };
481 Ok(Type::ConstInt(value))
482 } else {
483 Ok(Type::ConstBinary { op, left: std::rc::Rc::new(left), right: std::rc::Rc::new(right) })
484 }
485 }
486
487 pub fn comment(&mut self) -> Result<()> {
488 if self.get()? == b'/' && self.ahead()? == b'/' {
489 self.pos += 2;
490 while self.pos < self.buf.len() && self.buf[self.pos] != b'\n' {
491 self.pos += 1;
492 }
493 Ok(())
494 } else if self.get()? == b'/' && self.ahead()? == b'*' {
495 self.pos += 2;
496 while self.pos + 1 < self.buf.len() {
497 if self.buf[self.pos] == b'*' && self.buf[self.pos + 1] == b'/' {
498 self.pos += 2;
499 return Ok(());
500 }
501 self.pos += 1;
502 }
503 Err(ParserErr::at("未关闭的注释", self.pos).into())
504 } else {
505 Ok(())
506 }
507 }
508
509 pub fn whitespace(&mut self) -> Result<()> {
510 while self.pos < self.buf.len() {
511 self.comment()?;
512 if self.pos >= self.buf.len() || !WHITE_SPACE.contains(&self.buf[self.pos]) {
513 break;
514 }
515 self.pos += 1;
516 }
517 Ok(())
518 }
519
520 pub fn ident(&mut self) -> Result<SmolStr> {
521 let (start, mut stop) = self.collect(|ch| !NOT_IDENT.contains(&ch))?;
522 loop {
523 let save_pos = self.pos;
524 if self.just("::").is_err() {
525 break;
526 }
527 match self.collect(|ch| !NOT_IDENT.contains(&ch)) {
528 Ok((_, next_stop)) => {
529 stop = next_stop;
530 }
531 Err(_) => {
532 self.pos = save_pos;
533 break;
534 }
535 }
536 }
537 if KEYWORDS.iter().position(|k| k.as_bytes() == &self.buf[start..stop]).is_some() {
538 return Err(anyhow!("发现关键字{}", String::from_utf8_lossy(&self.buf[start..stop])));
539 }
540 Ok(self.get_str(start, stop))
541 }
542
543 pub fn string(&mut self) -> Result<SmolStr> {
544 if self.get()? != b'"' {
545 return Err(ParserErr::at("非字符串", self.current_pos()).into());
546 }
547 self.pos += 1;
548 let mut text_buf = Vec::new();
549 while self.pos < self.buf.len() {
550 if self.buf[self.pos] == b'\\' {
551 self.pos += 1;
553 match self.buf[self.pos] {
554 b'n' => {
555 text_buf.push(b'\n');
556 self.pos += 1;
557 }
558 b'r' => {
559 text_buf.push(b'\r');
560 self.pos += 1;
561 }
562 b't' => {
563 text_buf.push(b'\t');
564 self.pos += 1;
565 }
566 ch @ (b'\\' | b'"') => {
567 text_buf.push(ch);
568 self.pos += 1;
569 }
570 b'u' => {
571 self.pos += 1;
572 let unicode = if self.take(b'{').is_ok() {
573 let code = self.hex()?;
574 self.pos += 1;
575 code
576 } else {
577 self.hex()?
578 };
579 let ch = char::from_u32(unicode as u32).ok_or(anyhow!("非法 unicode {}", unicode))?;
580 let mut utf8_buf = [0u8; 4];
581 let s = ch.encode_utf8(&mut utf8_buf);
582 text_buf.extend_from_slice(s.as_bytes());
583 }
584 b'x' => {
585 self.pos += 1;
586 if self.pos + 2 > self.buf.len() {
587 return Err(anyhow!("非法 \\x 转义:需要 2 位十六进制"));
588 }
589 let start = self.pos;
590 self.pos += 2;
591 let hex = &self.buf[start..self.pos];
592 if hex.iter().any(|b| !b.is_ascii_hexdigit()) {
593 return Err(anyhow!("非法 \\x 转义:仅允许十六进制字符"));
594 }
595 let code = u32::from_str_radix(String::from_utf8_lossy(hex).as_ref(), 16)?;
596 if code > 0xFF {
597 return Err(anyhow!("\\x 转义值 0x{:02X} 超出 0xFF", code));
598 }
599 text_buf.push(code as u8);
600 }
601 other => {
602 return Err(anyhow!("invalid escape character: {}", other as char));
603 }
604 }
605 } else {
606 if self.buf[self.pos] == b'"' {
607 self.pos += 1;
608 return Ok(String::from_utf8(text_buf)?.into());
609 }
610 text_buf.push(self.buf[self.pos]);
611 self.pos += 1;
612 }
613 }
614 Err(ParserErr::at("未关闭字符串", self.pos).into())
615 }
616
617 pub fn text(&mut self) -> Result<SmolStr> {
618 if self.get()? == b'r' && [b'#', b'"'].contains(&self.ahead()?) {
619 self.pos += 1;
620 let mut end = String::from("\"");
621 while self.buf[self.pos] == b'#' {
622 end.push('#');
623 self.pos += 1;
624 }
625 if self.get()? != b'"' {
626 return Err(ParserErr::at("非法的原始字符串", self.current_pos()).into());
627 }
628 self.pos += 1;
629 let start_pos = self.pos;
630 while self.pos < self.buf.len() {
631 if self.just(&end).is_ok() {
632 break;
633 }
634 self.pos += 1;
635 }
636 Ok(self.get_str(start_pos, self.pos - end.len()))
637 } else {
638 self.string()
639 }
640 }
641
642 fn hex(&mut self) -> Result<i32> {
643 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
645 Ok(i32::from_str_radix(&String::from_utf8_lossy(&self.buf[start..stop]), 16)?)
646 }
647
648 fn numeric_suffix(&mut self) -> Option<Type> {
649 let save = self.pos;
650 for (name, ty) in TYPES {
651 if !ty.is_native() {
652 continue;
653 }
654 if self.buf.len() >= self.pos + name.len() && self.buf[self.pos..self.pos + name.len()].eq(name.as_bytes()) {
655 self.pos += name.len();
656 return Some(ty.clone());
657 }
658 }
659 self.pos = save;
660 None
661 }
662
663 fn int_literal(&mut self, digits: &str, radix: u32, suffix: Option<Type>) -> Result<Dynamic> {
664 let ty = suffix.unwrap_or(Type::I64);
666 let magnitude = u128::from_str_radix(digits, radix).map_err(|_| anyhow!("整数字面量 {} 超出可表示范围", digits))?;
668 let (signed, bits) = match ty {
669 Type::I8 => (true, 8u32),
670 Type::I16 => (true, 16),
671 Type::I32 => (true, 32),
672 Type::I64 => (true, 64),
673 Type::U8 => (false, 8),
674 Type::U16 => (false, 16),
675 Type::U32 => (false, 32),
676 Type::U64 => (false, 64),
677 Type::F16 => return Ok(Dynamic::F16(dynamic::f64_to_f16(magnitude as f64))),
678 Type::F32 => return Ok(Dynamic::F32(magnitude as f32)),
679 Type::F64 => return Ok(Dynamic::F64(magnitude as f64)),
680 ty => return Err(anyhow!("{:?} 不能作为数字后缀", ty)),
681 };
682 let unsigned_max = (1u128 << bits) - 1;
683 let max_allowed = if radix == 10 { if signed { unsigned_max / 2 + 1 } else { unsigned_max } } else { unsigned_max };
686 if magnitude > max_allowed {
687 return Err(anyhow!("整数字面量 {} 超出 {:?} 的范围", digits, ty));
688 }
689 Ok(match ty {
690 Type::I8 => Dynamic::I8(magnitude as i8),
691 Type::I16 => Dynamic::I16(magnitude as i16),
692 Type::I32 => Dynamic::I32(magnitude as i32),
693 Type::I64 => Dynamic::I64(magnitude as i64),
694 Type::U8 => Dynamic::U8(magnitude as u8),
695 Type::U16 => Dynamic::U16(magnitude as u16),
696 Type::U32 => Dynamic::U32(magnitude as u32),
697 Type::U64 => Dynamic::U64(magnitude as u64),
698 _ => unreachable!(),
699 })
700 }
701
702 fn float_literal(&mut self, digits: &str, suffix: Option<Type>) -> Result<Dynamic> {
703 let value: f64 = digits.parse()?;
704 if let Some(ref ty) = suffix {
705 let is_int_suffix = matches!(ty, Type::I8 | Type::I16 | Type::I32 | Type::I64 | Type::U8 | Type::U16 | Type::U32 | Type::U64);
708 if is_int_suffix {
709 let (min, max): (f64, f64) = match ty {
710 Type::I8 => (i8::MIN as f64, i8::MAX as f64),
711 Type::I16 => (i16::MIN as f64, i16::MAX as f64),
712 Type::I32 => (i32::MIN as f64, i32::MAX as f64),
713 Type::I64 => (i64::MIN as f64, i64::MAX as f64),
714 Type::U8 => (0.0, u8::MAX as f64),
715 Type::U16 => (0.0, u16::MAX as f64),
716 Type::U32 => (0.0, u32::MAX as f64),
717 Type::U64 => (0.0, u64::MAX as f64),
718 _ => unreachable!(),
719 };
720 if !value.is_finite() || value < min || value > max || value.fract() != 0.0 {
721 return Err(anyhow!("浮点字面量 {:?} 超出 {:?} 范围", value, ty));
722 }
723 } else if !value.is_finite() {
724 return Err(anyhow!("非法浮点字面量: {:?}", value));
725 }
726 }
727 Ok(match suffix.unwrap_or(Type::F32) {
728 Type::I8 => Dynamic::I8(value as i8),
729 Type::I16 => Dynamic::I16(value as i16),
730 Type::I32 => Dynamic::I32(value as i32),
731 Type::I64 => Dynamic::I64(value as i64),
732 Type::U8 => Dynamic::U8(value as u8),
733 Type::U16 => Dynamic::U16(value as u16),
734 Type::U32 => Dynamic::U32(value as u32),
735 Type::U64 => Dynamic::U64(value as u64),
736 Type::F16 => Dynamic::F16(dynamic::f64_to_f16(value)),
737 Type::F32 => Dynamic::F32(value as f32),
738 Type::F64 => Dynamic::F64(value),
739 ty => return Err(anyhow!("{:?} 不能作为浮点数字后缀", ty)),
740 })
741 }
742
743 pub fn number(&mut self) -> Result<Dynamic> {
744 if self.get()? == b'0' {
745 if [b'b', b'B'].contains(&self.ahead()?) {
746 self.pos += 2;
747 let (start, stop) = self.collect(|ch| ch == b'0' || ch == b'1')?;
748 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
749 let suffix = self.numeric_suffix();
750 return self.int_literal(&s, 2, suffix);
751 } else if [b'o', b'O'].contains(&self.ahead()?) {
752 self.pos += 2;
753 let (start, stop) = self.collect(|ch| ch >= b'0' && ch <= b'7')?;
754 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
755 let suffix = self.numeric_suffix();
756 return self.int_literal(&s, 8, suffix);
757 } else if [b'x', b'X'].contains(&self.ahead()?) {
758 self.pos += 2;
759 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
760 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
761 let suffix = self.numeric_suffix();
762 return self.int_literal(&s, 16, suffix);
763 }
764 }
765 let start = self.pos;
766 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
767 self.pos += 1;
768 }
769 let mut is_float = false;
770 if self.pos < self.buf.len() && self.buf[self.pos] == b'.' && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
771 is_float = true;
772 self.pos += 1;
773 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
774 self.pos += 1;
775 }
776 }
777 if self.pos < self.buf.len() && (self.buf[self.pos] == b'e' || self.buf[self.pos] == b'E') {
778 let mut exp_pos = self.pos + 1;
779 if exp_pos < self.buf.len() && (self.buf[exp_pos] == b'+' || self.buf[exp_pos] == b'-') {
780 exp_pos += 1;
781 }
782 if exp_pos < self.buf.len() && self.buf[exp_pos] <= b'9' && self.buf[exp_pos] >= b'0' {
783 is_float = true;
784 self.pos = exp_pos + 1;
785 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
786 self.pos += 1;
787 }
788 }
789 }
790 if self.pos > start {
791 let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
792 let suffix = self.numeric_suffix();
793 if is_float {
794 return self.float_literal(&text, suffix);
795 }
796 return self.int_literal(&text, 10, suffix);
797 }
798 Err(ParserErr::at("非数字", start).into())
799 }
800}
801
802#[cfg(test)]
803mod tests {
804 use super::*;
805
806 fn parse_all(code: &str) -> Result<Vec<Stmt>> {
807 let mut parser = Parser::new(code.as_bytes().to_vec());
808 let mut stmts = Vec::new();
809 loop {
810 match parser.stmt(false) {
811 Ok(stmt) => stmts.push(stmt),
812 Err(err) => {
813 if parser.is_eof() {
814 return Ok(stmts);
815 }
816 return Err(err);
817 }
818 }
819 }
820 }
821
822 fn run_with_big_stack(f: impl FnOnce() + Send + 'static) {
826 std::thread::Builder::new().stack_size(64 * 1024 * 1024).spawn(f).unwrap().join().unwrap();
827 }
828
829 #[test]
830 fn deeply_nested_parens_error_instead_of_stack_overflow() {
831 run_with_big_stack(|| {
832 let depth = MAX_PARSE_DEPTH + 50;
833 let code = format!("{}1{}", "(".repeat(depth), ")".repeat(depth));
834 let mut parser = Parser::new(code.into_bytes());
835 let err = parser.get_expr().unwrap_err();
836 assert!(err.to_string().contains("嵌套过深"), "got: {err}");
837 });
838 }
839
840 #[test]
841 fn deeply_nested_blocks_error_instead_of_stack_overflow() {
842 run_with_big_stack(|| {
843 let depth = MAX_PARSE_DEPTH + 50;
844 let code = format!("fn f() {}{}{}", "{".repeat(depth), "1", "}".repeat(depth));
845 let err = parse_all(&code).unwrap_err();
846 assert!(err.to_string().contains("嵌套过深"), "got: {err}");
847 });
848 }
849
850 #[test]
851 fn normal_nesting_within_limit_parses() {
852 let code = format!("{}1{}", "(".repeat(32), ")".repeat(32));
854 let mut parser = Parser::new(code.into_bytes());
855 parser.get_expr().unwrap();
856 }
857
858 fn parse_literal(code: &str) -> Result<Dynamic> {
859 let mut parser = Parser::new(code.as_bytes().to_vec());
860 match parser.get_expr()?.kind {
861 crate::ExprKind::Value(value) => Ok(value),
862 other => Err(anyhow!("不是字面量: {:?}", other)),
863 }
864 }
865
866 #[test]
867 fn unsuffixed_integer_defaults_to_i64() {
868 assert_eq!(parse_literal("5").unwrap(), Dynamic::I64(5));
869 assert_eq!(parse_literal("3000000000").unwrap(), Dynamic::I64(3000000000));
871 }
872
873 #[test]
874 fn out_of_range_integer_literals_error() {
875 assert!(parse_literal("99999999999999999999999999999999999999999").is_err());
877 assert!(parse_literal("255i8").unwrap_err().to_string().contains("超出"));
879 assert!(parse_literal("70000i16").unwrap_err().to_string().contains("超出"));
880 assert!(parse_literal("256u8").unwrap_err().to_string().contains("超出"));
881 }
882
883 #[test]
884 fn signed_min_magnitude_literals_allowed() {
885 assert_eq!(parse_literal("128i8").unwrap(), Dynamic::I8(-128));
887 assert_eq!(parse_literal("9223372036854775808").unwrap(), Dynamic::I64(i64::MIN));
888 }
889
890 #[test]
891 fn hex_literals_keep_bit_pattern() {
892 assert_eq!(parse_literal("0xFFFFFFFF").unwrap(), Dynamic::I64(0xFFFFFFFF));
894 assert_eq!(parse_literal("0xFFi8").unwrap(), Dynamic::I8(-1));
896 assert_eq!(parse_literal("0xFFFFFFFFu32").unwrap(), Dynamic::U32(u32::MAX));
897 }
898
899 fn shape(code: &str) -> String {
901 let mut parser = Parser::new(code.as_bytes().to_vec());
902 let expr = parser.get_expr().expect("parse");
903 fmt_shape(&expr)
904 }
905
906 fn binop_sym(op: &crate::BinaryOp) -> &'static str {
907 use crate::BinaryOp::*;
908 match op {
909 Add => "+",
910 Sub => "-",
911 Mul => "*",
912 Div => "/",
913 Mod => "%",
914 Shl => "<<",
915 Shr => ">>",
916 BitAnd => "&",
917 BitOr => "|",
918 BitXor => "^",
919 Assign => "=",
920 AddAssign => "+=",
921 Eq => "==",
922 Ne => "!=",
923 Lt => "<",
924 Gt => ">",
925 Le => "<=",
926 Ge => ">=",
927 And => "&&",
928 Or => "||",
929 Idx => "idx",
930 other => {
931 let _ = other;
932 "?"
933 }
934 }
935 }
936
937 fn fmt_shape(expr: &crate::Expr) -> String {
938 use crate::ExprKind::*;
939 match &expr.kind {
940 Value(v) => format!("{:?}", v).replace("I64(", "").replace("I32(", "").trim_end_matches(')').to_string(),
941 Ident(name) => name.to_string(),
942 Unary { op, value } => {
943 let s = if matches!(op, crate::UnaryOp::Neg) { "-" } else { "!" };
944 format!("({} {})", s, fmt_shape(value))
945 }
946 Binary { left, op, right } => format!("({} {} {})", binop_sym(op), fmt_shape(left), fmt_shape(right)),
947 Range { start, stop, inclusive } => format!("({} {} {})", if *inclusive { "..=" } else { ".." }, fmt_shape(start), fmt_shape(stop)),
948 Typed { value, ty } => format!("(as {} {:?})", fmt_shape(value), ty),
949 other => format!("{:?}", other),
950 }
951 }
952
953 #[test]
954 fn precedence_and_associativity_golden() {
955 assert_eq!(shape("1 + 2 * 3"), "(+ 1 (* 2 3))");
957 assert_eq!(shape("1 * 2 + 3"), "(+ (* 1 2) 3)");
958 assert_eq!(shape("1 - 2 - 3"), "(- (- 1 2) 3)");
960 assert_eq!(shape("8 / 4 / 2"), "(/ (/ 8 4) 2)");
961 assert_eq!(shape("2 + 3 << 4"), "(<< (+ 2 3) 4)");
963 assert_eq!(shape("1 | 2 ^ 3 & 4"), "(| 1 (^ 2 (& 3 4)))");
965 assert_eq!(shape("1 + 2 == 3"), "(== (+ 1 2) 3)");
967 assert_eq!(shape("a && b || c"), "(|| (&& a b) c)");
969 assert_eq!(shape("-a * b"), "(* (- a) b)");
971 assert_eq!(shape("!a == b"), "(== (! a) b)");
972 }
973
974 #[test]
975 fn assignment_range_and_as_precedence_golden() {
976 assert_eq!(shape("a = b + c"), "(= a (+ b c))");
978 assert_eq!(shape("a = b = c"), "(= a (= b c))");
979 assert_eq!(shape("a = b = c = d"), "(= a (= b (= c d)))");
980 assert_eq!(shape("a += b * c"), "(+= a (* b c))");
982 assert_eq!(shape("1 + 1 .. n * 2"), "(.. (+ 1 1) (* n 2))");
984 assert_eq!(shape("0 ..= n - 1"), "(..= 0 (- n 1))");
985 assert_eq!(shape("a + b as i64"), "(+ a (as b I64))");
987 assert_eq!(shape("a as i64 + b"), "(+ (as a I64) b)");
988 assert_eq!(shape("(a + b) as i64"), "(as (+ a b) I64)");
989 }
990
991 #[test]
995 fn parser_never_panics_on_random_input() {
996 run_with_big_stack(|| {
997 const FRAGMENTS: &[&str] = &[
998 "fn", "let", "if", "else", "for", "in", "while", "return", "struct", "impl", "pub", "(", ")", "{", "}", "[", "]", "<", ">", "+", "-", "*", "/", "%", "=", "==", "&&", "||", "..", "..=", "as", "i32",
999 "u64", "f64", ".", ",", ";", ":", "::", "x", "0", "1", "255i8", "0xFF", "\"s\"", "true", "null", "|a|", "->",
1000 ];
1001 let mut state: u64 = 0x9E3779B97F4A7C15;
1003 let mut next = || {
1004 state ^= state >> 12;
1005 state ^= state << 25;
1006 state ^= state >> 27;
1007 state = state.wrapping_mul(0x2545F4914F6CDD1D);
1008 state
1009 };
1010
1011 for _ in 0..4000 {
1012 let mut code = String::new();
1013 let tokens = (next() % 40) as usize;
1014 for _ in 0..tokens {
1015 code.push_str(FRAGMENTS[(next() as usize) % FRAGMENTS.len()]);
1016 if next() % 2 == 0 {
1017 code.push(' ');
1018 }
1019 }
1020 let result = std::panic::catch_unwind(|| {
1022 let mut parser = Parser::new(code.clone().into_bytes());
1023 let mut count = 0;
1024 loop {
1025 match parser.stmt(false) {
1026 Ok(_) => {
1027 count += 1;
1028 if parser.is_eof() || count > 1000 {
1029 break;
1030 }
1031 }
1032 Err(_) => break,
1033 }
1034 }
1035 });
1036 assert!(result.is_ok(), "parser panicked on input: {:?}", code);
1037 }
1038 });
1039 }
1040
1041 #[test]
1042 fn allows_local_name_to_shadow_prior_function() {
1043 parse_all(
1044 r#"
1045 fn chunk_id(x, y) {
1046 x + y
1047 }
1048
1049 fn open() {
1050 let chunk_id = 1;
1051 chunk_id
1052 }
1053 "#,
1054 )
1055 .unwrap();
1056 }
1057
1058 #[test]
1059 fn rejects_duplicate_function_args() {
1060 let err = parse_all("fn open(value, value) { value }").unwrap_err();
1061 assert!(err.to_string().contains("符号 value 已经声明"));
1062 }
1063
1064 #[test]
1065 fn rejects_duplicate_local_let_names() {
1066 let err = parse_all(
1067 r#"
1068 fn open() {
1069 let value = 1;
1070 let value = 2;
1071 value
1072 }
1073 "#,
1074 )
1075 .unwrap_err();
1076 assert!(err.to_string().contains("符号 value 已经声明"));
1077 }
1078
1079 #[test]
1080 fn allows_same_method_name_in_different_impl_blocks() {
1081 parse_all(
1082 r#"
1083 struct A {}
1084 struct B {}
1085
1086 impl A {
1087 fn zero() { 0 }
1088 }
1089
1090 impl B {
1091 fn zero() { 0 }
1092 }
1093 "#,
1094 )
1095 .unwrap();
1096 }
1097
1098 #[test]
1099 fn rejects_nested_fn_inside_function_body() {
1100 let err = parse_all("fn outer() { fn inner() { 1 } }").unwrap_err();
1101 assert!(err.to_string().contains("函数体内不能定义"), "got: {err}");
1102 }
1103
1104 #[test]
1105 fn rejects_nested_struct_inside_function_body() {
1106 let err = parse_all("fn outer() { struct S { x: i32 } S{x: 1} }").unwrap_err();
1107 assert!(err.to_string().contains("函数体内不能定义"), "got: {err}");
1108 }
1109
1110 #[test]
1111 fn rejects_nested_const_inside_function_body() {
1112 let err = parse_all("fn outer() { const K = 1 } K").unwrap_err();
1113 assert!(err.to_string().contains("函数体内不能定义"), "got: {err}");
1114 }
1115
1116 #[test]
1117 fn hex_escape_at_end_of_string_preserves_byte() {
1118 let mut p = Parser::new(br#""abc\x41""#.to_vec());
1119 let s = p.string().unwrap();
1120 assert_eq!(s.as_str(), "abcA");
1121 }
1122
1123 #[test]
1124 fn hex_escape_truncated_reports_clear_error() {
1125 let mut p = Parser::new(br#""abc\x""#.to_vec());
1126 let err = p.string().unwrap_err();
1127 assert!(err.to_string().contains("\\x"), "got: {err}");
1128 }
1129
1130 #[test]
1131 fn hex_escape_non_hex_char_reports_clear_error() {
1132 let mut p = Parser::new(br#""abc\xZZ""#.to_vec());
1133 let err = p.string().unwrap_err();
1134 assert!(err.to_string().contains("\\x"), "got: {err}");
1135 }
1136
1137 #[test]
1138 fn else_with_invalid_body_reports_error() {
1139 let err = parse_all("fn f() { if true { 1 } else }").unwrap_err();
1141 let msg = err.to_string();
1142 assert!(msg.contains("not code block") || msg.contains("未结束的"), "got: {msg}");
1143 }
1144
1145 #[test]
1146 fn float_literal_with_int_suffix_out_of_range_errors() {
1147 let mut p = Parser::new(b"1e30u8".to_vec());
1148 let err = p.number().unwrap_err();
1149 assert!(err.to_string().contains("超出"), "got: {err}");
1150 }
1151
1152 #[test]
1153 fn float_literal_with_int_suffix_fractional_errors() {
1154 let mut p = Parser::new(b"1.5i32".to_vec());
1155 let err = p.number().unwrap_err();
1156 assert!(err.to_string().contains("超出"), "got: {err}");
1157 }
1158
1159 #[test]
1160 fn float_literal_with_float_suffix_accepts_fractional() {
1161 let mut p = Parser::new(b"1e-3f32".to_vec());
1162 assert!(matches!(p.number().unwrap(), Dynamic::F32(v) if (v - 1e-3).abs() < 1e-8));
1163 }
1164
1165 #[test]
1166 fn allows_closure_inside_function_body() {
1167 parse_all("fn outer() { let f = |x: i32| { x + 1 }; f(1) }").unwrap();
1168 }
1169
1170 #[test]
1171 fn rejects_const_inside_impl_body() {
1172 let err = parse_all("struct S {}\nimpl S { const K = 1 }").unwrap_err();
1173 let msg = err.to_string();
1174 assert!(msg.contains("impl 体内不能定义") && msg.contains("const"), "got: {msg}");
1175 }
1176
1177 #[test]
1178 fn allows_fn_inside_impl_body() {
1179 parse_all("struct S {}\nimpl S { pub fn m(self: S) { 1 } }").unwrap();
1180 }
1181
1182 #[test]
1183 fn parser_err_carries_span() {
1184 let src = "fn f() {}\nfn f() {}\n";
1186 let err = parse_all(src).unwrap_err();
1187 eprintln!("err display: {err}");
1188 let downcast = err.downcast_ref::<ParserErr>().expect("ParserErr");
1189 eprintln!("message: {}", downcast.message());
1190 eprintln!("span: {:?}", downcast.span());
1191 assert!(downcast.message().contains("f"));
1192 assert!(downcast.span().start < src.len());
1194 }
1195
1196 #[test]
1197 fn block_as_let_value_is_expression() {
1198 parse_all("pub fn f() { let x = { let y = 1; y + 1 }; x }").unwrap();
1199 }
1200
1201 #[test]
1202 fn dict_still_takes_priority_over_block() {
1203 parse_all("pub fn f() { let d = { key: 1 }; d }").unwrap();
1205 }
1206
1207 #[test]
1208 fn list_pattern_with_rest_parses() {
1209 parse_all("pub fn f(items) { let [first, ..rest] = items; first }").unwrap();
1210 }
1211
1212 #[test]
1213 fn list_pattern_with_only_rest_parses() {
1214 parse_all("pub fn f(items) { let [..all] = items; all }").unwrap();
1215 }
1216
1217 #[test]
1218 fn take_error_carries_precise_pos() {
1219 use crate::SpannedParseError;
1222 let mut p = Parser::new(b"ab".to_vec());
1223 let pos_before = p.current_pos();
1224 let err = p.take(b'c').unwrap_err();
1225 let spanned = err.downcast_ref::<SpannedParseError>().expect("take should wrap in SpannedParseError");
1226 assert_eq!(spanned.pos, pos_before);
1228 }
1229
1230 #[test]
1231 fn parses_scientific_float_suffixes() {
1232 let mut parser = Parser::new(b"1.7976931348623157e308f64".to_vec());
1233 assert_eq!(parser.number().unwrap(), Dynamic::F64(1.7976931348623157e308));
1234
1235 let mut parser = Parser::new(b"1e-3f32".to_vec());
1236 assert_eq!(parser.number().unwrap(), Dynamic::F32(1e-3f32));
1237 }
1238
1239 #[test]
1240 fn parses_immediate_closure_call() {
1241 let mut parser = Parser::new(b"|| { 1i32 }()".to_vec());
1242 let expr = parser.get_expr().unwrap();
1243 let ExprKind::Call { obj, params } = expr.kind else {
1244 panic!("expected closure call, got {expr:?}");
1245 };
1246 assert!(params.is_empty());
1247 let ExprKind::Closure { args, .. } = obj.kind else {
1248 panic!("expected closure callee, got {obj:?}");
1249 };
1250 assert!(args.is_empty());
1251 }
1252
1253 #[test]
1254 fn parses_empty_tuple_expression() {
1255 let mut parser = Parser::new(b"()".to_vec());
1256 let expr = parser.get_expr().unwrap();
1257 let ExprKind::Tuple(items) = expr.kind else {
1258 panic!("expected empty tuple, got {expr:?}");
1259 };
1260 assert!(items.is_empty());
1261 }
1262
1263 #[test]
1264 fn parses_explicit_generic_function_call() {
1265 let mut parser = Parser::new(b"value::<4>()".to_vec());
1266 let expr = parser.get_expr().unwrap();
1267 let ExprKind::Call { obj, params } = expr.kind else {
1268 panic!("expected function call, got {expr:?}");
1269 };
1270 assert!(params.is_empty());
1271 let ExprKind::Generic { obj, params } = obj.kind else {
1272 panic!("expected generic callee, got {obj:?}");
1273 };
1274 assert!(matches!(obj.kind, ExprKind::Ident(name) if name.as_str() == "value"));
1275 assert!(matches!(params.as_slice(), [Type::ConstInt(4)]));
1276 }
1277
1278 #[test]
1279 fn parses_bigfloat_cmp_context_segment() {
1280 let code = r#"
1281 struct BigFloat<N> { data: [u32; N], exp: i32, sign: bool }
1282
1283 impl BigFloat<N> {
1284 fn abs_cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1285 let self_high = self.exp + ((N - 1) as i32);
1286 let rhs_high = rhs.exp + ((N - 1) as i32);
1287 let high = if self_high >= rhs_high { self_high } else { rhs_high };
1288 let low = if self.exp <= rhs.exp { self.exp } else { rhs.exp };
1289 let result = 0i32;
1290 let power = high;
1291
1292 while power >= low && result == 0i32 {
1293 let a_idx = power - self.exp;
1294 let b_idx = power - rhs.exp;
1295 let a_limb = 0u32;
1296 let b_limb = 0u32;
1297
1298 if a_idx >= 0i32 && a_idx < (N as i32) {
1299 a_limb = self.data[a_idx as u32];
1300 }
1301 if b_idx >= 0i32 && b_idx < (N as i32) {
1302 b_limb = rhs.data[b_idx as u32];
1303 }
1304
1305 if a_limb > b_limb {
1306 result = 1i32;
1307 } else if a_limb < b_limb {
1308 result = -1i32;
1309 }
1310
1311 power -= 1i32;
1312 }
1313
1314 result
1315 }
1316
1317 pub fn cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1318 if self.is_zero() && rhs.is_zero() {
1319 0i32
1320 } else if self.sign != rhs.sign {
1321 if self.sign { -1i32 } else { 1i32 }
1322 } else {
1323 let cmp = self.abs_cmp(rhs);
1324 if self.sign { -cmp } else { cmp }
1325 }
1326 }
1327 }
1328 "#;
1329 parse_all(code).unwrap();
1330 }
1331
1332 #[test]
1333 fn parses_bigfloat_file() {
1334 let code = include_str!("../../zusts/bigfloat.zs");
1335 parse_all(code).unwrap();
1336 }
1337}