1use std::{collections::BTreeSet, fmt::Debug};
2
3use anyhow::{Result, anyhow};
4use dynamic::{ConstIntOp, Dynamic, Type};
5use smol_str::SmolStr;
6
7mod expr;
8pub use expr::{BinaryOp, Expr, ExprKind, UnaryOp};
9
10mod pattern;
11pub use pattern::{Pattern, PatternKind};
12
13mod stmt;
14pub use stmt::{Stmt, StmtKind};
15
16#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
17pub struct Span {
18 pub start: usize,
19 pub end: usize,
20}
21
22impl Span {
23 pub const fn new(start: usize, end: usize) -> Self {
24 Self { start, end }
25 }
26
27 pub const fn empty(pos: usize) -> Self {
28 Self { start: pos, end: pos }
29 }
30
31 pub fn merge(self, other: Self) -> Self {
32 Self { start: self.start.min(other.start), end: self.end.max(other.end) }
33 }
34}
35
36#[derive(Debug)]
37pub struct Parser {
38 pos: usize, buf: Vec<u8>, spans: Vec<usize>,
41 decl_scopes: Vec<BTreeSet<SmolStr>>,
42 impl_depth: usize,
43 fn_body_depth: usize,
46 impl_body_depth: usize,
49 depth: usize, fatal: bool, }
52
53pub const MAX_PARSE_DEPTH: usize = 128;
60
61const NOT_IDENT: &[u8] = &[b' ', b'\t', b'\n', b'\r', b'/', b'*', b'+', b'-', b'=', b'(', b')', b'{', b'}', b'[', b']', b';', b':', b',', b'.', b'<', b'>', b'!', b'#', b'$', b'%', b'^', b'&', b'|', b'\\', b'"', b'\''];
62const WHITE_SPACE: &[u8] = &[b' ', b'\t', b'\n', b'\r'];
63const TYPES: &[(&str, Type)] = &[
64 ("bool", Type::Bool),
65 ("string", Type::Str),
66 ("i8", Type::I8),
67 ("i16", Type::I16),
68 ("i32", Type::I32),
69 ("i64", Type::I64),
70 ("u8", Type::U8),
71 ("u16", Type::U16),
72 ("u32", Type::U32),
73 ("u64", Type::U64),
74 ("f16", Type::F16),
75 ("f32", Type::F32),
76 ("f64", Type::F64),
77];
78const KEYWORDS: &[&str] = &["true", "false", "null", "let", "if", "else", "for", "in", "while", "pub", "fn", "struct", "impl", "const", "static", "continue", "return", "break"];
79
80#[macro_export]
81macro_rules! parse_list {
82 ($self: ident, $start: expr, $end: expr, $sep: expr, $item_expr: expr) => {{
83 let mut items = $start;
84 loop {
85 $self.whitespace()?;
86 if $self.get()? == $end {
87 $self.pos += 1;
88 break;
89 }
90 let item = $item_expr;
91 items.push(item);
92 $self.whitespace()?;
93 if $self.get()? == $sep {
94 $self.pos += 1;
95 }
96 }
97 items
98 }};
99}
100
101#[macro_export]
102macro_rules! try_parse {
103 ($self: ident, $method: expr) => {{
104 let save_pos = $self.pos; let save_decl_scopes = $self.decl_scopes.clone();
106 let save_impl_depth = $self.impl_depth;
107 match $method {
108 Ok(expr) => Ok(expr),
109 Err(e) if $self.fatal => Err(e),
111 Err(e) => {
112 $self.pos = save_pos;
113 $self.decl_scopes = save_decl_scopes;
114 $self.impl_depth = save_impl_depth;
115 Err(e)
116 }
117 }
118 }};
119}
120
121#[derive(Debug, thiserror::Error)]
122pub enum ParserErr {
123 #[error("{message}")]
124 Spanned { message: String, span: Span },
125}
126
127impl ParserErr {
128 pub fn new(message: impl Into<String>, span: Span) -> Self {
130 Self::Spanned { message: message.into(), span }
131 }
132
133 pub fn at(message: impl Into<String>, pos: usize) -> Self {
135 Self::Spanned { message: message.into(), span: Span::new(pos, pos) }
136 }
137
138 pub fn span(&self) -> Span {
139 match self {
140 Self::Spanned { span, .. } => *span,
141 }
142 }
143
144 pub fn message(&self) -> &str {
145 match self {
146 Self::Spanned { message, .. } => message,
147 }
148 }
149}
150
151#[derive(Debug, thiserror::Error)]
154#[error("{err}")]
155pub struct SpannedParseError {
156 pub err: ParserErr,
157 pub pos: usize,
158}
159
160impl SpannedParseError {
161 pub fn new(err: ParserErr, pos: usize) -> Self {
162 Self { err, pos }
163 }
164}
165
166impl Parser {
167 pub fn new(buf: Vec<u8>) -> Self {
168 Self { pos: 0, buf, spans: Vec::new(), decl_scopes: vec![BTreeSet::new()], impl_depth: 0, fn_body_depth: 0, impl_body_depth: 0, depth: 0, fatal: false }
169 }
170
171 fn enter_depth(&mut self) -> Result<()> {
178 self.depth += 1;
179 if self.depth > MAX_PARSE_DEPTH {
180 self.depth -= 1;
181 self.fatal = true;
182 return Err(ParserErr::at("表达式嵌套过深", self.current_pos()).into());
183 }
184 Ok(())
185 }
186
187 fn exit_depth(&mut self) {
188 self.depth = self.depth.saturating_sub(1);
189 }
190
191 fn check_fatal(&self) -> Result<()> {
193 if self.fatal { Err(ParserErr::at("表达式嵌套过深", self.current_pos()).into()) } else { Ok(()) }
194 }
195
196 fn push_decl_scope(&mut self) {
197 self.decl_scopes.push(BTreeSet::new());
198 }
199
200 fn pop_decl_scope(&mut self) {
201 if self.decl_scopes.len() > 1 {
202 self.decl_scopes.pop();
203 }
204 }
205
206 fn declare_symbol(&mut self, name: &SmolStr) -> Result<()> {
207 if name.is_empty() {
208 return Ok(());
209 }
210 if self.decl_scopes.iter().rev().any(|scope| scope.contains(name)) {
211 return Err(ParserErr::at(format!("符号 {} 已经声明", name), self.current_pos()).into());
212 }
213 self.decl_scopes.last_mut().expect("parser always has a declaration scope").insert(name.clone());
214 Ok(())
215 }
216
217 fn declare_symbol_in_current_scope(&mut self, name: &SmolStr) -> Result<()> {
218 if name.is_empty() {
219 return Ok(());
220 }
221 let scope = self.decl_scopes.last_mut().expect("parser always has a declaration scope");
222 if scope.contains(name) {
223 return Err(ParserErr::at(format!("符号 {} 已经声明", name), self.current_pos()).into());
224 }
225 scope.insert(name.clone());
226 Ok(())
227 }
228
229 fn declare_function_name(&mut self, name: &SmolStr) -> Result<()> {
230 if self.impl_depth > 0 { self.declare_symbol_in_current_scope(name) } else { self.declare_symbol(name) }
231 }
232
233 fn declare_args(&mut self, args: &[(SmolStr, Type)]) -> Result<()> {
234 for (name, _) in args {
235 self.declare_symbol(name)?;
236 }
237 Ok(())
238 }
239
240 fn declare_pattern_symbols(&mut self, pat: &Pattern) -> Result<()> {
241 match &pat.kind {
242 PatternKind::Ident { name, .. } => self.declare_symbol_in_current_scope(name),
243 PatternKind::Tuple(items) => {
244 for item in items {
245 self.declare_pattern_symbols(item)?;
246 }
247 Ok(())
248 }
249 PatternKind::List { elems, .. } => {
250 for item in elems {
251 self.declare_pattern_symbols(item)?;
252 }
253 Ok(())
254 }
255 PatternKind::Wildcard | PatternKind::Var { .. } | PatternKind::Literal(_) | PatternKind::Member(_, _) | PatternKind::Idx(_, _) => Ok(()),
256 }
257 }
258
259 fn function_body(&mut self, args: &[(SmolStr, Type)]) -> Result<Stmt> {
260 self.push_decl_scope();
261 self.fn_body_depth += 1;
262 let result = (|| {
263 self.declare_args(args)?;
264 self.block()
265 })();
266 self.fn_body_depth -= 1;
267 self.pop_decl_scope();
268 result
269 }
270
271 fn impl_body(&mut self) -> Result<Stmt> {
272 self.push_decl_scope();
273 self.impl_depth += 1;
274 self.impl_body_depth += 1;
275 let result = self.block();
276 self.impl_body_depth -= 1;
277 self.impl_depth -= 1;
278 self.pop_decl_scope();
279 result
280 }
281
282 pub fn is_eof(&self) -> bool {
283 self.pos >= self.buf.len()
284 }
285
286 pub fn get(&self) -> Result<u8> {
287 self.buf.get(self.pos).cloned().ok_or_else(|| ParserErr::at("输入结束", self.pos).into())
289 }
290
291 pub fn take(&mut self, ch: u8) -> Result<()> {
292 if self.buf.get(self.pos).map(|b| *b == ch).unwrap_or(false) {
294 self.pos += 1;
295 Ok(())
296 } else {
297 Err(SpannedParseError::new(ParserErr::at(format!("期望字符 {} 实际字符 {}", ch as char, self.buf.get(self.pos as usize).cloned().unwrap_or(0) as char), self.pos), self.pos).into())
298 }
299 }
300
301 pub fn until(&mut self, ch: u8) -> Result<()> {
302 self.whitespace()?;
304 self.take(ch)
305 }
306
307 pub fn ahead(&self) -> Result<u8> {
308 self.buf.get(self.pos + 1).cloned().ok_or_else(|| ParserErr::at("输入结束", self.pos).into())
310 }
311
312 pub fn get_str(&self, start: usize, stop: usize) -> SmolStr {
313 SmolStr::from(String::from_utf8_lossy(&self.buf[start..stop]))
314 }
315
316 pub fn error_stmt(&self) -> SmolStr {
317 SmolStr::from(String::from_utf8_lossy(&self.buf[self.spans.last().cloned().unwrap_or(0)..self.pos]))
318 }
319
320 pub fn current_pos(&self) -> usize {
321 self.pos
322 }
323
324 pub fn span_from(&self, start: usize) -> Span {
325 Span::new(start, self.pos)
326 }
327
328 pub fn collect<F: Fn(u8) -> bool>(&mut self, f: F) -> Result<(usize, usize)> {
329 let start = self.pos;
330 while self.pos < self.buf.len() && f(self.buf[self.pos]) {
331 self.pos += 1;
332 }
333 if self.pos > start { Ok((start, self.pos)) } else { Err(ParserErr::at("未发现期望字符", start).into()) }
334 }
335
336 pub fn just(&mut self, pattern: &str) -> Result<()> {
337 if self.buf.len() - self.pos >= pattern.len() && self.buf[self.pos..self.pos + pattern.len()].eq(pattern.as_bytes()) {
338 self.pos += pattern.len();
339 Ok(())
340 } else {
341 Err(ParserErr::at(format!("期望字符串 {}", pattern), self.pos).into())
342 }
343 }
344
345 pub fn keyword(&mut self, pattern: &str) -> Result<()> {
346 self.just(pattern)?;
347 if self.pos < self.buf.len() && !NOT_IDENT.contains(&self.buf[self.pos]) {
348 self.pos -= pattern.len();
349 return Err(ParserErr::at(format!("期望字符串 {}", pattern), self.pos).into());
350 }
351 Ok(())
352 }
353
354 pub fn get_type(&mut self) -> Result<Type> {
355 self.whitespace()?;
356 if self.get()? == b'[' {
357 self.pos += 1;
358 let ty = self.get_type()?;
359 self.until(b';')?;
360 self.whitespace()?;
361 let len = self.get_type_param()?;
362 self.until(b']')?;
363 if let Type::ConstInt(number) = len {
364 let number = u32::try_from(number).map_err(|_| anyhow!("数组长度超出 u32 范围"))?;
365 Ok(Type::Array(std::rc::Rc::new(ty), number))
366 } else {
367 Ok(Type::ArrayParam(std::rc::Rc::new(ty), std::rc::Rc::new(len)))
368 }
369 } else {
370 for ty in TYPES {
371 if self.just(ty.0).is_ok() {
372 return Ok(ty.1.clone());
373 }
374 }
375 let name = self.ident()?;
376 if self.take(b'<').is_ok() {
377 let params = crate::parse_list!(self, Vec::new(), b'>', b',', self.get_type_param()?);
378 Ok(Type::Ident { name, params })
379 } else {
380 Ok(Type::Ident { name, params: Vec::new() })
381 }
382 }
383 }
384
385 pub fn get_type_param(&mut self) -> Result<Type> {
386 self.const_type_param_add()
387 }
388
389 fn const_type_param_add(&mut self) -> Result<Type> {
390 let mut left = self.const_type_param_mul()?;
391 loop {
392 self.whitespace()?;
393 let op = if self.take(b'+').is_ok() {
394 Some(ConstIntOp::Add)
395 } else if self.take(b'-').is_ok() {
396 Some(ConstIntOp::Sub)
397 } else {
398 None
399 };
400 let Some(op) = op else { break };
401 let right = self.const_type_param_mul()?;
402 left = Self::fold_const_type_binary(op, left, right)?;
403 }
404 Ok(left)
405 }
406
407 fn const_type_param_mul(&mut self) -> Result<Type> {
408 let mut left = self.const_type_param_primary()?;
409 loop {
410 self.whitespace()?;
411 let op = if self.take(b'*').is_ok() {
412 Some(ConstIntOp::Mul)
413 } else if self.take(b'/').is_ok() {
414 Some(ConstIntOp::Div)
415 } else if self.take(b'%').is_ok() {
416 Some(ConstIntOp::Mod)
417 } else {
418 None
419 };
420 let Some(op) = op else { break };
421 let right = self.const_type_param_primary()?;
422 left = Self::fold_const_type_binary(op, left, right)?;
423 }
424 Ok(left)
425 }
426
427 fn const_type_param_primary(&mut self) -> Result<Type> {
428 self.whitespace()?;
429 if self.take(b'(').is_ok() {
430 let ty = self.get_type_param()?;
431 self.until(b')')?;
432 return Ok(ty);
433 }
434 if self.get()?.is_ascii_digit() {
435 let value = self.number()?;
436 if let Some(value) = value.as_uint() {
437 let value = i64::try_from(value).map_err(|_| anyhow!("模板数字参数超出 i64 范围"))?;
438 Ok(Type::ConstInt(value))
439 } else if let Some(value) = value.as_int() {
440 Ok(Type::ConstInt(value))
441 } else {
442 Err(anyhow!("模板数字参数必须是整数"))
443 }
444 } else {
445 self.get_type()
446 }
447 }
448
449 fn fold_const_type_binary(op: ConstIntOp, left: Type, right: Type) -> Result<Type> {
450 if let (Type::ConstInt(left), Type::ConstInt(right)) = (&left, &right) {
451 let value = match op {
452 ConstIntOp::Add => left + right,
453 ConstIntOp::Sub => left - right,
454 ConstIntOp::Mul => left * right,
455 ConstIntOp::Div => {
456 if *right == 0 {
457 return Err(anyhow!("模板整数除以 0"));
458 }
459 left / right
460 }
461 ConstIntOp::Mod => {
462 if *right == 0 {
463 return Err(anyhow!("模板整数取模 0"));
464 }
465 left % right
466 }
467 };
468 Ok(Type::ConstInt(value))
469 } else {
470 Ok(Type::ConstBinary { op, left: std::rc::Rc::new(left), right: std::rc::Rc::new(right) })
471 }
472 }
473
474 pub fn comment(&mut self) -> Result<()> {
475 if self.get()? == b'/' && self.ahead()? == b'/' {
476 self.pos += 2;
477 while self.pos < self.buf.len() && self.buf[self.pos] != b'\n' {
478 self.pos += 1;
479 }
480 Ok(())
481 } else if self.get()? == b'/' && self.ahead()? == b'*' {
482 self.pos += 2;
483 while self.pos + 1 < self.buf.len() {
484 if self.buf[self.pos] == b'*' && self.buf[self.pos + 1] == b'/' {
485 self.pos += 2;
486 return Ok(());
487 }
488 self.pos += 1;
489 }
490 Err(ParserErr::at("未关闭的注释", self.pos).into())
491 } else {
492 Ok(())
493 }
494 }
495
496 pub fn whitespace(&mut self) -> Result<()> {
497 while self.pos < self.buf.len() {
498 self.comment()?;
499 if self.pos >= self.buf.len() || !WHITE_SPACE.contains(&self.buf[self.pos]) {
500 break;
501 }
502 self.pos += 1;
503 }
504 Ok(())
505 }
506
507 pub fn ident(&mut self) -> Result<SmolStr> {
508 let (start, mut stop) = self.collect(|ch| !NOT_IDENT.contains(&ch))?;
509 loop {
510 let save_pos = self.pos;
511 if self.just("::").is_err() {
512 break;
513 }
514 match self.collect(|ch| !NOT_IDENT.contains(&ch)) {
515 Ok((_, next_stop)) => {
516 stop = next_stop;
517 }
518 Err(_) => {
519 self.pos = save_pos;
520 break;
521 }
522 }
523 }
524 if KEYWORDS.iter().position(|k| k.as_bytes() == &self.buf[start..stop]).is_some() {
525 return Err(anyhow!("发现关键字{}", String::from_utf8_lossy(&self.buf[start..stop])));
526 }
527 Ok(self.get_str(start, stop))
528 }
529
530 pub fn string(&mut self) -> Result<SmolStr> {
531 if self.get()? != b'"' {
532 return Err(ParserErr::at("非字符串", self.current_pos()).into());
533 }
534 self.pos += 1;
535 let mut text_buf = Vec::new();
536 while self.pos < self.buf.len() {
537 if self.buf[self.pos] == b'\\' {
538 self.pos += 1;
540 match self.buf[self.pos] {
541 b'n' => {
542 text_buf.push(b'\n');
543 self.pos += 1;
544 }
545 b'r' => {
546 text_buf.push(b'\r');
547 self.pos += 1;
548 }
549 b't' => {
550 text_buf.push(b'\t');
551 self.pos += 1;
552 }
553 ch @ (b'\\' | b'"') => {
554 text_buf.push(ch);
555 self.pos += 1;
556 }
557 b'u' => {
558 self.pos += 1;
559 let unicode = if self.take(b'{').is_ok() {
560 let code = self.hex()?;
561 self.pos += 1;
562 code
563 } else {
564 self.hex()?
565 };
566 let ch = char::from_u32(unicode as u32).ok_or(anyhow!("非法 unicode {}", unicode))?;
567 let mut utf8_buf = [0u8; 4];
568 let s = ch.encode_utf8(&mut utf8_buf);
569 text_buf.extend_from_slice(s.as_bytes());
570 }
571 b'x' => {
572 self.pos += 1;
573 if self.pos + 2 > self.buf.len() {
574 return Err(anyhow!("非法 \\x 转义:需要 2 位十六进制"));
575 }
576 let start = self.pos;
577 self.pos += 2;
578 let hex = &self.buf[start..self.pos];
579 if hex.iter().any(|b| !b.is_ascii_hexdigit()) {
580 return Err(anyhow!("非法 \\x 转义:仅允许十六进制字符"));
581 }
582 let code = u32::from_str_radix(String::from_utf8_lossy(hex).as_ref(), 16)?;
583 if code > 0xFF {
584 return Err(anyhow!("\\x 转义值 0x{:02X} 超出 0xFF", code));
585 }
586 text_buf.push(code as u8);
587 }
588 other => {
589 return Err(anyhow!("invalid escape character: {}", other as char));
590 }
591 }
592 } else {
593 if self.buf[self.pos] == b'"' {
594 self.pos += 1;
595 return Ok(String::from_utf8(text_buf)?.into());
596 }
597 text_buf.push(self.buf[self.pos]);
598 self.pos += 1;
599 }
600 }
601 Err(ParserErr::at("未关闭字符串", self.pos).into())
602 }
603
604 pub fn text(&mut self) -> Result<SmolStr> {
605 if self.get()? == b'r' && [b'#', b'"'].contains(&self.ahead()?) {
606 self.pos += 1;
607 let mut end = String::from("\"");
608 while self.buf[self.pos] == b'#' {
609 end.push('#');
610 self.pos += 1;
611 }
612 if self.get()? != b'"' {
613 return Err(ParserErr::at("非法的原始字符串", self.current_pos()).into());
614 }
615 self.pos += 1;
616 let start_pos = self.pos;
617 while self.pos < self.buf.len() {
618 if self.just(&end).is_ok() {
619 break;
620 }
621 self.pos += 1;
622 }
623 Ok(self.get_str(start_pos, self.pos - end.len()))
624 } else {
625 self.string()
626 }
627 }
628
629 fn hex(&mut self) -> Result<i32> {
630 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
632 Ok(i32::from_str_radix(&String::from_utf8_lossy(&self.buf[start..stop]), 16)?)
633 }
634
635 fn numeric_suffix(&mut self) -> Option<Type> {
636 let save = self.pos;
637 for (name, ty) in TYPES {
638 if !ty.is_native() {
639 continue;
640 }
641 if self.buf.len() >= self.pos + name.len() && self.buf[self.pos..self.pos + name.len()].eq(name.as_bytes()) {
642 self.pos += name.len();
643 return Some(ty.clone());
644 }
645 }
646 self.pos = save;
647 None
648 }
649
650 fn int_literal(&mut self, digits: &str, radix: u32, suffix: Option<Type>) -> Result<Dynamic> {
651 let ty = suffix.unwrap_or(Type::I64);
653 let magnitude = u128::from_str_radix(digits, radix).map_err(|_| anyhow!("整数字面量 {} 超出可表示范围", digits))?;
655 let (signed, bits) = match ty {
656 Type::I8 => (true, 8u32),
657 Type::I16 => (true, 16),
658 Type::I32 => (true, 32),
659 Type::I64 => (true, 64),
660 Type::U8 => (false, 8),
661 Type::U16 => (false, 16),
662 Type::U32 => (false, 32),
663 Type::U64 => (false, 64),
664 Type::F16 => return Ok(Dynamic::F16(dynamic::f64_to_f16(magnitude as f64))),
665 Type::F32 => return Ok(Dynamic::F32(magnitude as f32)),
666 Type::F64 => return Ok(Dynamic::F64(magnitude as f64)),
667 ty => return Err(anyhow!("{:?} 不能作为数字后缀", ty)),
668 };
669 let unsigned_max = (1u128 << bits) - 1;
670 let max_allowed = if radix == 10 { if signed { unsigned_max / 2 + 1 } else { unsigned_max } } else { unsigned_max };
673 if magnitude > max_allowed {
674 return Err(anyhow!("整数字面量 {} 超出 {:?} 的范围", digits, ty));
675 }
676 Ok(match ty {
677 Type::I8 => Dynamic::I8(magnitude as i8),
678 Type::I16 => Dynamic::I16(magnitude as i16),
679 Type::I32 => Dynamic::I32(magnitude as i32),
680 Type::I64 => Dynamic::I64(magnitude as i64),
681 Type::U8 => Dynamic::U8(magnitude as u8),
682 Type::U16 => Dynamic::U16(magnitude as u16),
683 Type::U32 => Dynamic::U32(magnitude as u32),
684 Type::U64 => Dynamic::U64(magnitude as u64),
685 _ => unreachable!(),
686 })
687 }
688
689 fn float_literal(&mut self, digits: &str, suffix: Option<Type>) -> Result<Dynamic> {
690 let value: f64 = digits.parse()?;
691 if let Some(ref ty) = suffix {
692 let is_int_suffix = matches!(ty, Type::I8 | Type::I16 | Type::I32 | Type::I64 | Type::U8 | Type::U16 | Type::U32 | Type::U64);
695 if is_int_suffix {
696 let (min, max): (f64, f64) = match ty {
697 Type::I8 => (i8::MIN as f64, i8::MAX as f64),
698 Type::I16 => (i16::MIN as f64, i16::MAX as f64),
699 Type::I32 => (i32::MIN as f64, i32::MAX as f64),
700 Type::I64 => (i64::MIN as f64, i64::MAX as f64),
701 Type::U8 => (0.0, u8::MAX as f64),
702 Type::U16 => (0.0, u16::MAX as f64),
703 Type::U32 => (0.0, u32::MAX as f64),
704 Type::U64 => (0.0, u64::MAX as f64),
705 _ => unreachable!(),
706 };
707 if !value.is_finite() || value < min || value > max || value.fract() != 0.0 {
708 return Err(anyhow!("浮点字面量 {:?} 超出 {:?} 范围", value, ty));
709 }
710 } else if !value.is_finite() {
711 return Err(anyhow!("非法浮点字面量: {:?}", value));
712 }
713 }
714 Ok(match suffix.unwrap_or(Type::F32) {
715 Type::I8 => Dynamic::I8(value as i8),
716 Type::I16 => Dynamic::I16(value as i16),
717 Type::I32 => Dynamic::I32(value as i32),
718 Type::I64 => Dynamic::I64(value as i64),
719 Type::U8 => Dynamic::U8(value as u8),
720 Type::U16 => Dynamic::U16(value as u16),
721 Type::U32 => Dynamic::U32(value as u32),
722 Type::U64 => Dynamic::U64(value as u64),
723 Type::F16 => Dynamic::F16(dynamic::f64_to_f16(value)),
724 Type::F32 => Dynamic::F32(value as f32),
725 Type::F64 => Dynamic::F64(value),
726 ty => return Err(anyhow!("{:?} 不能作为浮点数字后缀", ty)),
727 })
728 }
729
730 pub fn number(&mut self) -> Result<Dynamic> {
731 if self.get()? == b'0' {
732 if [b'b', b'B'].contains(&self.ahead()?) {
733 self.pos += 2;
734 let (start, stop) = self.collect(|ch| ch == b'0' || ch == b'1')?;
735 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
736 let suffix = self.numeric_suffix();
737 return self.int_literal(&s, 2, suffix);
738 } else if [b'o', b'O'].contains(&self.ahead()?) {
739 self.pos += 2;
740 let (start, stop) = self.collect(|ch| ch >= b'0' && ch <= b'7')?;
741 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
742 let suffix = self.numeric_suffix();
743 return self.int_literal(&s, 8, suffix);
744 } else if [b'x', b'X'].contains(&self.ahead()?) {
745 self.pos += 2;
746 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
747 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
748 let suffix = self.numeric_suffix();
749 return self.int_literal(&s, 16, suffix);
750 }
751 }
752 let start = self.pos;
753 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
754 self.pos += 1;
755 }
756 let mut is_float = false;
757 if self.pos < self.buf.len() && self.buf[self.pos] == b'.' && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
758 is_float = true;
759 self.pos += 1;
760 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
761 self.pos += 1;
762 }
763 }
764 if self.pos < self.buf.len() && (self.buf[self.pos] == b'e' || self.buf[self.pos] == b'E') {
765 let mut exp_pos = self.pos + 1;
766 if exp_pos < self.buf.len() && (self.buf[exp_pos] == b'+' || self.buf[exp_pos] == b'-') {
767 exp_pos += 1;
768 }
769 if exp_pos < self.buf.len() && self.buf[exp_pos] <= b'9' && self.buf[exp_pos] >= b'0' {
770 is_float = true;
771 self.pos = exp_pos + 1;
772 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
773 self.pos += 1;
774 }
775 }
776 }
777 if self.pos > start {
778 let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
779 let suffix = self.numeric_suffix();
780 if is_float {
781 return self.float_literal(&text, suffix);
782 }
783 return self.int_literal(&text, 10, suffix);
784 }
785 Err(ParserErr::at("非数字", start).into())
786 }
787}
788
789#[cfg(test)]
790mod tests {
791 use super::*;
792
793 fn parse_all(code: &str) -> Result<Vec<Stmt>> {
794 let mut parser = Parser::new(code.as_bytes().to_vec());
795 let mut stmts = Vec::new();
796 loop {
797 match parser.stmt(false) {
798 Ok(stmt) => stmts.push(stmt),
799 Err(err) => {
800 if parser.is_eof() {
801 return Ok(stmts);
802 }
803 return Err(err);
804 }
805 }
806 }
807 }
808
809 fn run_with_big_stack(f: impl FnOnce() + Send + 'static) {
813 std::thread::Builder::new().stack_size(64 * 1024 * 1024).spawn(f).unwrap().join().unwrap();
814 }
815
816 #[test]
817 fn deeply_nested_parens_error_instead_of_stack_overflow() {
818 run_with_big_stack(|| {
819 let depth = MAX_PARSE_DEPTH + 50;
820 let code = format!("{}1{}", "(".repeat(depth), ")".repeat(depth));
821 let mut parser = Parser::new(code.into_bytes());
822 let err = parser.get_expr().unwrap_err();
823 assert!(err.to_string().contains("嵌套过深"), "got: {err}");
824 });
825 }
826
827 #[test]
828 fn deeply_nested_blocks_error_instead_of_stack_overflow() {
829 run_with_big_stack(|| {
830 let depth = MAX_PARSE_DEPTH + 50;
831 let code = format!("fn f() {}{}{}", "{".repeat(depth), "1", "}".repeat(depth));
832 let err = parse_all(&code).unwrap_err();
833 assert!(err.to_string().contains("嵌套过深"), "got: {err}");
834 });
835 }
836
837 #[test]
838 fn normal_nesting_within_limit_parses() {
839 let code = format!("{}1{}", "(".repeat(32), ")".repeat(32));
841 let mut parser = Parser::new(code.into_bytes());
842 parser.get_expr().unwrap();
843 }
844
845 fn parse_literal(code: &str) -> Result<Dynamic> {
846 let mut parser = Parser::new(code.as_bytes().to_vec());
847 match parser.get_expr()?.kind {
848 crate::ExprKind::Value(value) => Ok(value),
849 other => Err(anyhow!("不是字面量: {:?}", other)),
850 }
851 }
852
853 #[test]
854 fn unsuffixed_integer_defaults_to_i64() {
855 assert_eq!(parse_literal("5").unwrap(), Dynamic::I64(5));
856 assert_eq!(parse_literal("3000000000").unwrap(), Dynamic::I64(3000000000));
858 }
859
860 #[test]
861 fn out_of_range_integer_literals_error() {
862 assert!(parse_literal("99999999999999999999999999999999999999999").is_err());
864 assert!(parse_literal("255i8").unwrap_err().to_string().contains("超出"));
866 assert!(parse_literal("70000i16").unwrap_err().to_string().contains("超出"));
867 assert!(parse_literal("256u8").unwrap_err().to_string().contains("超出"));
868 }
869
870 #[test]
871 fn signed_min_magnitude_literals_allowed() {
872 assert_eq!(parse_literal("128i8").unwrap(), Dynamic::I8(-128));
874 assert_eq!(parse_literal("9223372036854775808").unwrap(), Dynamic::I64(i64::MIN));
875 }
876
877 #[test]
878 fn hex_literals_keep_bit_pattern() {
879 assert_eq!(parse_literal("0xFFFFFFFF").unwrap(), Dynamic::I64(0xFFFFFFFF));
881 assert_eq!(parse_literal("0xFFi8").unwrap(), Dynamic::I8(-1));
883 assert_eq!(parse_literal("0xFFFFFFFFu32").unwrap(), Dynamic::U32(u32::MAX));
884 }
885
886 fn shape(code: &str) -> String {
888 let mut parser = Parser::new(code.as_bytes().to_vec());
889 let expr = parser.get_expr().expect("parse");
890 fmt_shape(&expr)
891 }
892
893 fn binop_sym(op: &crate::BinaryOp) -> &'static str {
894 use crate::BinaryOp::*;
895 match op {
896 Add => "+",
897 Sub => "-",
898 Mul => "*",
899 Div => "/",
900 Mod => "%",
901 Shl => "<<",
902 Shr => ">>",
903 BitAnd => "&",
904 BitOr => "|",
905 BitXor => "^",
906 Assign => "=",
907 AddAssign => "+=",
908 Eq => "==",
909 Ne => "!=",
910 Lt => "<",
911 Gt => ">",
912 Le => "<=",
913 Ge => ">=",
914 And => "&&",
915 Or => "||",
916 Idx => "idx",
917 other => {
918 let _ = other;
919 "?"
920 }
921 }
922 }
923
924 fn fmt_shape(expr: &crate::Expr) -> String {
925 use crate::ExprKind::*;
926 match &expr.kind {
927 Value(v) => format!("{:?}", v).replace("I64(", "").replace("I32(", "").trim_end_matches(')').to_string(),
928 Ident(name) => name.to_string(),
929 Unary { op, value } => {
930 let s = if matches!(op, crate::UnaryOp::Neg) { "-" } else { "!" };
931 format!("({} {})", s, fmt_shape(value))
932 }
933 Binary { left, op, right } => format!("({} {} {})", binop_sym(op), fmt_shape(left), fmt_shape(right)),
934 Range { start, stop, inclusive } => format!("({} {} {})", if *inclusive { "..=" } else { ".." }, fmt_shape(start), fmt_shape(stop)),
935 Typed { value, ty } => format!("(as {} {:?})", fmt_shape(value), ty),
936 other => format!("{:?}", other),
937 }
938 }
939
940 #[test]
941 fn precedence_and_associativity_golden() {
942 assert_eq!(shape("1 + 2 * 3"), "(+ 1 (* 2 3))");
944 assert_eq!(shape("1 * 2 + 3"), "(+ (* 1 2) 3)");
945 assert_eq!(shape("1 - 2 - 3"), "(- (- 1 2) 3)");
947 assert_eq!(shape("8 / 4 / 2"), "(/ (/ 8 4) 2)");
948 assert_eq!(shape("2 + 3 << 4"), "(<< (+ 2 3) 4)");
950 assert_eq!(shape("1 | 2 ^ 3 & 4"), "(| 1 (^ 2 (& 3 4)))");
952 assert_eq!(shape("1 + 2 == 3"), "(== (+ 1 2) 3)");
954 assert_eq!(shape("a && b || c"), "(|| (&& a b) c)");
956 assert_eq!(shape("-a * b"), "(* (- a) b)");
958 assert_eq!(shape("!a == b"), "(== (! a) b)");
959 }
960
961 #[test]
962 fn assignment_range_and_as_precedence_golden() {
963 assert_eq!(shape("a = b + c"), "(= a (+ b c))");
965 assert_eq!(shape("a = b = c"), "(= a (= b c))");
966 assert_eq!(shape("a = b = c = d"), "(= a (= b (= c d)))");
967 assert_eq!(shape("a += b * c"), "(+= a (* b c))");
969 assert_eq!(shape("1 + 1 .. n * 2"), "(.. (+ 1 1) (* n 2))");
971 assert_eq!(shape("0 ..= n - 1"), "(..= 0 (- n 1))");
972 assert_eq!(shape("a + b as i64"), "(+ a (as b I64))");
974 assert_eq!(shape("a as i64 + b"), "(+ (as a I64) b)");
975 assert_eq!(shape("(a + b) as i64"), "(as (+ a b) I64)");
976 }
977
978 #[test]
982 fn parser_never_panics_on_random_input() {
983 run_with_big_stack(|| {
984 const FRAGMENTS: &[&str] = &[
985 "fn", "let", "if", "else", "for", "in", "while", "return", "struct", "impl", "pub", "(", ")", "{", "}", "[", "]", "<", ">", "+", "-", "*", "/", "%", "=", "==", "&&", "||", "..", "..=", "as", "i32",
986 "u64", "f64", ".", ",", ";", ":", "::", "x", "0", "1", "255i8", "0xFF", "\"s\"", "true", "null", "|a|", "->",
987 ];
988 let mut state: u64 = 0x9E3779B97F4A7C15;
990 let mut next = || {
991 state ^= state >> 12;
992 state ^= state << 25;
993 state ^= state >> 27;
994 state = state.wrapping_mul(0x2545F4914F6CDD1D);
995 state
996 };
997
998 for _ in 0..4000 {
999 let mut code = String::new();
1000 let tokens = (next() % 40) as usize;
1001 for _ in 0..tokens {
1002 code.push_str(FRAGMENTS[(next() as usize) % FRAGMENTS.len()]);
1003 if next() % 2 == 0 {
1004 code.push(' ');
1005 }
1006 }
1007 let result = std::panic::catch_unwind(|| {
1009 let mut parser = Parser::new(code.clone().into_bytes());
1010 let mut count = 0;
1011 loop {
1012 match parser.stmt(false) {
1013 Ok(_) => {
1014 count += 1;
1015 if parser.is_eof() || count > 1000 {
1016 break;
1017 }
1018 }
1019 Err(_) => break,
1020 }
1021 }
1022 });
1023 assert!(result.is_ok(), "parser panicked on input: {:?}", code);
1024 }
1025 });
1026 }
1027
1028 #[test]
1029 fn allows_local_name_to_shadow_prior_function() {
1030 parse_all(
1031 r#"
1032 fn chunk_id(x, y) {
1033 x + y
1034 }
1035
1036 fn open() {
1037 let chunk_id = 1;
1038 chunk_id
1039 }
1040 "#,
1041 )
1042 .unwrap();
1043 }
1044
1045 #[test]
1046 fn rejects_duplicate_function_args() {
1047 let err = parse_all("fn open(value, value) { value }").unwrap_err();
1048 assert!(err.to_string().contains("符号 value 已经声明"));
1049 }
1050
1051 #[test]
1052 fn rejects_duplicate_local_let_names() {
1053 let err = parse_all(
1054 r#"
1055 fn open() {
1056 let value = 1;
1057 let value = 2;
1058 value
1059 }
1060 "#,
1061 )
1062 .unwrap_err();
1063 assert!(err.to_string().contains("符号 value 已经声明"));
1064 }
1065
1066 #[test]
1067 fn allows_same_method_name_in_different_impl_blocks() {
1068 parse_all(
1069 r#"
1070 struct A {}
1071 struct B {}
1072
1073 impl A {
1074 fn zero() { 0 }
1075 }
1076
1077 impl B {
1078 fn zero() { 0 }
1079 }
1080 "#,
1081 )
1082 .unwrap();
1083 }
1084
1085 #[test]
1086 fn rejects_nested_fn_inside_function_body() {
1087 let err = parse_all("fn outer() { fn inner() { 1 } }").unwrap_err();
1088 assert!(err.to_string().contains("函数体内不能定义"), "got: {err}");
1089 }
1090
1091 #[test]
1092 fn rejects_nested_struct_inside_function_body() {
1093 let err = parse_all("fn outer() { struct S { x: i32 } S{x: 1} }").unwrap_err();
1094 assert!(err.to_string().contains("函数体内不能定义"), "got: {err}");
1095 }
1096
1097 #[test]
1098 fn rejects_nested_const_inside_function_body() {
1099 let err = parse_all("fn outer() { const K = 1 } K").unwrap_err();
1100 assert!(err.to_string().contains("函数体内不能定义"), "got: {err}");
1101 }
1102
1103 #[test]
1104 fn hex_escape_at_end_of_string_preserves_byte() {
1105 let mut p = Parser::new(br#""abc\x41""#.to_vec());
1106 let s = p.string().unwrap();
1107 assert_eq!(s.as_str(), "abcA");
1108 }
1109
1110 #[test]
1111 fn hex_escape_truncated_reports_clear_error() {
1112 let mut p = Parser::new(br#""abc\x""#.to_vec());
1113 let err = p.string().unwrap_err();
1114 assert!(err.to_string().contains("\\x"), "got: {err}");
1115 }
1116
1117 #[test]
1118 fn hex_escape_non_hex_char_reports_clear_error() {
1119 let mut p = Parser::new(br#""abc\xZZ""#.to_vec());
1120 let err = p.string().unwrap_err();
1121 assert!(err.to_string().contains("\\x"), "got: {err}");
1122 }
1123
1124 #[test]
1125 fn else_with_invalid_body_reports_error() {
1126 let err = parse_all("fn f() { if true { 1 } else }").unwrap_err();
1128 let msg = err.to_string();
1129 assert!(msg.contains("not code block") || msg.contains("未结束的"), "got: {msg}");
1130 }
1131
1132 #[test]
1133 fn float_literal_with_int_suffix_out_of_range_errors() {
1134 let mut p = Parser::new(b"1e30u8".to_vec());
1135 let err = p.number().unwrap_err();
1136 assert!(err.to_string().contains("超出"), "got: {err}");
1137 }
1138
1139 #[test]
1140 fn float_literal_with_int_suffix_fractional_errors() {
1141 let mut p = Parser::new(b"1.5i32".to_vec());
1142 let err = p.number().unwrap_err();
1143 assert!(err.to_string().contains("超出"), "got: {err}");
1144 }
1145
1146 #[test]
1147 fn float_literal_with_float_suffix_accepts_fractional() {
1148 let mut p = Parser::new(b"1e-3f32".to_vec());
1149 assert!(matches!(p.number().unwrap(), Dynamic::F32(v) if (v - 1e-3).abs() < 1e-8));
1150 }
1151
1152 #[test]
1153 fn allows_closure_inside_function_body() {
1154 parse_all("fn outer() { let f = |x: i32| { x + 1 }; f(1) }").unwrap();
1155 }
1156
1157 #[test]
1158 fn rejects_const_inside_impl_body() {
1159 let err = parse_all("struct S {}\nimpl S { const K = 1 }").unwrap_err();
1160 let msg = err.to_string();
1161 assert!(msg.contains("impl 体内不能定义") && msg.contains("const"), "got: {msg}");
1162 }
1163
1164 #[test]
1165 fn allows_fn_inside_impl_body() {
1166 parse_all("struct S {}\nimpl S { pub fn m(self: S) { 1 } }").unwrap();
1167 }
1168
1169 #[test]
1170 fn parser_err_carries_span() {
1171 let src = "fn f() {}\nfn f() {}\n";
1173 let err = parse_all(src).unwrap_err();
1174 eprintln!("err display: {err}");
1175 let downcast = err.downcast_ref::<ParserErr>().expect("ParserErr");
1176 eprintln!("message: {}", downcast.message());
1177 eprintln!("span: {:?}", downcast.span());
1178 assert!(downcast.message().contains("f"));
1179 assert!(downcast.span().start < src.len());
1181 }
1182
1183 #[test]
1184 fn block_as_let_value_is_expression() {
1185 parse_all("pub fn f() { let x = { let y = 1; y + 1 }; x }").unwrap();
1186 }
1187
1188 #[test]
1189 fn dict_still_takes_priority_over_block() {
1190 parse_all("pub fn f() { let d = { key: 1 }; d }").unwrap();
1192 }
1193
1194 #[test]
1195 fn list_pattern_with_rest_parses() {
1196 parse_all("pub fn f(items) { let [first, ..rest] = items; first }").unwrap();
1197 }
1198
1199 #[test]
1200 fn list_pattern_with_only_rest_parses() {
1201 parse_all("pub fn f(items) { let [..all] = items; all }").unwrap();
1202 }
1203
1204 #[test]
1205 fn take_error_carries_precise_pos() {
1206 use crate::SpannedParseError;
1209 let mut p = Parser::new(b"ab".to_vec());
1210 let pos_before = p.current_pos();
1211 let err = p.take(b'c').unwrap_err();
1212 let spanned = err.downcast_ref::<SpannedParseError>().expect("take should wrap in SpannedParseError");
1213 assert_eq!(spanned.pos, pos_before);
1215 }
1216
1217 #[test]
1218 fn parses_scientific_float_suffixes() {
1219 let mut parser = Parser::new(b"1.7976931348623157e308f64".to_vec());
1220 assert_eq!(parser.number().unwrap(), Dynamic::F64(1.7976931348623157e308));
1221
1222 let mut parser = Parser::new(b"1e-3f32".to_vec());
1223 assert_eq!(parser.number().unwrap(), Dynamic::F32(1e-3f32));
1224 }
1225
1226 #[test]
1227 fn parses_immediate_closure_call() {
1228 let mut parser = Parser::new(b"|| { 1i32 }()".to_vec());
1229 let expr = parser.get_expr().unwrap();
1230 let ExprKind::Call { obj, params } = expr.kind else {
1231 panic!("expected closure call, got {expr:?}");
1232 };
1233 assert!(params.is_empty());
1234 let ExprKind::Closure { args, .. } = obj.kind else {
1235 panic!("expected closure callee, got {obj:?}");
1236 };
1237 assert!(args.is_empty());
1238 }
1239
1240 #[test]
1241 fn parses_empty_tuple_expression() {
1242 let mut parser = Parser::new(b"()".to_vec());
1243 let expr = parser.get_expr().unwrap();
1244 let ExprKind::Tuple(items) = expr.kind else {
1245 panic!("expected empty tuple, got {expr:?}");
1246 };
1247 assert!(items.is_empty());
1248 }
1249
1250 #[test]
1251 fn parses_explicit_generic_function_call() {
1252 let mut parser = Parser::new(b"value::<4>()".to_vec());
1253 let expr = parser.get_expr().unwrap();
1254 let ExprKind::Call { obj, params } = expr.kind else {
1255 panic!("expected function call, got {expr:?}");
1256 };
1257 assert!(params.is_empty());
1258 let ExprKind::Generic { obj, params } = obj.kind else {
1259 panic!("expected generic callee, got {obj:?}");
1260 };
1261 assert!(matches!(obj.kind, ExprKind::Ident(name) if name.as_str() == "value"));
1262 assert!(matches!(params.as_slice(), [Type::ConstInt(4)]));
1263 }
1264
1265 #[test]
1266 fn parses_bigfloat_cmp_context_segment() {
1267 let code = r#"
1268 struct BigFloat<N> { data: [u32; N], exp: i32, sign: bool }
1269
1270 impl BigFloat<N> {
1271 fn abs_cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1272 let self_high = self.exp + ((N - 1) as i32);
1273 let rhs_high = rhs.exp + ((N - 1) as i32);
1274 let high = if self_high >= rhs_high { self_high } else { rhs_high };
1275 let low = if self.exp <= rhs.exp { self.exp } else { rhs.exp };
1276 let result = 0i32;
1277 let power = high;
1278
1279 while power >= low && result == 0i32 {
1280 let a_idx = power - self.exp;
1281 let b_idx = power - rhs.exp;
1282 let a_limb = 0u32;
1283 let b_limb = 0u32;
1284
1285 if a_idx >= 0i32 && a_idx < (N as i32) {
1286 a_limb = self.data[a_idx as u32];
1287 }
1288 if b_idx >= 0i32 && b_idx < (N as i32) {
1289 b_limb = rhs.data[b_idx as u32];
1290 }
1291
1292 if a_limb > b_limb {
1293 result = 1i32;
1294 } else if a_limb < b_limb {
1295 result = -1i32;
1296 }
1297
1298 power -= 1i32;
1299 }
1300
1301 result
1302 }
1303
1304 pub fn cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1305 if self.is_zero() && rhs.is_zero() {
1306 0i32
1307 } else if self.sign != rhs.sign {
1308 if self.sign { -1i32 } else { 1i32 }
1309 } else {
1310 let cmp = self.abs_cmp(rhs);
1311 if self.sign { -cmp } else { cmp }
1312 }
1313 }
1314 }
1315 "#;
1316 parse_all(code).unwrap();
1317 }
1318
1319 #[test]
1320 fn parses_bigfloat_file() {
1321 let code = include_str!("../../zusts/bigfloat.zs");
1322 parse_all(code).unwrap();
1323 }
1324}