Skip to main content

parser/
lib.rs

1use std::{collections::BTreeSet, fmt::Debug};
2
3use anyhow::{Result, anyhow};
4use dynamic::{ConstIntOp, Dynamic, Type};
5use smol_str::SmolStr;
6
7mod expr;
8pub use expr::{BinaryOp, Expr, ExprKind, UnaryOp};
9
10mod pattern;
11pub use pattern::{Pattern, PatternKind};
12
13mod stmt;
14pub use stmt::{Stmt, StmtKind};
15
16#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
17pub struct Span {
18    pub start: usize,
19    pub end: usize,
20}
21
22impl Span {
23    pub const fn new(start: usize, end: usize) -> Self {
24        Self { start, end }
25    }
26
27    pub const fn empty(pos: usize) -> Self {
28        Self { start: pos, end: pos }
29    }
30
31    pub fn merge(self, other: Self) -> Self {
32        Self { start: self.start.min(other.start), end: self.end.max(other.end) }
33    }
34}
35
36#[derive(Debug)]
37pub struct Parser {
38    pos: usize,   //当前解析的位置
39    buf: Vec<u8>, //待解析的字符串
40    spans: Vec<usize>,
41    decl_scopes: Vec<BTreeSet<SmolStr>>,
42    impl_depth: usize,
43    depth: usize, //当前表达式/语句递归深度,防止恶意深嵌套输入打爆调用栈
44    fatal: bool,  //递归过深等不可恢复错误;置位后所有解析入口立即失败,避免回溯重试导致死循环
45}
46
47/// 解析递归深度上限。超过即返回 [`ParserErr::TooDeep`],把"栈溢出崩溃"降级为
48/// 普通解析错误。
49///
50/// 单层 `expr_with_min_weight` 帧约 7KB,worker 线程默认栈仅 2MB,因此上限取
51/// 128(与 rustc 默认 `recursion_limit` 一致):128×7KB≈0.9MB,在最小栈上仍有
52/// 余量,而正常代码极少超过几十层嵌套。
53pub const MAX_PARSE_DEPTH: usize = 128;
54
55const NOT_IDENT: &[u8] = &[b' ', b'\t', b'\n', b'\r', b'/', b'*', b'+', b'-', b'=', b'(', b')', b'{', b'}', b'[', b']', b';', b':', b',', b'.', b'<', b'>', b'!', b'#', b'$', b'%', b'^', b'&', b'|', b'\\', b'"', b'\''];
56const WHITE_SPACE: &[u8] = &[b' ', b'\t', b'\n', b'\r'];
57const TYPES: &[(&str, Type)] = &[
58    ("bool", Type::Bool),
59    ("string", Type::Str),
60    ("i8", Type::I8),
61    ("i16", Type::I16),
62    ("i32", Type::I32),
63    ("i64", Type::I64),
64    ("u8", Type::U8),
65    ("u16", Type::U16),
66    ("u32", Type::U32),
67    ("u64", Type::U64),
68    ("f16", Type::F16),
69    ("f32", Type::F32),
70    ("f64", Type::F64),
71];
72const KEYWORDS: &[&str] = &["true", "false", "null", "let", "if", "else", "for", "in", "while", "pub", "fn", "struct", "impl", "const", "static", "continue", "return", "break"];
73
74#[macro_export]
75macro_rules! parse_list {
76    ($self: ident, $start: expr, $end: expr, $sep: expr, $item_expr: expr) => {{
77        let mut items = $start;
78        loop {
79            $self.whitespace()?;
80            if $self.get()? == $end {
81                $self.pos += 1;
82                break;
83            }
84            let item = $item_expr;
85            items.push(item);
86            $self.whitespace()?;
87            if $self.get()? == $sep {
88                $self.pos += 1;
89            }
90        }
91        items
92    }};
93}
94
95#[macro_export]
96macro_rules! try_parse {
97    ($self: ident, $method: expr) => {{
98        let save_pos = $self.pos; //保存当前 pos
99        let save_decl_scopes = $self.decl_scopes.clone();
100        let save_impl_depth = $self.impl_depth;
101        match $method {
102            Ok(expr) => Ok(expr),
103            // fatal(如递归过深)不可恢复:不回退 pos,直接上抛,避免外层换产生式重试导致死循环
104            Err(e) if $self.fatal => Err(e),
105            Err(e) => {
106                $self.pos = save_pos;
107                $self.decl_scopes = save_decl_scopes;
108                $self.impl_depth = save_impl_depth;
109                Err(e)
110            }
111        }
112    }};
113}
114
115#[derive(Debug, thiserror::Error)]
116pub enum ParserErr {
117    #[error("期望字符 {0} 实际字符 {1}")]
118    ExpectChar(char, char),
119    #[error("未发现期望字符")]
120    NoCharCollect,
121    #[error("期望字符串 {0}")]
122    ExpectedString(SmolStr),
123    #[error("输入结束")]
124    EndofInput,
125    #[error("未关闭的注释")]
126    UncloseComment,
127    #[error("非法的原始字符串")]
128    IllegalRawString,
129    #[error("未关闭字符串")]
130    UnclosedString,
131    #[error("非字符串")]
132    NotString,
133    #[error("非数字")]
134    NotNumber,
135    #[error("符号 {0} 已经声明")]
136    DuplicateSymbol(SmolStr),
137    #[error("表达式嵌套过深")]
138    TooDeep,
139}
140
141impl Parser {
142    pub fn new(buf: Vec<u8>) -> Self {
143        Self { pos: 0, buf, spans: Vec::new(), decl_scopes: vec![BTreeSet::new()], impl_depth: 0, depth: 0, fatal: false }
144    }
145
146    /// 进入一层递归:自增深度并校验上限。配合 [`Parser::exit_depth`] 使用。
147    ///
148    /// 超限时置 [`Parser::fatal`]:这是不可恢复错误。否则 `try_parse!` 的回溯会
149    /// 把 [`ParserErr::TooDeep`] 当成"换个产生式再试",pos 回退后外层循环原地重试,
150    /// 形成死循环。置位后 [`Parser::check_fatal`] 让每个解析入口立即失败,错误一路
151    /// 通过 `?` 上抛终止解析。
152    fn enter_depth(&mut self) -> Result<()> {
153        self.depth += 1;
154        if self.depth > MAX_PARSE_DEPTH {
155            self.depth -= 1;
156            self.fatal = true;
157            return Err(ParserErr::TooDeep.into());
158        }
159        Ok(())
160    }
161
162    fn exit_depth(&mut self) {
163        self.depth = self.depth.saturating_sub(1);
164    }
165
166    /// 解析入口的快速失败检查:一旦进入 fatal 状态,立即返回错误,阻止任何回溯重试。
167    fn check_fatal(&self) -> Result<()> {
168        if self.fatal { Err(ParserErr::TooDeep.into()) } else { Ok(()) }
169    }
170
171    fn push_decl_scope(&mut self) {
172        self.decl_scopes.push(BTreeSet::new());
173    }
174
175    fn pop_decl_scope(&mut self) {
176        if self.decl_scopes.len() > 1 {
177            self.decl_scopes.pop();
178        }
179    }
180
181    fn declare_symbol(&mut self, name: &SmolStr) -> Result<()> {
182        if name.is_empty() {
183            return Ok(());
184        }
185        if self.decl_scopes.iter().rev().any(|scope| scope.contains(name)) {
186            return Err(ParserErr::DuplicateSymbol(name.clone()).into());
187        }
188        self.decl_scopes.last_mut().expect("parser always has a declaration scope").insert(name.clone());
189        Ok(())
190    }
191
192    fn declare_symbol_in_current_scope(&mut self, name: &SmolStr) -> Result<()> {
193        if name.is_empty() {
194            return Ok(());
195        }
196        let scope = self.decl_scopes.last_mut().expect("parser always has a declaration scope");
197        if scope.contains(name) {
198            return Err(ParserErr::DuplicateSymbol(name.clone()).into());
199        }
200        scope.insert(name.clone());
201        Ok(())
202    }
203
204    fn declare_function_name(&mut self, name: &SmolStr) -> Result<()> {
205        if self.impl_depth > 0 { self.declare_symbol_in_current_scope(name) } else { self.declare_symbol(name) }
206    }
207
208    fn declare_args(&mut self, args: &[(SmolStr, Type)]) -> Result<()> {
209        for (name, _) in args {
210            self.declare_symbol(name)?;
211        }
212        Ok(())
213    }
214
215    fn declare_pattern_symbols(&mut self, pat: &Pattern) -> Result<()> {
216        match &pat.kind {
217            PatternKind::Ident { name, .. } => self.declare_symbol_in_current_scope(name),
218            PatternKind::Tuple(items) => {
219                for item in items {
220                    self.declare_pattern_symbols(item)?;
221                }
222                Ok(())
223            }
224            PatternKind::List { elems, .. } => {
225                for item in elems {
226                    self.declare_pattern_symbols(item)?;
227                }
228                Ok(())
229            }
230            PatternKind::Wildcard | PatternKind::Var { .. } | PatternKind::Literal(_) | PatternKind::Member(_, _) | PatternKind::Idx(_, _) => Ok(()),
231        }
232    }
233
234    fn function_body(&mut self, args: &[(SmolStr, Type)]) -> Result<Stmt> {
235        self.push_decl_scope();
236        let result = (|| {
237            self.declare_args(args)?;
238            self.block()
239        })();
240        self.pop_decl_scope();
241        result
242    }
243
244    fn impl_body(&mut self) -> Result<Stmt> {
245        self.push_decl_scope();
246        self.impl_depth += 1;
247        let result = self.block();
248        self.impl_depth -= 1;
249        self.pop_decl_scope();
250        result
251    }
252
253    pub fn is_eof(&self) -> bool {
254        self.pos >= self.buf.len()
255    }
256
257    pub fn get(&self) -> Result<u8> {
258        //查看当前字符
259        self.buf.get(self.pos).cloned().ok_or(ParserErr::EndofInput.into())
260    }
261
262    pub fn take(&mut self, ch: u8) -> Result<()> {
263        //如果当前字符为 ch 消费该字符 返回 Ok(())
264        if self.buf.get(self.pos).map(|b| *b == ch).unwrap_or(false) {
265            self.pos += 1;
266            Ok(())
267        } else {
268            Err(ParserErr::ExpectChar(ch as char, self.buf.get(self.pos as usize).cloned().unwrap_or(0) as char).into())
269        }
270    }
271
272    pub fn until(&mut self, ch: u8) -> Result<()> {
273        //消费直到指定字符 ch 忽略空白和注释
274        self.whitespace()?;
275        self.take(ch)
276    }
277
278    pub fn ahead(&self) -> Result<u8> {
279        //朝前看
280        self.buf.get(self.pos + 1).cloned().ok_or(ParserErr::EndofInput.into())
281    }
282
283    pub fn get_str(&self, start: usize, stop: usize) -> SmolStr {
284        SmolStr::from(String::from_utf8_lossy(&self.buf[start..stop]))
285    }
286
287    pub fn error_stmt(&self) -> SmolStr {
288        SmolStr::from(String::from_utf8_lossy(&self.buf[self.spans.last().cloned().unwrap_or(0)..self.pos]))
289    }
290
291    pub fn current_pos(&self) -> usize {
292        self.pos
293    }
294
295    pub fn span_from(&self, start: usize) -> Span {
296        Span::new(start, self.pos)
297    }
298
299    pub fn collect<F: Fn(u8) -> bool>(&mut self, f: F) -> Result<(usize, usize)> {
300        let start = self.pos;
301        while self.pos < self.buf.len() && f(self.buf[self.pos]) {
302            self.pos += 1;
303        }
304        if self.pos > start { Ok((start, self.pos)) } else { Err(ParserErr::NoCharCollect.into()) }
305    }
306
307    pub fn just(&mut self, pattern: &str) -> Result<()> {
308        if self.buf.len() - self.pos >= pattern.len() && self.buf[self.pos..self.pos + pattern.len()].eq(pattern.as_bytes()) {
309            self.pos += pattern.len();
310            Ok(())
311        } else {
312            Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into())
313        }
314    }
315
316    pub fn keyword(&mut self, pattern: &str) -> Result<()> {
317        self.just(pattern)?;
318        if self.pos < self.buf.len() && !NOT_IDENT.contains(&self.buf[self.pos]) {
319            self.pos -= pattern.len();
320            return Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into());
321        }
322        Ok(())
323    }
324
325    pub fn get_type(&mut self) -> Result<Type> {
326        self.whitespace()?;
327        if self.get()? == b'[' {
328            self.pos += 1;
329            let ty = self.get_type()?;
330            self.until(b';')?;
331            self.whitespace()?;
332            let len = self.get_type_param()?;
333            self.until(b']')?;
334            if let Type::ConstInt(number) = len {
335                let number = u32::try_from(number).map_err(|_| anyhow!("数组长度超出 u32 范围"))?;
336                Ok(Type::Array(std::rc::Rc::new(ty), number))
337            } else {
338                Ok(Type::ArrayParam(std::rc::Rc::new(ty), std::rc::Rc::new(len)))
339            }
340        } else {
341            for ty in TYPES {
342                if self.just(ty.0).is_ok() {
343                    return Ok(ty.1.clone());
344                }
345            }
346            let name = self.ident()?;
347            if self.take(b'<').is_ok() {
348                let params = crate::parse_list!(self, Vec::new(), b'>', b',', self.get_type_param()?);
349                Ok(Type::Ident { name, params })
350            } else {
351                Ok(Type::Ident { name, params: Vec::new() })
352            }
353        }
354    }
355
356    pub fn get_type_param(&mut self) -> Result<Type> {
357        self.const_type_param_add()
358    }
359
360    fn const_type_param_add(&mut self) -> Result<Type> {
361        let mut left = self.const_type_param_mul()?;
362        loop {
363            self.whitespace()?;
364            let op = if self.take(b'+').is_ok() {
365                Some(ConstIntOp::Add)
366            } else if self.take(b'-').is_ok() {
367                Some(ConstIntOp::Sub)
368            } else {
369                None
370            };
371            let Some(op) = op else { break };
372            let right = self.const_type_param_mul()?;
373            left = Self::fold_const_type_binary(op, left, right)?;
374        }
375        Ok(left)
376    }
377
378    fn const_type_param_mul(&mut self) -> Result<Type> {
379        let mut left = self.const_type_param_primary()?;
380        loop {
381            self.whitespace()?;
382            let op = if self.take(b'*').is_ok() {
383                Some(ConstIntOp::Mul)
384            } else if self.take(b'/').is_ok() {
385                Some(ConstIntOp::Div)
386            } else if self.take(b'%').is_ok() {
387                Some(ConstIntOp::Mod)
388            } else {
389                None
390            };
391            let Some(op) = op else { break };
392            let right = self.const_type_param_primary()?;
393            left = Self::fold_const_type_binary(op, left, right)?;
394        }
395        Ok(left)
396    }
397
398    fn const_type_param_primary(&mut self) -> Result<Type> {
399        self.whitespace()?;
400        if self.take(b'(').is_ok() {
401            let ty = self.get_type_param()?;
402            self.until(b')')?;
403            return Ok(ty);
404        }
405        if self.get()?.is_ascii_digit() {
406            let value = self.number()?;
407            if let Some(value) = value.as_uint() {
408                let value = i64::try_from(value).map_err(|_| anyhow!("模板数字参数超出 i64 范围"))?;
409                Ok(Type::ConstInt(value))
410            } else if let Some(value) = value.as_int() {
411                Ok(Type::ConstInt(value))
412            } else {
413                Err(anyhow!("模板数字参数必须是整数"))
414            }
415        } else {
416            self.get_type()
417        }
418    }
419
420    fn fold_const_type_binary(op: ConstIntOp, left: Type, right: Type) -> Result<Type> {
421        if let (Type::ConstInt(left), Type::ConstInt(right)) = (&left, &right) {
422            let value = match op {
423                ConstIntOp::Add => left + right,
424                ConstIntOp::Sub => left - right,
425                ConstIntOp::Mul => left * right,
426                ConstIntOp::Div => {
427                    if *right == 0 {
428                        return Err(anyhow!("模板整数除以 0"));
429                    }
430                    left / right
431                }
432                ConstIntOp::Mod => {
433                    if *right == 0 {
434                        return Err(anyhow!("模板整数取模 0"));
435                    }
436                    left % right
437                }
438            };
439            Ok(Type::ConstInt(value))
440        } else {
441            Ok(Type::ConstBinary { op, left: std::rc::Rc::new(left), right: std::rc::Rc::new(right) })
442        }
443    }
444
445    pub fn comment(&mut self) -> Result<()> {
446        if self.get()? == b'/' && self.ahead()? == b'/' {
447            self.pos += 2;
448            while self.pos < self.buf.len() && self.buf[self.pos] != b'\n' {
449                self.pos += 1;
450            }
451            Ok(())
452        } else if self.get()? == b'/' && self.ahead()? == b'*' {
453            self.pos += 2;
454            while self.pos + 1 < self.buf.len() {
455                if self.buf[self.pos] == b'*' && self.buf[self.pos + 1] == b'/' {
456                    self.pos += 2;
457                    return Ok(());
458                }
459                self.pos += 1;
460            }
461            Err(ParserErr::UncloseComment.into())
462        } else {
463            Ok(())
464        }
465    }
466
467    pub fn whitespace(&mut self) -> Result<()> {
468        while self.pos < self.buf.len() {
469            self.comment()?;
470            if self.pos >= self.buf.len() || !WHITE_SPACE.contains(&self.buf[self.pos]) {
471                break;
472            }
473            self.pos += 1;
474        }
475        Ok(())
476    }
477
478    pub fn ident(&mut self) -> Result<SmolStr> {
479        let (start, mut stop) = self.collect(|ch| !NOT_IDENT.contains(&ch))?;
480        loop {
481            let save_pos = self.pos;
482            if self.just("::").is_err() {
483                break;
484            }
485            match self.collect(|ch| !NOT_IDENT.contains(&ch)) {
486                Ok((_, next_stop)) => {
487                    stop = next_stop;
488                }
489                Err(_) => {
490                    self.pos = save_pos;
491                    break;
492                }
493            }
494        }
495        if KEYWORDS.iter().position(|k| k.as_bytes() == &self.buf[start..stop]).is_some() {
496            return Err(anyhow!("发现关键字{}", String::from_utf8_lossy(&self.buf[start..stop])));
497        }
498        Ok(self.get_str(start, stop))
499    }
500
501    pub fn string(&mut self) -> Result<SmolStr> {
502        if self.get()? != b'"' {
503            return Err(ParserErr::NotString.into());
504        }
505        self.pos += 1;
506        let mut text_buf = Vec::new();
507        while self.pos < self.buf.len() {
508            if self.buf[self.pos] == b'\\' {
509                //转义字符
510                self.pos += 1;
511                match self.buf[self.pos] {
512                    b'n' => { text_buf.push(b'\n'); self.pos += 1; }
513                    b'r' => { text_buf.push(b'\r'); self.pos += 1; }
514                    b't' => { text_buf.push(b'\t'); self.pos += 1; }
515                    ch @ (b'\\' | b'"') => {
516                        text_buf.push(ch);
517                        self.pos += 1;
518                    }
519                    b'u' => {
520                        self.pos += 1;
521                        let unicode = if self.take(b'{').is_ok() {
522                            let code = self.hex()?;
523                            self.pos += 1;
524                            code
525                        } else {
526                            self.hex()?
527                        };
528                        let ch = char::from_u32(unicode as u32).ok_or(anyhow!("非法 unicode {}", unicode))?;
529                        let mut utf8_buf = [0u8; 4];
530                        let s = ch.encode_utf8(&mut utf8_buf);
531                        text_buf.extend_from_slice(s.as_bytes());
532                    }
533                    b'x' => {
534                        self.pos += 1;
535                        if self.pos + 2 < self.buf.len() {
536                            let start = self.pos;
537                            self.pos += 2;
538                            let hex = &self.buf[start..self.pos];
539                            let code = u32::from_str_radix(String::from_utf8_lossy(hex).as_ref(), 16)?;
540                            text_buf.push(code as u8);
541                        }
542                    }
543                    other => {
544                        return Err(anyhow!("invalid escape character: {}", other as char));
545                    }
546                }
547            } else {
548                if self.buf[self.pos] == b'"' {
549                    self.pos += 1;
550                    return Ok(String::from_utf8(text_buf)?.into());
551                }
552                text_buf.push(self.buf[self.pos]);
553                self.pos += 1;
554            }
555        }
556        Err(ParserErr::UnclosedString.into())
557    }
558
559    pub fn text(&mut self) -> Result<SmolStr> {
560        if self.get()? == b'r' && [b'#', b'"'].contains(&self.ahead()?) {
561            self.pos += 1;
562            let mut end = String::from("\"");
563            while self.buf[self.pos] == b'#' {
564                end.push('#');
565                self.pos += 1;
566            }
567            if self.get()? != b'"' {
568                return Err(ParserErr::IllegalRawString.into());
569            }
570            self.pos += 1;
571            let start_pos = self.pos;
572            while self.pos < self.buf.len() {
573                if self.just(&end).is_ok() {
574                    break;
575                }
576                self.pos += 1;
577            }
578            Ok(self.get_str(start_pos, self.pos - end.len()))
579        } else {
580            self.string()
581        }
582    }
583
584    fn hex(&mut self) -> Result<i32> {
585        //注意 hex 会消耗当前字符 设置新的 self.pos
586        let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
587        Ok(i32::from_str_radix(&String::from_utf8_lossy(&self.buf[start..stop]), 16)?)
588    }
589
590    fn numeric_suffix(&mut self) -> Option<Type> {
591        let save = self.pos;
592        for (name, ty) in TYPES {
593            if !ty.is_native() || *ty == Type::F16 {
594                continue;
595            }
596            if self.buf.len() >= self.pos + name.len() && self.buf[self.pos..self.pos + name.len()].eq(name.as_bytes()) {
597                self.pos += name.len();
598                return Some(ty.clone());
599            }
600        }
601        self.pos = save;
602        None
603    }
604
605    fn int_literal(&mut self, digits: &str, radix: u32, suffix: Option<Type>) -> Result<Dynamic> {
606        // 默认整数类型为 I64:常见的较大十进制数(如 30 亿)不再静默回绕成负数。
607        let ty = suffix.unwrap_or(Type::I64);
608        // 负号由一元运算符单独解析,这里的字面量恒为非负,因此统一解析成 u128。
609        let magnitude = u128::from_str_radix(digits, radix).map_err(|_| anyhow!("整数字面量 {} 超出可表示范围", digits))?;
610        let (signed, bits) = match ty {
611            Type::I8 => (true, 8u32),
612            Type::I16 => (true, 16),
613            Type::I32 => (true, 32),
614            Type::I64 => (true, 64),
615            Type::U8 => (false, 8),
616            Type::U16 => (false, 16),
617            Type::U32 => (false, 32),
618            Type::U64 => (false, 64),
619            Type::F32 => return Ok(Dynamic::F32(magnitude as f32)),
620            Type::F64 => return Ok(Dynamic::F64(magnitude as f64)),
621            ty => return Err(anyhow!("{:?} 不能作为数字后缀", ty)),
622        };
623        let unsigned_max = (1u128 << bits) - 1;
624        // 十进制按数值语义判界(有符号允许到 |MIN|,即 2^(bits-1),以支持 -128i8、i64::MIN);
625        // 十六/八/二进制按位模式语义判界,允许写满整型位宽(如 0xFFFFFFFF 仍是合法的位掩码)。
626        let max_allowed = if radix == 10 {
627            if signed { unsigned_max / 2 + 1 } else { unsigned_max }
628        } else {
629            unsigned_max
630        };
631        if magnitude > max_allowed {
632            return Err(anyhow!("整数字面量 {} 超出 {:?} 的范围", digits, ty));
633        }
634        Ok(match ty {
635            Type::I8 => Dynamic::I8(magnitude as i8),
636            Type::I16 => Dynamic::I16(magnitude as i16),
637            Type::I32 => Dynamic::I32(magnitude as i32),
638            Type::I64 => Dynamic::I64(magnitude as i64),
639            Type::U8 => Dynamic::U8(magnitude as u8),
640            Type::U16 => Dynamic::U16(magnitude as u16),
641            Type::U32 => Dynamic::U32(magnitude as u32),
642            Type::U64 => Dynamic::U64(magnitude as u64),
643            _ => unreachable!(),
644        })
645    }
646
647    fn float_literal(&mut self, digits: &str, suffix: Option<Type>) -> Result<Dynamic> {
648        let value: f64 = digits.parse()?;
649        Ok(match suffix.unwrap_or(Type::F32) {
650            Type::I8 => Dynamic::I8(value as i8),
651            Type::I16 => Dynamic::I16(value as i16),
652            Type::I32 => Dynamic::I32(value as i32),
653            Type::I64 => Dynamic::I64(value as i64),
654            Type::U8 => Dynamic::U8(value as u8),
655            Type::U16 => Dynamic::U16(value as u16),
656            Type::U32 => Dynamic::U32(value as u32),
657            Type::U64 => Dynamic::U64(value as u64),
658            Type::F32 => Dynamic::F32(value as f32),
659            Type::F64 => Dynamic::F64(value),
660            ty => return Err(anyhow!("{:?} 不能作为浮点数字后缀", ty)),
661        })
662    }
663
664    pub fn number(&mut self) -> Result<Dynamic> {
665        if self.get()? == b'0' {
666            if [b'b', b'B'].contains(&self.ahead()?) {
667                self.pos += 2;
668                let (start, stop) = self.collect(|ch| ch == b'0' || ch == b'1')?;
669                let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
670                let suffix = self.numeric_suffix();
671                return self.int_literal(&s, 2, suffix);
672            } else if [b'o', b'O'].contains(&self.ahead()?) {
673                self.pos += 2;
674                let (start, stop) = self.collect(|ch| ch >= b'0' && ch <= b'7')?;
675                let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
676                let suffix = self.numeric_suffix();
677                return self.int_literal(&s, 8, suffix);
678            } else if [b'x', b'X'].contains(&self.ahead()?) {
679                self.pos += 2;
680                let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
681                let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
682                let suffix = self.numeric_suffix();
683                return self.int_literal(&s, 16, suffix);
684            }
685        }
686        let start = self.pos;
687        while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
688            self.pos += 1;
689        }
690        let mut is_float = false;
691        if self.pos < self.buf.len() && self.buf[self.pos] == b'.' && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
692            is_float = true;
693            self.pos += 1;
694            while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
695                self.pos += 1;
696            }
697        }
698        if self.pos < self.buf.len() && (self.buf[self.pos] == b'e' || self.buf[self.pos] == b'E') {
699            let mut exp_pos = self.pos + 1;
700            if exp_pos < self.buf.len() && (self.buf[exp_pos] == b'+' || self.buf[exp_pos] == b'-') {
701                exp_pos += 1;
702            }
703            if exp_pos < self.buf.len() && self.buf[exp_pos] <= b'9' && self.buf[exp_pos] >= b'0' {
704                is_float = true;
705                self.pos = exp_pos + 1;
706                while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
707                    self.pos += 1;
708                }
709            }
710        }
711        if self.pos > start {
712            let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
713            let suffix = self.numeric_suffix();
714            if is_float {
715                return self.float_literal(&text, suffix);
716            }
717            return self.int_literal(&text, 10, suffix);
718        }
719        Err(ParserErr::NotNumber.into())
720    }
721}
722
723#[cfg(test)]
724mod tests {
725    use super::*;
726
727    fn parse_all(code: &str) -> Result<Vec<Stmt>> {
728        let mut parser = Parser::new(code.as_bytes().to_vec());
729        let mut stmts = Vec::new();
730        loop {
731            match parser.stmt(false) {
732                Ok(stmt) => stmts.push(stmt),
733                Err(err) => {
734                    if parser.is_eof() {
735                        return Ok(stmts);
736                    }
737                    return Err(err);
738                }
739            }
740        }
741    }
742
743    // 调试构建里单帧约 16KB,病态深嵌套即便有深度守卫也会在守卫触发"之前"打爆
744    // 测试线程默认 2MB 栈;因此用大栈线程跑,验证守卫确实返回 TooDeep(而非崩溃)。
745    // 生产是 release 构建,单帧仅数 KB,128 层上限在 8MB 主栈上余量充足。
746    fn run_with_big_stack(f: impl FnOnce() + Send + 'static) {
747        std::thread::Builder::new().stack_size(64 * 1024 * 1024).spawn(f).unwrap().join().unwrap();
748    }
749
750    #[test]
751    fn deeply_nested_parens_error_instead_of_stack_overflow() {
752        run_with_big_stack(|| {
753            let depth = MAX_PARSE_DEPTH + 50;
754            let code = format!("{}1{}", "(".repeat(depth), ")".repeat(depth));
755            let mut parser = Parser::new(code.into_bytes());
756            let err = parser.get_expr().unwrap_err();
757            assert!(err.to_string().contains("嵌套过深"), "got: {err}");
758        });
759    }
760
761    #[test]
762    fn deeply_nested_blocks_error_instead_of_stack_overflow() {
763        run_with_big_stack(|| {
764            let depth = MAX_PARSE_DEPTH + 50;
765            let code = format!("fn f() {}{}{}", "{".repeat(depth), "1", "}".repeat(depth));
766            let err = parse_all(&code).unwrap_err();
767            assert!(err.to_string().contains("嵌套过深"), "got: {err}");
768        });
769    }
770
771    #[test]
772    fn normal_nesting_within_limit_parses() {
773        // 远低于上限的正常嵌套不受影响
774        let code = format!("{}1{}", "(".repeat(32), ")".repeat(32));
775        let mut parser = Parser::new(code.into_bytes());
776        parser.get_expr().unwrap();
777    }
778
779    fn parse_literal(code: &str) -> Result<Dynamic> {
780        let mut parser = Parser::new(code.as_bytes().to_vec());
781        match parser.get_expr()?.kind {
782            crate::ExprKind::Value(value) => Ok(value),
783            other => Err(anyhow!("不是字面量: {:?}", other)),
784        }
785    }
786
787    #[test]
788    fn unsuffixed_integer_defaults_to_i64() {
789        assert_eq!(parse_literal("5").unwrap(), Dynamic::I64(5));
790        // 30 亿:旧的 I32 默认会静默回绕成负数,I64 默认保留正确数值
791        assert_eq!(parse_literal("3000000000").unwrap(), Dynamic::I64(3000000000));
792    }
793
794    #[test]
795    fn out_of_range_integer_literals_error() {
796        // 超出 u64,连 i128 解析也容纳不下 → 报错而非回绕
797        assert!(parse_literal("99999999999999999999999999999999999999999").is_err());
798        // 窄后缀越界
799        assert!(parse_literal("255i8").unwrap_err().to_string().contains("超出"));
800        assert!(parse_literal("70000i16").unwrap_err().to_string().contains("超出"));
801        assert!(parse_literal("256u8").unwrap_err().to_string().contains("超出"));
802    }
803
804    #[test]
805    fn signed_min_magnitude_literals_allowed() {
806        // -128i8 由一元负号 + 字面量 128 组成,字面量 128 必须可被接受
807        assert_eq!(parse_literal("128i8").unwrap(), Dynamic::I8(-128));
808        assert_eq!(parse_literal("9223372036854775808").unwrap(), Dynamic::I64(i64::MIN));
809    }
810
811    #[test]
812    fn hex_literals_keep_bit_pattern() {
813        // 十六进制按位模式语义:0xFFFFFFFF 是合法掩码,默认 I64 容纳为正值
814        assert_eq!(parse_literal("0xFFFFFFFF").unwrap(), Dynamic::I64(0xFFFFFFFF));
815        // 写满目标位宽的掩码允许通过(0xFF -> i8 的 -1)
816        assert_eq!(parse_literal("0xFFi8").unwrap(), Dynamic::I8(-1));
817        assert_eq!(parse_literal("0xFFFFFFFFu32").unwrap(), Dynamic::U32(u32::MAX));
818    }
819
820    // 把表达式 AST 渲染成 S 表达式,用来锁定优先级/结合性(expr.rs 手写树旋转逻辑)。
821    fn shape(code: &str) -> String {
822        let mut parser = Parser::new(code.as_bytes().to_vec());
823        let expr = parser.get_expr().expect("parse");
824        fmt_shape(&expr)
825    }
826
827    fn binop_sym(op: &crate::BinaryOp) -> &'static str {
828        use crate::BinaryOp::*;
829        match op {
830            Add => "+", Sub => "-", Mul => "*", Div => "/", Mod => "%",
831            Shl => "<<", Shr => ">>", BitAnd => "&", BitOr => "|", BitXor => "^",
832            Assign => "=", AddAssign => "+=", Eq => "==", Ne => "!=", Lt => "<", Gt => ">",
833            Le => "<=", Ge => ">=", And => "&&", Or => "||", Idx => "idx",
834            other => {
835                let _ = other;
836                "?"
837            }
838        }
839    }
840
841    fn fmt_shape(expr: &crate::Expr) -> String {
842        use crate::ExprKind::*;
843        match &expr.kind {
844            Value(v) => format!("{:?}", v).replace("I64(", "").replace("I32(", "").trim_end_matches(')').to_string(),
845            Ident(name) => name.to_string(),
846            Unary { op, value } => {
847                let s = if matches!(op, crate::UnaryOp::Neg) { "-" } else { "!" };
848                format!("({} {})", s, fmt_shape(value))
849            }
850            Binary { left, op, right } => format!("({} {} {})", binop_sym(op), fmt_shape(left), fmt_shape(right)),
851            Range { start, stop, inclusive } => format!("({} {} {})", if *inclusive { "..=" } else { ".." }, fmt_shape(start), fmt_shape(stop)),
852            Typed { value, ty } => format!("(as {} {:?})", fmt_shape(value), ty),
853            other => format!("{:?}", other),
854        }
855    }
856
857    #[test]
858    fn precedence_and_associativity_golden() {
859        // 乘法高于加法
860        assert_eq!(shape("1 + 2 * 3"), "(+ 1 (* 2 3))");
861        assert_eq!(shape("1 * 2 + 3"), "(+ (* 1 2) 3)");
862        // 同级左结合
863        assert_eq!(shape("1 - 2 - 3"), "(- (- 1 2) 3)");
864        assert_eq!(shape("8 / 4 / 2"), "(/ (/ 8 4) 2)");
865        // 移位低于加法
866        assert_eq!(shape("2 + 3 << 4"), "(<< (+ 2 3) 4)");
867        // 位运算优先级:& 高于 ^ 高于 |
868        assert_eq!(shape("1 | 2 ^ 3 & 4"), "(| 1 (^ 2 (& 3 4)))");
869        // 比较低于算术
870        assert_eq!(shape("1 + 2 == 3"), "(== (+ 1 2) 3)");
871        // 逻辑:&& 高于 ||
872        assert_eq!(shape("a && b || c"), "(|| (&& a b) c)");
873        // 一元高于乘法
874        assert_eq!(shape("-a * b"), "(* (- a) b)");
875        assert_eq!(shape("!a == b"), "(== (! a) b)");
876    }
877
878    #[test]
879    fn assignment_range_and_as_precedence_golden() {
880        // 赋值最低优先级
881        assert_eq!(shape("a = b + c"), "(= a (+ b c))");
882        // 已知限制:链式赋值当前为左结合 (= (= a b) c),理想应为右结合。
883        // 由于外层 = 的左侧不是 lvalue,这会在编译期报错而非静默误算;
884        // 锁定现状以防回归,正确的右结合修复见后续独立任务。
885        assert_eq!(shape("a = b = c"), "(= (= a b) c)");
886        // 复合赋值
887        assert_eq!(shape("a += b * c"), "(+= a (* b c))");
888        // range 边界是完整算术表达式(已修复:上界按完整子表达式解析)
889        assert_eq!(shape("1 + 1 .. n * 2"), "(.. (+ 1 1) (* n 2))");
890        assert_eq!(shape("0 ..= n - 1"), "(..= 0 (- n 1))");
891        // 已知限制:as 当前绑定整个左侧表达式 (as (+ a b) T),Rust 语义应为 (+ a (as b T))。
892        // 现有代码依赖此松绑定,改动有破坏风险;锁定现状,正确优先级见后续独立任务。
893        assert_eq!(shape("a + b as i64"), "(as (+ a b) I64)");
894    }
895
896    // 轻量 fuzz:用确定性 PRNG 生成大量随机/半结构化输入喂给解析器,断言它永远
897    // 不 panic、不崩溃(返回 Ok 或 Err 都可),也不卡死(B2 的深度守卫保证有界)。
898    // 在大栈线程上跑,避免深嵌套合法解析在调试构建里耗尽测试线程的 2MB 栈。
899    #[test]
900    fn parser_never_panics_on_random_input() {
901        run_with_big_stack(|| {
902            const FRAGMENTS: &[&str] = &[
903                "fn", "let", "if", "else", "for", "in", "while", "return", "struct", "impl", "pub", "(", ")", "{", "}", "[", "]", "<", ">", "+", "-", "*", "/", "%", "=", "==", "&&", "||", "..", "..=", "as", "i32", "u64", "f64", ".", ",", ";", ":", "::", "x", "0", "1", "255i8", "0xFF", "\"s\"", "true", "null", "|a|", "->",
904            ];
905            // xorshift64* 确定性 PRNG
906            let mut state: u64 = 0x9E3779B97F4A7C15;
907            let mut next = || {
908                state ^= state >> 12;
909                state ^= state << 25;
910                state ^= state >> 27;
911                state = state.wrapping_mul(0x2545F4914F6CDD1D);
912                state
913            };
914
915            for _ in 0..4000 {
916                let mut code = String::new();
917                let tokens = (next() % 40) as usize;
918                for _ in 0..tokens {
919                    code.push_str(FRAGMENTS[(next() as usize) % FRAGMENTS.len()]);
920                    if next() % 2 == 0 {
921                        code.push(' ');
922                    }
923                }
924                // 解析全程不应 panic;parse_all 返回 Ok/Err 均可接受。
925                let result = std::panic::catch_unwind(|| {
926                    let mut parser = Parser::new(code.clone().into_bytes());
927                    let mut count = 0;
928                    loop {
929                        match parser.stmt(false) {
930                            Ok(_) => {
931                                count += 1;
932                                if parser.is_eof() || count > 1000 {
933                                    break;
934                                }
935                            }
936                            Err(_) => break,
937                        }
938                    }
939                });
940                assert!(result.is_ok(), "parser panicked on input: {:?}", code);
941            }
942        });
943    }
944
945    #[test]
946    fn allows_local_name_to_shadow_prior_function() {
947        parse_all(
948            r#"
949            fn chunk_id(x, y) {
950                x + y
951            }
952
953            fn open() {
954                let chunk_id = 1;
955                chunk_id
956            }
957            "#,
958        )
959        .unwrap();
960    }
961
962    #[test]
963    fn rejects_duplicate_function_args() {
964        let err = parse_all("fn open(value, value) { value }").unwrap_err();
965        assert!(err.to_string().contains("符号 value 已经声明"));
966    }
967
968    #[test]
969    fn rejects_duplicate_local_let_names() {
970        let err = parse_all(
971            r#"
972            fn open() {
973                let value = 1;
974                let value = 2;
975                value
976            }
977            "#,
978        )
979        .unwrap_err();
980        assert!(err.to_string().contains("符号 value 已经声明"));
981    }
982
983    #[test]
984    fn allows_same_method_name_in_different_impl_blocks() {
985        parse_all(
986            r#"
987            struct A {}
988            struct B {}
989
990            impl A {
991                fn zero() { 0 }
992            }
993
994            impl B {
995                fn zero() { 0 }
996            }
997            "#,
998        )
999        .unwrap();
1000    }
1001
1002    #[test]
1003    fn parses_scientific_float_suffixes() {
1004        let mut parser = Parser::new(b"1.7976931348623157e308f64".to_vec());
1005        assert_eq!(parser.number().unwrap(), Dynamic::F64(1.7976931348623157e308));
1006
1007        let mut parser = Parser::new(b"1e-3f32".to_vec());
1008        assert_eq!(parser.number().unwrap(), Dynamic::F32(1e-3f32));
1009    }
1010
1011    #[test]
1012    fn parses_immediate_closure_call() {
1013        let mut parser = Parser::new(b"|| { 1i32 }()".to_vec());
1014        let expr = parser.get_expr().unwrap();
1015        let ExprKind::Call { obj, params } = expr.kind else {
1016            panic!("expected closure call, got {expr:?}");
1017        };
1018        assert!(params.is_empty());
1019        let ExprKind::Closure { args, .. } = obj.kind else {
1020            panic!("expected closure callee, got {obj:?}");
1021        };
1022        assert!(args.is_empty());
1023    }
1024
1025    #[test]
1026    fn parses_empty_tuple_expression() {
1027        let mut parser = Parser::new(b"()".to_vec());
1028        let expr = parser.get_expr().unwrap();
1029        let ExprKind::Tuple(items) = expr.kind else {
1030            panic!("expected empty tuple, got {expr:?}");
1031        };
1032        assert!(items.is_empty());
1033    }
1034
1035    #[test]
1036    fn parses_explicit_generic_function_call() {
1037        let mut parser = Parser::new(b"value::<4>()".to_vec());
1038        let expr = parser.get_expr().unwrap();
1039        let ExprKind::Call { obj, params } = expr.kind else {
1040            panic!("expected function call, got {expr:?}");
1041        };
1042        assert!(params.is_empty());
1043        let ExprKind::Generic { obj, params } = obj.kind else {
1044            panic!("expected generic callee, got {obj:?}");
1045        };
1046        assert!(matches!(obj.kind, ExprKind::Ident(name) if name.as_str() == "value"));
1047        assert!(matches!(params.as_slice(), [Type::ConstInt(4)]));
1048    }
1049
1050    #[test]
1051    fn parses_bigfloat_cmp_context_segment() {
1052        let code = r#"
1053            struct BigFloat<N> { data: [u32; N], exp: i32, sign: bool }
1054
1055            impl BigFloat<N> {
1056                fn abs_cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1057                    let self_high = self.exp + ((N - 1) as i32);
1058                    let rhs_high = rhs.exp + ((N - 1) as i32);
1059                    let high = if self_high >= rhs_high { self_high } else { rhs_high };
1060                    let low = if self.exp <= rhs.exp { self.exp } else { rhs.exp };
1061                    let result = 0i32;
1062                    let power = high;
1063
1064                    while power >= low && result == 0i32 {
1065                        let a_idx = power - self.exp;
1066                        let b_idx = power - rhs.exp;
1067                        let a_limb = 0u32;
1068                        let b_limb = 0u32;
1069
1070                        if a_idx >= 0i32 && a_idx < (N as i32) {
1071                            a_limb = self.data[a_idx as u32];
1072                        }
1073                        if b_idx >= 0i32 && b_idx < (N as i32) {
1074                            b_limb = rhs.data[b_idx as u32];
1075                        }
1076
1077                        if a_limb > b_limb {
1078                            result = 1i32;
1079                        } else if a_limb < b_limb {
1080                            result = -1i32;
1081                        }
1082
1083                        power -= 1i32;
1084                    }
1085
1086                    result
1087                }
1088
1089                pub fn cmp(self: BigFloat<N>, rhs: BigFloat<N>) {
1090                    if self.is_zero() && rhs.is_zero() {
1091                        0i32
1092                    } else if self.sign != rhs.sign {
1093                        if self.sign { -1i32 } else { 1i32 }
1094                    } else {
1095                        let cmp = self.abs_cmp(rhs);
1096                        if self.sign { -cmp } else { cmp }
1097                    }
1098                }
1099            }
1100            "#;
1101        parse_all(code).unwrap();
1102    }
1103
1104    #[test]
1105    fn parses_bigfloat_file() {
1106        let code = include_str!("../../zusts/bigfloat.zs");
1107        parse_all(code).unwrap();
1108    }
1109}