1use std::fmt::Debug;
2
3use anyhow::{Result, anyhow};
4use dynamic::{ConstIntOp, Dynamic, Type};
5use smol_str::SmolStr;
6
7mod expr;
8pub use expr::{BinaryOp, Expr, ExprKind, UnaryOp};
9
10mod pattern;
11pub use pattern::{Pattern, PatternKind};
12
13mod stmt;
14pub use stmt::{Stmt, StmtKind};
15
16#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
17pub struct Span {
18 pub start: usize,
19 pub end: usize,
20}
21
22impl Span {
23 pub const fn new(start: usize, end: usize) -> Self {
24 Self { start, end }
25 }
26
27 pub const fn empty(pos: usize) -> Self {
28 Self { start: pos, end: pos }
29 }
30
31 pub fn merge(self, other: Self) -> Self {
32 Self { start: self.start.min(other.start), end: self.end.max(other.end) }
33 }
34}
35
36#[derive(Debug)]
37pub struct Parser {
38 pos: usize, buf: Vec<u8>, spans: Vec<usize>,
41}
42
43const NOT_IDENT: &[u8] = &[b' ', b'\t', b'\n', b'\r', b'/', b'*', b'+', b'-', b'=', b'(', b')', b'{', b'}', b'[', b']', b';', b':', b',', b'.', b'<', b'>', b'!', b'#', b'$', b'%', b'^', b'&', b'|', b'\\', b'"', b'\''];
44const WHITE_SPACE: &[u8] = &[b' ', b'\t', b'\n', b'\r'];
45const TYPES: &[(&str, Type)] = &[
46 ("bool", Type::Bool),
47 ("string", Type::Str),
48 ("i8", Type::I8),
49 ("i16", Type::I16),
50 ("i32", Type::I32),
51 ("i64", Type::I64),
52 ("u8", Type::U8),
53 ("u16", Type::U16),
54 ("u32", Type::U32),
55 ("u64", Type::U64),
56 ("f16", Type::F16),
57 ("f32", Type::F32),
58 ("f64", Type::F64),
59];
60const KEYWORDS: &[&str] = &["true", "false", "null", "let", "if", "else", "for", "in", "while", "pub", "fn", "struct", "impl", "const", "static", "continue", "return", "break"];
61
62#[macro_export]
63macro_rules! parse_list {
64 ($self: ident, $start: expr, $end: expr, $sep: expr, $item_expr: expr) => {{
65 let mut items = $start;
66 loop {
67 $self.whitespace()?;
68 if $self.get()? == $end {
69 $self.pos += 1;
70 break;
71 }
72 let item = $item_expr;
73 items.push(item);
74 $self.whitespace()?;
75 if $self.get()? == $sep {
76 $self.pos += 1;
77 }
78 }
79 items
80 }};
81}
82
83#[macro_export]
84macro_rules! try_parse {
85 ($self: ident, $method: expr) => {{
86 let save_pos = $self.pos; match $method {
88 Ok(expr) => Ok(expr),
89 Err(e) => {
90 $self.pos = save_pos;
91 Err(e)
92 }
93 }
94 }};
95}
96
97#[derive(Debug, thiserror::Error)]
98pub enum ParserErr {
99 #[error("期望字符 {0} 实际字符 {1}")]
100 ExpectChar(char, char),
101 #[error("未发现期望字符")]
102 NoCharCollect,
103 #[error("期望字符串 {0}")]
104 ExpectedString(SmolStr),
105 #[error("输入结束")]
106 EndofInput,
107 #[error("未关闭的注释")]
108 UncloseComment,
109 #[error("非法的原始字符串")]
110 IllegalRawString,
111 #[error("未关闭字符串")]
112 UnclosedString,
113 #[error("非字符串")]
114 NotString,
115 #[error("非数字")]
116 NotNumber,
117}
118
119impl Parser {
120 pub fn new(buf: Vec<u8>) -> Self {
121 Self { pos: 0, buf, spans: Vec::new() }
122 }
123
124 pub fn is_eof(&self) -> bool {
125 self.pos >= self.buf.len()
126 }
127
128 pub fn get(&self) -> Result<u8> {
129 self.buf.get(self.pos).cloned().ok_or(ParserErr::EndofInput.into())
131 }
132
133 pub fn take(&mut self, ch: u8) -> Result<()> {
134 if self.buf.get(self.pos).map(|b| *b == ch).unwrap_or(false) {
136 self.pos += 1;
137 Ok(())
138 } else {
139 Err(ParserErr::ExpectChar(ch as char, self.buf.get(self.pos as usize).cloned().unwrap_or(0) as char).into())
140 }
141 }
142
143 pub fn until(&mut self, ch: u8) -> Result<()> {
144 self.whitespace()?;
146 self.take(ch)
147 }
148
149 pub fn ahead(&self) -> Result<u8> {
150 self.buf.get(self.pos + 1).cloned().ok_or(ParserErr::EndofInput.into())
152 }
153
154 pub fn get_str(&self, start: usize, stop: usize) -> SmolStr {
155 SmolStr::from(String::from_utf8_lossy(&self.buf[start..stop]))
156 }
157
158 pub fn error_stmt(&self) -> SmolStr {
159 SmolStr::from(String::from_utf8_lossy(&self.buf[self.spans.last().cloned().unwrap_or(0)..self.pos]))
160 }
161
162 pub fn current_pos(&self) -> usize {
163 self.pos
164 }
165
166 pub fn span_from(&self, start: usize) -> Span {
167 Span::new(start, self.pos)
168 }
169
170 pub fn collect<F: Fn(u8) -> bool>(&mut self, f: F) -> Result<(usize, usize)> {
171 let start = self.pos;
172 while self.pos < self.buf.len() && f(self.buf[self.pos]) {
173 self.pos += 1;
174 }
175 if self.pos > start { Ok((start, self.pos)) } else { Err(ParserErr::NoCharCollect.into()) }
176 }
177
178 pub fn just(&mut self, pattern: &str) -> Result<()> {
179 if self.buf.len() - self.pos >= pattern.len() && self.buf[self.pos..self.pos + pattern.len()].eq(pattern.as_bytes()) {
180 self.pos += pattern.len();
181 Ok(())
182 } else {
183 Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into())
184 }
185 }
186
187 pub fn keyword(&mut self, pattern: &str) -> Result<()> {
188 self.just(pattern)?;
189 if self.pos < self.buf.len() && !NOT_IDENT.contains(&self.buf[self.pos]) {
190 self.pos -= pattern.len();
191 return Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into());
192 }
193 Ok(())
194 }
195
196 pub fn get_type(&mut self) -> Result<Type> {
197 self.whitespace()?;
198 if self.get()? == b'[' {
199 self.pos += 1;
200 let ty = self.get_type()?;
201 self.until(b';')?;
202 self.whitespace()?;
203 let len = self.get_type_param()?;
204 self.until(b']')?;
205 if let Type::ConstInt(number) = len {
206 let number = u32::try_from(number).map_err(|_| anyhow!("数组长度超出 u32 范围"))?;
207 Ok(Type::Array(std::rc::Rc::new(ty), number))
208 } else {
209 Ok(Type::ArrayParam(std::rc::Rc::new(ty), std::rc::Rc::new(len)))
210 }
211 } else {
212 for ty in TYPES {
213 if self.just(ty.0).is_ok() {
214 return Ok(ty.1.clone());
215 }
216 }
217 let name = self.ident()?;
218 if self.take(b'<').is_ok() {
219 let params = crate::parse_list!(self, Vec::new(), b'>', b',', self.get_type_param()?);
220 Ok(Type::Ident { name, params })
221 } else {
222 Ok(Type::Ident { name, params: Vec::new() })
223 }
224 }
225 }
226
227 pub fn get_type_param(&mut self) -> Result<Type> {
228 self.const_type_param_add()
229 }
230
231 fn const_type_param_add(&mut self) -> Result<Type> {
232 let mut left = self.const_type_param_mul()?;
233 loop {
234 self.whitespace()?;
235 let op = if self.take(b'+').is_ok() {
236 Some(ConstIntOp::Add)
237 } else if self.take(b'-').is_ok() {
238 Some(ConstIntOp::Sub)
239 } else {
240 None
241 };
242 let Some(op) = op else { break };
243 let right = self.const_type_param_mul()?;
244 left = Self::fold_const_type_binary(op, left, right)?;
245 }
246 Ok(left)
247 }
248
249 fn const_type_param_mul(&mut self) -> Result<Type> {
250 let mut left = self.const_type_param_primary()?;
251 loop {
252 self.whitespace()?;
253 let op = if self.take(b'*').is_ok() {
254 Some(ConstIntOp::Mul)
255 } else if self.take(b'/').is_ok() {
256 Some(ConstIntOp::Div)
257 } else if self.take(b'%').is_ok() {
258 Some(ConstIntOp::Mod)
259 } else {
260 None
261 };
262 let Some(op) = op else { break };
263 let right = self.const_type_param_primary()?;
264 left = Self::fold_const_type_binary(op, left, right)?;
265 }
266 Ok(left)
267 }
268
269 fn const_type_param_primary(&mut self) -> Result<Type> {
270 self.whitespace()?;
271 if self.take(b'(').is_ok() {
272 let ty = self.get_type_param()?;
273 self.until(b')')?;
274 return Ok(ty);
275 }
276 if self.get()?.is_ascii_digit() {
277 let value = self.number()?;
278 if let Some(value) = value.as_uint() {
279 let value = i64::try_from(value).map_err(|_| anyhow!("模板数字参数超出 i64 范围"))?;
280 Ok(Type::ConstInt(value))
281 } else if let Some(value) = value.as_int() {
282 Ok(Type::ConstInt(value))
283 } else {
284 Err(anyhow!("模板数字参数必须是整数"))
285 }
286 } else {
287 self.get_type()
288 }
289 }
290
291 fn fold_const_type_binary(op: ConstIntOp, left: Type, right: Type) -> Result<Type> {
292 if let (Type::ConstInt(left), Type::ConstInt(right)) = (&left, &right) {
293 let value = match op {
294 ConstIntOp::Add => left + right,
295 ConstIntOp::Sub => left - right,
296 ConstIntOp::Mul => left * right,
297 ConstIntOp::Div => {
298 if *right == 0 {
299 return Err(anyhow!("模板整数除以 0"));
300 }
301 left / right
302 }
303 ConstIntOp::Mod => {
304 if *right == 0 {
305 return Err(anyhow!("模板整数取模 0"));
306 }
307 left % right
308 }
309 };
310 Ok(Type::ConstInt(value))
311 } else {
312 Ok(Type::ConstBinary { op, left: std::rc::Rc::new(left), right: std::rc::Rc::new(right) })
313 }
314 }
315
316 pub fn comment(&mut self) -> Result<()> {
317 if self.get()? == b'/' && self.ahead()? == b'/' {
318 self.pos += 2;
319 while self.pos < self.buf.len() && self.buf[self.pos] != b'\n' {
320 self.pos += 1;
321 }
322 Ok(())
323 } else if self.get()? == b'/' && self.ahead()? == b'*' {
324 self.pos += 2;
325 while self.pos + 1 < self.buf.len() {
326 if self.buf[self.pos] == b'*' && self.buf[self.pos + 1] == b'/' {
327 self.pos += 2;
328 return Ok(());
329 }
330 self.pos += 1;
331 }
332 Err(ParserErr::UncloseComment.into())
333 } else {
334 Ok(())
335 }
336 }
337
338 pub fn whitespace(&mut self) -> Result<()> {
339 while self.pos < self.buf.len() {
340 self.comment()?;
341 if self.pos >= self.buf.len() || !WHITE_SPACE.contains(&self.buf[self.pos]) {
342 break;
343 }
344 self.pos += 1;
345 }
346 Ok(())
347 }
348
349 pub fn ident(&mut self) -> Result<SmolStr> {
350 let (start, mut stop) = self.collect(|ch| !NOT_IDENT.contains(&ch))?;
351 while self.just("::").is_ok() {
352 (_, stop) = self.collect(|ch| !NOT_IDENT.contains(&ch))?;
353 }
354 if KEYWORDS.iter().position(|k| k.as_bytes() == &self.buf[start..stop]).is_some() {
355 return Err(anyhow!("发现关键字{}", String::from_utf8_lossy(&self.buf[start..stop])));
356 }
357 Ok(self.get_str(start, stop))
358 }
359
360 pub fn string(&mut self) -> Result<SmolStr> {
361 if self.buf[self.pos] == b'"' {
362 self.pos += 1;
363 let mut text_buf = Vec::new();
364 while self.pos < self.buf.len() {
365 if self.buf[self.pos] == b'\\' {
366 self.pos += 1;
368 match self.buf[self.pos] {
369 ch @ (b'n' | b'r' | b't' | b'\\' | b'"') => {
370 text_buf.push(ch);
371 self.pos += 1;
372 }
373 b'u' => {
374 self.pos += 1;
375 let unicode = if self.take(b'{').is_ok() {
376 let code = self.hex()?;
377 self.pos += 1;
378 code
379 } else {
380 self.hex()?
381 };
382 let ch = char::from_u32(unicode as u32).ok_or(anyhow!("非法 unicode {}", unicode))?;
383 let mut utf8_buf = [0u8; 4];
384 let s = ch.encode_utf8(&mut utf8_buf);
385 text_buf.extend_from_slice(s.as_bytes());
386 }
387 b'x' => {
388 self.pos += 1;
389 if self.pos + 2 < self.buf.len() {
390 let start = self.pos;
391 self.pos += 2;
392 let hex = &self.buf[start..self.pos];
393 let code = u32::from_str_radix(String::from_utf8_lossy(hex).as_ref(), 16)?;
394 text_buf.push(code as u8);
395 }
396 }
397 other => {
398 return Err(anyhow!("invalid escape character: {}", other as char));
399 }
400 }
401 } else {
402 if self.buf[self.pos] == b'"' {
403 self.pos += 1;
404 return Ok(String::from_utf8(text_buf)?.into());
405 }
406 text_buf.push(self.buf[self.pos]);
407 self.pos += 1;
408 }
409 }
410 Err(ParserErr::UnclosedString.into())
411 } else {
412 Err(ParserErr::NotString.into())
413 }
414 }
415
416 pub fn text(&mut self) -> Result<SmolStr> {
417 if self.get()? == b'r' && [b'#', b'"'].contains(&self.ahead()?) {
418 self.pos += 1;
419 let mut end = String::new();
420 while self.buf[self.pos] == b'#' {
421 end.push('#');
422 self.pos += 1;
423 }
424 if self.get()? != b'"' {
425 return Err(ParserErr::IllegalRawString.into());
426 }
427 self.pos += 1;
428 let start_pos = self.pos;
429 while self.pos < self.buf.len() {
430 if self.just(&end).is_ok() {
431 break;
432 }
433 self.pos += 1;
434 }
435 Ok(self.get_str(start_pos, self.pos - end.len()))
436 } else {
437 self.string()
438 }
439 }
440
441 fn hex(&mut self) -> Result<i32> {
442 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
444 Ok(i32::from_str_radix(&String::from_utf8_lossy(&self.buf[start..stop]), 16)?)
445 }
446
447 fn numeric_suffix(&mut self) -> Option<Type> {
448 let save = self.pos;
449 for (name, ty) in TYPES {
450 if !ty.is_native() || *ty == Type::F16 {
451 continue;
452 }
453 if self.buf.len() >= self.pos + name.len() && self.buf[self.pos..self.pos + name.len()].eq(name.as_bytes()) {
454 self.pos += name.len();
455 return Some(ty.clone());
456 }
457 }
458 self.pos = save;
459 None
460 }
461
462 fn int_literal(&mut self, digits: &str, radix: u32, suffix: Option<Type>) -> Result<Dynamic> {
463 Ok(match suffix.unwrap_or(Type::I32) {
464 Type::I8 => Dynamic::I8(i128::from_str_radix(digits, radix)? as i8),
465 Type::I16 => Dynamic::I16(i128::from_str_radix(digits, radix)? as i16),
466 Type::I32 => Dynamic::I32(i128::from_str_radix(digits, radix)? as i32),
467 Type::I64 => Dynamic::I64(i128::from_str_radix(digits, radix)? as i64),
468 Type::U8 => Dynamic::U8(u128::from_str_radix(digits, radix)? as u8),
469 Type::U16 => Dynamic::U16(u128::from_str_radix(digits, radix)? as u16),
470 Type::U32 => Dynamic::U32(u128::from_str_radix(digits, radix)? as u32),
471 Type::U64 => Dynamic::U64(u128::from_str_radix(digits, radix)? as u64),
472 Type::F32 => Dynamic::F32(u128::from_str_radix(digits, radix)? as f32),
473 Type::F64 => Dynamic::F64(u128::from_str_radix(digits, radix)? as f64),
474 ty => return Err(anyhow!("{:?} 不能作为数字后缀", ty)),
475 })
476 }
477
478 fn float_literal(&mut self, digits: &str, suffix: Option<Type>) -> Result<Dynamic> {
479 let value: f64 = digits.parse()?;
480 Ok(match suffix.unwrap_or(Type::F32) {
481 Type::I8 => Dynamic::I8(value as i8),
482 Type::I16 => Dynamic::I16(value as i16),
483 Type::I32 => Dynamic::I32(value as i32),
484 Type::I64 => Dynamic::I64(value as i64),
485 Type::U8 => Dynamic::U8(value as u8),
486 Type::U16 => Dynamic::U16(value as u16),
487 Type::U32 => Dynamic::U32(value as u32),
488 Type::U64 => Dynamic::U64(value as u64),
489 Type::F32 => Dynamic::F32(value as f32),
490 Type::F64 => Dynamic::F64(value),
491 ty => return Err(anyhow!("{:?} 不能作为浮点数字后缀", ty)),
492 })
493 }
494
495 pub fn number(&mut self) -> Result<Dynamic> {
496 if self.get()? == b'0' {
497 if [b'b', b'B'].contains(&self.ahead()?) {
498 self.pos += 2;
499 let (start, stop) = self.collect(|ch| ch == b'0' || ch == b'1')?;
500 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
501 let suffix = self.numeric_suffix();
502 return self.int_literal(&s, 2, suffix);
503 } else if [b'o', b'O'].contains(&self.ahead()?) {
504 self.pos += 2;
505 let (start, stop) = self.collect(|ch| ch >= b'0' && ch <= b'7')?;
506 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
507 let suffix = self.numeric_suffix();
508 return self.int_literal(&s, 8, suffix);
509 } else if [b'x', b'X'].contains(&self.ahead()?) {
510 self.pos += 2;
511 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
512 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
513 let suffix = self.numeric_suffix();
514 return self.int_literal(&s, 16, suffix);
515 }
516 }
517 let start = self.pos;
518 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
519 self.pos += 1;
520 }
521 if self.pos < self.buf.len() && self.buf[self.pos] == b'.' && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
522 self.pos += 1;
523 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
524 self.pos += 1;
525 }
526 if self.pos < self.buf.len() && (self.buf[self.pos] == b'e' || self.buf[self.pos] == b'E') && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
527 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
528 self.pos += 1;
529 }
530 }
531 if self.pos > start {
532 let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
533 let suffix = self.numeric_suffix();
534 return self.float_literal(&text, suffix);
535 }
536 } else {
537 if self.pos > start {
538 let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
539 let suffix = self.numeric_suffix();
540 return self.int_literal(&text, 10, suffix);
541 }
542 }
543 Err(ParserErr::NotNumber.into())
544 }
545}