1use std::fmt::Debug;
2
3use anyhow::{Result, anyhow};
4use dynamic::{ConstIntOp, Dynamic, Type};
5use smol_str::SmolStr;
6
7mod expr;
8pub use expr::{BinaryOp, Expr, ExprKind, UnaryOp};
9
10mod pattern;
11pub use pattern::{Pattern, PatternKind};
12
13mod stmt;
14pub use stmt::{Stmt, StmtKind};
15
16#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
17pub struct Span {
18 pub start: usize,
19 pub end: usize,
20}
21
22impl Span {
23 pub const fn new(start: usize, end: usize) -> Self {
24 Self { start, end }
25 }
26
27 pub const fn empty(pos: usize) -> Self {
28 Self { start: pos, end: pos }
29 }
30
31 pub fn merge(self, other: Self) -> Self {
32 Self { start: self.start.min(other.start), end: self.end.max(other.end) }
33 }
34}
35
36#[derive(Debug)]
37pub struct Parser {
38 pos: usize, buf: Vec<u8>, spans: Vec<usize>,
41}
42
43const NOT_IDENT: &[u8] = &[b' ', b'\t', b'\n', b'\r', b'/', b'*', b'+', b'-', b'=', b'(', b')', b'{', b'}', b'[', b']', b';', b':', b',', b'.', b'<', b'>', b'!', b'#', b'$', b'%', b'^', b'&', b'|', b'\\', b'"', b'\''];
44const WHITE_SPACE: &[u8] = &[b' ', b'\t', b'\n', b'\r'];
45const TYPES: &[(&str, Type)] = &[
46 ("bool", Type::Bool),
47 ("string", Type::Str),
48 ("i8", Type::I8),
49 ("i16", Type::I16),
50 ("i32", Type::I32),
51 ("i64", Type::I64),
52 ("u8", Type::U8),
53 ("u16", Type::U16),
54 ("u32", Type::U32),
55 ("u64", Type::U64),
56 ("f16", Type::F16),
57 ("f32", Type::F32),
58 ("f64", Type::F64),
59];
60const KEYWORDS: &[&str] = &["true", "false", "null", "let", "if", "else", "for", "in", "while", "pub", "fn", "struct", "impl", "const", "static", "continue", "return", "break"];
61
62#[macro_export]
63macro_rules! parse_list {
64 ($self: ident, $start: expr, $end: expr, $sep: expr, $item_expr: expr) => {{
65 let mut items = $start;
66 loop {
67 $self.whitespace()?;
68 if $self.get()? == $end {
69 $self.pos += 1;
70 break;
71 }
72 let item = $item_expr;
73 items.push(item);
74 $self.whitespace()?;
75 if $self.get()? == $sep {
76 $self.pos += 1;
77 }
78 }
79 items
80 }};
81}
82
83#[macro_export]
84macro_rules! try_parse {
85 ($self: ident, $method: expr) => {{
86 let save_pos = $self.pos; match $method {
88 Ok(expr) => Ok(expr),
89 Err(e) => {
90 $self.pos = save_pos;
91 Err(e)
92 }
93 }
94 }};
95}
96
97#[derive(Debug, thiserror::Error)]
98pub enum ParserErr {
99 #[error("期望字符 {0} 实际字符 {1}")]
100 ExpectChar(char, char),
101 #[error("未发现期望字符")]
102 NoCharCollect,
103 #[error("期望字符串 {0}")]
104 ExpectedString(SmolStr),
105 #[error("输入结束")]
106 EndofInput,
107 #[error("未关闭的注释")]
108 UncloseComment,
109 #[error("非法的原始字符串")]
110 IllegalRawString,
111 #[error("未关闭字符串")]
112 UnclosedString,
113 #[error("非字符串")]
114 NotString,
115 #[error("非数字")]
116 NotNumber,
117}
118
119impl Parser {
120 pub fn new(buf: Vec<u8>) -> Self {
121 Self { pos: 0, buf, spans: Vec::new() }
122 }
123
124 pub fn is_eof(&self) -> bool {
125 self.pos >= self.buf.len()
126 }
127
128 pub fn get(&self) -> Result<u8> {
129 self.buf.get(self.pos).cloned().ok_or(ParserErr::EndofInput.into())
131 }
132
133 pub fn take(&mut self, ch: u8) -> Result<()> {
134 if self.buf.get(self.pos).map(|b| *b == ch).unwrap_or(false) {
136 self.pos += 1;
137 Ok(())
138 } else {
139 Err(ParserErr::ExpectChar(ch as char, self.buf.get(self.pos as usize).cloned().unwrap_or(0) as char).into())
140 }
141 }
142
143 pub fn until(&mut self, ch: u8) -> Result<()> {
144 self.whitespace()?;
146 self.take(ch)
147 }
148
149 pub fn ahead(&self) -> Result<u8> {
150 self.buf.get(self.pos + 1).cloned().ok_or(ParserErr::EndofInput.into())
152 }
153
154 pub fn get_str(&self, start: usize, stop: usize) -> SmolStr {
155 SmolStr::from(String::from_utf8_lossy(&self.buf[start..stop]))
156 }
157
158 pub fn error_stmt(&self) -> SmolStr {
159 SmolStr::from(String::from_utf8_lossy(&self.buf[self.spans.last().cloned().unwrap_or(0)..self.pos]))
160 }
161
162 pub fn current_pos(&self) -> usize {
163 self.pos
164 }
165
166 pub fn span_from(&self, start: usize) -> Span {
167 Span::new(start, self.pos)
168 }
169
170 pub fn collect<F: Fn(u8) -> bool>(&mut self, f: F) -> Result<(usize, usize)> {
171 let start = self.pos;
172 while self.pos < self.buf.len() && f(self.buf[self.pos]) {
173 self.pos += 1;
174 }
175 if self.pos > start { Ok((start, self.pos)) } else { Err(ParserErr::NoCharCollect.into()) }
176 }
177
178 pub fn just(&mut self, pattern: &str) -> Result<()> {
179 if self.buf.len() - self.pos >= pattern.len() && self.buf[self.pos..self.pos + pattern.len()].eq(pattern.as_bytes()) {
180 self.pos += pattern.len();
181 Ok(())
182 } else {
183 Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into())
184 }
185 }
186
187 pub fn keyword(&mut self, pattern: &str) -> Result<()> {
188 self.just(pattern)?;
189 if self.pos < self.buf.len() && !NOT_IDENT.contains(&self.buf[self.pos]) {
190 self.pos -= pattern.len();
191 return Err(ParserErr::ExpectedString(SmolStr::new(pattern)).into());
192 }
193 Ok(())
194 }
195
196 pub fn get_type(&mut self) -> Result<Type> {
197 self.whitespace()?;
198 if self.get()? == b'[' {
199 self.pos += 1;
200 let ty = self.get_type()?;
201 self.until(b';')?;
202 self.whitespace()?;
203 let len = self.get_type_param()?;
204 self.until(b']')?;
205 if let Type::ConstInt(number) = len {
206 let number = u32::try_from(number).map_err(|_| anyhow!("数组长度超出 u32 范围"))?;
207 Ok(Type::Array(std::rc::Rc::new(ty), number))
208 } else {
209 Ok(Type::ArrayParam(std::rc::Rc::new(ty), std::rc::Rc::new(len)))
210 }
211 } else {
212 for ty in TYPES {
213 if self.just(ty.0).is_ok() {
214 return Ok(ty.1.clone());
215 }
216 }
217 let name = self.ident()?;
218 if self.take(b'<').is_ok() {
219 let params = crate::parse_list!(self, Vec::new(), b'>', b',', self.get_type_param()?);
220 Ok(Type::Ident { name, params })
221 } else {
222 Ok(Type::Ident { name, params: Vec::new() })
223 }
224 }
225 }
226
227 pub fn get_type_param(&mut self) -> Result<Type> {
228 self.const_type_param_add()
229 }
230
231 fn const_type_param_add(&mut self) -> Result<Type> {
232 let mut left = self.const_type_param_mul()?;
233 loop {
234 self.whitespace()?;
235 let op = if self.take(b'+').is_ok() {
236 Some(ConstIntOp::Add)
237 } else if self.take(b'-').is_ok() {
238 Some(ConstIntOp::Sub)
239 } else {
240 None
241 };
242 let Some(op) = op else { break };
243 let right = self.const_type_param_mul()?;
244 left = Self::fold_const_type_binary(op, left, right)?;
245 }
246 Ok(left)
247 }
248
249 fn const_type_param_mul(&mut self) -> Result<Type> {
250 let mut left = self.const_type_param_primary()?;
251 loop {
252 self.whitespace()?;
253 let op = if self.take(b'*').is_ok() {
254 Some(ConstIntOp::Mul)
255 } else if self.take(b'/').is_ok() {
256 Some(ConstIntOp::Div)
257 } else if self.take(b'%').is_ok() {
258 Some(ConstIntOp::Mod)
259 } else {
260 None
261 };
262 let Some(op) = op else { break };
263 let right = self.const_type_param_primary()?;
264 left = Self::fold_const_type_binary(op, left, right)?;
265 }
266 Ok(left)
267 }
268
269 fn const_type_param_primary(&mut self) -> Result<Type> {
270 self.whitespace()?;
271 if self.take(b'(').is_ok() {
272 let ty = self.get_type_param()?;
273 self.until(b')')?;
274 return Ok(ty);
275 }
276 if self.get()?.is_ascii_digit() {
277 let value = self.number()?;
278 if let Some(value) = value.as_uint() {
279 let value = i64::try_from(value).map_err(|_| anyhow!("模板数字参数超出 i64 范围"))?;
280 Ok(Type::ConstInt(value))
281 } else if let Some(value) = value.as_int() {
282 Ok(Type::ConstInt(value))
283 } else {
284 Err(anyhow!("模板数字参数必须是整数"))
285 }
286 } else {
287 self.get_type()
288 }
289 }
290
291 fn fold_const_type_binary(op: ConstIntOp, left: Type, right: Type) -> Result<Type> {
292 if let (Type::ConstInt(left), Type::ConstInt(right)) = (&left, &right) {
293 let value = match op {
294 ConstIntOp::Add => left + right,
295 ConstIntOp::Sub => left - right,
296 ConstIntOp::Mul => left * right,
297 ConstIntOp::Div => {
298 if *right == 0 {
299 return Err(anyhow!("模板整数除以 0"));
300 }
301 left / right
302 }
303 ConstIntOp::Mod => {
304 if *right == 0 {
305 return Err(anyhow!("模板整数取模 0"));
306 }
307 left % right
308 }
309 };
310 Ok(Type::ConstInt(value))
311 } else {
312 Ok(Type::ConstBinary { op, left: std::rc::Rc::new(left), right: std::rc::Rc::new(right) })
313 }
314 }
315
316 pub fn comment(&mut self) -> Result<()> {
317 if self.get()? == b'/' && self.ahead()? == b'/' {
318 self.pos += 2;
319 while self.pos < self.buf.len() && self.buf[self.pos] != b'\n' {
320 self.pos += 1;
321 }
322 Ok(())
323 } else if self.get()? == b'/' && self.ahead()? == b'*' {
324 self.pos += 2;
325 while self.pos + 1 < self.buf.len() {
326 if self.buf[self.pos] == b'*' && self.buf[self.pos + 1] == b'/' {
327 self.pos += 2;
328 return Ok(());
329 }
330 self.pos += 1;
331 }
332 Err(ParserErr::UncloseComment.into())
333 } else {
334 Ok(())
335 }
336 }
337
338 pub fn whitespace(&mut self) -> Result<()> {
339 while self.pos < self.buf.len() {
340 self.comment()?;
341 if self.pos >= self.buf.len() || !WHITE_SPACE.contains(&self.buf[self.pos]) {
342 break;
343 }
344 self.pos += 1;
345 }
346 Ok(())
347 }
348
349 pub fn ident(&mut self) -> Result<SmolStr> {
350 let (start, mut stop) = self.collect(|ch| !NOT_IDENT.contains(&ch))?;
351 loop {
352 let save_pos = self.pos;
353 if self.just("::").is_err() {
354 break;
355 }
356 match self.collect(|ch| !NOT_IDENT.contains(&ch)) {
357 Ok((_, next_stop)) => {
358 stop = next_stop;
359 }
360 Err(_) => {
361 self.pos = save_pos;
362 break;
363 }
364 }
365 }
366 if KEYWORDS.iter().position(|k| k.as_bytes() == &self.buf[start..stop]).is_some() {
367 return Err(anyhow!("发现关键字{}", String::from_utf8_lossy(&self.buf[start..stop])));
368 }
369 Ok(self.get_str(start, stop))
370 }
371
372 pub fn string(&mut self) -> Result<SmolStr> {
373 if self.buf[self.pos] == b'"' {
374 self.pos += 1;
375 let mut text_buf = Vec::new();
376 while self.pos < self.buf.len() {
377 if self.buf[self.pos] == b'\\' {
378 self.pos += 1;
380 match self.buf[self.pos] {
381 ch @ (b'n' | b'r' | b't' | b'\\' | b'"') => {
382 text_buf.push(ch);
383 self.pos += 1;
384 }
385 b'u' => {
386 self.pos += 1;
387 let unicode = if self.take(b'{').is_ok() {
388 let code = self.hex()?;
389 self.pos += 1;
390 code
391 } else {
392 self.hex()?
393 };
394 let ch = char::from_u32(unicode as u32).ok_or(anyhow!("非法 unicode {}", unicode))?;
395 let mut utf8_buf = [0u8; 4];
396 let s = ch.encode_utf8(&mut utf8_buf);
397 text_buf.extend_from_slice(s.as_bytes());
398 }
399 b'x' => {
400 self.pos += 1;
401 if self.pos + 2 < self.buf.len() {
402 let start = self.pos;
403 self.pos += 2;
404 let hex = &self.buf[start..self.pos];
405 let code = u32::from_str_radix(String::from_utf8_lossy(hex).as_ref(), 16)?;
406 text_buf.push(code as u8);
407 }
408 }
409 other => {
410 return Err(anyhow!("invalid escape character: {}", other as char));
411 }
412 }
413 } else {
414 if self.buf[self.pos] == b'"' {
415 self.pos += 1;
416 return Ok(String::from_utf8(text_buf)?.into());
417 }
418 text_buf.push(self.buf[self.pos]);
419 self.pos += 1;
420 }
421 }
422 Err(ParserErr::UnclosedString.into())
423 } else {
424 Err(ParserErr::NotString.into())
425 }
426 }
427
428 pub fn text(&mut self) -> Result<SmolStr> {
429 if self.get()? == b'r' && [b'#', b'"'].contains(&self.ahead()?) {
430 self.pos += 1;
431 let mut end = String::new();
432 while self.buf[self.pos] == b'#' {
433 end.push('#');
434 self.pos += 1;
435 }
436 if self.get()? != b'"' {
437 return Err(ParserErr::IllegalRawString.into());
438 }
439 self.pos += 1;
440 let start_pos = self.pos;
441 while self.pos < self.buf.len() {
442 if self.just(&end).is_ok() {
443 break;
444 }
445 self.pos += 1;
446 }
447 Ok(self.get_str(start_pos, self.pos - end.len()))
448 } else {
449 self.string()
450 }
451 }
452
453 fn hex(&mut self) -> Result<i32> {
454 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
456 Ok(i32::from_str_radix(&String::from_utf8_lossy(&self.buf[start..stop]), 16)?)
457 }
458
459 fn numeric_suffix(&mut self) -> Option<Type> {
460 let save = self.pos;
461 for (name, ty) in TYPES {
462 if !ty.is_native() || *ty == Type::F16 {
463 continue;
464 }
465 if self.buf.len() >= self.pos + name.len() && self.buf[self.pos..self.pos + name.len()].eq(name.as_bytes()) {
466 self.pos += name.len();
467 return Some(ty.clone());
468 }
469 }
470 self.pos = save;
471 None
472 }
473
474 fn int_literal(&mut self, digits: &str, radix: u32, suffix: Option<Type>) -> Result<Dynamic> {
475 Ok(match suffix.unwrap_or(Type::I32) {
476 Type::I8 => Dynamic::I8(i128::from_str_radix(digits, radix)? as i8),
477 Type::I16 => Dynamic::I16(i128::from_str_radix(digits, radix)? as i16),
478 Type::I32 => Dynamic::I32(i128::from_str_radix(digits, radix)? as i32),
479 Type::I64 => Dynamic::I64(i128::from_str_radix(digits, radix)? as i64),
480 Type::U8 => Dynamic::U8(u128::from_str_radix(digits, radix)? as u8),
481 Type::U16 => Dynamic::U16(u128::from_str_radix(digits, radix)? as u16),
482 Type::U32 => Dynamic::U32(u128::from_str_radix(digits, radix)? as u32),
483 Type::U64 => Dynamic::U64(u128::from_str_radix(digits, radix)? as u64),
484 Type::F32 => Dynamic::F32(u128::from_str_radix(digits, radix)? as f32),
485 Type::F64 => Dynamic::F64(u128::from_str_radix(digits, radix)? as f64),
486 ty => return Err(anyhow!("{:?} 不能作为数字后缀", ty)),
487 })
488 }
489
490 fn float_literal(&mut self, digits: &str, suffix: Option<Type>) -> Result<Dynamic> {
491 let value: f64 = digits.parse()?;
492 Ok(match suffix.unwrap_or(Type::F32) {
493 Type::I8 => Dynamic::I8(value as i8),
494 Type::I16 => Dynamic::I16(value as i16),
495 Type::I32 => Dynamic::I32(value as i32),
496 Type::I64 => Dynamic::I64(value as i64),
497 Type::U8 => Dynamic::U8(value as u8),
498 Type::U16 => Dynamic::U16(value as u16),
499 Type::U32 => Dynamic::U32(value as u32),
500 Type::U64 => Dynamic::U64(value as u64),
501 Type::F32 => Dynamic::F32(value as f32),
502 Type::F64 => Dynamic::F64(value),
503 ty => return Err(anyhow!("{:?} 不能作为浮点数字后缀", ty)),
504 })
505 }
506
507 pub fn number(&mut self) -> Result<Dynamic> {
508 if self.get()? == b'0' {
509 if [b'b', b'B'].contains(&self.ahead()?) {
510 self.pos += 2;
511 let (start, stop) = self.collect(|ch| ch == b'0' || ch == b'1')?;
512 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
513 let suffix = self.numeric_suffix();
514 return self.int_literal(&s, 2, suffix);
515 } else if [b'o', b'O'].contains(&self.ahead()?) {
516 self.pos += 2;
517 let (start, stop) = self.collect(|ch| ch >= b'0' && ch <= b'7')?;
518 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
519 let suffix = self.numeric_suffix();
520 return self.int_literal(&s, 8, suffix);
521 } else if [b'x', b'X'].contains(&self.ahead()?) {
522 self.pos += 2;
523 let (start, stop) = self.collect(|ch| (ch >= b'0' && ch <= b'9') || (ch >= b'a' && ch <= b'f') || (ch >= b'A' && ch <= b'F'))?;
524 let s = String::from_utf8_lossy(&self.buf[start..stop]).to_string();
525 let suffix = self.numeric_suffix();
526 return self.int_literal(&s, 16, suffix);
527 }
528 }
529 let start = self.pos;
530 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
531 self.pos += 1;
532 }
533 if self.pos < self.buf.len() && self.buf[self.pos] == b'.' && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
534 self.pos += 1;
535 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
536 self.pos += 1;
537 }
538 if self.pos < self.buf.len() && (self.buf[self.pos] == b'e' || self.buf[self.pos] == b'E') && self.ahead().map(|ch| ch <= b'9' && ch >= b'0').unwrap_or(false) {
539 while self.pos < self.buf.len() && self.buf[self.pos] <= b'9' && self.buf[self.pos] >= b'0' {
540 self.pos += 1;
541 }
542 }
543 if self.pos > start {
544 let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
545 let suffix = self.numeric_suffix();
546 return self.float_literal(&text, suffix);
547 }
548 } else {
549 if self.pos > start {
550 let text = String::from_utf8_lossy(&self.buf[start..self.pos]).to_string();
551 let suffix = self.numeric_suffix();
552 return self.int_literal(&text, 10, suffix);
553 }
554 }
555 Err(ParserErr::NotNumber.into())
556 }
557}