1use std::collections::VecDeque;
5use std::fmt;
6use std::hash::{Hash, Hasher};
7
8use erg_common::error::Location;
9use erg_common::impl_displayable_deque_stream_for_wrapper;
10use erg_common::opcode311::BinOpCode;
11use erg_common::str::Str;
12use erg_common::traits::{DequeStream, Locational};
13#[cfg(not(feature = "pylib"))]
18use erg_proc_macros::pyclass;
19#[cfg(feature = "pylib")]
20use pyo3::prelude::*;
21
22#[pyclass]
24#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25#[repr(u8)]
26pub enum TokenKind {
27 Symbol,
29 NatLit,
31 IntLit,
33 BinLit,
35 OctLit,
37 HexLit,
39 RatioLit,
40 BoolLit,
41 StrLit,
42 StrInterpLeft,
44 StrInterpMid,
46 StrInterpRight,
48 NoneLit,
49 EllipsisLit,
51 InfLit,
52 DocComment,
53 PrePlus,
55 PreMinus,
57 PreBitNot,
59 Mutate,
63 PreStar, PreDblStar, Try,
67 Plus,
69 Minus,
71 Star,
73 Slash,
75 FloorDiv,
77 Pow,
79 Mod,
81 Closed,
83 RightOpen,
85 LeftOpen,
87 Open,
89 BitAnd,
91 BitOr,
93 BitXor,
95 Shl,
97 Shr,
99 Less,
101 Gre,
103 LessEq,
105 GreEq,
107 DblEq,
109 NotEq,
111 InOp,
113 NotInOp,
115 ContainsOp,
117 SubOp,
119 IsOp,
121 IsNotOp,
123 AndOp,
125 OrOp,
127 RefOp,
129 RefMutOp,
131 Assign,
133 Inclusion,
135 Walrus,
137 FuncArrow,
139 ProcArrow,
141 LParen,
143 RParen,
145 LSqBr,
147 RSqBr,
149 LBrace,
151 RBrace,
153 Indent,
154 Dedent,
155 Dot,
157 Pipe,
159 Colon,
161 DblColon,
163 SupertypeOf,
165 SubtypeOf,
167 As,
169 Comma,
171 Caret,
173 Amper,
175 AtSign,
177 VBar,
179 UBar,
181 Newline,
183 Semi,
185 Illegal,
186 BOF,
188 EOF,
189}
190
191use TokenKind::*;
192
193#[pyclass]
194#[derive(Debug, Clone, Copy, PartialEq, Eq)]
195pub enum TokenCategory {
196 Symbol,
197 Literal,
198 StrInterpLeft,
199 StrInterpMid,
200 StrInterpRight,
201 BinOp,
202 UnaryOp,
203 PostfixOp,
205 LEnclosure,
207 REnclosure,
209 SpecialBinOp,
211 DefOp,
213 LambdaOp,
215 Separator,
217 Reserved,
219 AtSign,
221 VBar,
223 UBar,
225 BOF,
226 EOF,
227 Illegal,
228}
229
230impl fmt::Display for TokenCategory {
231 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232 write!(f, "{self:?}")
233 }
234}
235
236impl TokenCategory {
237 pub const fn is_block_op(&self) -> bool {
238 matches!(self, Self::DefOp | Self::LambdaOp)
239 }
240}
241
242impl TokenKind {
243 pub const fn category(&self) -> TokenCategory {
244 match self {
245 Symbol => TokenCategory::Symbol,
246 NatLit | BinLit | OctLit | HexLit | IntLit | RatioLit | StrLit | BoolLit | NoneLit
247 | EllipsisLit | InfLit | DocComment => TokenCategory::Literal,
248 StrInterpLeft => TokenCategory::StrInterpLeft,
249 StrInterpMid => TokenCategory::StrInterpMid,
250 StrInterpRight => TokenCategory::StrInterpRight,
251 PrePlus | PreMinus | PreBitNot | Mutate | PreStar | PreDblStar | RefOp | RefMutOp => {
252 TokenCategory::UnaryOp
253 }
254 Try => TokenCategory::PostfixOp,
255 Comma | Colon | DblColon | SupertypeOf | SubtypeOf | As | Dot | Pipe | Walrus
256 | Inclusion => TokenCategory::SpecialBinOp,
257 Assign => TokenCategory::DefOp,
258 FuncArrow | ProcArrow => TokenCategory::LambdaOp,
259 Semi | Newline => TokenCategory::Separator,
260 LParen | LBrace | LSqBr | Indent => TokenCategory::LEnclosure,
261 RParen | RBrace | RSqBr | Dedent => TokenCategory::REnclosure,
262 Caret | Amper => TokenCategory::Reserved,
263 AtSign => TokenCategory::AtSign,
264 VBar => TokenCategory::VBar,
265 UBar => TokenCategory::UBar,
266 BOF => TokenCategory::BOF,
267 EOF => TokenCategory::EOF,
268 Illegal => TokenCategory::Illegal,
269 _ => TokenCategory::BinOp,
270 }
271 }
272
273 pub const fn precedence(&self) -> Option<usize> {
274 let prec = match self {
275 Dot | DblColon => 200, Pow => 190, PrePlus | PreMinus | PreBitNot | RefOp | RefMutOp => 180, Star | Slash | FloorDiv | Mod => 170, Plus | Minus => 160, Shl | Shr => 150, BitAnd => 140, BitXor => 130, BitOr => 120, Closed | LeftOpen | RightOpen | Open => 100, Less | Gre | LessEq | GreEq | DblEq | NotEq | InOp | NotInOp | ContainsOp | IsOp
286 | IsNotOp => 90, AndOp => 80, OrOp => 70, FuncArrow | ProcArrow | Inclusion => 60, Colon | SupertypeOf | SubtypeOf | As => 50, Comma => 40, Assign | Walrus => 20, Newline | Semi => 10, LParen | LBrace | LSqBr | Indent => 0, _ => return None,
296 };
297 Some(prec)
298 }
299
300 pub const fn is_right_associative(&self) -> bool {
301 matches!(
302 self,
303 FuncArrow | ProcArrow | Assign )
305 }
306
307 pub const fn is_range_op(&self) -> bool {
308 matches!(self, Closed | LeftOpen | RightOpen | Open)
309 }
310}
311
312impl fmt::Display for TokenKind {
313 #[inline]
314 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
315 write!(f, "{self:?}")
316 }
317}
318
319impl From<TokenKind> for BinOpCode {
320 fn from(tk: TokenKind) -> Self {
321 match tk {
322 Plus => BinOpCode::Add,
323 Minus => BinOpCode::Subtract,
324 Star => BinOpCode::Multiply,
325 Slash => BinOpCode::TrueDivide,
326 FloorDiv => BinOpCode::FloorDiv,
327 Mod => BinOpCode::Remainder,
328 Pow => BinOpCode::Power,
329 BitAnd => BinOpCode::And,
330 BitOr => BinOpCode::Or,
331 BitXor => BinOpCode::Xor,
332 Shl => BinOpCode::LShift,
333 Shr => BinOpCode::RShift,
334 _ => panic!("invalid token kind for binop"),
335 }
336 }
337}
338
339#[pyclass(get_all, set_all)]
340#[derive(Clone, Eq)]
341pub struct Token {
342 pub kind: TokenKind,
343 pub content: Str,
344 pub lineno: u32,
347 pub col_begin: u32,
349 pub col_end: u32,
352}
353
354pub const COLON: Token = Token::dummy(TokenKind::Colon, ":");
355pub const AS: Token = Token::dummy(TokenKind::As, "as");
356pub const DOT: Token = Token::dummy(TokenKind::Dot, ".");
357pub const EQUAL: Token = Token::dummy(TokenKind::Assign, "=");
358
359impl fmt::Debug for Token {
360 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
361 f.debug_struct("Token")
362 .field("kind", &self.kind)
363 .field("content", &self.content.replace('\n', "\\n"))
364 .field("lineno", &self.lineno)
365 .field("col_begin", &self.col_begin)
366 .field("col_end", &self.col_end)
367 .finish()
368 }
369}
370
371impl fmt::Display for Token {
372 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
373 write!(f, "{:?} {}", self.kind, self.content.replace('\n', "\\n"))
374 }
375}
376
377impl PartialEq for Token {
380 #[inline]
381 fn eq(&self, other: &Self) -> bool {
382 self.is(other.kind) && self.content == other.content
383 }
384}
385
386impl Hash for Token {
387 fn hash<H: Hasher>(&self, state: &mut H) {
388 self.kind.hash(state);
389 self.content.hash(state);
390 }
391}
392
393impl Locational for Token {
394 fn loc(&self) -> Location {
395 if self.lineno == 0 {
396 Location::Unknown
397 } else {
398 Location::range(self.lineno, self.col_begin, self.lineno, self.col_end)
399 }
400 }
401
402 #[inline]
403 fn col_end(&self) -> Option<u32> {
404 Some(self.col_begin + self.content.len() as u32)
405 }
406}
407
408impl Token {
409 pub const DUMMY: Token = Token {
410 kind: TokenKind::Illegal,
411 content: Str::ever("DUMMY"),
412 lineno: 0,
413 col_begin: 0,
414 col_end: 0,
415 };
416
417 pub const fn dummy(kind: TokenKind, content: &'static str) -> Self {
418 Self {
419 kind,
420 content: Str::ever(content),
421 lineno: 0,
422 col_begin: 0,
423 col_end: 0,
424 }
425 }
426
427 #[inline]
428 pub fn new<S: Into<Str>>(kind: TokenKind, cont: S, lineno: u32, col_begin: u32) -> Self {
429 let content = cont.into();
430 let col_end = col_begin + content.chars().count() as u32;
431 Token {
432 kind,
433 content,
434 lineno,
435 col_begin,
436 col_end,
437 }
438 }
439
440 #[inline]
441 pub fn new_fake<S: Into<Str>>(
442 kind: TokenKind,
443 cont: S,
444 lineno: u32,
445 col_begin: u32,
446 col_end: u32,
447 ) -> Self {
448 Token {
449 kind,
450 content: cont.into(),
451 lineno,
452 col_begin,
453 col_end,
454 }
455 }
456
457 pub fn new_with_loc(kind: TokenKind, cont: impl Into<Str>, loc: Location) -> Self {
458 Token {
459 kind,
460 content: cont.into(),
461 lineno: loc.ln_begin().unwrap_or(0),
462 col_begin: loc.col_begin().unwrap_or(0),
463 col_end: loc.col_end().unwrap_or(1),
464 }
465 }
466
467 #[inline]
468 pub fn from_str(kind: TokenKind, cont: &str) -> Self {
469 Token {
470 kind,
471 content: Str::rc(cont),
472 lineno: 0,
473 col_begin: 0,
474 col_end: 0,
475 }
476 }
477
478 #[inline]
479 pub fn symbol(cont: &str) -> Self {
480 Self::from_str(TokenKind::Symbol, cont)
481 }
482
483 #[inline]
484 pub fn symbol_with_line(cont: &str, lineno: u32) -> Self {
485 Token {
486 kind: TokenKind::Symbol,
487 content: Str::rc(cont),
488 lineno,
489 col_begin: 0,
490 col_end: 1,
491 }
492 }
493
494 pub fn symbol_with_loc<S: Into<Str>>(cont: S, loc: Location) -> Self {
495 Token {
496 kind: TokenKind::Symbol,
497 content: cont.into(),
498 lineno: loc.ln_begin().unwrap_or(0),
499 col_begin: loc.col_begin().unwrap_or(0),
500 col_end: loc.col_end().unwrap_or(1),
501 }
502 }
503
504 pub const fn static_symbol(s: &'static str) -> Self {
505 Token {
506 kind: TokenKind::Symbol,
507 content: Str::ever(s),
508 lineno: 0,
509 col_begin: 0,
510 col_end: 1,
511 }
512 }
513
514 pub fn deep_eq(&self, other: &Self) -> bool {
515 self.kind == other.kind
516 && self.content == other.content
517 && self.lineno == other.lineno
518 && self.col_begin == other.col_begin
519 }
520
521 pub fn loc(&self) -> Location {
522 Locational::loc(self)
523 }
524
525 pub const fn category(&self) -> TokenCategory {
526 self.kind.category()
527 }
528
529 pub fn category_is(&self, category: TokenCategory) -> bool {
530 self.kind.category() == category
531 }
532
533 pub fn is(&self, kind: TokenKind) -> bool {
534 self.kind == kind
535 }
536
537 pub fn is_number(&self) -> bool {
538 matches!(
539 self.kind,
540 NatLit | IntLit | BinLit | OctLit | HexLit | RatioLit | InfLit
541 )
542 }
543
544 pub fn is_str(&self) -> bool {
545 matches!(self.kind, StrLit | DocComment)
546 }
547
548 pub const fn is_block_op(&self) -> bool {
549 self.category().is_block_op()
550 }
551
552 pub const fn inspect(&self) -> &Str {
553 &self.content
554 }
555
556 pub fn is_procedural(&self) -> bool {
557 self.inspect().ends_with('!')
558 }
559
560 pub fn is_const(&self) -> bool {
561 self.inspect().is_uppercase()
562 }
563}
564
565#[pyclass]
566#[derive(Debug, Clone)]
567pub struct TokenStream(VecDeque<Token>);
568
569impl_displayable_deque_stream_for_wrapper!(TokenStream, Token);