1use std::sync::atomic::{AtomicU64, Ordering};
2
3use crate::intern::{InternedStr, StringInterner};
4use crate::source::SourceLocation;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
15pub struct TokenId(pub u64);
16
17static TOKEN_ID_COUNTER: AtomicU64 = AtomicU64::new(1); impl TokenId {
21 pub fn next() -> Self {
23 Self(TOKEN_ID_COUNTER.fetch_add(1, Ordering::Relaxed))
24 }
25
26 pub const INVALID: Self = Self(0);
28
29 pub fn is_valid(&self) -> bool {
31 self.0 != 0
32 }
33}
34
35impl std::fmt::Display for TokenId {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 write!(f, "TokenId({})", self.0)
38 }
39}
40
41#[derive(Debug, Clone, PartialEq)]
47pub enum MacroInvocationKind {
48 Object,
50 Function {
52 args: Vec<Vec<Token>>,
54 },
55}
56
57#[derive(Debug, Clone, PartialEq)]
62pub struct MacroBeginInfo {
63 pub marker_id: TokenId,
65 pub trigger_token_id: TokenId,
67 pub macro_name: InternedStr,
69 pub kind: MacroInvocationKind,
71 pub call_loc: SourceLocation,
73 pub is_wrapped: bool,
75 pub preserve_call: bool,
85}
86
87#[derive(Debug, Clone, PartialEq)]
91pub struct MacroEndInfo {
92 pub begin_marker_id: TokenId,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
102pub enum CommentKind {
103 Line,
105 Block,
107}
108
109#[derive(Debug, Clone, PartialEq)]
111pub struct Comment {
112 pub kind: CommentKind,
113 pub text: String,
114 pub loc: SourceLocation,
115}
116
117impl Comment {
118 pub fn new(kind: CommentKind, text: String, loc: SourceLocation) -> Self {
120 Self { kind, text, loc }
121 }
122
123 pub fn is_doc(&self) -> bool {
125 false
126 }
127}
128
129#[derive(Debug, Clone, PartialEq)]
131pub enum TokenKind {
132 IntLit(i64),
135 UIntLit(u64),
137 FloatLit(f64),
139 CharLit(u8),
141 WideCharLit(u32),
143 StringLit(Vec<u8>),
145 WideStringLit(Vec<u32>),
147
148 Ident(InternedStr),
150
151 KwAuto,
154 KwExtern,
155 KwRegister,
156 KwStatic,
157 KwTypedef,
158 KwChar,
160 KwDouble,
161 KwFloat,
162 KwInt,
163 KwLong,
164 KwShort,
165 KwSigned,
166 KwUnsigned,
167 KwVoid,
168 KwConst,
170 KwVolatile,
171 KwRestrict,
172 KwStruct,
174 KwUnion,
175 KwEnum,
176 KwBreak,
178 KwCase,
179 KwContinue,
180 KwDefault,
181 KwDo,
182 KwElse,
183 KwFor,
184 KwGoto,
185 KwIf,
186 KwReturn,
187 KwSwitch,
188 KwWhile,
189 KwInline,
191 KwSizeof,
192 KwBool,
194 KwComplex,
195 KwImaginary,
196 KwAlignas,
198 KwAlignof,
199 KwAtomic,
200 KwGeneric,
201 KwNoreturn,
202 KwStaticAssert,
203 KwThreadLocal,
204 KwFloat16,
206 KwFloat32,
207 KwFloat64,
208 KwFloat128,
209 KwFloat32x,
210 KwFloat64x,
211 KwInt128,
213
214 KwInline2, KwInline3, KwSigned2, KwConst2, KwConst3, KwVolatile2, KwVolatile3, KwRestrict2, KwRestrict3, KwBool2, KwAlignof2, KwAlignof3, KwTypeof, KwTypeof2, KwTypeof3, KwAttribute, KwAttribute2, KwAsm, KwAsm2, KwAsm3, KwExtension,
247 KwThread,
249
250 Plus, Minus, Star, Slash, Percent, Amp, Pipe, Caret, Tilde, LtLt, GtGt, Bang, AmpAmp, PipePipe, Lt, Gt, LtEq, GtEq, EqEq, BangEq, Eq, PlusEq, MinusEq, StarEq, SlashEq, PercentEq, AmpEq, PipeEq, CaretEq, LtLtEq, GtGtEq, PlusPlus, MinusMinus, Question, Colon, Arrow, Dot, Ellipsis, Comma, Semi, LParen, RParen, LBracket, RBracket, LBrace, RBrace, Hash, HashHash, Backslash, Eof,
315 Newline,
317 Space,
319
320 MacroBegin(Box<MacroBeginInfo>),
326 MacroEnd(MacroEndInfo),
330}
331
332impl TokenKind {
333 pub fn from_keyword(s: &str) -> Option<TokenKind> {
335 match s {
336 "auto" => Some(TokenKind::KwAuto),
338 "extern" => Some(TokenKind::KwExtern),
339 "register" => Some(TokenKind::KwRegister),
340 "static" => Some(TokenKind::KwStatic),
341 "typedef" => Some(TokenKind::KwTypedef),
342 "char" => Some(TokenKind::KwChar),
344 "double" => Some(TokenKind::KwDouble),
345 "float" => Some(TokenKind::KwFloat),
346 "int" => Some(TokenKind::KwInt),
347 "long" => Some(TokenKind::KwLong),
348 "short" => Some(TokenKind::KwShort),
349 "signed" => Some(TokenKind::KwSigned),
350 "unsigned" => Some(TokenKind::KwUnsigned),
351 "void" => Some(TokenKind::KwVoid),
352 "const" => Some(TokenKind::KwConst),
354 "volatile" => Some(TokenKind::KwVolatile),
355 "restrict" => Some(TokenKind::KwRestrict),
356 "struct" => Some(TokenKind::KwStruct),
358 "union" => Some(TokenKind::KwUnion),
359 "enum" => Some(TokenKind::KwEnum),
360 "break" => Some(TokenKind::KwBreak),
362 "case" => Some(TokenKind::KwCase),
363 "continue" => Some(TokenKind::KwContinue),
364 "default" => Some(TokenKind::KwDefault),
365 "do" => Some(TokenKind::KwDo),
366 "else" => Some(TokenKind::KwElse),
367 "for" => Some(TokenKind::KwFor),
368 "goto" => Some(TokenKind::KwGoto),
369 "if" => Some(TokenKind::KwIf),
370 "return" => Some(TokenKind::KwReturn),
371 "switch" => Some(TokenKind::KwSwitch),
372 "while" => Some(TokenKind::KwWhile),
373 "inline" => Some(TokenKind::KwInline),
375 "__inline" => Some(TokenKind::KwInline2),
376 "__inline__" => Some(TokenKind::KwInline3),
377 "sizeof" => Some(TokenKind::KwSizeof),
378 "_Bool" => Some(TokenKind::KwBool),
380 "bool" => Some(TokenKind::KwBool2),
381 "_Complex" => Some(TokenKind::KwComplex),
382 "_Imaginary" => Some(TokenKind::KwImaginary),
383 "_Alignas" => Some(TokenKind::KwAlignas),
385 "_Alignof" => Some(TokenKind::KwAlignof),
386 "__alignof" => Some(TokenKind::KwAlignof2),
387 "__alignof__" => Some(TokenKind::KwAlignof3),
388 "_Atomic" => Some(TokenKind::KwAtomic),
389 "_Generic" => Some(TokenKind::KwGeneric),
390 "_Noreturn" => Some(TokenKind::KwNoreturn),
391 "_Static_assert" => Some(TokenKind::KwStaticAssert),
392 "_Thread_local" => Some(TokenKind::KwThreadLocal),
393 "__thread" => Some(TokenKind::KwThread),
394 "_Float16" => Some(TokenKind::KwFloat16),
396 "_Float32" => Some(TokenKind::KwFloat32),
397 "_Float64" => Some(TokenKind::KwFloat64),
398 "_Float128" => Some(TokenKind::KwFloat128),
399 "_Float32x" => Some(TokenKind::KwFloat32x),
400 "_Float64x" => Some(TokenKind::KwFloat64x),
401 "__int128" => Some(TokenKind::KwInt128),
403 "__signed__" => Some(TokenKind::KwSigned2),
405 "__const" => Some(TokenKind::KwConst2),
407 "__const__" => Some(TokenKind::KwConst3),
408 "__volatile" => Some(TokenKind::KwVolatile2),
410 "__volatile__" => Some(TokenKind::KwVolatile3),
411 "__restrict" => Some(TokenKind::KwRestrict2),
413 "__restrict__" => Some(TokenKind::KwRestrict3),
414 "typeof" => Some(TokenKind::KwTypeof),
416 "__typeof" => Some(TokenKind::KwTypeof2),
417 "__typeof__" => Some(TokenKind::KwTypeof3),
418 "__attribute" => Some(TokenKind::KwAttribute),
420 "__attribute__" => Some(TokenKind::KwAttribute2),
421 "asm" => Some(TokenKind::KwAsm),
423 "__asm" => Some(TokenKind::KwAsm2),
424 "__asm__" => Some(TokenKind::KwAsm3),
425 "__extension__" => Some(TokenKind::KwExtension),
427 _ => None,
428 }
429 }
430
431 pub fn is_keyword(&self) -> bool {
433 matches!(
434 self,
435 TokenKind::KwAuto
436 | TokenKind::KwBreak
437 | TokenKind::KwCase
438 | TokenKind::KwChar
439 | TokenKind::KwConst
440 | TokenKind::KwConst2
441 | TokenKind::KwConst3
442 | TokenKind::KwContinue
443 | TokenKind::KwDefault
444 | TokenKind::KwDo
445 | TokenKind::KwDouble
446 | TokenKind::KwElse
447 | TokenKind::KwEnum
448 | TokenKind::KwExtern
449 | TokenKind::KwFloat
450 | TokenKind::KwFor
451 | TokenKind::KwGoto
452 | TokenKind::KwIf
453 | TokenKind::KwInline
454 | TokenKind::KwInline2
455 | TokenKind::KwInline3
456 | TokenKind::KwInt
457 | TokenKind::KwLong
458 | TokenKind::KwRegister
459 | TokenKind::KwRestrict
460 | TokenKind::KwRestrict2
461 | TokenKind::KwRestrict3
462 | TokenKind::KwReturn
463 | TokenKind::KwShort
464 | TokenKind::KwSigned
465 | TokenKind::KwSigned2
466 | TokenKind::KwSizeof
467 | TokenKind::KwStatic
468 | TokenKind::KwStruct
469 | TokenKind::KwSwitch
470 | TokenKind::KwTypedef
471 | TokenKind::KwUnion
472 | TokenKind::KwUnsigned
473 | TokenKind::KwVoid
474 | TokenKind::KwVolatile
475 | TokenKind::KwVolatile2
476 | TokenKind::KwVolatile3
477 | TokenKind::KwWhile
478 | TokenKind::KwBool
479 | TokenKind::KwBool2
480 | TokenKind::KwComplex
481 | TokenKind::KwImaginary
482 | TokenKind::KwAlignas
483 | TokenKind::KwAlignof
484 | TokenKind::KwAlignof2
485 | TokenKind::KwAlignof3
486 | TokenKind::KwAtomic
487 | TokenKind::KwGeneric
488 | TokenKind::KwNoreturn
489 | TokenKind::KwStaticAssert
490 | TokenKind::KwThreadLocal
491 | TokenKind::KwTypeof
492 | TokenKind::KwTypeof2
493 | TokenKind::KwTypeof3
494 | TokenKind::KwAttribute
495 | TokenKind::KwAttribute2
496 | TokenKind::KwAsm
497 | TokenKind::KwAsm2
498 | TokenKind::KwAsm3
499 | TokenKind::KwExtension
500 | TokenKind::KwThread
501 | TokenKind::KwInt128
502 )
503 }
504
505 pub fn keyword_str(&self) -> Option<&'static str> {
514 match self {
515 TokenKind::KwAuto => Some("auto"),
517 TokenKind::KwExtern => Some("extern"),
518 TokenKind::KwRegister => Some("register"),
519 TokenKind::KwStatic => Some("static"),
520 TokenKind::KwTypedef => Some("typedef"),
521 TokenKind::KwChar => Some("char"),
522 TokenKind::KwDouble => Some("double"),
523 TokenKind::KwFloat => Some("float"),
524 TokenKind::KwInt => Some("int"),
525 TokenKind::KwLong => Some("long"),
526 TokenKind::KwShort => Some("short"),
527 TokenKind::KwSigned => Some("signed"),
528 TokenKind::KwUnsigned => Some("unsigned"),
529 TokenKind::KwVoid => Some("void"),
530 TokenKind::KwConst => Some("const"),
531 TokenKind::KwVolatile => Some("volatile"),
532 TokenKind::KwRestrict => Some("restrict"),
533 TokenKind::KwStruct => Some("struct"),
534 TokenKind::KwUnion => Some("union"),
535 TokenKind::KwEnum => Some("enum"),
536 TokenKind::KwBreak => Some("break"),
537 TokenKind::KwCase => Some("case"),
538 TokenKind::KwContinue => Some("continue"),
539 TokenKind::KwDefault => Some("default"),
540 TokenKind::KwDo => Some("do"),
541 TokenKind::KwElse => Some("else"),
542 TokenKind::KwFor => Some("for"),
543 TokenKind::KwGoto => Some("goto"),
544 TokenKind::KwIf => Some("if"),
545 TokenKind::KwReturn => Some("return"),
546 TokenKind::KwSwitch => Some("switch"),
547 TokenKind::KwWhile => Some("while"),
548 TokenKind::KwInline => Some("inline"),
549 TokenKind::KwSizeof => Some("sizeof"),
550 TokenKind::KwBool => Some("_Bool"),
552 TokenKind::KwComplex => Some("_Complex"),
553 TokenKind::KwImaginary => Some("_Imaginary"),
554 TokenKind::KwAlignas => Some("_Alignas"),
556 TokenKind::KwAlignof => Some("_Alignof"),
557 TokenKind::KwAtomic => Some("_Atomic"),
558 TokenKind::KwGeneric => Some("_Generic"),
559 TokenKind::KwNoreturn => Some("_Noreturn"),
560 TokenKind::KwStaticAssert => Some("_Static_assert"),
561 TokenKind::KwThreadLocal => Some("_Thread_local"),
562 TokenKind::KwFloat16 => Some("_Float16"),
564 TokenKind::KwFloat32 => Some("_Float32"),
565 TokenKind::KwFloat64 => Some("_Float64"),
566 TokenKind::KwFloat128 => Some("_Float128"),
567 TokenKind::KwFloat32x => Some("_Float32x"),
568 TokenKind::KwFloat64x => Some("_Float64x"),
569 TokenKind::KwInline2 => Some("__inline"),
571 TokenKind::KwInline3 => Some("__inline__"),
572 TokenKind::KwSigned2 => Some("__signed__"),
573 TokenKind::KwConst2 => Some("__const"),
574 TokenKind::KwConst3 => Some("__const__"),
575 TokenKind::KwVolatile2 => Some("__volatile"),
576 TokenKind::KwVolatile3 => Some("__volatile__"),
577 TokenKind::KwRestrict2 => Some("__restrict"),
578 TokenKind::KwRestrict3 => Some("__restrict__"),
579 TokenKind::KwBool2 => Some("bool"),
580 TokenKind::KwAlignof2 => Some("__alignof"),
581 TokenKind::KwAlignof3 => Some("__alignof__"),
582 TokenKind::KwTypeof => Some("typeof"),
583 TokenKind::KwTypeof2 => Some("__typeof"),
584 TokenKind::KwTypeof3 => Some("__typeof__"),
585 TokenKind::KwAttribute => Some("__attribute"),
586 TokenKind::KwAttribute2 => Some("__attribute__"),
587 TokenKind::KwAsm => Some("asm"),
588 TokenKind::KwAsm2 => Some("__asm"),
589 TokenKind::KwAsm3 => Some("__asm__"),
590 TokenKind::KwExtension => Some("__extension__"),
591 TokenKind::KwThread => Some("__thread"),
592 TokenKind::KwInt128 => Some("__int128"),
593 _ => None,
594 }
595 }
596
597 pub fn format(&self, interner: &StringInterner) -> String {
599 match self {
600 TokenKind::Ident(id) => interner.get(*id).to_string(),
602 TokenKind::IntLit(n) => n.to_string(),
603 TokenKind::UIntLit(n) => format!("{}u", n),
604 TokenKind::FloatLit(f) => f.to_string(),
605 TokenKind::CharLit(c) => format!("'{}'", escape_char(*c)),
606 TokenKind::WideCharLit(c) => format!("L'{}'", escape_wide_char(*c)),
607 TokenKind::StringLit(s) => format!("\"{}\"", escape_string(s)),
608 TokenKind::WideStringLit(s) => format!("L\"{}\"", escape_wide_string(s)),
609 TokenKind::KwAuto => "auto".to_string(),
611 TokenKind::KwExtern => "extern".to_string(),
612 TokenKind::KwRegister => "register".to_string(),
613 TokenKind::KwStatic => "static".to_string(),
614 TokenKind::KwTypedef => "typedef".to_string(),
615 TokenKind::KwChar => "char".to_string(),
616 TokenKind::KwDouble => "double".to_string(),
617 TokenKind::KwFloat => "float".to_string(),
618 TokenKind::KwInt => "int".to_string(),
619 TokenKind::KwLong => "long".to_string(),
620 TokenKind::KwShort => "short".to_string(),
621 TokenKind::KwSigned => "signed".to_string(),
622 TokenKind::KwUnsigned => "unsigned".to_string(),
623 TokenKind::KwVoid => "void".to_string(),
624 TokenKind::KwConst => "const".to_string(),
625 TokenKind::KwVolatile => "volatile".to_string(),
626 TokenKind::KwRestrict => "restrict".to_string(),
627 TokenKind::KwStruct => "struct".to_string(),
628 TokenKind::KwUnion => "union".to_string(),
629 TokenKind::KwEnum => "enum".to_string(),
630 TokenKind::KwBreak => "break".to_string(),
631 TokenKind::KwCase => "case".to_string(),
632 TokenKind::KwContinue => "continue".to_string(),
633 TokenKind::KwDefault => "default".to_string(),
634 TokenKind::KwDo => "do".to_string(),
635 TokenKind::KwElse => "else".to_string(),
636 TokenKind::KwFor => "for".to_string(),
637 TokenKind::KwGoto => "goto".to_string(),
638 TokenKind::KwIf => "if".to_string(),
639 TokenKind::KwReturn => "return".to_string(),
640 TokenKind::KwSwitch => "switch".to_string(),
641 TokenKind::KwWhile => "while".to_string(),
642 TokenKind::KwInline => "inline".to_string(),
643 TokenKind::KwSizeof => "sizeof".to_string(),
644 TokenKind::KwBool => "_Bool".to_string(),
645 TokenKind::KwComplex => "_Complex".to_string(),
646 TokenKind::KwImaginary => "_Imaginary".to_string(),
647 TokenKind::KwAlignas => "_Alignas".to_string(),
648 TokenKind::KwAlignof => "_Alignof".to_string(),
649 TokenKind::KwAtomic => "_Atomic".to_string(),
650 TokenKind::KwGeneric => "_Generic".to_string(),
651 TokenKind::KwNoreturn => "_Noreturn".to_string(),
652 TokenKind::KwStaticAssert => "_Static_assert".to_string(),
653 TokenKind::KwThreadLocal => "_Thread_local".to_string(),
654 TokenKind::KwFloat16 => "_Float16".to_string(),
655 TokenKind::KwFloat32 => "_Float32".to_string(),
656 TokenKind::KwFloat64 => "_Float64".to_string(),
657 TokenKind::KwFloat128 => "_Float128".to_string(),
658 TokenKind::KwFloat32x => "_Float32x".to_string(),
659 TokenKind::KwFloat64x => "_Float64x".to_string(),
660 TokenKind::KwInline2 => "__inline".to_string(),
662 TokenKind::KwInline3 => "__inline__".to_string(),
663 TokenKind::KwSigned2 => "__signed__".to_string(),
664 TokenKind::KwConst2 => "__const".to_string(),
665 TokenKind::KwConst3 => "__const__".to_string(),
666 TokenKind::KwVolatile2 => "__volatile".to_string(),
667 TokenKind::KwVolatile3 => "__volatile__".to_string(),
668 TokenKind::KwRestrict2 => "__restrict".to_string(),
669 TokenKind::KwRestrict3 => "__restrict__".to_string(),
670 TokenKind::KwBool2 => "bool".to_string(),
671 TokenKind::KwAlignof2 => "__alignof".to_string(),
672 TokenKind::KwAlignof3 => "__alignof__".to_string(),
673 TokenKind::KwTypeof => "typeof".to_string(),
674 TokenKind::KwTypeof2 => "__typeof".to_string(),
675 TokenKind::KwTypeof3 => "__typeof__".to_string(),
676 TokenKind::KwAttribute => "__attribute".to_string(),
677 TokenKind::KwAttribute2 => "__attribute__".to_string(),
678 TokenKind::KwAsm => "asm".to_string(),
679 TokenKind::KwAsm2 => "__asm".to_string(),
680 TokenKind::KwAsm3 => "__asm__".to_string(),
681 TokenKind::KwExtension => "__extension__".to_string(),
682 TokenKind::KwThread => "__thread".to_string(),
683 TokenKind::KwInt128 => "__int128".to_string(),
684 TokenKind::Plus => "+".to_string(),
686 TokenKind::Minus => "-".to_string(),
687 TokenKind::Star => "*".to_string(),
688 TokenKind::Slash => "/".to_string(),
689 TokenKind::Percent => "%".to_string(),
690 TokenKind::Amp => "&".to_string(),
691 TokenKind::Pipe => "|".to_string(),
692 TokenKind::Caret => "^".to_string(),
693 TokenKind::Tilde => "~".to_string(),
694 TokenKind::LtLt => "<<".to_string(),
695 TokenKind::GtGt => ">>".to_string(),
696 TokenKind::Bang => "!".to_string(),
697 TokenKind::AmpAmp => "&&".to_string(),
698 TokenKind::PipePipe => "||".to_string(),
699 TokenKind::Lt => "<".to_string(),
700 TokenKind::Gt => ">".to_string(),
701 TokenKind::LtEq => "<=".to_string(),
702 TokenKind::GtEq => ">=".to_string(),
703 TokenKind::EqEq => "==".to_string(),
704 TokenKind::BangEq => "!=".to_string(),
705 TokenKind::Eq => "=".to_string(),
706 TokenKind::PlusEq => "+=".to_string(),
707 TokenKind::MinusEq => "-=".to_string(),
708 TokenKind::StarEq => "*=".to_string(),
709 TokenKind::SlashEq => "/=".to_string(),
710 TokenKind::PercentEq => "%=".to_string(),
711 TokenKind::AmpEq => "&=".to_string(),
712 TokenKind::PipeEq => "|=".to_string(),
713 TokenKind::CaretEq => "^=".to_string(),
714 TokenKind::LtLtEq => "<<=".to_string(),
715 TokenKind::GtGtEq => ">>=".to_string(),
716 TokenKind::PlusPlus => "++".to_string(),
717 TokenKind::MinusMinus => "--".to_string(),
718 TokenKind::Question => "?".to_string(),
719 TokenKind::Colon => ":".to_string(),
720 TokenKind::Arrow => "->".to_string(),
721 TokenKind::Dot => ".".to_string(),
722 TokenKind::Ellipsis => "...".to_string(),
723 TokenKind::Comma => ",".to_string(),
725 TokenKind::Semi => ";".to_string(),
726 TokenKind::LParen => "(".to_string(),
727 TokenKind::RParen => ")".to_string(),
728 TokenKind::LBracket => "[".to_string(),
729 TokenKind::RBracket => "]".to_string(),
730 TokenKind::LBrace => "{".to_string(),
731 TokenKind::RBrace => "}".to_string(),
732 TokenKind::Hash => "#".to_string(),
734 TokenKind::HashHash => "##".to_string(),
735 TokenKind::Backslash => "\\".to_string(),
736 TokenKind::Newline => "\n".to_string(),
738 TokenKind::Eof => "".to_string(),
739 TokenKind::Space => " ".to_string(),
740 TokenKind::MacroBegin(info) => {
742 format!("/*<MACRO_BEGIN:{}>*/", interner.get(info.macro_name))
743 }
744 TokenKind::MacroEnd(info) => {
745 format!("/*<MACRO_END:{}>*/", info.begin_marker_id)
746 }
747 }
748 }
749}
750
751fn escape_char(c: u8) -> String {
753 match c {
754 b'\n' => "\\n".to_string(),
755 b'\r' => "\\r".to_string(),
756 b'\t' => "\\t".to_string(),
757 b'\\' => "\\\\".to_string(),
758 b'\'' => "\\'".to_string(),
759 c if c.is_ascii_graphic() || c == b' ' => (c as char).to_string(),
760 c => format!("\\x{:02x}", c),
761 }
762}
763
764fn escape_wide_char(c: u32) -> String {
766 if let Some(ch) = char::from_u32(c) {
767 match ch {
768 '\n' => "\\n".to_string(),
769 '\r' => "\\r".to_string(),
770 '\t' => "\\t".to_string(),
771 '\\' => "\\\\".to_string(),
772 '\'' => "\\'".to_string(),
773 c if c.is_ascii_graphic() || c == ' ' => c.to_string(),
774 c if c as u32 <= 0xFFFF => format!("\\u{:04x}", c as u32),
775 c => format!("\\U{:08x}", c as u32),
776 }
777 } else {
778 format!("\\U{:08x}", c)
779 }
780}
781
782fn escape_string(s: &[u8]) -> String {
784 s.iter().map(|&c| escape_char(c)).collect()
785}
786
787fn escape_wide_string(s: &[u32]) -> String {
789 s.iter().map(|&c| escape_wide_char(c)).collect()
790}
791
792#[derive(Debug, Clone, PartialEq)]
794pub struct Token {
795 pub id: TokenId,
797 pub kind: TokenKind,
798 pub loc: SourceLocation,
799 pub leading_comments: Vec<Comment>,
801}
802
803impl Default for Token {
804 fn default() -> Self {
805 Self {
806 id: TokenId::default(),
807 kind: TokenKind::Eof,
808 loc: SourceLocation::default(),
809 leading_comments: Vec::new(),
810 }
811 }
812}
813
814impl Token {
815 pub fn new(kind: TokenKind, loc: SourceLocation) -> Self {
817 Self {
818 id: TokenId::next(),
819 kind,
820 loc,
821 leading_comments: Vec::new(),
822 }
823 }
824
825 pub fn with_comments(kind: TokenKind, loc: SourceLocation, comments: Vec<Comment>) -> Self {
827 Self {
828 id: TokenId::next(),
829 kind,
830 loc,
831 leading_comments: comments,
832 }
833 }
834
835 pub fn clone_with_new_id(&self) -> Self {
840 Self {
841 id: TokenId::next(),
842 kind: self.kind.clone(),
843 loc: self.loc.clone(),
844 leading_comments: self.leading_comments.clone(),
845 }
846 }
847}
848
849#[cfg(test)]
850mod tests {
851 use super::*;
852
853 #[test]
854 fn test_keyword_lookup() {
855 assert_eq!(TokenKind::from_keyword("int"), Some(TokenKind::KwInt));
856 assert_eq!(TokenKind::from_keyword("if"), Some(TokenKind::KwIf));
857 assert_eq!(TokenKind::from_keyword("foo"), None);
858 }
859
860 #[test]
861 fn test_inline_variants() {
862 assert_eq!(TokenKind::from_keyword("inline"), Some(TokenKind::KwInline));
863 assert_eq!(TokenKind::from_keyword("__inline"), Some(TokenKind::KwInline2));
864 assert_eq!(TokenKind::from_keyword("__inline__"), Some(TokenKind::KwInline3));
865 }
866
867 #[test]
868 fn test_gcc_extension_keywords() {
869 assert_eq!(TokenKind::from_keyword("const"), Some(TokenKind::KwConst));
871 assert_eq!(TokenKind::from_keyword("__const"), Some(TokenKind::KwConst2));
872 assert_eq!(TokenKind::from_keyword("__const__"), Some(TokenKind::KwConst3));
873 assert_eq!(TokenKind::from_keyword("volatile"), Some(TokenKind::KwVolatile));
875 assert_eq!(TokenKind::from_keyword("__volatile"), Some(TokenKind::KwVolatile2));
876 assert_eq!(TokenKind::from_keyword("__volatile__"), Some(TokenKind::KwVolatile3));
877 assert_eq!(TokenKind::from_keyword("restrict"), Some(TokenKind::KwRestrict));
879 assert_eq!(TokenKind::from_keyword("__restrict"), Some(TokenKind::KwRestrict2));
880 assert_eq!(TokenKind::from_keyword("__restrict__"), Some(TokenKind::KwRestrict3));
881 assert_eq!(TokenKind::from_keyword("typeof"), Some(TokenKind::KwTypeof));
883 assert_eq!(TokenKind::from_keyword("__typeof"), Some(TokenKind::KwTypeof2));
884 assert_eq!(TokenKind::from_keyword("__typeof__"), Some(TokenKind::KwTypeof3));
885 assert_eq!(TokenKind::from_keyword("__attribute"), Some(TokenKind::KwAttribute));
887 assert_eq!(TokenKind::from_keyword("__attribute__"), Some(TokenKind::KwAttribute2));
888 assert_eq!(TokenKind::from_keyword("asm"), Some(TokenKind::KwAsm));
890 assert_eq!(TokenKind::from_keyword("__asm"), Some(TokenKind::KwAsm2));
891 assert_eq!(TokenKind::from_keyword("__asm__"), Some(TokenKind::KwAsm3));
892 }
893
894 #[test]
895 fn test_token_id_uniqueness() {
896 let id1 = TokenId::next();
897 let id2 = TokenId::next();
898 let id3 = TokenId::next();
899
900 assert_ne!(id1, id2);
901 assert_ne!(id2, id3);
902 assert_ne!(id1, id3);
903 }
904
905 #[test]
906 fn test_token_id_invalid() {
907 assert!(!TokenId::INVALID.is_valid());
908 assert!(TokenId::next().is_valid());
909 }
910
911 #[test]
912 fn test_token_has_unique_id() {
913 let loc = SourceLocation::default();
914 let t1 = Token::new(TokenKind::KwInt, loc.clone());
915 let t2 = Token::new(TokenKind::KwInt, loc.clone());
916
917 assert_ne!(t1.id, t2.id);
918 }
919
920 #[test]
921 fn test_clone_with_new_id() {
922 let loc = SourceLocation::default();
923 let t1 = Token::new(TokenKind::KwInt, loc);
924 let t2 = t1.clone_with_new_id();
925
926 assert_eq!(t1.kind, t2.kind);
928 assert_ne!(t1.id, t2.id);
929 }
930
931 #[test]
932 fn test_clone_preserves_id() {
933 let loc = SourceLocation::default();
934 let t1 = Token::new(TokenKind::KwInt, loc);
935 let t2 = t1.clone();
936
937 assert_eq!(t1.id, t2.id);
939 }
940}