ruast/
token.rs

1use std::fmt;
2use std::ops::{Deref, DerefMut};
3
4use crate::expr::Lit;
5
6#[cfg(feature = "fuzzing")]
7pub mod depth_limiter {
8    use std::cell::Cell;
9    thread_local! { static DEPTH: Cell<u32> = const { Cell::new(0) }; }
10
11    pub fn set(max_depth: u32) {
12        DEPTH.with(|f| f.set(max_depth));
13    }
14    pub fn reached() -> bool {
15        DEPTH.with(|f| {
16            let n = f.get();
17            if n == 0 {
18                true
19            } else {
20                f.set(n - 1);
21                false
22            }
23        })
24    }
25}
26
27/// String for fuzzing. Generates only valid strings as identifiers.
28#[cfg(feature = "fuzzing")]
29#[derive(Debug, Clone, PartialEq, Eq, Hash)]
30pub struct String(std::string::String);
31
32#[cfg(feature = "fuzzing")]
33impl fmt::Display for String {
34    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
35        write!(f, "{}", self.0)
36    }
37}
38
39#[cfg(feature = "fuzzing")]
40impl Deref for String {
41    type Target = str;
42
43    fn deref(&self) -> &Self::Target {
44        &self.0
45    }
46}
47
48#[cfg(feature = "fuzzing")]
49impl AsRef<str> for String {
50    fn as_ref(&self) -> &str {
51        &self.0
52    }
53}
54
55#[cfg(feature = "fuzzing")]
56impl AsRef<std::ffi::OsStr> for String {
57    fn as_ref(&self) -> &std::ffi::OsStr {
58        self.0.as_ref()
59    }
60}
61
62#[cfg(feature = "fuzzing")]
63impl From<std::string::String> for String {
64    fn from(s: std::string::String) -> Self {
65        Self(s)
66    }
67}
68
69#[cfg(feature = "fuzzing")]
70impl From<&String> for String {
71    fn from(s: &String) -> Self {
72        s.clone()
73    }
74}
75
76#[cfg(feature = "fuzzing")]
77impl From<&str> for String {
78    fn from(s: &str) -> Self {
79        Self(s.to_string())
80    }
81}
82
83#[cfg(feature = "fuzzing")]
84impl PartialEq<str> for String {
85    fn eq(&self, other: &str) -> bool {
86        self.0 == other
87    }
88}
89
90#[cfg(feature = "fuzzing")]
91impl PartialEq<&str> for String {
92    fn eq(&self, other: &&str) -> bool {
93        self.0 == *other
94    }
95}
96
97#[cfg(feature = "fuzzing")]
98impl PartialEq<String> for &str {
99    fn eq(&self, other: &String) -> bool {
100        *self == other.0
101    }
102}
103
104#[cfg(feature = "fuzzing")]
105impl PartialEq<String> for str {
106    fn eq(&self, other: &String) -> bool {
107        self == other.0
108    }
109}
110
111#[cfg(feature = "fuzzing")]
112impl PartialEq<std::string::String> for String {
113    fn eq(&self, other: &std::string::String) -> bool {
114        &self.0 == other
115    }
116}
117
118#[cfg(feature = "fuzzing")]
119impl PartialEq<String> for std::string::String {
120    fn eq(&self, other: &String) -> bool {
121        self == &other.0
122    }
123}
124
125#[cfg(feature = "fuzzing")]
126impl<'a> arbitrary::Arbitrary<'a> for String {
127    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
128        const HEAD: &[u8] = b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
129        const TAIL: &[u8] = b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
130        let len = u.int_in_range(1..=10)?;
131        let mut s = std::string::String::with_capacity(len);
132        s.push(*u.choose(HEAD)? as char);
133        for _ in 1..len {
134            s.push(*u.choose(TAIL)? as char);
135        }
136        if KeywordToken::try_from(s.as_str()).is_ok() || s == "_" {
137            s.insert(0, '_');
138        }
139        Ok(String(s))
140    }
141}
142
143/// A string that is confirmed at the time of construction to be a valid Rust identifier.
144#[cfg(feature = "checked-ident")]
145#[derive(Debug, Clone, PartialEq, Eq, Hash)]
146pub struct Identifier(String);
147
148#[cfg(feature = "checked-ident")]
149impl fmt::Display for Identifier {
150    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151        write!(f, "{}", self.0)
152    }
153}
154
155#[cfg(feature = "checked-ident")]
156impl Deref for Identifier {
157    type Target = String;
158
159    fn deref(&self) -> &Self::Target {
160        &self.0
161    }
162}
163
164#[cfg(feature = "checked-ident")]
165impl Identifier {
166    pub fn new(ident: impl Into<String>) -> Result<Self, String> {
167        let ident = check_ident(ident)?;
168        Ok(Self(ident))
169    }
170
171    pub fn as_str(&self) -> &str {
172        &self.0
173    }
174}
175
176#[cfg(feature = "checked-ident")]
177pub fn check_ident(maybe_ident: impl Into<String>) -> Result<String, String> {
178    use unicode_ident;
179
180    let ident = maybe_ident.into();
181    let mut chars = ident.chars();
182    let Some(first) = chars.next() else {
183        return Err(ident);
184    };
185    if !unicode_ident::is_xid_start(first) {
186        return Err(ident);
187    }
188
189    if chars.all(unicode_ident::is_xid_continue) && KeywordToken::try_from(&ident[..]).is_err() {
190        Ok(ident)
191    } else {
192        Err(ident)
193    }
194}
195
196#[cfg_attr(feature = "fuzzing", derive(arbitrary::Arbitrary))]
197#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
198pub enum KeywordToken {
199    As,
200    Async,
201    Await,
202    Box,
203    Break,
204    Const,
205    Continue,
206    Crate,
207    Dyn,
208    Else,
209    Enum,
210    Extern,
211    False,
212    Fn,
213    For,
214    If,
215    Impl,
216    In,
217    Let,
218    Loop,
219    Match,
220    Mod,
221    Move,
222    Mut,
223    Pub,
224    Ref,
225    Return,
226    Self_,
227    Static,
228    Struct,
229    Super,
230    Trait,
231    True,
232    Try,
233    Type,
234    Unsafe,
235    Use,
236    Where,
237    While,
238    Yield,
239}
240
241impl fmt::Display for KeywordToken {
242    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
243        match self {
244            Self::As => write!(f, "as"),
245            Self::Async => write!(f, "async"),
246            Self::Await => write!(f, "await"),
247            Self::Box => write!(f, "box"),
248            Self::Break => write!(f, "break"),
249            Self::Const => write!(f, "const"),
250            Self::Continue => write!(f, "continue"),
251            Self::Crate => write!(f, "crate"),
252            Self::Dyn => write!(f, "dyn"),
253            Self::Else => write!(f, "else"),
254            Self::Enum => write!(f, "enum"),
255            Self::Extern => write!(f, "extern"),
256            Self::False => write!(f, "false"),
257            Self::Fn => write!(f, "fn"),
258            Self::For => write!(f, "for"),
259            Self::If => write!(f, "if"),
260            Self::Impl => write!(f, "impl"),
261            Self::In => write!(f, "in"),
262            Self::Let => write!(f, "let"),
263            Self::Loop => write!(f, "loop"),
264            Self::Match => write!(f, "match"),
265            Self::Mod => write!(f, "mod"),
266            Self::Move => write!(f, "move"),
267            Self::Mut => write!(f, "mut"),
268            Self::Pub => write!(f, "pub"),
269            Self::Ref => write!(f, "ref"),
270            Self::Return => write!(f, "return"),
271            Self::Self_ => write!(f, "self"),
272            Self::Static => write!(f, "static"),
273            Self::Struct => write!(f, "struct"),
274            Self::Super => write!(f, "super"),
275            Self::Trait => write!(f, "trait"),
276            Self::True => write!(f, "true"),
277            Self::Try => write!(f, "try"),
278            Self::Type => write!(f, "type"),
279            Self::Unsafe => write!(f, "unsafe"),
280            Self::Use => write!(f, "use"),
281            Self::Where => write!(f, "where"),
282            Self::While => write!(f, "while"),
283            Self::Yield => write!(f, "yield"),
284        }
285    }
286}
287
288impl TryFrom<&str> for KeywordToken {
289    type Error = ();
290
291    fn try_from(value: &str) -> Result<Self, Self::Error> {
292        match value {
293            "as" => Ok(Self::As),
294            "async" => Ok(Self::Async),
295            "await" => Ok(Self::Await),
296            "box" => Ok(Self::Box),
297            "break" => Ok(Self::Break),
298            "const" => Ok(Self::Const),
299            "continue" => Ok(Self::Continue),
300            "crate" => Ok(Self::Crate),
301            "dyn" => Ok(Self::Dyn),
302            "else" => Ok(Self::Else),
303            "enum" => Ok(Self::Enum),
304            "extern" => Ok(Self::Extern),
305            "false" => Ok(Self::False),
306            "fn" => Ok(Self::Fn),
307            "for" => Ok(Self::For),
308            "if" => Ok(Self::If),
309            "impl" => Ok(Self::Impl),
310            "in" => Ok(Self::In),
311            "let" => Ok(Self::Let),
312            "loop" => Ok(Self::Loop),
313            "match" => Ok(Self::Match),
314            "mod" => Ok(Self::Mod),
315            "move" => Ok(Self::Move),
316            "mut" => Ok(Self::Mut),
317            "pub" => Ok(Self::Pub),
318            "ref" => Ok(Self::Ref),
319            "return" => Ok(Self::Return),
320            "self" | "Self" => Ok(Self::Self_),
321            "static" => Ok(Self::Static),
322            "struct" => Ok(Self::Struct),
323            "super" => Ok(Self::Super),
324            "trait" => Ok(Self::Trait),
325            "true" => Ok(Self::True),
326            "try" => Ok(Self::Try),
327            "type" => Ok(Self::Type),
328            "unsafe" => Ok(Self::Unsafe),
329            "use" => Ok(Self::Use),
330            "where" => Ok(Self::Where),
331            "while" => Ok(Self::While),
332            "yield" => Ok(Self::Yield),
333            _ => Err(()),
334        }
335    }
336}
337
338#[cfg_attr(feature = "fuzzing", derive(arbitrary::Arbitrary))]
339#[derive(Debug, Clone, PartialEq, Eq, Hash)]
340pub enum BinOpToken {
341    /// `+`
342    Plus,
343    /// `-`
344    Minus,
345    /// `*`
346    Star,
347    /// `/`
348    Slash,
349    /// `%`
350    Percent,
351    /// `^`
352    Caret,
353    /// `&&`
354    LazyAnd,
355    /// `||`
356    LazyOr,
357    /// `&`
358    BitAnd,
359    /// `|`
360    BitOr,
361    /// `^`
362    BitXor,
363    /// `<<`
364    Shl,
365    /// `>>`
366    Shr,
367}
368
369impl fmt::Display for BinOpToken {
370    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
371        match self {
372            Self::Plus => write!(f, "+"),
373            Self::Minus => write!(f, "-"),
374            Self::Star => write!(f, "*"),
375            Self::Slash => write!(f, "/"),
376            Self::Percent => write!(f, "%"),
377            Self::Caret => write!(f, "^"),
378            Self::LazyAnd => write!(f, "&&"),
379            Self::LazyOr => write!(f, "||"),
380            Self::BitAnd => write!(f, "&"),
381            Self::BitOr => write!(f, "|"),
382            Self::BitXor => write!(f, "^"),
383            Self::Shl => write!(f, "<<"),
384            Self::Shr => write!(f, ">>"),
385        }
386    }
387}
388
389#[cfg_attr(feature = "fuzzing", derive(arbitrary::Arbitrary))]
390#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
391pub enum Delimiter {
392    /// ()
393    Parenthesis,
394    /// {}
395    Brace,
396    /// []
397    Bracket,
398    Invisible,
399}
400
401impl Delimiter {
402    pub fn open(&self) -> &'static str {
403        match self {
404            Self::Parenthesis => "(",
405            Self::Brace => "{",
406            Self::Bracket => "[",
407            Self::Invisible => "",
408        }
409    }
410
411    pub fn close(&self) -> &'static str {
412        match self {
413            Self::Parenthesis => ")",
414            Self::Brace => "}",
415            Self::Bracket => "]",
416            Self::Invisible => "",
417        }
418    }
419}
420
421#[cfg(feature = "tokenize")]
422impl From<Delimiter> for proc_macro2::Delimiter {
423    fn from(delim: Delimiter) -> Self {
424        match delim {
425            Delimiter::Parenthesis => Self::Parenthesis,
426            Delimiter::Brace => Self::Brace,
427            Delimiter::Bracket => Self::Bracket,
428            Delimiter::Invisible => Self::None,
429        }
430    }
431}
432
433#[cfg_attr(feature = "fuzzing", derive(arbitrary::Arbitrary))]
434#[derive(Debug, Clone, PartialEq, Eq, Hash)]
435pub enum Token {
436    /// `=`
437    Eq,
438    /// `<`
439    Lt,
440    /// `<=`
441    Le,
442    /// `==`
443    EqEq,
444    /// `!=`
445    Ne,
446    /// `>=`
447    Ge,
448    /// `>`
449    Gt,
450    /// `&`
451    And,
452    /// `|`
453    Or,
454    /// `!`
455    Not,
456    /// `~`
457    Tilde,
458    BinOp(BinOpToken),
459    BinOpEq(BinOpToken),
460    /* Structural symbols */
461    /// `@`
462    At,
463    /// `.`
464    Dot,
465    /// `..`
466    DotDot,
467    /// `...`
468    DotDotDot,
469    /// `..=`
470    DotDotEq,
471    /// `,`
472    Comma,
473    /// `;`
474    Semi,
475    /// `:`
476    Colon,
477    /// `::`
478    ModSep,
479    /// `<-`
480    LArrow,
481    /// `->`
482    RArrow,
483    /// `=>`
484    FatArrow,
485    /// `#`
486    Pound,
487    /// `$`
488    Dollar,
489    /// `?`
490    Question,
491    /// `'`
492    SingleQuote,
493    OpenDelim(Delimiter),
494    CloseDelim(Delimiter),
495    Lit(Lit),
496    Ident(String),
497    Lifetime(String),
498    Keyword(KeywordToken),
499    /// Note that this variant outputs the stored string as it is (without displaying a leading `///`).
500    DocComment(String),
501    /// When print this variant as an element of a `TokenStream`, it is displayed combined with the following tokens (no spacing).
502    Joint(Box<Token>),
503    Eof,
504}
505
506impl fmt::Display for Token {
507    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
508        match self {
509            Self::Eq => write!(f, "="),
510            Self::Lt => write!(f, "<"),
511            Self::Le => write!(f, "<="),
512            Self::EqEq => write!(f, "=="),
513            Self::Ne => write!(f, "!="),
514            Self::Ge => write!(f, ">="),
515            Self::Gt => write!(f, ">"),
516            Self::And => write!(f, "&"),
517            Self::Or => write!(f, "|"),
518            Self::Not => write!(f, "!"),
519            Self::Tilde => write!(f, "~"),
520            Self::BinOp(op) => write!(f, "{op}"),
521            Self::BinOpEq(op) => write!(f, "{op}="),
522            Self::At => write!(f, "@"),
523            Self::Dot => write!(f, "."),
524            Self::DotDot => write!(f, ".."),
525            Self::DotDotDot => write!(f, "..."),
526            Self::DotDotEq => write!(f, "..="),
527            Self::Comma => write!(f, ","),
528            Self::Semi => write!(f, ";"),
529            Self::Colon => write!(f, ":"),
530            Self::ModSep => write!(f, "::"),
531            Self::LArrow => write!(f, "<-"),
532            Self::RArrow => write!(f, "->"),
533            Self::FatArrow => write!(f, "=>"),
534            Self::Pound => write!(f, "#"),
535            Self::Dollar => write!(f, "$"),
536            Self::Question => write!(f, "?"),
537            Self::SingleQuote => write!(f, "'"),
538            Self::OpenDelim(delim) => write!(f, "{}", delim.open()),
539            Self::CloseDelim(delim) => write!(f, "{}", delim.close()),
540            Self::Lit(lit) => write!(f, "{lit}"),
541            Self::Ident(ident) => write!(f, "{ident}"),
542            Self::Lifetime(lifetime) => write!(f, "'{lifetime}"),
543            Self::Keyword(keyword) => write!(f, "{keyword}"),
544            Self::DocComment(comment) => write!(f, "{comment}"),
545            Self::Joint(token) => write!(f, "{token}"),
546            Self::Eof => write!(f, ""),
547        }
548    }
549}
550
551impl Token {
552    pub fn lit(lit: impl Into<Lit>) -> Self {
553        Self::Lit(lit.into())
554    }
555
556    pub fn verbatim(lit: impl Into<String>) -> Self {
557        Self::DocComment(lit.into())
558    }
559
560    pub fn ident(ident: impl Into<String>) -> Self {
561        Self::Ident(ident.into())
562    }
563
564    #[cfg(feature = "checked-ident")]
565    pub fn checked_ident(ident: impl Into<String>) -> Result<Self, String> {
566        let ident = check_ident(ident)?;
567        Ok(Self::Ident(ident))
568    }
569
570    /// `Token::lifetime("a")` => `'a`
571    pub fn lifetime(lifetime: impl Into<String>) -> Self {
572        Self::Lifetime(lifetime.into())
573    }
574
575    pub const fn is_keyword(&self) -> bool {
576        matches!(self, Self::Keyword(_))
577    }
578
579    pub const fn is_ident(&self) -> bool {
580        matches!(self, Self::Ident(_))
581    }
582
583    pub const fn is_lit(&self) -> bool {
584        matches!(self, Self::Lit(_))
585    }
586
587    pub const fn is_joint(&self) -> bool {
588        matches!(self, Self::Joint(_))
589    }
590
591    pub const fn is_delimiter(&self) -> bool {
592        matches!(self, Self::OpenDelim(_) | Self::CloseDelim(_))
593    }
594
595    pub const fn is_square_bracket(&self) -> bool {
596        matches!(self, Self::Lt | Self::Gt)
597    }
598
599    pub fn into_joint(self) -> Self {
600        match self {
601            Self::Joint(_) => self,
602            _ => Self::Joint(Box::new(self)),
603        }
604    }
605
606    pub fn as_unjoint(&self) -> &Self {
607        match self {
608            Self::Joint(token) => token,
609            _ => self,
610        }
611    }
612}
613
614/// This structure is not related to `proc_macro2::TokenStream`.
615/// However, it can be converted to `proc_marco2::TokenStream` by enabling the `quote` feature.
616#[derive(Debug, Clone, PartialEq, Eq, Hash, Default)]
617pub struct TokenStream(Vec<Token>);
618
619#[cfg(feature = "fuzzing")]
620impl<'a> arbitrary::Arbitrary<'a> for TokenStream {
621    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
622        let len = u.int_in_range(0..=20)?;
623        let mut tokens = vec![];
624        for _ in 0..len {
625            let token = Token::arbitrary(u)?;
626            if !token.is_delimiter()
627                && !token.is_square_bracket()
628                && token != Token::Eof
629                && token != Token::SingleQuote
630            {
631                tokens.push(token);
632            }
633        }
634        Ok(Self(tokens))
635    }
636}
637
638impl fmt::Display for TokenStream {
639    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
640        let mut previous_was_joint = false;
641        for (i, token) in self.0.iter().enumerate() {
642            if i > 0 && !previous_was_joint {
643                write!(f, " ")?;
644            }
645            write!(f, "{token}")?;
646            previous_was_joint = token.is_joint();
647        }
648        Ok(())
649    }
650}
651
652impl From<Vec<Token>> for TokenStream {
653    fn from(tokens: Vec<Token>) -> Self {
654        Self(tokens)
655    }
656}
657
658impl From<Token> for TokenStream {
659    fn from(token: Token) -> Self {
660        Self(vec![token])
661    }
662}
663
664impl Deref for TokenStream {
665    type Target = Vec<Token>;
666
667    fn deref(&self) -> &Self::Target {
668        &self.0
669    }
670}
671
672impl DerefMut for TokenStream {
673    fn deref_mut(&mut self) -> &mut Self::Target {
674        &mut self.0
675    }
676}
677
678impl IntoIterator for TokenStream {
679    type Item = Token;
680    type IntoIter = std::vec::IntoIter<Self::Item>;
681
682    fn into_iter(self) -> Self::IntoIter {
683        self.0.into_iter()
684    }
685}
686
687impl TokenStream {
688    pub fn new() -> Self {
689        Self(vec![])
690    }
691
692    pub fn and(mut self, other: Self) -> Self {
693        self.extend(other);
694        self
695    }
696
697    pub fn aggregate(tss: impl IntoIterator<Item = TokenStream>) -> Self {
698        let mut tokens = vec![];
699        for ts in tss {
700            tokens.extend(ts);
701        }
702        Self(tokens)
703    }
704
705    /// Convert the last token to a joint token.
706    pub fn into_joint(mut self) -> Self {
707        if let Some(last) = self.0.pop() {
708            if last.is_joint() {
709                self.0.push(last);
710            } else {
711                self.0.push(last.into_joint());
712            }
713        }
714        self
715    }
716}