Skip to main content

java_lang/
token.rs

1use std::{
2    fmt,
3    hash::{Hash, Hasher},
4};
5
6use crate::span::Span;
7
8/// A token produced by the lexer.
9#[derive(Debug, Clone)]
10pub struct Token {
11    pub kind: TokenKind,
12    pub span: Span,
13}
14
15impl Token {
16    pub fn new(kind: TokenKind, span: Span) -> Self {
17        Token { kind, span }
18    }
19}
20
21/// The kind of token.
22#[derive(Debug, Clone, PartialEq, Eq)]
23pub enum TokenKind {
24    // Literals
25    IntegerLit(String),
26    FloatLit(String),
27    BoolLit(bool),
28    CharLit(String),
29    StringLit(String),
30    NullLit,
31
32    // Identifiers and keywords
33    Ident(String),
34    // Reserved keywords
35    Abstract,
36    Assert,
37    Boolean,
38    Break,
39    Byte,
40    Case,
41    Catch,
42    Char,
43    Class,
44    Const,
45    Continue,
46    Default,
47    Do,
48    Double,
49    Else,
50    Enum,
51    Extends,
52    Final,
53    Finally,
54    Float,
55    For,
56    Goto,
57    If,
58    Implements,
59    Import,
60    Instanceof,
61    Int,
62    Interface,
63    Long,
64    Module,
65    Native,
66    New,
67    Package,
68    Private,
69    Protected,
70    Public,
71    Requires,
72    Return,
73    Short,
74    Static,
75    Strictfp,
76    Super,
77    Switch,
78    Synchronized,
79    This,
80    Throw,
81    Throws,
82    Transient,
83    Try,
84    Void,
85    Volatile,
86    While,
87    Underscore,
88    // Contextual keywords
89    Open,
90    Opens,
91    Exports,
92    Provides,
93    To,
94    Transitive,
95    Uses,
96    With,
97    Yield,
98    Var,
99    Record,
100    Sealed,
101    Permits,
102    NonSealed,
103    When,
104
105    // Separators / punctuation
106    LParen,     // (
107    RParen,     // )
108    LBrace,     // {
109    RBrace,     // }
110    LBracket,   // [
111    RBracket,   // ]
112    Semicolon,  // ;
113    Comma,      // ,
114    Dot,        // .
115    DotDot,     // ..
116    Ellipsis,   // ...
117    At,         // @
118    ColonColon, // ::
119
120    // Operators
121    Eq,         // =
122    Gt,         // >
123    Lt,         // <
124    Bang,       // !
125    Tilde,      // ~
126    Question,   // ?
127    Colon,      // :
128    Arrow,      // ->
129    EqEq,       // ==
130    GtEq,       // >=
131    LtEq,       // <=
132    BangEq,     // !=
133    AmpAmp,     // &&
134    PipePipe,   // ||
135    PlusPlus,   // ++
136    MinusMinus, // --
137    Plus,       // +
138    Minus,      // -
139    Star,       // *
140    Slash,      // /
141    Amp,        // &
142    Pipe,       // |
143    Caret,      // ^
144    Percent,    // %
145    LtLt,       // <<
146    GtGt,       // >>
147    GtGtGt,     // >>>
148    PlusEq,     // +=
149    MinusEq,    // -=
150    StarEq,     // *=
151    SlashEq,    // /=
152    AmpEq,      // &=
153    PipeEq,     // |=
154    CaretEq,    // ^=
155    PercentEq,  // %=
156    LtLtEq,     // <<=
157    GtGtEq,     // >>=
158    GtGtGtEq,   // >>>=
159
160    // Comments
161    LineComment(String),     // // ...
162    BlockComment(String),    // /* ... */
163    DocLineComment(String),  // /// ...
164    DocBlockComment(String), // /** ... */
165
166    // End of input
167    Eof,
168}
169
170impl fmt::Display for TokenKind {
171    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172        match self {
173            Self::IntegerLit(s) => write!(f, "{}", s),
174            Self::FloatLit(s) => write!(f, "{}", s),
175            Self::BoolLit(b) => write!(f, "{}", b),
176            Self::CharLit(s) => write!(f, "'{}'", s),
177            Self::StringLit(s) => write!(f, "\"{}\"", s),
178            Self::NullLit => write!(f, "null"),
179            Self::Ident(s) => write!(f, "{}", s),
180
181            Self::Abstract => write!(f, "abstract"),
182            Self::Assert => write!(f, "assert"),
183            Self::Boolean => write!(f, "boolean"),
184            Self::Break => write!(f, "break"),
185            Self::Byte => write!(f, "byte"),
186            Self::Case => write!(f, "case"),
187            Self::Catch => write!(f, "catch"),
188            Self::Char => write!(f, "char"),
189            Self::Class => write!(f, "class"),
190            Self::Const => write!(f, "const"),
191            Self::Continue => write!(f, "continue"),
192            Self::Default => write!(f, "default"),
193            Self::Do => write!(f, "do"),
194            Self::Double => write!(f, "double"),
195            Self::Else => write!(f, "else"),
196            Self::Enum => write!(f, "enum"),
197            Self::Extends => write!(f, "extends"),
198            Self::Final => write!(f, "final"),
199            Self::Finally => write!(f, "finally"),
200            Self::Float => write!(f, "float"),
201            Self::For => write!(f, "for"),
202            Self::Goto => write!(f, "goto"),
203            Self::If => write!(f, "if"),
204            Self::Implements => write!(f, "implements"),
205            Self::Import => write!(f, "import"),
206            Self::Instanceof => write!(f, "instanceof"),
207            Self::Int => write!(f, "int"),
208            Self::Interface => write!(f, "interface"),
209            Self::Long => write!(f, "long"),
210            Self::Module => write!(f, "module"),
211            Self::Native => write!(f, "native"),
212            Self::New => write!(f, "new"),
213            Self::Package => write!(f, "package"),
214            Self::Private => write!(f, "private"),
215            Self::Protected => write!(f, "protected"),
216            Self::Public => write!(f, "public"),
217            Self::Requires => write!(f, "requires"),
218            Self::Return => write!(f, "return"),
219            Self::Short => write!(f, "short"),
220            Self::Static => write!(f, "static"),
221            Self::Strictfp => write!(f, "strictfp"),
222            Self::Super => write!(f, "super"),
223            Self::Switch => write!(f, "switch"),
224            Self::Synchronized => write!(f, "synchronized"),
225            Self::This => write!(f, "this"),
226            Self::Throw => write!(f, "throw"),
227            Self::Throws => write!(f, "throws"),
228            Self::Transient => write!(f, "transient"),
229            Self::Try => write!(f, "try"),
230            Self::Void => write!(f, "void"),
231            Self::Volatile => write!(f, "volatile"),
232            Self::While => write!(f, "while"),
233            Self::Underscore => write!(f, "_"),
234
235            Self::Open => write!(f, "open"),
236            Self::Opens => write!(f, "opens"),
237            Self::Exports => write!(f, "exports"),
238            Self::Provides => write!(f, "provides"),
239            Self::To => write!(f, "to"),
240            Self::Transitive => write!(f, "transitive"),
241            Self::Uses => write!(f, "uses"),
242            Self::With => write!(f, "with"),
243            Self::Yield => write!(f, "yield"),
244            Self::Var => write!(f, "var"),
245            Self::Record => write!(f, "record"),
246            Self::Sealed => write!(f, "sealed"),
247            Self::Permits => write!(f, "permits"),
248            Self::NonSealed => write!(f, "non-sealed"),
249            Self::When => write!(f, "when"),
250
251            Self::LParen => write!(f, "("),
252            Self::RParen => write!(f, ")"),
253            Self::LBrace => write!(f, "{{"),
254            Self::RBrace => write!(f, "}}"),
255            Self::LBracket => write!(f, "["),
256            Self::RBracket => write!(f, "]"),
257            Self::Semicolon => write!(f, ";"),
258            Self::Comma => write!(f, ","),
259            Self::Dot => write!(f, "."),
260            Self::DotDot => write!(f, ".."),
261            Self::Ellipsis => write!(f, "..."),
262            Self::At => write!(f, "@"),
263            Self::ColonColon => write!(f, "::"),
264            Self::Eq => write!(f, "="),
265            Self::Gt => write!(f, ">"),
266            Self::Lt => write!(f, "<"),
267            Self::Bang => write!(f, "!"),
268            Self::Tilde => write!(f, "~"),
269            Self::Question => write!(f, "?"),
270            Self::Colon => write!(f, ":"),
271            Self::Arrow => write!(f, "->"),
272            Self::EqEq => write!(f, "=="),
273            Self::GtEq => write!(f, ">="),
274            Self::LtEq => write!(f, "<="),
275            Self::BangEq => write!(f, "!="),
276            Self::AmpAmp => write!(f, "&&"),
277            Self::PipePipe => write!(f, "||"),
278            Self::PlusPlus => write!(f, "++"),
279            Self::MinusMinus => write!(f, "--"),
280            Self::Plus => write!(f, "+"),
281            Self::Minus => write!(f, "-"),
282            Self::Star => write!(f, "*"),
283            Self::Slash => write!(f, "/"),
284            Self::Amp => write!(f, "&"),
285            Self::Pipe => write!(f, "|"),
286            Self::Caret => write!(f, "^"),
287            Self::Percent => write!(f, "%"),
288            Self::LtLt => write!(f, "<<"),
289            Self::GtGt => write!(f, ">>"),
290            Self::GtGtGt => write!(f, ">>>"),
291            Self::PlusEq => write!(f, "+="),
292            Self::MinusEq => write!(f, "-="),
293            Self::StarEq => write!(f, "*="),
294            Self::SlashEq => write!(f, "/="),
295            Self::AmpEq => write!(f, "&="),
296            Self::PipeEq => write!(f, "|="),
297            Self::CaretEq => write!(f, "^="),
298            Self::PercentEq => write!(f, "%="),
299            Self::LtLtEq => write!(f, "<<="),
300            Self::GtGtEq => write!(f, ">>="),
301            Self::GtGtGtEq => write!(f, ">>>="),
302            Self::LineComment(s) => write!(f, "{}", s),
303            Self::BlockComment(s) => write!(f, "{}", s),
304            Self::DocLineComment(s) => write!(f, "{}", s),
305            Self::DocBlockComment(s) => write!(f, "{}", s),
306            Self::Eof => write!(f, "<eof>"),
307        }
308    }
309}
310
311impl Hash for TokenKind {
312    fn hash<H: Hasher>(&self, state: &mut H) {
313        std::mem::discriminant(self).hash(state);
314        match self {
315            Self::Ident(s) => s.hash(state),
316            Self::IntegerLit(s) => s.hash(state),
317            Self::FloatLit(s) => s.hash(state),
318            Self::LineComment(s) => s.hash(state),
319            Self::BlockComment(s) => s.hash(state),
320            Self::DocLineComment(s) => s.hash(state),
321            Self::DocBlockComment(s) => s.hash(state),
322            _ => {}
323        }
324    }
325}
326
327/// Check if a string is a reserved Java keyword.
328#[allow(dead_code)]
329pub fn is_reserved_keyword(s: &str) -> bool {
330    matches!(
331        s,
332        "abstract"
333            | "assert"
334            | "boolean"
335            | "break"
336            | "byte"
337            | "case"
338            | "catch"
339            | "char"
340            | "class"
341            | "const"
342            | "continue"
343            | "default"
344            | "do"
345            | "double"
346            | "else"
347            | "enum"
348            | "extends"
349            | "final"
350            | "finally"
351            | "float"
352            | "for"
353            | "goto"
354            | "if"
355            | "implements"
356            | "import"
357            | "instanceof"
358            | "int"
359            | "interface"
360            | "long"
361            | "native"
362            | "new"
363            | "package"
364            | "private"
365            | "protected"
366            | "public"
367            | "requires"
368            | "return"
369            | "short"
370            | "static"
371            | "strictfp"
372            | "super"
373            | "switch"
374            | "synchronized"
375            | "this"
376            | "throw"
377            | "throws"
378            | "transient"
379            | "try"
380            | "void"
381            | "volatile"
382            | "while"
383            | "_"
384    )
385}
386
387/// Check if a string is a contextual keyword.
388#[allow(dead_code)]
389pub fn is_contextual_keyword(s: &str) -> bool {
390    matches!(
391        s,
392        "open"
393            | "opens"
394            | "exports"
395            | "provides"
396            | "to"
397            | "transitive"
398            | "uses"
399            | "with"
400            | "yield"
401            | "var"
402            | "module"
403            | "record"
404            | "sealed"
405            | "permits"
406            | "non-sealed"
407            | "when"
408    )
409}
410
411/// Map a keyword string to its token kind. Returns None for non-keywords.
412pub fn keyword_to_token(s: &str) -> Option<TokenKind> {
413    match s {
414        "abstract" => Some(TokenKind::Abstract),
415        "assert" => Some(TokenKind::Assert),
416        "boolean" => Some(TokenKind::Boolean),
417        "break" => Some(TokenKind::Break),
418        "byte" => Some(TokenKind::Byte),
419        "case" => Some(TokenKind::Case),
420        "catch" => Some(TokenKind::Catch),
421        "char" => Some(TokenKind::Char),
422        "class" => Some(TokenKind::Class),
423        "const" => Some(TokenKind::Const),
424        "continue" => Some(TokenKind::Continue),
425        "default" => Some(TokenKind::Default),
426        "do" => Some(TokenKind::Do),
427        "double" => Some(TokenKind::Double),
428        "else" => Some(TokenKind::Else),
429        "enum" => Some(TokenKind::Enum),
430        "extends" => Some(TokenKind::Extends),
431        "final" => Some(TokenKind::Final),
432        "finally" => Some(TokenKind::Finally),
433        "float" => Some(TokenKind::Float),
434        "for" => Some(TokenKind::For),
435        "goto" => Some(TokenKind::Goto),
436        "if" => Some(TokenKind::If),
437        "implements" => Some(TokenKind::Implements),
438        "import" => Some(TokenKind::Import),
439        "instanceof" => Some(TokenKind::Instanceof),
440        "int" => Some(TokenKind::Int),
441        "interface" => Some(TokenKind::Interface),
442        "long" => Some(TokenKind::Long),
443        "native" => Some(TokenKind::Native),
444        "new" => Some(TokenKind::New),
445        "package" => Some(TokenKind::Package),
446        "private" => Some(TokenKind::Private),
447        "protected" => Some(TokenKind::Protected),
448        "public" => Some(TokenKind::Public),
449        "requires" => Some(TokenKind::Requires),
450        "return" => Some(TokenKind::Return),
451        "short" => Some(TokenKind::Short),
452        "static" => Some(TokenKind::Static),
453        "strictfp" => Some(TokenKind::Strictfp),
454        "super" => Some(TokenKind::Super),
455        "switch" => Some(TokenKind::Switch),
456        "synchronized" => Some(TokenKind::Synchronized),
457        "this" => Some(TokenKind::This),
458        "throw" => Some(TokenKind::Throw),
459        "throws" => Some(TokenKind::Throws),
460        "transient" => Some(TokenKind::Transient),
461        "try" => Some(TokenKind::Try),
462        "void" => Some(TokenKind::Void),
463        "volatile" => Some(TokenKind::Volatile),
464        "while" => Some(TokenKind::While),
465        "_" => Some(TokenKind::Underscore),
466        // Contextual keywords
467        "open" => Some(TokenKind::Open),
468        "opens" => Some(TokenKind::Opens),
469        "exports" => Some(TokenKind::Exports),
470        "provides" => Some(TokenKind::Provides),
471        "to" => Some(TokenKind::To),
472        "transitive" => Some(TokenKind::Transitive),
473        "uses" => Some(TokenKind::Uses),
474        "with" => Some(TokenKind::With),
475        "yield" => Some(TokenKind::Yield),
476        "var" => Some(TokenKind::Var),
477        "record" => Some(TokenKind::Record),
478        "sealed" => Some(TokenKind::Sealed),
479        "permits" => Some(TokenKind::Permits),
480        "non-sealed" => Some(TokenKind::NonSealed),
481        "when" => Some(TokenKind::When),
482        _ => None,
483    }
484}
485
486/// Check if a token kind is a keyword (reserved or contextual).
487#[allow(dead_code)]
488pub fn is_keyword(kind: &TokenKind) -> bool {
489    matches!(
490        kind,
491        TokenKind::Abstract
492            | TokenKind::Assert
493            | TokenKind::Boolean
494            | TokenKind::Break
495            | TokenKind::Byte
496            | TokenKind::Case
497            | TokenKind::Catch
498            | TokenKind::Char
499            | TokenKind::Class
500            | TokenKind::Const
501            | TokenKind::Continue
502            | TokenKind::Default
503            | TokenKind::Do
504            | TokenKind::Double
505            | TokenKind::Else
506            | TokenKind::Enum
507            | TokenKind::Extends
508            | TokenKind::Final
509            | TokenKind::Finally
510            | TokenKind::Float
511            | TokenKind::For
512            | TokenKind::Goto
513            | TokenKind::If
514            | TokenKind::Implements
515            | TokenKind::Import
516            | TokenKind::Instanceof
517            | TokenKind::Int
518            | TokenKind::Interface
519            | TokenKind::Long
520            | TokenKind::Module
521            | TokenKind::Native
522            | TokenKind::New
523            | TokenKind::Package
524            | TokenKind::Private
525            | TokenKind::Protected
526            | TokenKind::Public
527            | TokenKind::Requires
528            | TokenKind::Return
529            | TokenKind::Short
530            | TokenKind::Static
531            | TokenKind::Strictfp
532            | TokenKind::Super
533            | TokenKind::Switch
534            | TokenKind::Synchronized
535            | TokenKind::This
536            | TokenKind::Throw
537            | TokenKind::Throws
538            | TokenKind::Transient
539            | TokenKind::Try
540            | TokenKind::Void
541            | TokenKind::Volatile
542            | TokenKind::While
543            | TokenKind::Underscore
544    )
545}
546
547/// Check if a token kind is a primitive type keyword.
548pub fn is_primitive_type(kind: &TokenKind) -> bool {
549    matches!(
550        kind,
551        TokenKind::Byte
552            | TokenKind::Short
553            | TokenKind::Int
554            | TokenKind::Long
555            | TokenKind::Char
556            | TokenKind::Float
557            | TokenKind::Double
558            | TokenKind::Boolean
559    )
560}
561
562/// Check if a token kind can start a type.
563pub fn can_start_type(kind: &TokenKind) -> bool {
564    is_primitive_type(kind)
565        || matches!(
566            kind,
567            TokenKind::Ident(_)
568                | TokenKind::At // annotation
569                | TokenKind::Void
570                // contextual keywords that can be types (when used as identifiers in certain contexts)
571                | TokenKind::Var
572                | TokenKind::Record
573                | TokenKind::Sealed
574                | TokenKind::Yield
575        )
576}