Skip to main content

bock_lexer/
token.rs

1//! Token types for the Bock lexer.
2
3use bock_source::Span;
4use std::fmt;
5
6/// The kind of a lexical token.
7///
8/// # Note on `Shr` vs `Compose`
9/// Both `>>` and the function-composition operator `>>` share the same
10/// source spelling. The lexer always emits `Shr`; the parser
11/// re-interprets the token as `Compose` when it appears in an expression
12/// context where the shift reading makes no sense.
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14#[must_use]
15pub enum TokenKind {
16    // ── Literals ────────────────────────────────────────────────────────────
17    /// Integer literal: `42`, `0xFF`, `0o77`, `0b1010`
18    IntLiteral,
19    /// Floating-point literal: `3.14`, `1.0e10`
20    FloatLiteral,
21    /// Standard string literal: `"hello"`
22    StringLiteral,
23    /// Raw string literal: `r"no escapes"`
24    RawStringLiteral,
25    /// Multi-line string literal: `"""..."""`
26    MultiLineStringLiteral,
27    /// Raw multi-line string literal: `r"""..."""`
28    RawMultiLineStringLiteral,
29    /// Character literal: `'a'`
30    CharLiteral,
31    /// Boolean literal: `true` / `false` (also produced as keywords)
32    BoolLiteral,
33
34    // ── String interpolation (produced by the string sub-lexer, P1.3) ──────
35    /// A literal text segment between interpolation expressions.
36    StringLiteralPart,
37    /// `${` — starts an interpolated expression inside a string.
38    InterpolationStart,
39    /// `}` — closes an interpolated expression inside a string.
40    InterpolationEnd,
41
42    // ── Identifiers ─────────────────────────────────────────────────────────
43    /// Lowercase/underscore identifier: `foo`, `_bar`
44    Ident,
45    /// Type identifier (starts with uppercase): `Foo`, `MyType`
46    TypeIdent,
47
48    // ── Keywords ────────────────────────────────────────────────────────────
49    Fn,
50    Let,
51    Mut,
52    Const,
53    If,
54    Else,
55    Match,
56    For,
57    In,
58    While,
59    Loop,
60    Break,
61    Continue,
62    Return,
63    Guard,
64    With,
65    Handling,
66    Handle,
67    Record,
68    Enum,
69    Class,
70    Trait,
71    Impl,
72    /// `self` (lowercase)
73    SelfLower,
74    /// `Self` (uppercase)
75    SelfUpper,
76    Module,
77    Use,
78    Public,
79    Internal,
80    Native,
81    Async,
82    Await,
83    Effect,
84    Platform,
85    Where,
86    Type,
87    /// `Ok` — standard result variant keyword
88    Ok_,
89    /// `Err` — standard result variant keyword
90    Err_,
91    /// `Some` — standard option variant keyword
92    Some_,
93    /// `None` — standard option variant keyword
94    None_,
95    Property,
96    Forall,
97    Unreachable,
98    /// `is` — type-test / pattern keyword
99    Is,
100
101    // ── Arithmetic operators ─────────────────────────────────────────────────
102    /// `+`
103    Plus,
104    /// `-`
105    Minus,
106    /// `*`
107    Star,
108    /// `/`
109    Slash,
110    /// `%`
111    Percent,
112    /// `**`
113    Power,
114
115    // ── Comparison operators ─────────────────────────────────────────────────
116    /// `==`
117    Eq,
118    /// `!=`
119    Neq,
120    /// `<`
121    Lt,
122    /// `>`
123    Gt,
124    /// `<=`
125    Lte,
126    /// `>=`
127    Gte,
128
129    // ── Logical / bitwise operators ──────────────────────────────────────────
130    /// `&&`
131    And,
132    /// `||`
133    Or,
134    /// `!`
135    Not,
136    /// `&`
137    BitAnd,
138    /// `|`
139    BitOr,
140    /// `^`
141    BitXor,
142    /// `~`
143    BitNot,
144    /// `<<`
145    Shl,
146    /// `>>` — lexer always emits this; parser re-interprets as `Compose` when needed.
147    Shr,
148    /// `>>` in function-composition context — never emitted by the lexer directly;
149    /// the parser re-interprets `Shr` as `Compose` in expression position.
150    Compose,
151
152    // ── Assignment operators ─────────────────────────────────────────────────
153    /// `=`
154    Assign,
155    /// `+=`
156    PlusEq,
157    /// `-=`
158    MinusEq,
159    /// `*=`
160    StarEq,
161    /// `/=`
162    SlashEq,
163    /// `%=`
164    PercentEq,
165
166    // ── Special operators / punctuation ──────────────────────────────────────
167    /// `|>` — pipe operator
168    Pipe,
169    /// `=>` — fat arrow (match arms, lambdas)
170    FatArrow,
171    /// `->` — thin arrow (return-type annotation)
172    ThinArrow,
173    /// `?` — error propagation / optional chaining
174    Question,
175    /// `..` — exclusive range
176    DotDot,
177    /// `..=` — inclusive range
178    DotDotEq,
179    /// `.`
180    Dot,
181    /// `_` — wildcard / placeholder
182    Underscore,
183    /// `#` — attribute sigil
184    Hash,
185    /// `@`
186    At,
187
188    // ── Delimiters ───────────────────────────────────────────────────────────
189    /// `(`
190    LParen,
191    /// `)`
192    RParen,
193    /// `[`
194    LBracket,
195    /// `]`
196    RBracket,
197    /// `{`
198    LBrace,
199    /// `}`
200    RBrace,
201    /// `,`
202    Comma,
203    /// `:`
204    Colon,
205    /// `;`
206    Semicolon,
207
208    // ── Special ──────────────────────────────────────────────────────────────
209    /// Significant newline (statement terminator)
210    Newline,
211    /// `///` doc comment
212    DocComment,
213    /// `//!` module doc comment
214    ModuleDocComment,
215    /// End of file
216    Eof,
217    /// Lexer error token
218    Error,
219}
220
221impl fmt::Display for TokenKind {
222    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
223        let s = match self {
224            TokenKind::IntLiteral => "<int>",
225            TokenKind::FloatLiteral => "<float>",
226            TokenKind::StringLiteral => "<string>",
227            TokenKind::RawStringLiteral => "<raw-string>",
228            TokenKind::MultiLineStringLiteral => "<multiline-string>",
229            TokenKind::RawMultiLineStringLiteral => "<raw-multiline-string>",
230            TokenKind::CharLiteral => "<char>",
231            TokenKind::BoolLiteral => "<bool>",
232            TokenKind::StringLiteralPart => "<string-part>",
233            TokenKind::InterpolationStart => "${",
234            TokenKind::InterpolationEnd => "}",
235            TokenKind::Ident => "<ident>",
236            TokenKind::TypeIdent => "<type-ident>",
237            TokenKind::Fn => "fn",
238            TokenKind::Let => "let",
239            TokenKind::Mut => "mut",
240            TokenKind::Const => "const",
241            TokenKind::If => "if",
242            TokenKind::Else => "else",
243            TokenKind::Match => "match",
244            TokenKind::For => "for",
245            TokenKind::In => "in",
246            TokenKind::While => "while",
247            TokenKind::Loop => "loop",
248            TokenKind::Break => "break",
249            TokenKind::Continue => "continue",
250            TokenKind::Return => "return",
251            TokenKind::Guard => "guard",
252            TokenKind::With => "with",
253            TokenKind::Handling => "handling",
254            TokenKind::Handle => "handle",
255            TokenKind::Record => "record",
256            TokenKind::Enum => "enum",
257            TokenKind::Class => "class",
258            TokenKind::Trait => "trait",
259            TokenKind::Impl => "impl",
260            TokenKind::SelfLower => "self",
261            TokenKind::SelfUpper => "Self",
262            TokenKind::Module => "module",
263            TokenKind::Use => "use",
264            TokenKind::Public => "public",
265            TokenKind::Internal => "internal",
266            TokenKind::Native => "native",
267            TokenKind::Async => "async",
268            TokenKind::Await => "await",
269            TokenKind::Effect => "effect",
270            TokenKind::Platform => "platform",
271            TokenKind::Where => "where",
272            TokenKind::Type => "type",
273            TokenKind::Ok_ => "Ok",
274            TokenKind::Err_ => "Err",
275            TokenKind::Some_ => "Some",
276            TokenKind::None_ => "None",
277            TokenKind::Property => "property",
278            TokenKind::Forall => "forall",
279            TokenKind::Unreachable => "unreachable",
280            TokenKind::Is => "is",
281            TokenKind::Plus => "+",
282            TokenKind::Minus => "-",
283            TokenKind::Star => "*",
284            TokenKind::Slash => "/",
285            TokenKind::Percent => "%",
286            TokenKind::Power => "**",
287            TokenKind::Eq => "==",
288            TokenKind::Neq => "!=",
289            TokenKind::Lt => "<",
290            TokenKind::Gt => ">",
291            TokenKind::Lte => "<=",
292            TokenKind::Gte => ">=",
293            TokenKind::And => "&&",
294            TokenKind::Or => "||",
295            TokenKind::Not => "!",
296            TokenKind::BitAnd => "&",
297            TokenKind::BitOr => "|",
298            TokenKind::BitXor => "^",
299            TokenKind::BitNot => "~",
300            TokenKind::Shl => "<<",
301            TokenKind::Shr => ">>",
302            TokenKind::Compose => ">>",
303            TokenKind::Assign => "=",
304            TokenKind::PlusEq => "+=",
305            TokenKind::MinusEq => "-=",
306            TokenKind::StarEq => "*=",
307            TokenKind::SlashEq => "/=",
308            TokenKind::PercentEq => "%=",
309            TokenKind::Pipe => "|>",
310            TokenKind::FatArrow => "=>",
311            TokenKind::ThinArrow => "->",
312            TokenKind::Question => "?",
313            TokenKind::DotDot => "..",
314            TokenKind::DotDotEq => "..=",
315            TokenKind::Dot => ".",
316            TokenKind::Underscore => "_",
317            TokenKind::Hash => "#",
318            TokenKind::At => "@",
319            TokenKind::LParen => "(",
320            TokenKind::RParen => ")",
321            TokenKind::LBracket => "[",
322            TokenKind::RBracket => "]",
323            TokenKind::LBrace => "{",
324            TokenKind::RBrace => "}",
325            TokenKind::Comma => ",",
326            TokenKind::Colon => ":",
327            TokenKind::Semicolon => ";",
328            TokenKind::Newline => "<newline>",
329            TokenKind::DocComment => "///",
330            TokenKind::ModuleDocComment => "//!",
331            TokenKind::Eof => "<eof>",
332            TokenKind::Error => "<error>",
333        };
334        f.write_str(s)
335    }
336}
337
338/// A single lexical token with its kind, source span, and optional literal text.
339#[derive(Debug, Clone, PartialEq, Eq)]
340#[must_use]
341pub struct Token {
342    /// The syntactic kind of this token.
343    pub kind: TokenKind,
344    /// Source location.
345    pub span: Span,
346    /// Raw literal content for tokens where the text is significant
347    /// (string content, numeric literals, identifiers, comments, errors).
348    pub literal: Option<String>,
349}
350
351impl Token {
352    /// Construct a new token.
353    pub fn new(kind: TokenKind, span: Span, literal: Option<String>) -> Self {
354        Self {
355            kind,
356            span,
357            literal,
358        }
359    }
360}
361
362/// Map a source identifier string to its keyword [`TokenKind`], if any.
363///
364/// Returns `None` for ordinary identifiers.
365#[must_use]
366pub fn keyword_lookup(ident: &str) -> Option<TokenKind> {
367    let kind = match ident {
368        "fn" => TokenKind::Fn,
369        "let" => TokenKind::Let,
370        "mut" => TokenKind::Mut,
371        "const" => TokenKind::Const,
372        "if" => TokenKind::If,
373        "else" => TokenKind::Else,
374        "match" => TokenKind::Match,
375        "for" => TokenKind::For,
376        "in" => TokenKind::In,
377        "while" => TokenKind::While,
378        "loop" => TokenKind::Loop,
379        "break" => TokenKind::Break,
380        "continue" => TokenKind::Continue,
381        "return" => TokenKind::Return,
382        "guard" => TokenKind::Guard,
383        "with" => TokenKind::With,
384        "handling" => TokenKind::Handling,
385        "handle" => TokenKind::Handle,
386        "record" => TokenKind::Record,
387        "enum" => TokenKind::Enum,
388        "class" => TokenKind::Class,
389        "trait" => TokenKind::Trait,
390        "impl" => TokenKind::Impl,
391        "self" => TokenKind::SelfLower,
392        "Self" => TokenKind::SelfUpper,
393        "module" => TokenKind::Module,
394        "use" => TokenKind::Use,
395        "public" => TokenKind::Public,
396        "internal" => TokenKind::Internal,
397        "native" => TokenKind::Native,
398        "async" => TokenKind::Async,
399        "await" => TokenKind::Await,
400        "effect" => TokenKind::Effect,
401        "platform" => TokenKind::Platform,
402        "where" => TokenKind::Where,
403        "type" => TokenKind::Type,
404        "true" => TokenKind::BoolLiteral,
405        "false" => TokenKind::BoolLiteral,
406        "Ok" => TokenKind::Ok_,
407        "Err" => TokenKind::Err_,
408        "Some" => TokenKind::Some_,
409        "None" => TokenKind::None_,
410        "property" => TokenKind::Property,
411        "forall" => TokenKind::Forall,
412        "unreachable" => TokenKind::Unreachable,
413        "is" => TokenKind::Is,
414        _ => return None,
415    };
416    Some(kind)
417}
418
419#[cfg(test)]
420mod tests {
421    use super::*;
422    use bock_errors::Span;
423
424    fn dummy_span() -> Span {
425        Span::dummy()
426    }
427
428    #[test]
429    fn keyword_lookup_known() {
430        assert_eq!(keyword_lookup("fn"), Some(TokenKind::Fn));
431        assert_eq!(keyword_lookup("let"), Some(TokenKind::Let));
432        assert_eq!(keyword_lookup("mut"), Some(TokenKind::Mut));
433        assert_eq!(keyword_lookup("true"), Some(TokenKind::BoolLiteral));
434        assert_eq!(keyword_lookup("false"), Some(TokenKind::BoolLiteral));
435        assert_eq!(keyword_lookup("self"), Some(TokenKind::SelfLower));
436        assert_eq!(keyword_lookup("Self"), Some(TokenKind::SelfUpper));
437        assert_eq!(keyword_lookup("Ok"), Some(TokenKind::Ok_));
438        assert_eq!(keyword_lookup("Err"), Some(TokenKind::Err_));
439        assert_eq!(keyword_lookup("Some"), Some(TokenKind::Some_));
440        assert_eq!(keyword_lookup("None"), Some(TokenKind::None_));
441        assert_eq!(keyword_lookup("is"), Some(TokenKind::Is));
442        assert_eq!(keyword_lookup("forall"), Some(TokenKind::Forall));
443        assert_eq!(keyword_lookup("unreachable"), Some(TokenKind::Unreachable));
444    }
445
446    #[test]
447    fn keyword_lookup_unknown() {
448        assert_eq!(keyword_lookup("foo"), None);
449        assert_eq!(keyword_lookup("Foo"), None);
450        assert_eq!(keyword_lookup(""), None);
451        assert_eq!(keyword_lookup("FN"), None);
452        assert_eq!(keyword_lookup("Fn"), None);
453    }
454
455    #[test]
456    fn display_operators() {
457        assert_eq!(TokenKind::FatArrow.to_string(), "=>");
458        assert_eq!(TokenKind::ThinArrow.to_string(), "->");
459        assert_eq!(TokenKind::Pipe.to_string(), "|>");
460        assert_eq!(TokenKind::Power.to_string(), "**");
461        assert_eq!(TokenKind::Shr.to_string(), ">>");
462        assert_eq!(TokenKind::DotDotEq.to_string(), "..=");
463    }
464
465    #[test]
466    fn display_keywords() {
467        assert_eq!(TokenKind::Fn.to_string(), "fn");
468        assert_eq!(TokenKind::SelfLower.to_string(), "self");
469        assert_eq!(TokenKind::SelfUpper.to_string(), "Self");
470        assert_eq!(TokenKind::Ok_.to_string(), "Ok");
471        assert_eq!(TokenKind::BoolLiteral.to_string(), "<bool>");
472    }
473
474    #[test]
475    fn display_special() {
476        assert_eq!(TokenKind::Eof.to_string(), "<eof>");
477        assert_eq!(TokenKind::Newline.to_string(), "<newline>");
478        assert_eq!(TokenKind::Error.to_string(), "<error>");
479        assert_eq!(TokenKind::InterpolationStart.to_string(), "${");
480        assert_eq!(TokenKind::InterpolationEnd.to_string(), "}");
481    }
482
483    #[test]
484    fn token_construction() {
485        let span = dummy_span();
486        let tok = Token::new(TokenKind::Ident, span, Some("hello".into()));
487        assert_eq!(tok.kind, TokenKind::Ident);
488        assert_eq!(tok.literal.as_deref(), Some("hello"));
489    }
490
491    #[test]
492    fn all_keywords_round_trip() {
493        // Every keyword's Display output should re-lookup to itself.
494        let keywords = [
495            TokenKind::Fn,
496            TokenKind::Let,
497            TokenKind::Mut,
498            TokenKind::Const,
499            TokenKind::If,
500            TokenKind::Else,
501            TokenKind::Match,
502            TokenKind::For,
503            TokenKind::In,
504            TokenKind::While,
505            TokenKind::Loop,
506            TokenKind::Break,
507            TokenKind::Continue,
508            TokenKind::Return,
509            TokenKind::Guard,
510            TokenKind::With,
511            TokenKind::Handling,
512            TokenKind::Handle,
513            TokenKind::Record,
514            TokenKind::Enum,
515            TokenKind::Class,
516            TokenKind::Trait,
517            TokenKind::Impl,
518            TokenKind::SelfLower,
519            TokenKind::SelfUpper,
520            TokenKind::Module,
521            TokenKind::Use,
522            TokenKind::Public,
523            TokenKind::Internal,
524            TokenKind::Native,
525            TokenKind::Async,
526            TokenKind::Await,
527            TokenKind::Effect,
528            TokenKind::Platform,
529            TokenKind::Where,
530            TokenKind::Type,
531            TokenKind::Ok_,
532            TokenKind::Err_,
533            TokenKind::Some_,
534            TokenKind::None_,
535            TokenKind::Property,
536            TokenKind::Forall,
537            TokenKind::Unreachable,
538            TokenKind::Is,
539        ];
540        for kw in &keywords {
541            let text = kw.to_string();
542            assert_eq!(
543                keyword_lookup(&text).as_ref(),
544                Some(kw),
545                "round-trip failed for {kw:?} (display = {text:?})"
546            );
547        }
548    }
549}