Skip to main content

specl_syntax/
token.rs

1//! Token types and source span tracking for the Specl lexer.
2
3use std::fmt;
4
5/// A span in the source code, tracking byte offsets and line/column.
6#[derive(Clone, Copy, PartialEq, Eq, Default)]
7pub struct Span {
8    /// Start byte offset (inclusive).
9    pub start: usize,
10    /// End byte offset (exclusive).
11    pub end: usize,
12    /// Line number (1-indexed).
13    pub line: u32,
14    /// Column number (1-indexed, in characters not bytes).
15    pub column: u32,
16}
17
18impl Span {
19    /// Create a new span.
20    pub fn new(start: usize, end: usize, line: u32, column: u32) -> Self {
21        Self {
22            start,
23            end,
24            line,
25            column,
26        }
27    }
28
29    /// Create a dummy span for generated code.
30    pub fn dummy() -> Self {
31        Self::default()
32    }
33
34    /// Merge two spans into one that covers both.
35    pub fn merge(self, other: Self) -> Self {
36        Self {
37            start: self.start.min(other.start),
38            end: self.end.max(other.end),
39            line: self.line.min(other.line),
40            column: if self.line <= other.line {
41                self.column
42            } else {
43                other.column
44            },
45        }
46    }
47
48    /// Length in bytes.
49    pub fn len(&self) -> usize {
50        self.end - self.start
51    }
52
53    /// Check if span is empty.
54    pub fn is_empty(&self) -> bool {
55        self.start == self.end
56    }
57}
58
59impl fmt::Debug for Span {
60    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
61        write!(f, "{}:{}", self.line, self.column)
62    }
63}
64
65impl fmt::Display for Span {
66    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
67        write!(f, "{}:{}", self.line, self.column)
68    }
69}
70
71/// The kind of token.
72#[derive(Clone, Debug, PartialEq)]
73pub enum TokenKind {
74    // === Keywords ===
75    /// `module`
76    Module,
77    /// `use`
78    Use,
79    /// `const`
80    Const,
81    /// `var`
82    Var,
83    /// `type`
84    Type,
85    /// `init`
86    Init,
87    /// `action`
88    Action,
89    /// `invariant`
90    Invariant,
91    /// `property`
92    Property,
93    /// `fairness`
94    Fairness,
95    /// `func` - user-defined helper function
96    Func,
97
98    // === Logical keywords ===
99    /// `and`
100    And,
101    /// `or`
102    Or,
103    /// `not`
104    Not,
105    /// `implies`
106    Implies,
107    /// `iff`
108    Iff,
109
110    // === Quantifiers ===
111    /// `all` (universal quantifier, Python-like)
112    All,
113    /// `any` (existential quantifier, Python-like)
114    Any,
115    /// `choose` (deterministic selection)
116    Choose,
117
118    // === Control flow ===
119    /// `in`
120    In,
121    /// `for`
122    For,
123    /// `if`
124    If,
125    /// `then`
126    Then,
127    /// `else`
128    Else,
129    /// `let`
130    Let,
131    /// `with`
132    With,
133    /// `require`
134    Require,
135
136    // === Frame keywords ===
137    /// `changes`
138    Changes,
139
140    // === Temporal keywords ===
141    /// `always`
142    Always,
143    /// `eventually`
144    Eventually,
145    /// `leads_to`
146    LeadsTo,
147    /// `enabled`
148    Enabled,
149
150    // === Fairness keywords ===
151    /// `weak_fair`
152    WeakFair,
153    /// `strong_fair`
154    StrongFair,
155
156    // === Boolean literals ===
157    /// `true`
158    True,
159    /// `false`
160    False,
161
162    // === Built-in types ===
163    /// `Nat`
164    Nat,
165    /// `Int`
166    Int,
167    /// `Bool`
168    Bool,
169    /// `String`
170    String_,
171    /// `Set`
172    Set,
173    /// `Seq`
174    Seq,
175    /// `dict` - Python-like dictionary type (lowercase only)
176    Dict,
177    /// `Option`
178    Option_,
179    /// `Some`
180    Some_,
181    /// `None`
182    None_,
183
184    // === Punctuation ===
185    /// `(`
186    LParen,
187    /// `)`
188    RParen,
189    /// `{`
190    LBrace,
191    /// `}`
192    RBrace,
193    /// `[`
194    LBracket,
195    /// `]`
196    RBracket,
197    /// `,`
198    Comma,
199    /// `:`
200    Colon,
201    /// `;`
202    Semicolon,
203    /// `.`
204    Dot,
205    /// `..`
206    DotDot,
207    /// `->`
208    Arrow,
209    /// `=>`
210    FatArrow,
211    /// `'` (prime for next-state variables)
212    Prime,
213    /// `|`
214    Pipe,
215
216    // === Comparison operators ===
217    /// `==`
218    Eq,
219    /// `!=`
220    Ne,
221    /// `<`
222    Lt,
223    /// `<=`
224    Le,
225    /// `>`
226    Gt,
227    /// `>=`
228    Ge,
229
230    // === Arithmetic operators ===
231    /// `+`
232    Plus,
233    /// `-`
234    Minus,
235    /// `*`
236    Star,
237    /// `/`
238    Slash,
239    /// `%`
240    Percent,
241
242    // === Set operators ===
243    /// `union`
244    Union,
245    /// `intersect`
246    Intersect,
247    /// `diff`
248    Diff,
249    /// `subset_of`
250    SubsetOf,
251
252    // === Sequence operators ===
253    /// `++`
254    PlusPlus,
255    /// `head` - get first element of sequence
256    Head,
257    /// `tail` - get all but first element of sequence
258    Tail,
259    /// `len` - get length of sequence/set/function
260    Len,
261
262    // === Function/Map operators ===
263    /// `keys` - get domain of function as set
264    Keys,
265    /// `values` - get range of function as set
266    Values,
267    /// `powerset` - get set of all subsets
268    Powerset,
269    /// `union_all` - distributed union (union of all sets in set)
270    UnionAll,
271
272    // === Other operators ===
273    /// `&`
274    Ampersand,
275    /// `=`
276    Assign,
277
278    // === Literals ===
279    /// Integer literal
280    Integer(i64),
281    /// String literal (without quotes)
282    StringLit(std::string::String),
283    /// Identifier
284    Ident(std::string::String),
285
286    // === Comments ===
287    /// Single-line comment `// ...`
288    Comment(std::string::String),
289    /// Documentation comment `/// ...`
290    DocComment(std::string::String),
291
292    // === Special ===
293    /// End of file
294    Eof,
295    /// Lexer error
296    Error(std::string::String),
297}
298
299impl TokenKind {
300    /// Check if this token is a keyword.
301    pub fn is_keyword(&self) -> bool {
302        matches!(
303            self,
304            TokenKind::Module
305                | TokenKind::Use
306                | TokenKind::Const
307                | TokenKind::Var
308                | TokenKind::Type
309                | TokenKind::Init
310                | TokenKind::Action
311                | TokenKind::Invariant
312                | TokenKind::Property
313                | TokenKind::Fairness
314                | TokenKind::Func
315                | TokenKind::And
316                | TokenKind::Or
317                | TokenKind::Not
318                | TokenKind::Implies
319                | TokenKind::Iff
320                | TokenKind::All
321                | TokenKind::Any
322                | TokenKind::Choose
323                | TokenKind::In
324                | TokenKind::For
325                | TokenKind::If
326                | TokenKind::Then
327                | TokenKind::Else
328                | TokenKind::Let
329                | TokenKind::With
330                | TokenKind::Require
331                | TokenKind::Changes
332                | TokenKind::Always
333                | TokenKind::Eventually
334                | TokenKind::LeadsTo
335                | TokenKind::Enabled
336                | TokenKind::WeakFair
337                | TokenKind::StrongFair
338                | TokenKind::True
339                | TokenKind::False
340                | TokenKind::Nat
341                | TokenKind::Int
342                | TokenKind::Bool
343                | TokenKind::String_
344                | TokenKind::Set
345                | TokenKind::Seq
346                | TokenKind::Dict
347                | TokenKind::Option_
348                | TokenKind::Some_
349                | TokenKind::None_
350                | TokenKind::Union
351                | TokenKind::Intersect
352                | TokenKind::Diff
353                | TokenKind::SubsetOf
354                | TokenKind::Head
355                | TokenKind::Tail
356                | TokenKind::Len
357                | TokenKind::Keys
358                | TokenKind::Values
359                | TokenKind::Powerset
360                | TokenKind::UnionAll
361        )
362    }
363
364    /// Get the keyword for a given identifier, if any.
365    pub fn keyword(ident: &str) -> Option<TokenKind> {
366        Some(match ident {
367            "module" => TokenKind::Module,
368            "use" => TokenKind::Use,
369            "const" => TokenKind::Const,
370            "var" => TokenKind::Var,
371            "type" => TokenKind::Type,
372            "init" => TokenKind::Init,
373            "action" => TokenKind::Action,
374            "invariant" => TokenKind::Invariant,
375            "property" => TokenKind::Property,
376            "fairness" => TokenKind::Fairness,
377            "func" => TokenKind::Func,
378            "and" => TokenKind::And,
379            "or" => TokenKind::Or,
380            "not" => TokenKind::Not,
381            "implies" => TokenKind::Implies,
382            "iff" => TokenKind::Iff,
383            "all" => TokenKind::All,
384            "any" => TokenKind::Any,
385            "fix" => TokenKind::Choose,
386            "in" => TokenKind::In,
387            "for" => TokenKind::For,
388            "if" => TokenKind::If,
389            "then" => TokenKind::Then,
390            "else" => TokenKind::Else,
391            "let" => TokenKind::Let,
392            "with" => TokenKind::With,
393            "require" => TokenKind::Require,
394            "changes" => TokenKind::Changes,
395            "always" => TokenKind::Always,
396            "eventually" => TokenKind::Eventually,
397            "leads_to" => TokenKind::LeadsTo,
398            "enabled" => TokenKind::Enabled,
399            "weak_fair" => TokenKind::WeakFair,
400            "strong_fair" => TokenKind::StrongFair,
401            "true" => TokenKind::True,
402            "false" => TokenKind::False,
403            "Nat" => TokenKind::Nat,
404            "Int" => TokenKind::Int,
405            "Bool" => TokenKind::Bool,
406            "String" => TokenKind::String_,
407            "Set" => TokenKind::Set,
408            "Seq" => TokenKind::Seq,
409            "Dict" => TokenKind::Dict,
410            "Option" => TokenKind::Option_,
411            "Some" => TokenKind::Some_,
412            "None" => TokenKind::None_,
413            "union" => TokenKind::Union,
414            "intersect" => TokenKind::Intersect,
415            "diff" => TokenKind::Diff,
416            "subset_of" => TokenKind::SubsetOf,
417            "head" => TokenKind::Head,
418            "tail" => TokenKind::Tail,
419            "len" => TokenKind::Len,
420            "keys" => TokenKind::Keys,
421            "values" => TokenKind::Values,
422            "powerset" => TokenKind::Powerset,
423            "union_all" => TokenKind::UnionAll,
424            _ => return None,
425        })
426    }
427
428    /// Check if this is a trivia token (comment, whitespace).
429    pub fn is_trivia(&self) -> bool {
430        matches!(self, TokenKind::Comment(_) | TokenKind::DocComment(_))
431    }
432}
433
434impl fmt::Display for TokenKind {
435    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
436        match self {
437            TokenKind::Module => write!(f, "module"),
438            TokenKind::Use => write!(f, "use"),
439            TokenKind::Const => write!(f, "const"),
440            TokenKind::Var => write!(f, "var"),
441            TokenKind::Type => write!(f, "type"),
442            TokenKind::Init => write!(f, "init"),
443            TokenKind::Action => write!(f, "action"),
444            TokenKind::Invariant => write!(f, "invariant"),
445            TokenKind::Property => write!(f, "property"),
446            TokenKind::Fairness => write!(f, "fairness"),
447            TokenKind::Func => write!(f, "func"),
448            TokenKind::And => write!(f, "and"),
449            TokenKind::Or => write!(f, "or"),
450            TokenKind::Not => write!(f, "not"),
451            TokenKind::Implies => write!(f, "implies"),
452            TokenKind::Iff => write!(f, "iff"),
453            TokenKind::All => write!(f, "all"),
454            TokenKind::Any => write!(f, "any"),
455            TokenKind::Choose => write!(f, "choose"),
456            TokenKind::In => write!(f, "in"),
457            TokenKind::For => write!(f, "for"),
458            TokenKind::If => write!(f, "if"),
459            TokenKind::Then => write!(f, "then"),
460            TokenKind::Else => write!(f, "else"),
461            TokenKind::Let => write!(f, "let"),
462            TokenKind::With => write!(f, "with"),
463            TokenKind::Require => write!(f, "require"),
464            TokenKind::Changes => write!(f, "changes"),
465            TokenKind::Always => write!(f, "always"),
466            TokenKind::Eventually => write!(f, "eventually"),
467            TokenKind::LeadsTo => write!(f, "leads_to"),
468            TokenKind::Enabled => write!(f, "enabled"),
469            TokenKind::WeakFair => write!(f, "weak_fair"),
470            TokenKind::StrongFair => write!(f, "strong_fair"),
471            TokenKind::True => write!(f, "true"),
472            TokenKind::False => write!(f, "false"),
473            TokenKind::Nat => write!(f, "Nat"),
474            TokenKind::Int => write!(f, "Int"),
475            TokenKind::Bool => write!(f, "Bool"),
476            TokenKind::String_ => write!(f, "String"),
477            TokenKind::Set => write!(f, "Set"),
478            TokenKind::Seq => write!(f, "Seq"),
479            TokenKind::Dict => write!(f, "Dict"),
480            TokenKind::Option_ => write!(f, "Option"),
481            TokenKind::Some_ => write!(f, "Some"),
482            TokenKind::None_ => write!(f, "None"),
483            TokenKind::LParen => write!(f, "("),
484            TokenKind::RParen => write!(f, ")"),
485            TokenKind::LBrace => write!(f, "{{"),
486            TokenKind::RBrace => write!(f, "}}"),
487            TokenKind::LBracket => write!(f, "["),
488            TokenKind::RBracket => write!(f, "]"),
489            TokenKind::Comma => write!(f, ","),
490            TokenKind::Colon => write!(f, ":"),
491            TokenKind::Semicolon => write!(f, ";"),
492            TokenKind::Dot => write!(f, "."),
493            TokenKind::DotDot => write!(f, ".."),
494            TokenKind::Arrow => write!(f, "->"),
495            TokenKind::FatArrow => write!(f, "=>"),
496            TokenKind::Prime => write!(f, "'"),
497            TokenKind::Pipe => write!(f, "|"),
498            TokenKind::Eq => write!(f, "=="),
499            TokenKind::Ne => write!(f, "!="),
500            TokenKind::Lt => write!(f, "<"),
501            TokenKind::Le => write!(f, "<="),
502            TokenKind::Gt => write!(f, ">"),
503            TokenKind::Ge => write!(f, ">="),
504            TokenKind::Plus => write!(f, "+"),
505            TokenKind::Minus => write!(f, "-"),
506            TokenKind::Star => write!(f, "*"),
507            TokenKind::Slash => write!(f, "/"),
508            TokenKind::Percent => write!(f, "%"),
509            TokenKind::Union => write!(f, "union"),
510            TokenKind::Intersect => write!(f, "intersect"),
511            TokenKind::Diff => write!(f, "diff"),
512            TokenKind::SubsetOf => write!(f, "subset_of"),
513            TokenKind::PlusPlus => write!(f, "++"),
514            TokenKind::Head => write!(f, "head"),
515            TokenKind::Tail => write!(f, "tail"),
516            TokenKind::Len => write!(f, "len"),
517            TokenKind::Keys => write!(f, "keys"),
518            TokenKind::Values => write!(f, "values"),
519            TokenKind::Powerset => write!(f, "powerset"),
520            TokenKind::UnionAll => write!(f, "union_all"),
521            TokenKind::Ampersand => write!(f, "&"),
522            TokenKind::Assign => write!(f, "="),
523            TokenKind::Integer(n) => write!(f, "{}", n),
524            TokenKind::StringLit(s) => write!(f, "\"{}\"", s),
525            TokenKind::Ident(s) => write!(f, "{}", s),
526            TokenKind::Comment(s) => write!(f, "// {}", s),
527            TokenKind::DocComment(s) => write!(f, "/// {}", s),
528            TokenKind::Eof => write!(f, "EOF"),
529            TokenKind::Error(msg) => write!(f, "ERROR: {}", msg),
530        }
531    }
532}
533
534/// A token with its span in the source code.
535#[derive(Clone, Debug, PartialEq)]
536pub struct Token {
537    /// The kind of token.
538    pub kind: TokenKind,
539    /// The span in the source code.
540    pub span: Span,
541}
542
543impl Token {
544    /// Create a new token.
545    pub fn new(kind: TokenKind, span: Span) -> Self {
546        Self { kind, span }
547    }
548
549    /// Check if this is the end of file.
550    pub fn is_eof(&self) -> bool {
551        matches!(self.kind, TokenKind::Eof)
552    }
553
554    /// Check if this is an error token.
555    pub fn is_error(&self) -> bool {
556        matches!(self.kind, TokenKind::Error(_))
557    }
558}
559
560#[cfg(test)]
561mod tests {
562    use super::*;
563
564    #[test]
565    fn test_span_merge() {
566        let s1 = Span::new(0, 5, 1, 1);
567        let s2 = Span::new(10, 15, 1, 11);
568        let merged = s1.merge(s2);
569        assert_eq!(merged.start, 0);
570        assert_eq!(merged.end, 15);
571    }
572
573    #[test]
574    fn test_keyword_lookup() {
575        assert_eq!(TokenKind::keyword("module"), Some(TokenKind::Module));
576        assert_eq!(TokenKind::keyword("and"), Some(TokenKind::And));
577        assert_eq!(TokenKind::keyword("Nat"), Some(TokenKind::Nat));
578        assert_eq!(TokenKind::keyword("foo"), None);
579    }
580
581    #[test]
582    fn test_is_keyword() {
583        assert!(TokenKind::Module.is_keyword());
584        assert!(TokenKind::And.is_keyword());
585        assert!(!TokenKind::LParen.is_keyword());
586        assert!(!TokenKind::Integer(42).is_keyword());
587    }
588}