Skip to main content

panproto_expr_parser/
token.rs

1//! Token types for the Haskell-style surface syntax.
2
3use logos::Logos;
4use std::fmt;
5
6/// Source span (byte offsets).
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub struct Span {
9    /// Start byte offset (inclusive).
10    pub start: usize,
11    /// End byte offset (exclusive).
12    pub end: usize,
13}
14
15/// A token with its source span.
16#[derive(Debug, Clone, PartialEq)]
17pub struct Spanned {
18    /// The token kind.
19    pub token: Token,
20    /// Source span.
21    pub span: Span,
22}
23
24/// Token kinds produced by the lexer.
25///
26/// Keywords are recognized during lexing; identifiers that happen to match
27/// a keyword are emitted as the keyword token, not as `Ident`.
28#[derive(Logos, Debug, Clone, PartialEq)]
29#[logos(skip r"[ \t]+")]
30#[logos(skip(r"--[^\n]*", allow_greedy = true))]
31pub enum Token {
32    // ── Keywords ──────────────────────────────────────────────────
33    /// `do` keyword (layout block).
34    #[token("do")]
35    Do,
36    /// `let` keyword (layout block).
37    #[token("let")]
38    Let,
39    /// `in` keyword.
40    #[token("in")]
41    In,
42    /// `where` keyword (layout block).
43    #[token("where")]
44    Where,
45    /// `if` keyword.
46    #[token("if")]
47    If,
48    /// `then` keyword.
49    #[token("then")]
50    Then,
51    /// `else` keyword.
52    #[token("else")]
53    Else,
54    /// `case` keyword.
55    #[token("case")]
56    Case,
57    /// `of` keyword (layout block).
58    #[token("of")]
59    Of,
60    /// `guard` keyword.
61    #[token("guard")]
62    Guard,
63    /// `not` keyword (logical negation).
64    #[token("not")]
65    Not,
66    /// `mod` keyword (modulo).
67    #[token("mod")]
68    ModKw,
69    /// `div` keyword (integer division).
70    #[token("div")]
71    DivKw,
72    /// `otherwise` keyword (catch-all guard).
73    #[token("otherwise")]
74    Otherwise,
75
76    // ── Literals ──────────────────────────────────────────────────
77    /// Boolean literal `True`.
78    #[token("True")]
79    True,
80    /// Boolean literal `False`.
81    #[token("False")]
82    False,
83    /// `Nothing` literal (absent value).
84    #[token("Nothing")]
85    Nothing,
86
87    /// Integer literal (decimal or `0x` hex).
88    #[regex(r"0x[0-9a-fA-F]+", |lex| i64::from_str_radix(&lex.slice()[2..], 16).ok())]
89    #[regex(r"[0-9]+", |lex| lex.slice().parse::<i64>().ok(), priority = 2)]
90    Int(i64),
91
92    /// Floating-point literal.
93    #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
94    Float(f64),
95
96    /// String literal (double-quoted, backslash escapes).
97    #[regex(r#""([^"\\]|\\.)*""#, |lex| {
98        let s = lex.slice();
99        Some(s[1..s.len()-1].to_string())
100    })]
101    Str(String),
102
103    // ── Identifiers ──────────────────────────────────────────────
104    /// Lower-case identifier (variables, fields).
105    #[regex(r"[a-z_][a-zA-Z0-9_']*", |lex| lex.slice().to_string(), priority = 1)]
106    Ident(String),
107
108    /// Upper-case identifier (constructors, types).
109    #[regex(r"[A-Z][a-zA-Z0-9_']*", |lex| lex.slice().to_string())]
110    UpperIdent(String),
111
112    // ── Operators ────────────────────────────────────────────────
113    /// `->` arrow (function type, edge traversal).
114    #[token("->")]
115    Arrow,
116    /// `<-` left arrow (generators, monadic bind).
117    #[token("<-")]
118    LeftArrow,
119    /// `=>` fat arrow (constraints, pattern clauses).
120    #[token("=>")]
121    FatArrow,
122    /// `::` type annotation.
123    #[token("::")]
124    DoubleColon,
125    /// `..` range operator.
126    #[token("..")]
127    DotDot,
128    /// `==` equality.
129    #[token("==")]
130    EqEq,
131    /// `/=` inequality.
132    #[token("/=")]
133    Neq,
134    /// `<=` less-than-or-equal.
135    #[token("<=")]
136    Lte,
137    /// `>=` greater-than-or-equal.
138    #[token(">=")]
139    Gte,
140    /// `&&` logical and.
141    #[token("&&")]
142    AndAnd,
143    /// `||` logical or.
144    #[token("||")]
145    OrOr,
146    /// `++` list concatenation.
147    #[token("++")]
148    PlusPlus,
149    /// `+` addition.
150    #[token("+")]
151    Plus,
152    /// `-` subtraction / negation.
153    #[token("-")]
154    Minus,
155    /// `*` multiplication.
156    #[token("*")]
157    Star,
158    /// `/` division.
159    #[token("/")]
160    Slash,
161    /// `%` modulo.
162    #[token("%")]
163    Percent,
164    /// `<` less-than.
165    #[token("<")]
166    Lt,
167    /// `>` greater-than.
168    #[token(">")]
169    Gt,
170    /// `=` binding / definition.
171    #[token("=")]
172    Eq,
173    /// `.` field access / composition.
174    #[token(".")]
175    Dot,
176    /// `,` separator.
177    #[token(",")]
178    Comma,
179    /// `:` cons / type annotation.
180    #[token(":")]
181    Colon,
182    /// `|` guard / comprehension separator.
183    #[token("|")]
184    Pipe,
185    /// `\` lambda introducer.
186    #[token("\\")]
187    Backslash,
188    /// `@` as-pattern.
189    #[token("@")]
190    At,
191    /// `&` reference.
192    #[token("&")]
193    Ampersand,
194    /// `` ` `` infix function application.
195    #[token("`")]
196    Backtick,
197    /// `!` strict application.
198    #[token("!")]
199    Bang,
200
201    // ── Delimiters ───────────────────────────────────────────────
202    /// `(` open parenthesis.
203    #[token("(")]
204    LParen,
205    /// `)` close parenthesis.
206    #[token(")")]
207    RParen,
208    /// `[` open bracket.
209    #[token("[")]
210    LBracket,
211    /// `]` close bracket.
212    #[token("]")]
213    RBracket,
214    /// `{` open brace.
215    #[token("{")]
216    LBrace,
217    /// `}` close brace.
218    #[token("}")]
219    RBrace,
220
221    // ── Layout (virtual tokens inserted by the layout pass) ──────
222    /// Indentation increased (opens a block).
223    Indent,
224    /// Indentation decreased (closes a block).
225    Dedent,
226    /// Newline at the same indentation level (separates declarations).
227    Newline,
228
229    // ── Special ──────────────────────────────────────────────────
230    /// End of input.
231    Eof,
232}
233
234impl fmt::Display for Token {
235    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
236        match self {
237            Self::Ident(s) | Self::UpperIdent(s) | Self::Str(s) => write!(f, "{s}"),
238            Self::Int(n) => write!(f, "{n}"),
239            Self::Float(n) => write!(f, "{n}"),
240            Self::Arrow => write!(f, "->"),
241            Self::LeftArrow => write!(f, "<-"),
242            Self::FatArrow => write!(f, "=>"),
243            Self::EqEq => write!(f, "=="),
244            Self::Neq => write!(f, "/="),
245            Self::AndAnd => write!(f, "&&"),
246            Self::OrOr => write!(f, "||"),
247            Self::PlusPlus => write!(f, "++"),
248            Self::DoubleColon => write!(f, "::"),
249            Self::DotDot => write!(f, ".."),
250            _ => write!(f, "{self:?}"),
251        }
252    }
253}