panproto_expr_parser/token.rs
1//! Token types for the Haskell-style surface syntax.
2
3use logos::Logos;
4use std::fmt;
5
6/// Source span (byte offsets).
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub struct Span {
9 /// Start byte offset (inclusive).
10 pub start: usize,
11 /// End byte offset (exclusive).
12 pub end: usize,
13}
14
15/// A token with its source span.
16#[derive(Debug, Clone, PartialEq)]
17pub struct Spanned {
18 /// The token kind.
19 pub token: Token,
20 /// Source span.
21 pub span: Span,
22}
23
24/// Token kinds produced by the lexer.
25///
26/// Keywords are recognized during lexing; identifiers that happen to match
27/// a keyword are emitted as the keyword token, not as `Ident`.
28#[derive(Logos, Debug, Clone, PartialEq)]
29#[logos(skip r"[ \t]+")]
30#[logos(skip(r"--[^\n]*", allow_greedy = true))]
31pub enum Token {
32 // ── Keywords ──────────────────────────────────────────────────
33 /// `do` keyword (layout block).
34 #[token("do")]
35 Do,
36 /// `let` keyword (layout block).
37 #[token("let")]
38 Let,
39 /// `in` keyword.
40 #[token("in")]
41 In,
42 /// `where` keyword (layout block).
43 #[token("where")]
44 Where,
45 /// `if` keyword.
46 #[token("if")]
47 If,
48 /// `then` keyword.
49 #[token("then")]
50 Then,
51 /// `else` keyword.
52 #[token("else")]
53 Else,
54 /// `case` keyword.
55 #[token("case")]
56 Case,
57 /// `of` keyword (layout block).
58 #[token("of")]
59 Of,
60 /// `guard` keyword.
61 #[token("guard")]
62 Guard,
63 /// `not` keyword (logical negation).
64 #[token("not")]
65 Not,
66 /// `mod` keyword (modulo).
67 #[token("mod")]
68 ModKw,
69 /// `div` keyword (integer division).
70 #[token("div")]
71 DivKw,
72 /// `otherwise` keyword (catch-all guard).
73 #[token("otherwise")]
74 Otherwise,
75
76 // ── Literals ──────────────────────────────────────────────────
77 /// Boolean literal `True`.
78 #[token("True")]
79 True,
80 /// Boolean literal `False`.
81 #[token("False")]
82 False,
83 /// `Nothing` literal (absent value).
84 #[token("Nothing")]
85 Nothing,
86
87 /// Integer literal (decimal or `0x` hex).
88 #[regex(r"0x[0-9a-fA-F]+", |lex| i64::from_str_radix(&lex.slice()[2..], 16).ok())]
89 #[regex(r"[0-9]+", |lex| lex.slice().parse::<i64>().ok(), priority = 2)]
90 Int(i64),
91
92 /// Floating-point literal.
93 #[regex(r"[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
94 Float(f64),
95
96 /// String literal (double-quoted, backslash escapes).
97 #[regex(r#""([^"\\]|\\.)*""#, |lex| {
98 let s = lex.slice();
99 Some(s[1..s.len()-1].to_string())
100 })]
101 Str(String),
102
103 // ── Identifiers ──────────────────────────────────────────────
104 /// Lower-case identifier (variables, fields).
105 #[regex(r"[a-z_][a-zA-Z0-9_']*", |lex| lex.slice().to_string(), priority = 1)]
106 Ident(String),
107
108 /// Upper-case identifier (constructors, types).
109 #[regex(r"[A-Z][a-zA-Z0-9_']*", |lex| lex.slice().to_string())]
110 UpperIdent(String),
111
112 // ── Operators ────────────────────────────────────────────────
113 /// `->` arrow (function type, edge traversal).
114 #[token("->")]
115 Arrow,
116 /// `<-` left arrow (generators, monadic bind).
117 #[token("<-")]
118 LeftArrow,
119 /// `=>` fat arrow (constraints, pattern clauses).
120 #[token("=>")]
121 FatArrow,
122 /// `::` type annotation.
123 #[token("::")]
124 DoubleColon,
125 /// `..` range operator.
126 #[token("..")]
127 DotDot,
128 /// `==` equality.
129 #[token("==")]
130 EqEq,
131 /// `/=` inequality.
132 #[token("/=")]
133 Neq,
134 /// `<=` less-than-or-equal.
135 #[token("<=")]
136 Lte,
137 /// `>=` greater-than-or-equal.
138 #[token(">=")]
139 Gte,
140 /// `&&` logical and.
141 #[token("&&")]
142 AndAnd,
143 /// `||` logical or.
144 #[token("||")]
145 OrOr,
146 /// `++` list concatenation.
147 #[token("++")]
148 PlusPlus,
149 /// `+` addition.
150 #[token("+")]
151 Plus,
152 /// `-` subtraction / negation.
153 #[token("-")]
154 Minus,
155 /// `*` multiplication.
156 #[token("*")]
157 Star,
158 /// `/` division.
159 #[token("/")]
160 Slash,
161 /// `%` modulo.
162 #[token("%")]
163 Percent,
164 /// `<` less-than.
165 #[token("<")]
166 Lt,
167 /// `>` greater-than.
168 #[token(">")]
169 Gt,
170 /// `=` binding / definition.
171 #[token("=")]
172 Eq,
173 /// `.` field access / composition.
174 #[token(".")]
175 Dot,
176 /// `,` separator.
177 #[token(",")]
178 Comma,
179 /// `:` cons / type annotation.
180 #[token(":")]
181 Colon,
182 /// `|` guard / comprehension separator.
183 #[token("|")]
184 Pipe,
185 /// `\` lambda introducer.
186 #[token("\\")]
187 Backslash,
188 /// `@` as-pattern.
189 #[token("@")]
190 At,
191 /// `&` reference.
192 #[token("&")]
193 Ampersand,
194 /// `` ` `` infix function application.
195 #[token("`")]
196 Backtick,
197 /// `!` strict application.
198 #[token("!")]
199 Bang,
200
201 // ── Delimiters ───────────────────────────────────────────────
202 /// `(` open parenthesis.
203 #[token("(")]
204 LParen,
205 /// `)` close parenthesis.
206 #[token(")")]
207 RParen,
208 /// `[` open bracket.
209 #[token("[")]
210 LBracket,
211 /// `]` close bracket.
212 #[token("]")]
213 RBracket,
214 /// `{` open brace.
215 #[token("{")]
216 LBrace,
217 /// `}` close brace.
218 #[token("}")]
219 RBrace,
220
221 // ── Layout (virtual tokens inserted by the layout pass) ──────
222 /// Indentation increased (opens a block).
223 Indent,
224 /// Indentation decreased (closes a block).
225 Dedent,
226 /// Newline at the same indentation level (separates declarations).
227 Newline,
228
229 // ── Special ──────────────────────────────────────────────────
230 /// End of input.
231 Eof,
232}
233
234impl fmt::Display for Token {
235 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
236 match self {
237 Self::Ident(s) | Self::UpperIdent(s) | Self::Str(s) => write!(f, "{s}"),
238 Self::Int(n) => write!(f, "{n}"),
239 Self::Float(n) => write!(f, "{n}"),
240 Self::Arrow => write!(f, "->"),
241 Self::LeftArrow => write!(f, "<-"),
242 Self::FatArrow => write!(f, "=>"),
243 Self::EqEq => write!(f, "=="),
244 Self::Neq => write!(f, "/="),
245 Self::AndAnd => write!(f, "&&"),
246 Self::OrOr => write!(f, "||"),
247 Self::PlusPlus => write!(f, "++"),
248 Self::DoubleColon => write!(f, "::"),
249 Self::DotDot => write!(f, ".."),
250 _ => write!(f, "{self:?}"),
251 }
252 }
253}