Skip to main content

microcad_syntax/tokens/
mod.rs

1// Copyright © 2026 The µcad authors <info@microcad.xyz>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4use crate::Span;
5use crate::tokens::from_logos::from_logos;
6use crate::tokens::logos::NormalToken;
7use ::logos::Lexer;
8use std::borrow::Cow;
9use std::fmt::{Display, Formatter};
10use thiserror::Error;
11
12mod from_logos;
13mod logos;
14
15/// A source token with attached span
16#[derive(Debug, PartialEq, Clone)]
17pub struct SpannedToken<T> {
18    /// the span of the token
19    pub span: Span,
20    /// the token
21    pub token: T,
22}
23
24impl SpannedToken<Token<'_>> {
25    /// Create an owned version of the token
26    pub fn into_owned(self) -> SpannedToken<Token<'static>> {
27        SpannedToken {
28            span: self.span,
29            token: self.token.into_owned(),
30        }
31    }
32}
33
34impl<T> SpannedToken<T> {
35    /// Create a [`SpannedToken`] from [`Span`] and token
36    pub fn new(span: Span, token: T) -> Self {
37        SpannedToken { span, token }
38    }
39}
40
41impl<T: PartialEq> PartialEq<T> for SpannedToken<T> {
42    fn eq(&self, other: &T) -> bool {
43        self.token.eq(other)
44    }
45}
46
47/// Possible errors encountered while tokenizing
48#[derive(Debug, Default, Clone, PartialEq, Error)]
49pub enum LexerError {
50    /// No valid token was found for the character
51    #[default]
52    #[error("No valid token")]
53    NoValidToken,
54    /// A format string was encountered that wasn't closed correctly
55    #[error("Unclosed format string")]
56    UnclosedStringFormat(Span),
57    /// A string was encountered that wasn't closed correctly
58    #[error("Unclosed string")]
59    UnclosedString(Span),
60}
61
62impl LexerError {
63    /// Get a descriptive name of the error type
64    pub fn kind(&self) -> &'static str {
65        match self {
66            LexerError::NoValidToken => "no valid token",
67            LexerError::UnclosedStringFormat(_) => "unclosed format string",
68            LexerError::UnclosedString(_) => "unclosed string",
69        }
70    }
71}
72
73impl LexerError {
74    /// Get the span of the error
75    pub fn span(&self) -> Option<Span> {
76        match self {
77            LexerError::UnclosedStringFormat(span) => Some(span.clone()),
78            LexerError::UnclosedString(span) => Some(span.clone()),
79            _ => None,
80        }
81    }
82}
83
84/// Tokenize a µcad source string into an iterator of tokens.
85pub fn lex<'a>(input: &'a str) -> impl Iterator<Item = SpannedToken<Token<'a>>> {
86    from_logos(Lexer::<NormalToken>::new(input).spanned())
87}
88
89/// Source token for µcad files
90#[derive(Debug, PartialEq, Clone)]
91pub enum Token<'a> {
92    /// Whitespace
93    Whitespace(Cow<'a, str>),
94
95    /// A single-line comment, starting with `//`
96    SingleLineComment(Cow<'a, str>),
97    /// A multi-line comment, starting with `/*` and ending with `*/`
98    MultiLineComment(Cow<'a, str>),
99    /// A doc-comment, starting with `///`
100    DocComment(Cow<'a, str>),
101    /// An inner doc-comment, starting with `//!`
102    InnerDocComment(Cow<'a, str>),
103
104    /// The `mod` keyword
105    KeywordMod,
106    /// The `part` keyword
107    KeywordPart,
108    /// The `sketch` keyword
109    KeywordSketch,
110    /// The `op` keyword
111    KeywordOp,
112    /// The `fn` keyword
113    KeywordFn,
114    /// The `if` keyword
115    KeywordIf,
116    /// The `else` keyword
117    KeywordElse,
118    /// The `use` keyword
119    KeywordUse,
120    /// The `as` keyword
121    KeywordAs,
122    /// The `return` keyword
123    KeywordReturn,
124    /// The `pub` keyword
125    KeywordPub,
126    /// The `const` keyword
127    KeywordConst,
128    /// The `prop` keyword
129    KeywordProp,
130    /// The `init` keyword
131    KeywordInit,
132    /// The `__plugin` keyword
133    KeywordPlugin,
134    /// The `assembly` keyword
135    KeywordAssembly,
136    /// The `material` keyword
137    KeywordMaterial,
138    /// The `unit` keyword
139    KeywordUnit,
140    /// The `enum` keyword
141    KeywordEnum,
142    /// The `struct` keyword
143    KeywordStruct,
144    /// The `match` keyword
145    KeywordMatch,
146    /// The `type` keyword
147    KeywordType,
148
149    /// An identifier, alphanumeric, starting with either an alpha character or a underscore
150    Identifier(Cow<'a, str>),
151    /// A unit identifier
152    Unit(Cow<'a, str>),
153
154    /// An integer literal
155    LiteralInt(Cow<'a, str>),
156    /// A float literal
157    LiteralFloat(Cow<'a, str>),
158    /// A boolean literal
159    LiteralBool(bool),
160    /// A string literal
161    LiteralString(Cow<'a, str>),
162
163    /// Double-quote indicating the start of a format string
164    FormatStringStart,
165    /// Double-quote indicating the end of a format string
166    FormatStringEnd,
167    /// Literal string content of a format string
168    StringContent(Cow<'a, str>),
169    /// Escaped character inside a format string
170    Character(char),
171    /// The start of the format expression inside a format string
172    StringFormatOpen,
173    /// The end of the format expression inside a format string
174    StringFormatClose,
175    /// The precision specification of the format expression inside a format string
176    StringFormatPrecision(Cow<'a, str>),
177    /// The width specification of the format expression inside a format string
178    StringFormatWidth(Cow<'a, str>),
179
180    /// The `:` symbol
181    SigilColon,
182    /// The `;` symbol
183    SigilSemiColon,
184    /// The `::` symbol
185    SigilDoubleColon,
186    /// The `(` symbol
187    SigilOpenBracket,
188    /// The `)` symbol
189    SigilCloseBracket,
190    /// The `[` symbol
191    SigilOpenSquareBracket,
192    /// The `]` symbol
193    SigilCloseSquareBracket,
194    /// The `{` symbol
195    SigilOpenCurlyBracket,
196    /// The `}` symbol
197    SigilCloseCurlyBracket,
198    /// The `#` symbol
199    SigilHash,
200    /// The `.` symbol
201    SigilDot,
202    /// The `,` symbol
203    SigilComma,
204    /// The `..` symbol
205    SigilDoubleDot,
206    /// The `@` symbol
207    SigilAt,
208    /// The `->` symbol
209    SigilSingleArrow,
210    /// The `"` symbol
211    SigilQuote,
212
213    /// Add operator: `+`
214    OperatorAdd,
215    /// Subtract operator: `-`
216    OperatorSubtract,
217    /// Multiply operator: `-`
218    OperatorMultiply,
219    /// Divide operator: `/`
220    OperatorDivide,
221    /// Union operator: `|`
222    OperatorUnion,
223    /// Intersect operator: `&`
224    OperatorIntersect,
225    /// xor operator: `^`
226    OperatorPowerXor,
227    /// Greater-than operator: `>`
228    OperatorGreaterThan,
229    /// Less-than operator: `<`
230    OperatorLessThan,
231    /// Greater-or-equal operator: `>=`
232    OperatorGreaterEqual,
233    /// Less-or-equal operator: `<=`
234    OperatorLessEqual,
235    /// Near operator: `~`
236    OperatorNear,
237    /// Equal operator: `==`
238    OperatorEqual,
239    /// Not-equal operator: `!=`
240    OperatorNotEqual,
241    /// And operator: `and`
242    OperatorAnd,
243    /// Or operator: `or`
244    OperatorOr,
245    /// Xor operator: `xor'
246    OperatorXor,
247    /// Not operator: `!`
248    OperatorNot,
249    /// Assignment operator: `=`
250    OperatorAssignment,
251
252    /// A lexer failure
253    ///
254    /// When encountering an error, the lexer attempts to recover and continue emitting further tokens
255    Error(LexerError),
256}
257
258impl Display for Token<'_> {
259    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
260        write!(f, "{}", self.kind())
261    }
262}
263
264impl Token<'_> {
265    /// Create an owned version of the token
266    pub fn into_owned(self) -> Token<'static> {
267        match self {
268            Token::Whitespace(c) => Token::Whitespace(c.into_owned().into()),
269            Token::SingleLineComment(c) => Token::SingleLineComment(c.into_owned().into()),
270            Token::MultiLineComment(c) => Token::MultiLineComment(c.into_owned().into()),
271            Token::DocComment(c) => Token::DocComment(c.into_owned().into()),
272            Token::InnerDocComment(c) => Token::InnerDocComment(c.into_owned().into()),
273            Token::Identifier(s) => Token::Identifier(s.into_owned().into()),
274            Token::Unit(s) => Token::Unit(s.into_owned().into()),
275            Token::LiteralInt(s) => Token::LiteralInt(s.into_owned().into()),
276            Token::LiteralFloat(s) => Token::LiteralFloat(s.into_owned().into()),
277            Token::LiteralString(s) => Token::LiteralString(s.into_owned().into()),
278
279            Token::KeywordMod => Token::KeywordMod,
280            Token::KeywordPart => Token::KeywordPart,
281            Token::KeywordSketch => Token::KeywordSketch,
282            Token::KeywordOp => Token::KeywordOp,
283            Token::KeywordFn => Token::KeywordFn,
284            Token::KeywordIf => Token::KeywordIf,
285            Token::KeywordElse => Token::KeywordElse,
286            Token::KeywordUse => Token::KeywordUse,
287            Token::KeywordAs => Token::KeywordAs,
288            Token::KeywordReturn => Token::KeywordReturn,
289            Token::KeywordPub => Token::KeywordPub,
290            Token::KeywordConst => Token::KeywordConst,
291            Token::KeywordProp => Token::KeywordProp,
292            Token::KeywordInit => Token::KeywordInit,
293            Token::KeywordAssembly => Token::KeywordAssembly,
294            Token::KeywordPlugin => Token::KeywordPlugin,
295            Token::KeywordMaterial => Token::KeywordMaterial,
296            Token::KeywordUnit => Token::KeywordUnit,
297            Token::KeywordEnum => Token::KeywordEnum,
298            Token::KeywordStruct => Token::KeywordStruct,
299            Token::KeywordMatch => Token::KeywordMatch,
300            Token::KeywordType => Token::KeywordType,
301            Token::LiteralBool(l) => Token::LiteralBool(l),
302            Token::SigilColon => Token::SigilColon,
303            Token::SigilSemiColon => Token::SigilSemiColon,
304            Token::SigilDoubleColon => Token::SigilDoubleColon,
305            Token::SigilOpenBracket => Token::SigilOpenBracket,
306            Token::SigilCloseBracket => Token::SigilCloseBracket,
307            Token::SigilOpenSquareBracket => Token::SigilOpenSquareBracket,
308            Token::SigilCloseSquareBracket => Token::SigilCloseSquareBracket,
309            Token::SigilOpenCurlyBracket => Token::SigilOpenCurlyBracket,
310            Token::SigilCloseCurlyBracket => Token::SigilCloseCurlyBracket,
311            Token::SigilHash => Token::SigilHash,
312            Token::SigilDot => Token::SigilDot,
313            Token::SigilComma => Token::SigilComma,
314            Token::SigilDoubleDot => Token::SigilDoubleDot,
315            Token::SigilAt => Token::SigilAt,
316            Token::SigilSingleArrow => Token::SigilSingleArrow,
317            Token::OperatorAdd => Token::OperatorAdd,
318            Token::OperatorSubtract => Token::OperatorSubtract,
319            Token::OperatorMultiply => Token::OperatorMultiply,
320            Token::OperatorDivide => Token::OperatorDivide,
321            Token::OperatorUnion => Token::OperatorUnion,
322            Token::OperatorIntersect => Token::OperatorIntersect,
323            Token::OperatorPowerXor => Token::OperatorPowerXor,
324            Token::OperatorGreaterThan => Token::OperatorGreaterThan,
325            Token::OperatorLessThan => Token::OperatorLessThan,
326            Token::OperatorGreaterEqual => Token::OperatorGreaterEqual,
327            Token::OperatorLessEqual => Token::OperatorLessEqual,
328            Token::OperatorNear => Token::OperatorNear,
329            Token::OperatorEqual => Token::OperatorEqual,
330            Token::OperatorNotEqual => Token::OperatorNotEqual,
331            Token::OperatorAnd => Token::OperatorAnd,
332            Token::OperatorOr => Token::OperatorOr,
333            Token::OperatorXor => Token::OperatorXor,
334            Token::OperatorNot => Token::OperatorNot,
335            Token::OperatorAssignment => Token::OperatorAssignment,
336            Token::FormatStringStart => Token::FormatStringStart,
337            Token::FormatStringEnd => Token::FormatStringEnd,
338            Token::StringContent(s) => Token::StringContent(s.into_owned().into()),
339            Token::Character(c) => Token::Character(c),
340            Token::StringFormatOpen => Token::StringFormatOpen,
341            Token::StringFormatClose => Token::StringFormatClose,
342            Token::SigilQuote => Token::SigilQuote,
343            Token::Error(e) => Token::Error(e),
344            Token::StringFormatPrecision(c) => Token::StringFormatPrecision(c.into_owned().into()),
345            Token::StringFormatWidth(c) => Token::StringFormatWidth(c.into_owned().into()),
346        }
347    }
348
349    /// Get a descriptive name or symbol for the token type
350    pub fn kind(&self) -> &'static str {
351        match self {
352            Token::Whitespace(_) => "whitespace",
353            Token::SingleLineComment(_) => "single-line comment",
354            Token::MultiLineComment(_) => "multi-line comment",
355            Token::DocComment(_) => "doc comment",
356            Token::InnerDocComment(_) => "inner doc comment",
357            Token::KeywordMod => "mod",
358            Token::KeywordPart => "part",
359            Token::KeywordSketch => "sketch",
360            Token::KeywordOp => "op",
361            Token::KeywordFn => "fn",
362            Token::KeywordIf => "if",
363            Token::KeywordElse => "else",
364            Token::KeywordUse => "use",
365            Token::KeywordAs => "as",
366            Token::KeywordReturn => "return",
367            Token::KeywordPub => "pub",
368            Token::KeywordConst => "const",
369            Token::KeywordProp => "prop",
370            Token::KeywordInit => "init",
371            Token::KeywordPlugin => "__plugin",
372            Token::KeywordAssembly => "assembly",
373            Token::KeywordMaterial => "material",
374            Token::KeywordUnit => "unit",
375            Token::KeywordEnum => "enum",
376            Token::KeywordStruct => "struct",
377            Token::KeywordMatch => "match",
378            Token::KeywordType => "type",
379            Token::Identifier(_) => "identifier",
380            Token::Unit(_) => "unit",
381            Token::LiteralInt(_) => "integer literal",
382            Token::LiteralFloat(_) => "float literal",
383            Token::LiteralBool(_) => "boolean literal",
384            Token::LiteralString(_) => "string literal",
385            Token::FormatStringStart => "start of string",
386            Token::FormatStringEnd => "end of string",
387            Token::StringContent(_) => "string content",
388            Token::Character(_) => "escaped character",
389            Token::StringFormatOpen => "string format start",
390            Token::StringFormatClose => "string format end",
391            Token::SigilColon => ":",
392            Token::SigilSemiColon => ";",
393            Token::SigilDoubleColon => "::",
394            Token::SigilOpenBracket => "(",
395            Token::SigilCloseBracket => ")",
396            Token::SigilOpenSquareBracket => "[",
397            Token::SigilCloseSquareBracket => "]",
398            Token::SigilOpenCurlyBracket => "{",
399            Token::SigilCloseCurlyBracket => "}",
400            Token::SigilHash => "#",
401            Token::SigilDot => ".",
402            Token::SigilComma => ",",
403            Token::SigilDoubleDot => "..",
404            Token::SigilAt => "@",
405            Token::SigilSingleArrow => "->",
406            Token::SigilQuote => "\"",
407            Token::OperatorAdd => "+",
408            Token::OperatorSubtract => "-",
409            Token::OperatorMultiply => "*",
410            Token::OperatorDivide => "/",
411            Token::OperatorUnion => "|",
412            Token::OperatorIntersect => "&",
413            Token::OperatorPowerXor => "^",
414            Token::OperatorGreaterThan => ">",
415            Token::OperatorLessThan => "<",
416            Token::OperatorGreaterEqual => ">=",
417            Token::OperatorLessEqual => "<=",
418            Token::OperatorNear => "!",
419            Token::OperatorEqual => "==",
420            Token::OperatorNotEqual => "!=",
421            Token::OperatorAnd => "and",
422            Token::OperatorOr => "or",
423            Token::OperatorXor => "xor",
424            Token::OperatorNot => "not",
425            Token::OperatorAssignment => "=",
426            Token::StringFormatPrecision(_) => "format precision",
427            Token::StringFormatWidth(_) => "format width",
428            Token::Error(e) => e.kind(),
429        }
430    }
431
432    /// Check if the token is an error
433    pub fn is_error(&self) -> bool {
434        matches!(self, Token::Error(_))
435    }
436}