Skip to main content

microcad_lang_parse/tokens/
mod.rs

1// Copyright © 2026 The µcad authors <info@microcad.xyz>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4use crate::tokens::{from_logos::from_logos, logos::NormalToken};
5
6use ::logos::Lexer;
7use microcad_lang_base::Span;
8use std::{
9    borrow::Cow,
10    fmt::{Display, Formatter},
11};
12use thiserror::Error;
13
14mod from_logos;
15mod logos;
16
17/// A source token with attached span
18#[derive(Debug, PartialEq, Clone)]
19pub struct SpannedToken<T> {
20    /// the span of the token
21    pub span: Span,
22    /// the token
23    pub token: T,
24}
25
26impl SpannedToken<Token<'_>> {
27    /// Create an owned version of the token
28    pub fn into_owned(self) -> SpannedToken<Token<'static>> {
29        SpannedToken {
30            span: self.span,
31            token: self.token.into_owned(),
32        }
33    }
34}
35
36impl<T> SpannedToken<T> {
37    /// Create a [`SpannedToken`] from [`Span`] and token
38    pub fn new(span: Span, token: T) -> Self {
39        SpannedToken { span, token }
40    }
41}
42
43impl<T: PartialEq> PartialEq<T> for SpannedToken<T> {
44    fn eq(&self, other: &T) -> bool {
45        self.token.eq(other)
46    }
47}
48
49/// Possible errors encountered while tokenizing
50#[derive(Debug, Default, Clone, PartialEq, Error)]
51pub enum LexerError {
52    /// No valid token was found for the character
53    #[default]
54    #[error("No valid token")]
55    NoValidToken,
56    /// A format string was encountered that wasn't closed correctly
57    #[error("Unclosed format string")]
58    UnclosedStringFormat(Span),
59    /// A string was encountered that wasn't closed correctly
60    #[error("Unclosed string")]
61    UnclosedString(Span),
62}
63
64impl LexerError {
65    /// Get a descriptive name of the error type
66    pub fn kind(&self) -> &'static str {
67        match self {
68            LexerError::NoValidToken => "no valid token",
69            LexerError::UnclosedStringFormat(_) => "unclosed format string",
70            LexerError::UnclosedString(_) => "unclosed string",
71        }
72    }
73}
74
75impl LexerError {
76    /// Get the span of the error
77    pub fn span(&self) -> Option<Span> {
78        match self {
79            LexerError::UnclosedStringFormat(span) => Some(span.clone()),
80            LexerError::UnclosedString(span) => Some(span.clone()),
81            _ => None,
82        }
83    }
84}
85
86/// Tokenize a µcad source string into an iterator of tokens.
87pub fn lex<'a>(input: &'a str) -> impl Iterator<Item = SpannedToken<Token<'a>>> {
88    from_logos(Lexer::<NormalToken>::new(input).spanned())
89}
90
91/// Source token for µcad files
92#[derive(Debug, PartialEq, Clone)]
93pub enum Token<'a> {
94    /// Whitespace
95    Whitespace(Cow<'a, str>),
96
97    /// A single-line comment, starting with `//`
98    SingleLineComment(Cow<'a, str>),
99    /// A multi-line comment, starting with `/*` and ending with `*/`
100    MultiLineComment(Cow<'a, str>),
101    /// A doc-comment, starting with `///`
102    DocComment(Cow<'a, str>),
103    /// An inner doc-comment, starting with `//!`
104    InnerDocComment(Cow<'a, str>),
105
106    /// The `mod` keyword
107    KeywordMod,
108    /// The `part` keyword
109    KeywordPart,
110    /// The `sketch` keyword
111    KeywordSketch,
112    /// The `op` keyword
113    KeywordOp,
114    /// The `fn` keyword
115    KeywordFn,
116    /// The `if` keyword
117    KeywordIf,
118    /// The `else` keyword
119    KeywordElse,
120    /// The `use` keyword
121    KeywordUse,
122    /// The `as` keyword
123    KeywordAs,
124    /// The `return` keyword
125    KeywordReturn,
126    /// The `pub` keyword
127    KeywordPub,
128    /// The `const` keyword
129    KeywordConst,
130    /// The `prop` keyword
131    KeywordProp,
132    /// The `init` keyword
133    KeywordInit,
134    /// The `__plugin` keyword
135    KeywordPlugin,
136    /// The `assembly` keyword
137    KeywordAssembly,
138    /// The `material` keyword
139    KeywordMaterial,
140    /// The `unit` keyword
141    KeywordUnit,
142    /// The `enum` keyword
143    KeywordEnum,
144    /// The `struct` keyword
145    KeywordStruct,
146    /// The `match` keyword
147    KeywordMatch,
148    /// The `type` keyword
149    KeywordType,
150    /// The `extern` keyword
151    KeywordExtern,
152
153    /// An identifier, alphanumeric, starting with either an alpha character or a underscore
154    Identifier(Cow<'a, str>),
155    /// A unit identifier
156    Unit(Cow<'a, str>),
157
158    /// An integer literal
159    LiteralInt(Cow<'a, str>),
160    /// A float literal
161    LiteralFloat(Cow<'a, str>),
162    /// A boolean literal
163    LiteralBool(bool),
164    /// A string literal
165    LiteralString(Cow<'a, str>),
166
167    /// Double-quote indicating the start of a format string
168    FormatStringStart,
169    /// Double-quote indicating the end of a format string
170    FormatStringEnd,
171    /// Literal string content of a format string
172    StringContent(Cow<'a, str>),
173    /// Escaped character inside a format string
174    Character(char),
175    /// The start of the format expression inside a format string
176    StringFormatOpen,
177    /// The end of the format expression inside a format string
178    StringFormatClose,
179    /// The precision specification of the format expression inside a format string
180    StringFormatPrecision(Cow<'a, str>),
181    /// The width specification of the format expression inside a format string
182    StringFormatWidth(Cow<'a, str>),
183
184    /// The `:` symbol
185    SigilColon,
186    /// The `;` symbol
187    SigilSemiColon,
188    /// The `::` symbol
189    SigilDoubleColon,
190    /// The `(` symbol
191    SigilOpenBracket,
192    /// The `)` symbol
193    SigilCloseBracket,
194    /// The `[` symbol
195    SigilOpenSquareBracket,
196    /// The `]` symbol
197    SigilCloseSquareBracket,
198    /// The `{` symbol
199    SigilOpenCurlyBracket,
200    /// The `}` symbol
201    SigilCloseCurlyBracket,
202    /// The `#` symbol
203    SigilHash,
204    /// The `.` symbol
205    SigilDot,
206    /// The `,` symbol
207    SigilComma,
208    /// The `..` symbol
209    SigilDoubleDot,
210    /// The `@` symbol
211    SigilAt,
212    /// The `->` symbol
213    SigilSingleArrow,
214    /// The `"` symbol
215    SigilQuote,
216
217    /// Add operator: `+`
218    OperatorAdd,
219    /// Subtract operator: `-`
220    OperatorSubtract,
221    /// Multiply operator: `-`
222    OperatorMultiply,
223    /// Divide operator: `/`
224    OperatorDivide,
225    /// Union operator: `|`
226    OperatorUnion,
227    /// Intersect operator: `&`
228    OperatorIntersect,
229    /// xor operator: `^`
230    OperatorPowerXor,
231    /// Greater-than operator: `>`
232    OperatorGreaterThan,
233    /// Less-than operator: `<`
234    OperatorLessThan,
235    /// Greater-or-equal operator: `>=`
236    OperatorGreaterEqual,
237    /// Less-or-equal operator: `<=`
238    OperatorLessEqual,
239    /// Near operator: `~`
240    OperatorNear,
241    /// Equal operator: `==`
242    OperatorEqual,
243    /// Not-equal operator: `!=`
244    OperatorNotEqual,
245    /// And operator: `and`
246    OperatorAnd,
247    /// Or operator: `or`
248    OperatorOr,
249    /// Xor operator: `xor'
250    OperatorXor,
251    /// Not operator: `!`
252    OperatorNot,
253    /// Assignment operator: `=`
254    OperatorAssignment,
255
256    /// A lexer failure
257    ///
258    /// When encountering an error, the lexer attempts to recover and continue emitting further tokens
259    Error(LexerError),
260}
261
262impl Display for Token<'_> {
263    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
264        write!(f, "{}", self.kind())
265    }
266}
267
268impl Token<'_> {
269    /// Create an owned version of the token
270    pub fn into_owned(self) -> Token<'static> {
271        match self {
272            Token::Whitespace(c) => Token::Whitespace(c.into_owned().into()),
273            Token::SingleLineComment(c) => Token::SingleLineComment(c.into_owned().into()),
274            Token::MultiLineComment(c) => Token::MultiLineComment(c.into_owned().into()),
275            Token::DocComment(c) => Token::DocComment(c.into_owned().into()),
276            Token::InnerDocComment(c) => Token::InnerDocComment(c.into_owned().into()),
277            Token::Identifier(s) => Token::Identifier(s.into_owned().into()),
278            Token::Unit(s) => Token::Unit(s.into_owned().into()),
279            Token::LiteralInt(s) => Token::LiteralInt(s.into_owned().into()),
280            Token::LiteralFloat(s) => Token::LiteralFloat(s.into_owned().into()),
281            Token::LiteralString(s) => Token::LiteralString(s.into_owned().into()),
282
283            Token::KeywordMod => Token::KeywordMod,
284            Token::KeywordPart => Token::KeywordPart,
285            Token::KeywordSketch => Token::KeywordSketch,
286            Token::KeywordOp => Token::KeywordOp,
287            Token::KeywordFn => Token::KeywordFn,
288            Token::KeywordIf => Token::KeywordIf,
289            Token::KeywordElse => Token::KeywordElse,
290            Token::KeywordUse => Token::KeywordUse,
291            Token::KeywordAs => Token::KeywordAs,
292            Token::KeywordReturn => Token::KeywordReturn,
293            Token::KeywordPub => Token::KeywordPub,
294            Token::KeywordConst => Token::KeywordConst,
295            Token::KeywordProp => Token::KeywordProp,
296            Token::KeywordInit => Token::KeywordInit,
297            Token::KeywordAssembly => Token::KeywordAssembly,
298            Token::KeywordPlugin => Token::KeywordPlugin,
299            Token::KeywordMaterial => Token::KeywordMaterial,
300            Token::KeywordUnit => Token::KeywordUnit,
301            Token::KeywordEnum => Token::KeywordEnum,
302            Token::KeywordStruct => Token::KeywordStruct,
303            Token::KeywordMatch => Token::KeywordMatch,
304            Token::KeywordType => Token::KeywordType,
305            Token::KeywordExtern => Token::KeywordExtern,
306            Token::LiteralBool(l) => Token::LiteralBool(l),
307            Token::SigilColon => Token::SigilColon,
308            Token::SigilSemiColon => Token::SigilSemiColon,
309            Token::SigilDoubleColon => Token::SigilDoubleColon,
310            Token::SigilOpenBracket => Token::SigilOpenBracket,
311            Token::SigilCloseBracket => Token::SigilCloseBracket,
312            Token::SigilOpenSquareBracket => Token::SigilOpenSquareBracket,
313            Token::SigilCloseSquareBracket => Token::SigilCloseSquareBracket,
314            Token::SigilOpenCurlyBracket => Token::SigilOpenCurlyBracket,
315            Token::SigilCloseCurlyBracket => Token::SigilCloseCurlyBracket,
316            Token::SigilHash => Token::SigilHash,
317            Token::SigilDot => Token::SigilDot,
318            Token::SigilComma => Token::SigilComma,
319            Token::SigilDoubleDot => Token::SigilDoubleDot,
320            Token::SigilAt => Token::SigilAt,
321            Token::SigilSingleArrow => Token::SigilSingleArrow,
322            Token::OperatorAdd => Token::OperatorAdd,
323            Token::OperatorSubtract => Token::OperatorSubtract,
324            Token::OperatorMultiply => Token::OperatorMultiply,
325            Token::OperatorDivide => Token::OperatorDivide,
326            Token::OperatorUnion => Token::OperatorUnion,
327            Token::OperatorIntersect => Token::OperatorIntersect,
328            Token::OperatorPowerXor => Token::OperatorPowerXor,
329            Token::OperatorGreaterThan => Token::OperatorGreaterThan,
330            Token::OperatorLessThan => Token::OperatorLessThan,
331            Token::OperatorGreaterEqual => Token::OperatorGreaterEqual,
332            Token::OperatorLessEqual => Token::OperatorLessEqual,
333            Token::OperatorNear => Token::OperatorNear,
334            Token::OperatorEqual => Token::OperatorEqual,
335            Token::OperatorNotEqual => Token::OperatorNotEqual,
336            Token::OperatorAnd => Token::OperatorAnd,
337            Token::OperatorOr => Token::OperatorOr,
338            Token::OperatorXor => Token::OperatorXor,
339            Token::OperatorNot => Token::OperatorNot,
340            Token::OperatorAssignment => Token::OperatorAssignment,
341            Token::FormatStringStart => Token::FormatStringStart,
342            Token::FormatStringEnd => Token::FormatStringEnd,
343            Token::StringContent(s) => Token::StringContent(s.into_owned().into()),
344            Token::Character(c) => Token::Character(c),
345            Token::StringFormatOpen => Token::StringFormatOpen,
346            Token::StringFormatClose => Token::StringFormatClose,
347            Token::SigilQuote => Token::SigilQuote,
348            Token::Error(e) => Token::Error(e),
349            Token::StringFormatPrecision(c) => Token::StringFormatPrecision(c.into_owned().into()),
350            Token::StringFormatWidth(c) => Token::StringFormatWidth(c.into_owned().into()),
351        }
352    }
353
354    /// Get a descriptive name or symbol for the token type
355    pub fn kind(&self) -> &'static str {
356        match self {
357            Token::Whitespace(_) => "whitespace",
358            Token::SingleLineComment(_) => "single-line comment",
359            Token::MultiLineComment(_) => "multi-line comment",
360            Token::DocComment(_) => "doc comment",
361            Token::InnerDocComment(_) => "inner doc comment",
362            Token::KeywordMod => "mod",
363            Token::KeywordPart => "part",
364            Token::KeywordSketch => "sketch",
365            Token::KeywordOp => "op",
366            Token::KeywordFn => "fn",
367            Token::KeywordIf => "if",
368            Token::KeywordElse => "else",
369            Token::KeywordUse => "use",
370            Token::KeywordAs => "as",
371            Token::KeywordReturn => "return",
372            Token::KeywordPub => "pub",
373            Token::KeywordConst => "const",
374            Token::KeywordProp => "prop",
375            Token::KeywordInit => "init",
376            Token::KeywordPlugin => "__plugin",
377            Token::KeywordAssembly => "assembly",
378            Token::KeywordMaterial => "material",
379            Token::KeywordUnit => "unit",
380            Token::KeywordEnum => "enum",
381            Token::KeywordStruct => "struct",
382            Token::KeywordMatch => "match",
383            Token::KeywordType => "type",
384            Token::KeywordExtern => "extern",
385            Token::Identifier(_) => "identifier",
386            Token::Unit(_) => "unit",
387            Token::LiteralInt(_) => "integer literal",
388            Token::LiteralFloat(_) => "float literal",
389            Token::LiteralBool(_) => "boolean literal",
390            Token::LiteralString(_) => "string literal",
391            Token::FormatStringStart => "start of string",
392            Token::FormatStringEnd => "end of string",
393            Token::StringContent(_) => "string content",
394            Token::Character(_) => "escaped character",
395            Token::StringFormatOpen => "string format start",
396            Token::StringFormatClose => "string format end",
397            Token::SigilColon => ":",
398            Token::SigilSemiColon => ";",
399            Token::SigilDoubleColon => "::",
400            Token::SigilOpenBracket => "(",
401            Token::SigilCloseBracket => ")",
402            Token::SigilOpenSquareBracket => "[",
403            Token::SigilCloseSquareBracket => "]",
404            Token::SigilOpenCurlyBracket => "{",
405            Token::SigilCloseCurlyBracket => "}",
406            Token::SigilHash => "#",
407            Token::SigilDot => ".",
408            Token::SigilComma => ",",
409            Token::SigilDoubleDot => "..",
410            Token::SigilAt => "@",
411            Token::SigilSingleArrow => "->",
412            Token::SigilQuote => "\"",
413            Token::OperatorAdd => "+",
414            Token::OperatorSubtract => "-",
415            Token::OperatorMultiply => "*",
416            Token::OperatorDivide => "/",
417            Token::OperatorUnion => "|",
418            Token::OperatorIntersect => "&",
419            Token::OperatorPowerXor => "^",
420            Token::OperatorGreaterThan => ">",
421            Token::OperatorLessThan => "<",
422            Token::OperatorGreaterEqual => ">=",
423            Token::OperatorLessEqual => "<=",
424            Token::OperatorNear => "!",
425            Token::OperatorEqual => "==",
426            Token::OperatorNotEqual => "!=",
427            Token::OperatorAnd => "and",
428            Token::OperatorOr => "or",
429            Token::OperatorXor => "xor",
430            Token::OperatorNot => "not",
431            Token::OperatorAssignment => "=",
432            Token::StringFormatPrecision(_) => "format precision",
433            Token::StringFormatWidth(_) => "format width",
434            Token::Error(e) => e.kind(),
435        }
436    }
437
438    /// Check if the token is an error
439    pub fn is_error(&self) -> bool {
440        matches!(self, Token::Error(_))
441    }
442}