Skip to main content

microcad_syntax/tokens/
mod.rs

1// Copyright © 2026 The µcad authors <info@ucad.xyz>
2// SPDX-License-Identifier: AGPL-3.0-or-later
3
4use crate::Span;
5use crate::tokens::from_logos::from_logos;
6use crate::tokens::logos::NormalToken;
7use ::logos::Lexer;
8use std::borrow::Cow;
9use std::fmt::{Display, Formatter};
10use thiserror::Error;
11
12mod from_logos;
13mod logos;
14
15/// A source token with attached span
16#[derive(Debug, PartialEq, Clone)]
17pub struct SpannedToken<T> {
18    /// the span of the token
19    pub span: Span,
20    /// the token
21    pub token: T,
22}
23
24impl SpannedToken<Token<'_>> {
25    /// Create an owned version of the token
26    pub fn into_owned(self) -> SpannedToken<Token<'static>> {
27        SpannedToken {
28            span: self.span,
29            token: self.token.into_owned(),
30        }
31    }
32}
33
34impl<T> SpannedToken<T> {
35    /// Create a [`SpannedToken`] from [`Span`] and token
36    pub fn new(span: Span, token: T) -> Self {
37        SpannedToken { span, token }
38    }
39}
40
41impl<T: PartialEq> PartialEq<T> for SpannedToken<T> {
42    fn eq(&self, other: &T) -> bool {
43        self.token.eq(other)
44    }
45}
46
47/// Possible errors encountered while tokenizing
48#[derive(Debug, Default, Clone, PartialEq, Error)]
49pub enum LexerError {
50    /// No valid token was found for the character
51    #[default]
52    #[error("No valid token")]
53    NoValidToken,
54    /// A format string was encountered that wasn't closed correctly
55    #[error("Unclosed format string")]
56    UnclosedStringFormat(Span),
57    /// A string was encountered that wasn't closed correctly
58    #[error("Unclosed string")]
59    UnclosedString(Span),
60}
61
62impl LexerError {
63    /// Get a descriptive name of the error type
64    pub fn kind(&self) -> &'static str {
65        match self {
66            LexerError::NoValidToken => "no valid token",
67            LexerError::UnclosedStringFormat(_) => "unclosed format string",
68            LexerError::UnclosedString(_) => "unclosed string",
69        }
70    }
71}
72
73impl LexerError {
74    /// Get the span of the error
75    pub fn span(&self) -> Option<Span> {
76        match self {
77            LexerError::UnclosedStringFormat(span) => Some(span.clone()),
78            LexerError::UnclosedString(span) => Some(span.clone()),
79            _ => None,
80        }
81    }
82}
83
84/// Tokenize a µcad source string into an iterator of tokens.
85pub fn lex<'a>(input: &'a str) -> impl Iterator<Item = SpannedToken<Token<'a>>> {
86    from_logos(Lexer::<NormalToken>::new(input).spanned())
87}
88
89/// Source token for µcad files
90#[derive(Debug, PartialEq, Clone)]
91pub enum Token<'a> {
92    /// A single-line comment, starting with `//`
93    SingleLineComment(Cow<'a, str>),
94    /// A multi-line comment, starting with `/*` and ending with `*/`
95    MultiLineComment(Cow<'a, str>),
96    /// A doc-comment, starting with `///`
97    DocComment(Cow<'a, str>),
98
99    /// The `mod` keyword
100    KeywordMod,
101    /// The `part` keyword
102    KeywordPart,
103    /// The `sketch` keyword
104    KeywordSketch,
105    /// The `op` keyword
106    KeywordOp,
107    /// The `fn` keyword
108    KeywordFn,
109    /// The `if` keyword
110    KeywordIf,
111    /// The `else` keyword
112    KeywordElse,
113    /// The `use` keyword
114    KeywordUse,
115    /// The `as` keyword
116    KeywordAs,
117    /// The `return` keyword
118    KeywordReturn,
119    /// The `pub` keyword
120    KeywordPub,
121    /// The `const` keyword
122    KeywordConst,
123    /// The `prop` keyword
124    KeywordProp,
125    /// The `init` keyword
126    KeywordInit,
127
128    /// An identifier, alphanumeric, starting with either an alpha character or a underscore
129    Identifier(Cow<'a, str>),
130    /// A unit identifier
131    Unit(Cow<'a, str>),
132
133    /// An integer literal
134    LiteralInt(Cow<'a, str>),
135    /// A float literal
136    LiteralFloat(Cow<'a, str>),
137    /// A boolean literal
138    LiteralBool(bool),
139    /// A string literal
140    LiteralString(Cow<'a, str>),
141
142    /// Double-quote indicating the start of a format string
143    FormatStringStart,
144    /// Double-quote indicating the end of a format string
145    FormatStringEnd,
146    /// Literal string content of a format string
147    StringContent(Cow<'a, str>),
148    /// Escaped character inside a format string
149    Character(char),
150    /// The start of the format expression inside a format string
151    StringFormatOpen,
152    /// The end of the format expression inside a format string
153    StringFormatClose,
154    /// The precision specification of the format expression inside a format string
155    StringFormatPrecision(Cow<'a, str>),
156    /// The width specification of the format expression inside a format string
157    StringFormatWidth(Cow<'a, str>),
158
159    /// The `:` symbol
160    SigilColon,
161    /// The `;` symbol
162    SigilSemiColon,
163    /// The `::` symbol
164    SigilDoubleColon,
165    /// The `(` symbol
166    SigilOpenBracket,
167    /// The `)` symbol
168    SigilCloseBracket,
169    /// The `[` symbol
170    SigilOpenSquareBracket,
171    /// The `]` symbol
172    SigilCloseSquareBracket,
173    /// The `{` symbol
174    SigilOpenCurlyBracket,
175    /// The `}` symbol
176    SigilCloseCurlyBracket,
177    /// The `#` symbol
178    SigilHash,
179    /// The `.` symbol
180    SigilDot,
181    /// The `,` symbol
182    SigilComma,
183    /// The `..` symbol
184    SigilDoubleDot,
185    /// The `@` symbol
186    SigilAt,
187    /// The `->` symbol
188    SigilSingleArrow,
189    /// The `"` symbol
190    SigilQuote,
191
192    /// Add operator: `+`
193    OperatorAdd,
194    /// Subtract operator: `-`
195    OperatorSubtract,
196    /// Multiply operator: `-`
197    OperatorMultiply,
198    /// Divide operator: `/`
199    OperatorDivide,
200    /// Union operator: `|`
201    OperatorUnion,
202    /// Intersect operator: `&`
203    OperatorIntersect,
204    /// xor operator: `^`
205    OperatorPowerXor,
206    /// Greater-than operator: `>`
207    OperatorGreaterThan,
208    /// Less-than operator: `<`
209    OperatorLessThan,
210    /// Greater-or-equal operator: `>=`
211    OperatorGreaterEqual,
212    /// Less-or-equal operator: `<=`
213    OperatorLessEqual,
214    /// Near operator: `~`
215    OperatorNear,
216    /// Equal operator: `==`
217    OperatorEqual,
218    /// Not-equal operator: `!=`
219    OperatorNotEqual,
220    /// And operator: `and`
221    OperatorAnd,
222    /// Or operator: `or`
223    OperatorOr,
224    /// Xor operator: `xor'
225    OperatorXor,
226    /// Not operator: `!`
227    OperatorNot,
228    /// Assignment operator: `=`
229    OperatorAssignment,
230
231    /// A lexer failure
232    ///
233    /// When encountering an error, the lexer attempts to recover and continue emitting further tokens
234    Error(LexerError),
235}
236
237impl Display for Token<'_> {
238    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
239        write!(f, "{}", self.kind())
240    }
241}
242
243impl Token<'_> {
244    /// Create an owned version of the token
245    pub fn into_owned(self) -> Token<'static> {
246        match self {
247            Token::SingleLineComment(c) => Token::SingleLineComment(c.into_owned().into()),
248            Token::MultiLineComment(c) => Token::MultiLineComment(c.into_owned().into()),
249            Token::DocComment(c) => Token::DocComment(c.into_owned().into()),
250            Token::Identifier(s) => Token::Identifier(s.into_owned().into()),
251            Token::Unit(s) => Token::Unit(s.into_owned().into()),
252            Token::LiteralInt(s) => Token::LiteralInt(s.into_owned().into()),
253            Token::LiteralFloat(s) => Token::LiteralFloat(s.into_owned().into()),
254            Token::LiteralString(s) => Token::LiteralString(s.into_owned().into()),
255
256            Token::KeywordMod => Token::KeywordMod,
257            Token::KeywordPart => Token::KeywordPart,
258            Token::KeywordSketch => Token::KeywordSketch,
259            Token::KeywordOp => Token::KeywordOp,
260            Token::KeywordFn => Token::KeywordFn,
261            Token::KeywordIf => Token::KeywordIf,
262            Token::KeywordElse => Token::KeywordElse,
263            Token::KeywordUse => Token::KeywordUse,
264            Token::KeywordAs => Token::KeywordAs,
265            Token::KeywordReturn => Token::KeywordReturn,
266            Token::KeywordPub => Token::KeywordPub,
267            Token::KeywordConst => Token::KeywordConst,
268            Token::KeywordProp => Token::KeywordProp,
269            Token::KeywordInit => Token::KeywordInit,
270            Token::LiteralBool(l) => Token::LiteralBool(l),
271            Token::SigilColon => Token::SigilColon,
272            Token::SigilSemiColon => Token::SigilSemiColon,
273            Token::SigilDoubleColon => Token::SigilDoubleColon,
274            Token::SigilOpenBracket => Token::SigilOpenBracket,
275            Token::SigilCloseBracket => Token::SigilCloseBracket,
276            Token::SigilOpenSquareBracket => Token::SigilOpenSquareBracket,
277            Token::SigilCloseSquareBracket => Token::SigilCloseSquareBracket,
278            Token::SigilOpenCurlyBracket => Token::SigilOpenCurlyBracket,
279            Token::SigilCloseCurlyBracket => Token::SigilCloseCurlyBracket,
280            Token::SigilHash => Token::SigilHash,
281            Token::SigilDot => Token::SigilDot,
282            Token::SigilComma => Token::SigilComma,
283            Token::SigilDoubleDot => Token::SigilDoubleDot,
284            Token::SigilAt => Token::SigilAt,
285            Token::SigilSingleArrow => Token::SigilSingleArrow,
286            Token::OperatorAdd => Token::OperatorAdd,
287            Token::OperatorSubtract => Token::OperatorSubtract,
288            Token::OperatorMultiply => Token::OperatorMultiply,
289            Token::OperatorDivide => Token::OperatorDivide,
290            Token::OperatorUnion => Token::OperatorUnion,
291            Token::OperatorIntersect => Token::OperatorIntersect,
292            Token::OperatorPowerXor => Token::OperatorPowerXor,
293            Token::OperatorGreaterThan => Token::OperatorGreaterThan,
294            Token::OperatorLessThan => Token::OperatorLessThan,
295            Token::OperatorGreaterEqual => Token::OperatorGreaterEqual,
296            Token::OperatorLessEqual => Token::OperatorLessEqual,
297            Token::OperatorNear => Token::OperatorNear,
298            Token::OperatorEqual => Token::OperatorEqual,
299            Token::OperatorNotEqual => Token::OperatorNotEqual,
300            Token::OperatorAnd => Token::OperatorAnd,
301            Token::OperatorOr => Token::OperatorOr,
302            Token::OperatorXor => Token::OperatorXor,
303            Token::OperatorNot => Token::OperatorNot,
304            Token::OperatorAssignment => Token::OperatorAssignment,
305            Token::FormatStringStart => Token::FormatStringStart,
306            Token::FormatStringEnd => Token::FormatStringEnd,
307            Token::StringContent(s) => Token::StringContent(s.into_owned().into()),
308            Token::Character(c) => Token::Character(c),
309            Token::StringFormatOpen => Token::StringFormatOpen,
310            Token::StringFormatClose => Token::StringFormatClose,
311            Token::SigilQuote => Token::SigilQuote,
312            Token::Error(e) => Token::Error(e),
313            Token::StringFormatPrecision(c) => Token::StringFormatPrecision(c.into_owned().into()),
314            Token::StringFormatWidth(c) => Token::StringFormatWidth(c.into_owned().into()),
315        }
316    }
317
318    /// Get a descriptive name or symbol for the token type
319    pub fn kind(&self) -> &'static str {
320        match self {
321            Token::SingleLineComment(_) => "single-line comment",
322            Token::MultiLineComment(_) => "multi-line comment",
323            Token::DocComment(_) => "doc comment",
324            Token::KeywordMod => "mod",
325            Token::KeywordPart => "part",
326            Token::KeywordSketch => "sketch",
327            Token::KeywordOp => "op",
328            Token::KeywordFn => "fn",
329            Token::KeywordIf => "if",
330            Token::KeywordElse => "else",
331            Token::KeywordUse => "use",
332            Token::KeywordAs => "as",
333            Token::KeywordReturn => "return",
334            Token::KeywordPub => "pub",
335            Token::KeywordConst => "const",
336            Token::KeywordProp => "prop",
337            Token::KeywordInit => "init",
338            Token::Identifier(_) => "identifier",
339            Token::Unit(_) => "unit",
340            Token::LiteralInt(_) => "integer literal",
341            Token::LiteralFloat(_) => "float literal",
342            Token::LiteralBool(_) => "boolean literal",
343            Token::LiteralString(_) => "string literal",
344            Token::FormatStringStart => "start of string",
345            Token::FormatStringEnd => "end of string",
346            Token::StringContent(_) => "string content",
347            Token::Character(_) => "escaped character",
348            Token::StringFormatOpen => "string format start",
349            Token::StringFormatClose => "string format end",
350            Token::SigilColon => ":",
351            Token::SigilSemiColon => ";",
352            Token::SigilDoubleColon => "::",
353            Token::SigilOpenBracket => "(",
354            Token::SigilCloseBracket => ")",
355            Token::SigilOpenSquareBracket => "[",
356            Token::SigilCloseSquareBracket => "]",
357            Token::SigilOpenCurlyBracket => "{",
358            Token::SigilCloseCurlyBracket => "}",
359            Token::SigilHash => "#",
360            Token::SigilDot => ".",
361            Token::SigilComma => ",",
362            Token::SigilDoubleDot => "..",
363            Token::SigilAt => "@",
364            Token::SigilSingleArrow => "->",
365            Token::SigilQuote => "\"",
366            Token::OperatorAdd => "+",
367            Token::OperatorSubtract => "-",
368            Token::OperatorMultiply => "*",
369            Token::OperatorDivide => "/",
370            Token::OperatorUnion => "|",
371            Token::OperatorIntersect => "&",
372            Token::OperatorPowerXor => "^",
373            Token::OperatorGreaterThan => ">",
374            Token::OperatorLessThan => "<",
375            Token::OperatorGreaterEqual => ">=",
376            Token::OperatorLessEqual => "<=",
377            Token::OperatorNear => "!",
378            Token::OperatorEqual => "==",
379            Token::OperatorNotEqual => "!=",
380            Token::OperatorAnd => "and",
381            Token::OperatorOr => "or",
382            Token::OperatorXor => "xor",
383            Token::OperatorNot => "not",
384            Token::OperatorAssignment => "=",
385            Token::StringFormatPrecision(_) => "format precision",
386            Token::StringFormatWidth(_) => "format width",
387            Token::Error(e) => e.kind(),
388        }
389    }
390
391    /// Check if the token is an error
392    pub fn is_error(&self) -> bool {
393        matches!(self, Token::Error(_))
394    }
395}