Skip to main content

rbx_rsml/
lexer.rs

1use crate::lazy_collection;
2use crate::string_clip::StringClip;
3use enum_kinds::EnumKind;
4use logos::{Lexer as LogosLexer, Logos, SpannedIter};
5use ropey::Rope;
6use std::{
7    collections::{HashMap, HashSet},
8    mem::{Discriminant, discriminant},
9    sync::LazyLock,
10};
11
12pub struct RsmlLexer<'a> {
13    token_stream: SpannedIter<'a, Token<'a>>,
14    pub rope: Rope,
15}
16
17impl<'a> RsmlLexer<'a> {
18    pub fn new(source: &'a str) -> Self {
19        Self {
20            token_stream: Token::lexer(source).spanned(),
21            rope: Rope::from_str(source),
22        }
23    }
24
25    pub fn slice(&self) -> &'a str {
26        self.token_stream.slice()
27    }
28}
29
30impl<'a> Iterator for RsmlLexer<'a> {
31    type Item = SpannedToken<'a>;
32
33    fn next(&mut self) -> Option<Self::Item> {
34        let Some((token, span)) = self.token_stream.next() else {
35            return None;
36        };
37
38        match token {
39            Ok(token) => match token {
40                Token::CommentMulti(MultilineString { span, .. })
41                | Token::StringMulti(MultilineString { span, .. }) => {
42                    Some(SpannedToken::new(span.0, token, span.1))
43                }
44
45                _ => Some(SpannedToken::new(span.start, token, span.end)),
46            },
47
48            Err(_) => Some(SpannedToken::new(span.start, Token::Error, span.end)),
49        }
50    }
51}
52
53#[derive(Debug, Clone)]
54pub struct SpannedToken<'a>(pub usize, pub Token<'a>, pub usize);
55
56impl<'a> SpannedToken<'a> {
57    pub fn new(start: usize, value: Token<'a>, end: usize) -> Self {
58        Self(start, value, end)
59    }
60
61    #[inline(always)]
62    pub fn start(&self) -> usize {
63        self.0
64    }
65
66    #[inline(always)]
67    pub fn value(&self) -> &Token<'a> {
68        &self.1
69    }
70
71    #[inline(always)]
72    pub fn end(&self) -> usize {
73        self.2
74    }
75
76    #[inline(always)]
77    pub fn span(&self) -> (usize, usize) {
78        (self.0, self.2)
79    }
80}
81
82fn str_to_option(str: &str) -> Option<&str> {
83    if str.len() == 0 { None } else { Some(str) }
84}
85
86#[derive(Logos, Clone, Debug, PartialEq, EnumKind)]
87#[enum_kind(TokenKind, derive(Hash))]
88#[logos(skip r"[ \t\n\r\f]+")]
89#[logos(subpattern ident = r"[_A-Za-z][_A-Za-z\d]*|[_A-Za-z]+(-[A-Za-z\d_]+)+")]
90#[logos(subpattern numsect = r"_*[\d]+_*")]
91#[logos(subpattern num = r"((?&numsect)+\.)?(?&numsect)+|\.(?&numsect)")]
92pub enum Token<'a> {
93    #[regex(r"\-\-!.*", priority = 100, callback = |lex| lex.slice()[3..].trim())]
94    Directive(&'a str),
95
96    #[regex(r"\-\-\[=*\[", priority = 99, callback = |lex| multiline_string_block_callback(lex, 2))]
97    CommentMulti(MultilineString<'a>),
98
99    #[regex(r"\-\-[^(\[\[)].*", priority = 1, callback = |lex| str_to_option(&lex.slice().clip(2, 0)))]
100    #[regex(r"\-\-", priority = 1, callback = |_| None::<&str>)]
101    CommentSingle(Option<&'a str>),
102
103    // When adding a new declaration make sure to
104    // update the `DECLARATIONS` array located above.
105    #[token("@derive")]
106    DeriveDeclaration,
107
108    #[token("@macro")]
109    MacroDeclaration,
110
111    #[token("@priority")]
112    PriorityDeclaration,
113
114    #[token("@tween")]
115    TweenDeclaration,
116
117    #[regex(r"@(?&ident)", callback = |lex| str_to_option(&lex.slice()[1..]))]
118    QuerySelector(&'a str),
119
120    #[regex(r"\$!(?&ident)?", callback = |lex| str_to_option(&lex.slice()[2..]))]
121    StaticTokenIdentifier(&'a str),
122
123    #[regex(r"\$(?&ident)?", callback = |lex| str_to_option(&lex.slice()[1..]))]
124    TokenIdentifier(&'a str),
125
126    #[regex(r"(?&ident)")]
127    Identifier(&'a str),
128
129    #[regex(r"&(?&ident)?", callback = |lex| str_to_option(&lex.slice()[1..]))]
130    MacroArgIdentifier(Option<&'a str>),
131
132    #[regex(r"(?&ident)!", callback = |lex| str_to_option(&lex.slice().clip(0, 1)))]
133    MacroCallIdentifier(Option<&'a str>),
134
135    #[token("=")]
136    Equals,
137
138    #[token(",")]
139    Comma,
140
141    #[token(";")]
142    SemiColon,
143
144    #[regex(r"#(?&ident)", callback = |lex| str_to_option(&lex.slice()[1..]))]
145    NameSelector(&'a str),
146
147    #[regex(r"\.(?&ident)?", callback = |lex| str_to_option(&lex.slice()[1..]))]
148    TagSelectorOrEnumPart(Option<&'a str>),
149
150    #[regex(r":(?&ident)?", callback = |lex| str_to_option(&lex.slice()[1..]))]
151    StateSelectorOrEnumPart(Option<&'a str>),
152
153    #[regex(r"::(?&ident)", callback = |lex| str_to_option(&lex.slice()[2..]))]
154    PseudoSelector(&'a str),
155
156    #[token("->")]
157    ReturnArrow,
158
159    #[token(">")]
160    ChildrenSelector,
161
162    #[token(">>")]
163    DescendantsSelector,
164
165    #[token("{")]
166    ScopeOpen,
167
168    #[token("}")]
169    ScopeClose,
170
171    #[token("(")]
172    ParensOpen,
173
174    #[token(")")]
175    ParensClose,
176
177    #[token("/")]
178    OpDiv,
179
180    #[token("//")]
181    OpFloorDiv,
182
183    #[token("%")]
184    OpMod,
185
186    #[token("*")]
187    OpMult,
188
189    #[token("^")]
190    OpPow,
191
192    #[token("+")]
193    OpAdd,
194
195    #[token("-")]
196    OpSub,
197
198    #[regex(r"\[=*\[", priority = 98, callback = |lex| multiline_string_block_callback(lex, 0))]
199    StringMulti(MultilineString<'a>),
200
201    #[regex(r#""[^\"\n\t]*""#, callback = |lex| lex.slice().clip(1, 1))]
202    #[regex(r#"'[^\'\n\t]*'"#, callback = |lex| lex.slice().clip(1, 1))]
203    StringSingle(&'a str),
204
205    #[regex(r"(?&num)", priority = 99)]
206    Number(&'a str),
207
208    #[regex(r"(?&num)%", priority = 99)]
209    NumberScale(&'a str),
210
211    #[regex(r"(?&num)px", priority = 99)]
212    NumberOffset(&'a str),
213
214    #[token("true")]
215    #[token("false")]
216    Boolean(&'a str),
217
218    #[token("nil")]
219    Nil,
220
221    #[regex(r"(?i)tw:[a-z]+(:\d+)?")]
222    ColorTailwind(&'a str),
223
224    #[regex(r"(?i)skin:[a-z]+(:\d+)?")]
225    ColorSkin(&'a str),
226
227    #[regex(r"(?i)bc:[a-z]+")]
228    ColorBrick(&'a str),
229
230    #[regex(r"(?i)css:[a-z]+")]
231    ColorCss(&'a str),
232
233    #[regex(r"#[\da-fA-F]+", priority = 99)]
234    ColorHex(&'a str),
235
236    #[regex(r"rbxassetid://\d*")]
237    #[regex(r"(rbxasset|rbxthumb|rbxgameasset|rbxhttp|rbxtemp|https?)://[^) ]*")]
238    RbxAsset(&'a str),
239
240    #[regex(r"contentid://\d*", priority = 999)]
241    RbxContent(&'a str),
242
243    #[token("Enum")]
244    EnumKeyword,
245
246    Error,
247
248    None,
249}
250
251impl<'a> Token<'a> {
252    #[inline(always)]
253    pub fn discriminant(&self) -> Discriminant<TokenKind> {
254        discriminant(&TokenKind::from(self))
255    }
256
257    #[inline(always)]
258    pub fn kind(&self) -> TokenKind {
259        TokenKind::from(self)
260    }
261}
262
263impl TokenKind {
264    pub fn name(&self) -> &'static str {
265        TOKEN_KIND_STRING_MAP
266            .get(self)
267            .map(|x| *x)
268            .unwrap_or_else(|| "**error**")
269    }
270}
271
272#[derive(Logos, Debug, PartialEq, Clone)]
273#[logos(skip r"[ \t\n\r\f]+")]
274enum MultilineStringToken {
275    #[regex(r"\]=*\]")]
276    ExitMultilineString,
277}
278
279#[derive(Clone, Debug, PartialEq)]
280pub struct MultilineString<'a> {
281    pub nestedness: Result<usize, usize>,
282    pub content: &'a str,
283    pub span: (usize, usize),
284}
285
286fn multiline_string_block_callback<'a>(
287    lexer: &mut LogosLexer<'a, Token<'a>>,
288    sub_amount: usize,
289) -> MultilineString<'a> {
290    let mut sub_lexer = lexer.clone().morph::<MultilineStringToken>();
291
292    // Subtracts by `sub_amount` to account for leading characters (typically `--` for multi-line comments).
293    // Subtracts by 2 to account for `[` either side of the equal signs.
294    let open_nestedness = sub_lexer.slice().len() - sub_amount - 2;
295    let open_span_start = sub_lexer.span().start;
296
297    let content_span_start = open_span_start + 2;
298
299    while let Some(token) = sub_lexer.next() {
300        match token {
301            Ok(MultilineStringToken::ExitMultilineString) => {
302                let close_span = sub_lexer.span();
303                // Subtracts by 2 to account for `]` either side of the equal signs.
304                let close_nestedness = sub_lexer.slice().len() - 2;
305
306                if open_nestedness == close_nestedness {
307                    let data = MultilineString {
308                        nestedness: Ok(open_nestedness),
309                        content: &sub_lexer.source()[content_span_start..close_span.start],
310                        span: (open_span_start, close_span.end),
311                    };
312
313                    *lexer = sub_lexer.morph();
314
315                    return data;
316                }
317            }
318            _ => {}
319        }
320    }
321
322    let data = MultilineString {
323        nestedness: Err(open_nestedness),
324        content: sub_lexer.source().clip(content_span_start, 0),
325        span: (open_span_start, sub_lexer.source().len()),
326    };
327
328    *lexer = sub_lexer.morph();
329
330    data
331}
332
333pub const TOKEN_KIND_CONSTRUCT_DELIMITERS: LazyLock<HashSet<TokenKind>> = lazy_collection! {
334    TokenKind::ParensClose,
335    TokenKind::ScopeClose,
336    TokenKind::SemiColon,
337
338    TokenKind::DeriveDeclaration,
339    TokenKind::MacroDeclaration,
340    TokenKind::PriorityDeclaration,
341    TokenKind::TweenDeclaration
342};
343
344pub const TOKEN_KIND_MACRO_CALL_DELIMITERS: LazyLock<HashSet<TokenKind>> = lazy_collection! {
345    TokenKind::ParensClose,
346    TokenKind::ScopeClose,
347    TokenKind::ScopeOpen,
348    TokenKind::SemiColon,
349
350    TokenKind::DeriveDeclaration,
351    TokenKind::MacroDeclaration,
352    TokenKind::PriorityDeclaration,
353    TokenKind::TweenDeclaration
354};
355
356pub const TOKEN_KIND_INSIDE_PARENS_CONSTRUCT_DELIMITERS: LazyLock<HashSet<TokenKind>> = lazy_collection! {
357    TokenKind::ParensClose,
358};
359
360pub const TOKEN_KIND_ADD_SUB_PRECEDENCE: usize = 0;
361
362pub const TOKEN_KIND_OPERATOR_PRECEDENCE: LazyLock<HashMap<TokenKind, usize>> = lazy_collection! {
363    TokenKind::OpDiv => 1,
364    TokenKind::OpFloorDiv => 1,
365    TokenKind::OpMod => 1,
366    TokenKind::OpMult => 1,
367    TokenKind::OpPow => 1,
368    TokenKind::OpAdd => TOKEN_KIND_ADD_SUB_PRECEDENCE,
369    TokenKind::OpSub => TOKEN_KIND_ADD_SUB_PRECEDENCE,
370};
371
372const TOKEN_KIND_STRING_MAP: LazyLock<HashMap<TokenKind, &'static str>> = lazy_collection! {
373    TokenKind::CommentMulti => "`comment`",
374    TokenKind::CommentSingle => "`comment`",
375    TokenKind::Directive => "`directive`",
376    TokenKind::DeriveDeclaration => "\"@derive\"",
377    TokenKind::MacroDeclaration => "\"@macro\"",
378    TokenKind::PriorityDeclaration => "\"@priority\"",
379    TokenKind::TweenDeclaration => "\"@tween\"",
380    TokenKind::QuerySelector => "`query selector`",
381    TokenKind::Identifier => "`identifer`",
382    TokenKind::MacroArgIdentifier => "`macro argument`",
383    TokenKind::MacroCallIdentifier => "`macro call`",
384    TokenKind::Equals => "\"=\"",
385    TokenKind::Comma => "\",\"",
386    TokenKind::SemiColon => "\";\"",
387    TokenKind::NameSelector => "`name selector`",
388    TokenKind::TagSelectorOrEnumPart => "`tag selector`",
389    TokenKind::StateSelectorOrEnumPart => "`state selector`",
390    TokenKind::PseudoSelector => "`pseudo selector`",
391    TokenKind::ReturnArrow => "\"->\"",
392    TokenKind::ChildrenSelector => "\">\"",
393    TokenKind::DescendantsSelector => "\">>\"",
394    TokenKind::ScopeOpen => "\"{\"",
395    TokenKind::ScopeClose => "\"}\"",
396    TokenKind::ParensOpen => "\"(\"",
397    TokenKind::ParensClose => "\")\"",
398    TokenKind::StringMulti => "`string`",
399    TokenKind::StringSingle => "`string`",
400    TokenKind::Number => "`number`",
401    TokenKind::NumberScale => "`udim scale`",
402    TokenKind::NumberOffset => "`udim offset`",
403    TokenKind::ColorTailwind => "`tailwind color`",
404    TokenKind::ColorBrick => "`brick color`",
405    TokenKind::ColorCss => "`css color`",
406    TokenKind::ColorHex => "`hex color`",
407};