1use crate::lazy_collection;
2use crate::string_clip::StringClip;
3use enum_kinds::EnumKind;
4use logos::{Lexer as LogosLexer, Logos, SpannedIter};
5use ropey::Rope;
6use std::{
7 collections::{HashMap, HashSet},
8 mem::{Discriminant, discriminant},
9 sync::LazyLock,
10};
11
12pub struct RsmlLexer<'a> {
13 token_stream: SpannedIter<'a, Token<'a>>,
14 pub rope: Rope,
15}
16
17impl<'a> RsmlLexer<'a> {
18 pub fn new(source: &'a str) -> Self {
19 Self {
20 token_stream: Token::lexer(source).spanned(),
21 rope: Rope::from_str(source),
22 }
23 }
24
25 pub fn slice(&self) -> &'a str {
26 self.token_stream.slice()
27 }
28}
29
30impl<'a> Iterator for RsmlLexer<'a> {
31 type Item = SpannedToken<'a>;
32
33 fn next(&mut self) -> Option<Self::Item> {
34 let Some((token, span)) = self.token_stream.next() else {
35 return None;
36 };
37
38 match token {
39 Ok(token) => match token {
40 Token::CommentMulti(MultilineString { span, .. })
41 | Token::StringMulti(MultilineString { span, .. }) => {
42 Some(SpannedToken::new(span.0, token, span.1))
43 }
44
45 _ => Some(SpannedToken::new(span.start, token, span.end)),
46 },
47
48 Err(_) => Some(SpannedToken::new(span.start, Token::Error, span.end)),
49 }
50 }
51}
52
53#[derive(Debug, Clone)]
54pub struct SpannedToken<'a>(pub usize, pub Token<'a>, pub usize);
55
56impl<'a> SpannedToken<'a> {
57 pub fn new(start: usize, value: Token<'a>, end: usize) -> Self {
58 Self(start, value, end)
59 }
60
61 #[inline(always)]
62 pub fn start(&self) -> usize {
63 self.0
64 }
65
66 #[inline(always)]
67 pub fn value(&self) -> &Token<'a> {
68 &self.1
69 }
70
71 #[inline(always)]
72 pub fn end(&self) -> usize {
73 self.2
74 }
75
76 #[inline(always)]
77 pub fn span(&self) -> (usize, usize) {
78 (self.0, self.2)
79 }
80}
81
82fn str_to_option(str: &str) -> Option<&str> {
83 if str.len() == 0 { None } else { Some(str) }
84}
85
86#[derive(Logos, Clone, Debug, PartialEq, EnumKind)]
87#[enum_kind(TokenKind, derive(Hash))]
88#[logos(skip r"[ \t\n\r\f]+")]
89#[logos(subpattern ident = r"[_A-Za-z][_A-Za-z\d]*|[_A-Za-z]+(-[A-Za-z\d_]+)+")]
90#[logos(subpattern numsect = r"_*[\d]+_*")]
91#[logos(subpattern num = r"((?&numsect)+\.)?(?&numsect)+|\.(?&numsect)")]
92pub enum Token<'a> {
93 #[regex(r"\-\-!.*", priority = 100, callback = |lex| lex.slice()[3..].trim())]
94 Directive(&'a str),
95
96 #[regex(r"\-\-\[=*\[", priority = 99, callback = |lex| multiline_string_block_callback(lex, 2))]
97 CommentMulti(MultilineString<'a>),
98
99 #[regex(r"\-\-[^(\[\[)].*", priority = 1, callback = |lex| str_to_option(&lex.slice().clip(2, 0)))]
100 #[regex(r"\-\-", priority = 1, callback = |_| None::<&str>)]
101 CommentSingle(Option<&'a str>),
102
103 #[token("@derive")]
106 DeriveDeclaration,
107
108 #[token("@macro")]
109 MacroDeclaration,
110
111 #[token("@priority")]
112 PriorityDeclaration,
113
114 #[token("@tween")]
115 TweenDeclaration,
116
117 #[regex(r"@(?&ident)", callback = |lex| str_to_option(&lex.slice()[1..]))]
118 QuerySelector(&'a str),
119
120 #[regex(r"\$!(?&ident)?", callback = |lex| str_to_option(&lex.slice()[2..]))]
121 StaticTokenIdentifier(&'a str),
122
123 #[regex(r"\$(?&ident)?", callback = |lex| str_to_option(&lex.slice()[1..]))]
124 TokenIdentifier(&'a str),
125
126 #[regex(r"(?&ident)")]
127 Identifier(&'a str),
128
129 #[regex(r"&(?&ident)?", callback = |lex| str_to_option(&lex.slice()[1..]))]
130 MacroArgIdentifier(Option<&'a str>),
131
132 #[regex(r"(?&ident)!", callback = |lex| str_to_option(&lex.slice().clip(0, 1)))]
133 MacroCallIdentifier(Option<&'a str>),
134
135 #[token("=")]
136 Equals,
137
138 #[token(",")]
139 Comma,
140
141 #[token(";")]
142 SemiColon,
143
144 #[regex(r"#(?&ident)", callback = |lex| str_to_option(&lex.slice()[1..]))]
145 NameSelector(&'a str),
146
147 #[regex(r"\.(?&ident)?", callback = |lex| str_to_option(&lex.slice()[1..]))]
148 TagSelectorOrEnumPart(Option<&'a str>),
149
150 #[regex(r":(?&ident)?", callback = |lex| str_to_option(&lex.slice()[1..]))]
151 StateSelectorOrEnumPart(Option<&'a str>),
152
153 #[regex(r"::(?&ident)", callback = |lex| str_to_option(&lex.slice()[2..]))]
154 PseudoSelector(&'a str),
155
156 #[token("->")]
157 ReturnArrow,
158
159 #[token(">")]
160 ChildrenSelector,
161
162 #[token(">>")]
163 DescendantsSelector,
164
165 #[token("{")]
166 ScopeOpen,
167
168 #[token("}")]
169 ScopeClose,
170
171 #[token("(")]
172 ParensOpen,
173
174 #[token(")")]
175 ParensClose,
176
177 #[token("/")]
178 OpDiv,
179
180 #[token("//")]
181 OpFloorDiv,
182
183 #[token("%")]
184 OpMod,
185
186 #[token("*")]
187 OpMult,
188
189 #[token("^")]
190 OpPow,
191
192 #[token("+")]
193 OpAdd,
194
195 #[token("-")]
196 OpSub,
197
198 #[regex(r"\[=*\[", priority = 98, callback = |lex| multiline_string_block_callback(lex, 0))]
199 StringMulti(MultilineString<'a>),
200
201 #[regex(r#""[^\"\n\t]*""#, callback = |lex| lex.slice().clip(1, 1))]
202 #[regex(r#"'[^\'\n\t]*'"#, callback = |lex| lex.slice().clip(1, 1))]
203 StringSingle(&'a str),
204
205 #[regex(r"(?&num)", priority = 99)]
206 Number(&'a str),
207
208 #[regex(r"(?&num)%", priority = 99)]
209 NumberScale(&'a str),
210
211 #[regex(r"(?&num)px", priority = 99)]
212 NumberOffset(&'a str),
213
214 #[token("true")]
215 #[token("false")]
216 Boolean(&'a str),
217
218 #[token("nil")]
219 Nil,
220
221 #[regex(r"(?i)tw:[a-z]+(:\d+)?")]
222 ColorTailwind(&'a str),
223
224 #[regex(r"(?i)skin:[a-z]+(:\d+)?")]
225 ColorSkin(&'a str),
226
227 #[regex(r"(?i)bc:[a-z]+")]
228 ColorBrick(&'a str),
229
230 #[regex(r"(?i)css:[a-z]+")]
231 ColorCss(&'a str),
232
233 #[regex(r"#[\da-fA-F]+", priority = 99)]
234 ColorHex(&'a str),
235
236 #[regex(r"rbxassetid://\d*")]
237 #[regex(r"(rbxasset|rbxthumb|rbxgameasset|rbxhttp|rbxtemp|https?)://[^) ]*")]
238 RbxAsset(&'a str),
239
240 #[regex(r"contentid://\d*", priority = 999)]
241 RbxContent(&'a str),
242
243 #[token("Enum")]
244 EnumKeyword,
245
246 Error,
247
248 None,
249}
250
251impl<'a> Token<'a> {
252 #[inline(always)]
253 pub fn discriminant(&self) -> Discriminant<TokenKind> {
254 discriminant(&TokenKind::from(self))
255 }
256
257 #[inline(always)]
258 pub fn kind(&self) -> TokenKind {
259 TokenKind::from(self)
260 }
261}
262
263impl TokenKind {
264 pub fn name(&self) -> &'static str {
265 TOKEN_KIND_STRING_MAP
266 .get(self)
267 .map(|x| *x)
268 .unwrap_or_else(|| "**error**")
269 }
270}
271
272#[derive(Logos, Debug, PartialEq, Clone)]
273#[logos(skip r"[ \t\n\r\f]+")]
274enum MultilineStringToken {
275 #[regex(r"\]=*\]")]
276 ExitMultilineString,
277}
278
279#[derive(Clone, Debug, PartialEq)]
280pub struct MultilineString<'a> {
281 pub nestedness: Result<usize, usize>,
282 pub content: &'a str,
283 pub span: (usize, usize),
284}
285
286fn multiline_string_block_callback<'a>(
287 lexer: &mut LogosLexer<'a, Token<'a>>,
288 sub_amount: usize,
289) -> MultilineString<'a> {
290 let mut sub_lexer = lexer.clone().morph::<MultilineStringToken>();
291
292 let open_nestedness = sub_lexer.slice().len() - sub_amount - 2;
295 let open_span_start = sub_lexer.span().start;
296
297 let content_span_start = open_span_start + 2;
298
299 while let Some(token) = sub_lexer.next() {
300 match token {
301 Ok(MultilineStringToken::ExitMultilineString) => {
302 let close_span = sub_lexer.span();
303 let close_nestedness = sub_lexer.slice().len() - 2;
305
306 if open_nestedness == close_nestedness {
307 let data = MultilineString {
308 nestedness: Ok(open_nestedness),
309 content: &sub_lexer.source()[content_span_start..close_span.start],
310 span: (open_span_start, close_span.end),
311 };
312
313 *lexer = sub_lexer.morph();
314
315 return data;
316 }
317 }
318 _ => {}
319 }
320 }
321
322 let data = MultilineString {
323 nestedness: Err(open_nestedness),
324 content: sub_lexer.source().clip(content_span_start, 0),
325 span: (open_span_start, sub_lexer.source().len()),
326 };
327
328 *lexer = sub_lexer.morph();
329
330 data
331}
332
333pub const TOKEN_KIND_CONSTRUCT_DELIMITERS: LazyLock<HashSet<TokenKind>> = lazy_collection! {
334 TokenKind::ParensClose,
335 TokenKind::ScopeClose,
336 TokenKind::SemiColon,
337
338 TokenKind::DeriveDeclaration,
339 TokenKind::MacroDeclaration,
340 TokenKind::PriorityDeclaration,
341 TokenKind::TweenDeclaration
342};
343
344pub const TOKEN_KIND_MACRO_CALL_DELIMITERS: LazyLock<HashSet<TokenKind>> = lazy_collection! {
345 TokenKind::ParensClose,
346 TokenKind::ScopeClose,
347 TokenKind::ScopeOpen,
348 TokenKind::SemiColon,
349
350 TokenKind::DeriveDeclaration,
351 TokenKind::MacroDeclaration,
352 TokenKind::PriorityDeclaration,
353 TokenKind::TweenDeclaration
354};
355
356pub const TOKEN_KIND_INSIDE_PARENS_CONSTRUCT_DELIMITERS: LazyLock<HashSet<TokenKind>> = lazy_collection! {
357 TokenKind::ParensClose,
358};
359
360pub const TOKEN_KIND_ADD_SUB_PRECEDENCE: usize = 0;
361
362pub const TOKEN_KIND_OPERATOR_PRECEDENCE: LazyLock<HashMap<TokenKind, usize>> = lazy_collection! {
363 TokenKind::OpDiv => 1,
364 TokenKind::OpFloorDiv => 1,
365 TokenKind::OpMod => 1,
366 TokenKind::OpMult => 1,
367 TokenKind::OpPow => 1,
368 TokenKind::OpAdd => TOKEN_KIND_ADD_SUB_PRECEDENCE,
369 TokenKind::OpSub => TOKEN_KIND_ADD_SUB_PRECEDENCE,
370};
371
372const TOKEN_KIND_STRING_MAP: LazyLock<HashMap<TokenKind, &'static str>> = lazy_collection! {
373 TokenKind::CommentMulti => "`comment`",
374 TokenKind::CommentSingle => "`comment`",
375 TokenKind::Directive => "`directive`",
376 TokenKind::DeriveDeclaration => "\"@derive\"",
377 TokenKind::MacroDeclaration => "\"@macro\"",
378 TokenKind::PriorityDeclaration => "\"@priority\"",
379 TokenKind::TweenDeclaration => "\"@tween\"",
380 TokenKind::QuerySelector => "`query selector`",
381 TokenKind::Identifier => "`identifer`",
382 TokenKind::MacroArgIdentifier => "`macro argument`",
383 TokenKind::MacroCallIdentifier => "`macro call`",
384 TokenKind::Equals => "\"=\"",
385 TokenKind::Comma => "\",\"",
386 TokenKind::SemiColon => "\";\"",
387 TokenKind::NameSelector => "`name selector`",
388 TokenKind::TagSelectorOrEnumPart => "`tag selector`",
389 TokenKind::StateSelectorOrEnumPart => "`state selector`",
390 TokenKind::PseudoSelector => "`pseudo selector`",
391 TokenKind::ReturnArrow => "\"->\"",
392 TokenKind::ChildrenSelector => "\">\"",
393 TokenKind::DescendantsSelector => "\">>\"",
394 TokenKind::ScopeOpen => "\"{\"",
395 TokenKind::ScopeClose => "\"}\"",
396 TokenKind::ParensOpen => "\"(\"",
397 TokenKind::ParensClose => "\")\"",
398 TokenKind::StringMulti => "`string`",
399 TokenKind::StringSingle => "`string`",
400 TokenKind::Number => "`number`",
401 TokenKind::NumberScale => "`udim scale`",
402 TokenKind::NumberOffset => "`udim offset`",
403 TokenKind::ColorTailwind => "`tailwind color`",
404 TokenKind::ColorBrick => "`brick color`",
405 TokenKind::ColorCss => "`css color`",
406 TokenKind::ColorHex => "`hex color`",
407};