1use crate::parser::SyntaxKind;
10
11#[derive(Default)]
12pub struct LexState {
13 template_string_stack: Vec<u32>,
16}
17
18pub trait LexingRule {
21 fn lex(&self, text: &str, state: &mut LexState) -> usize;
23}
24
25impl LexingRule for &str {
26 #[inline]
27 fn lex(&self, text: &str, _: &mut LexState) -> usize {
28 if text.starts_with(*self) { self.len() } else { 0 }
29 }
30}
31
32impl<F: Fn(&str, &mut LexState) -> usize> LexingRule for F {
33 #[inline]
34 fn lex(&self, text: &str, state: &mut LexState) -> usize {
35 (self)(text, state)
36 }
37}
38
39pub fn lex_whitespace(text: &str, _: &mut LexState) -> usize {
40 let mut len = 0;
41 let chars = text.chars();
42 for c in chars {
43 if !c.is_whitespace() && !['\u{0002}', '\u{0003}'].contains(&c) {
44 break;
45 }
46 len += c.len_utf8();
47 }
48 len
49}
50
51pub fn lex_comment(text: &str, _: &mut LexState) -> usize {
52 if text.starts_with("//") {
54 return text.find(&['\n', '\r'] as &[_]).unwrap_or(text.len());
55 }
56 if text.starts_with("/*") {
57 let mut nested = 0;
58 let mut offset = 2;
59 let bytes = text.as_bytes();
60 while offset < bytes.len() {
61 if let Some(star) = bytes[offset..].iter().position(|c| *c == b'*') {
62 let star = star + offset;
63 if star > offset && bytes[star - 1] == b'/' {
64 nested += 1;
65 offset = star + 1;
66 } else if star < bytes.len() - 1 && bytes[star + 1] == b'/' {
67 if nested == 0 {
68 return star + 2;
69 }
70 nested -= 1;
71 offset = star + 2;
72 } else {
73 offset = star + 1;
74 }
75 } else {
76 return 0;
78 }
79 }
80 return 0;
82 }
83
84 0
85}
86
87pub fn lex_string(text: &str, state: &mut LexState) -> usize {
88 if let Some(brace_level) = state.template_string_stack.last_mut() {
89 if text.starts_with('{') {
90 *brace_level += 1;
91 return 0;
92 } else if text.starts_with('}') {
93 if *brace_level > 0 {
94 *brace_level -= 1;
95 return 0;
96 } else {
97 state.template_string_stack.pop();
98 }
99 } else if !text.starts_with('"') {
100 return 0;
101 }
102 } else if !text.starts_with('"') {
103 return 0;
104 }
105 let text_len = text.len();
106 let mut end = 1; loop {
108 let stop = match text[end..].find(&['"', '\\'][..]) {
109 Some(stop) => end + stop,
110 None => return 0,
112 };
113 match text.as_bytes()[stop] {
114 b'"' => {
115 return stop + 1;
116 }
117 b'\\' => {
118 if text_len <= stop + 1 {
119 return 0;
121 }
122 if text.as_bytes()[stop + 1] == b'{' {
123 state.template_string_stack.push(0);
124 return stop + 2;
125 }
126 end = stop + 1 + text[stop + 1..].chars().next().map_or(0, |c| c.len_utf8())
127 }
128 _ => unreachable!(),
129 }
130 }
131}
132
133pub fn lex_number(text: &str, _: &mut LexState) -> usize {
134 let mut len = 0;
135 let mut chars = text.chars();
136 let mut had_period = false;
137 while let Some(c) = chars.next() {
138 if !c.is_ascii_digit() {
139 if !had_period && c == '.' && len > 0 {
140 had_period = true;
141 } else {
142 if len > 0 {
143 if c == '%' {
144 return len + 1;
145 }
146 if c.is_ascii_alphabetic() {
147 len += c.len_utf8();
148 for c in chars {
150 if !c.is_ascii_alphabetic() {
151 return len;
152 }
153 len += c.len_utf8();
154 }
155 }
156 }
157 break;
158 }
159 }
160 len += c.len_utf8();
161 }
162 len
163}
164
165pub fn lex_color(text: &str, _: &mut LexState) -> usize {
166 if !text.starts_with('#') {
167 return 0;
168 }
169 let mut len = 1;
170 let chars = text[1..].chars();
171 for c in chars {
172 if !c.is_ascii_alphanumeric() {
173 break;
174 }
175 len += c.len_utf8();
176 }
177 len
178}
179
180pub fn lex_identifier(text: &str, _: &mut LexState) -> usize {
181 let mut len = 0;
182 let chars = text.chars();
183 for c in chars {
184 if !c.is_alphanumeric() && c != '_' && (c != '-' || len == 0) {
185 break;
186 }
187 len += c.len_utf8();
188 }
189 len
190}
191
192#[allow(clippy::needless_update)] pub fn lex(mut source: &str) -> Vec<crate::parser::Token> {
194 let mut result = Vec::new();
195 let mut offset = 0;
196 let mut state = LexState::default();
197 if source.starts_with("\u{FEFF}") {
198 result.push(crate::parser::Token {
200 kind: SyntaxKind::Whitespace,
201 text: source[..3].into(),
202 offset: 0,
203 length: 3,
204 ..Default::default()
205 });
206 source = &source[3..];
207 offset += 3;
208 }
209 while !source.is_empty() {
210 let (len, kind) = crate::parser::lex_next_token(source, &mut state).unwrap_or_else(|| {
211 let length = source.chars().next().map(char::len_utf8).unwrap_or_else(|| source.len());
221 (length, SyntaxKind::Error)
222 });
223 result.push(crate::parser::Token {
224 kind,
225 text: source[..len].into(),
226 offset,
227 length: len,
228 ..Default::default()
229 });
230 offset += len;
231 source = &source[len..];
232 }
233 result
234}
235
236#[test]
237fn basic_lexer_test() {
238 fn compare(source: &str, expected: &[(SyntaxKind, &str)]) {
239 let actual = lex(source);
240 let actual =
241 actual.iter().map(|token| (token.kind, token.text.as_str())).collect::<Vec<_>>();
242 assert_eq!(actual.as_slice(), expected);
243 }
244
245 compare(
246 r#"45 /*hi/*_*/ho*/ "string""#,
247 &[
248 (SyntaxKind::NumberLiteral, "45"),
249 (SyntaxKind::Whitespace, " "),
250 (SyntaxKind::Comment, "/*hi/*_*/ho*/"),
251 (SyntaxKind::Whitespace, " "),
252 (SyntaxKind::StringLiteral, r#""string""#),
253 ],
254 );
255
256 compare(
257 r#"12px+5.2+=0.7%"#,
258 &[
259 (SyntaxKind::NumberLiteral, "12px"),
260 (SyntaxKind::Plus, "+"),
261 (SyntaxKind::NumberLiteral, "5.2"),
262 (SyntaxKind::PlusEqual, "+="),
263 (SyntaxKind::NumberLiteral, "0.7%"),
264 ],
265 );
266 compare(
267 r#"aa_a.b1,c"#,
268 &[
269 (SyntaxKind::Identifier, "aa_a"),
270 (SyntaxKind::Dot, "."),
271 (SyntaxKind::Identifier, "b1"),
272 (SyntaxKind::Comma, ","),
273 (SyntaxKind::Identifier, "c"),
274 ],
275 );
276 compare(
277 r#"/*/**/*//**/*"#,
278 &[
279 (SyntaxKind::Comment, "/*/**/*/"),
280 (SyntaxKind::Comment, "/**/"),
281 (SyntaxKind::Star, "*"),
282 ],
283 );
284 compare(
285 "a//x\nb//y\r\nc//z",
286 &[
287 (SyntaxKind::Identifier, "a"),
288 (SyntaxKind::Comment, "//x"),
289 (SyntaxKind::Whitespace, "\n"),
290 (SyntaxKind::Identifier, "b"),
291 (SyntaxKind::Comment, "//y"),
292 (SyntaxKind::Whitespace, "\r\n"),
293 (SyntaxKind::Identifier, "c"),
294 (SyntaxKind::Comment, "//z"),
295 ],
296 );
297 compare(r#""x""#, &[(SyntaxKind::StringLiteral, r#""x""#)]);
298 compare(
299 r#"a"\"\\"x"#,
300 &[
301 (SyntaxKind::Identifier, "a"),
302 (SyntaxKind::StringLiteral, r#""\"\\""#),
303 (SyntaxKind::Identifier, "x"),
304 ],
305 );
306 compare(
307 r#""a\{b{c}d"e\{f}g"h}i"j"#,
308 &[
309 (SyntaxKind::StringLiteral, r#""a\{"#),
310 (SyntaxKind::Identifier, "b"),
311 (SyntaxKind::LBrace, "{"),
312 (SyntaxKind::Identifier, "c"),
313 (SyntaxKind::RBrace, "}"),
314 (SyntaxKind::Identifier, "d"),
315 (SyntaxKind::StringLiteral, r#""e\{"#),
316 (SyntaxKind::Identifier, "f"),
317 (SyntaxKind::StringLiteral, r#"}g""#),
318 (SyntaxKind::Identifier, "h"),
319 (SyntaxKind::StringLiteral, r#"}i""#),
320 (SyntaxKind::Identifier, "j"),
321 ],
322 );
323
324 compare(r#"/**"#, &[(SyntaxKind::Div, "/"), (SyntaxKind::Star, "*"), (SyntaxKind::Star, "*")]);
326 compare(r#""\"#, &[(SyntaxKind::Error, "\""), (SyntaxKind::Error, "\\")]);
327 compare(
328 r#""\ޱ"#,
329 &[(SyntaxKind::Error, "\""), (SyntaxKind::Error, "\\"), (SyntaxKind::Identifier, "ޱ")],
330 );
331}
332
333pub fn locate_slint_macro(rust_source: &str) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
336 let mut begin = 0;
337 std::iter::from_fn(move || {
338 let (open, close) = loop {
339 if let Some(m) = rust_source[begin..].find("slint") {
340 if let Some(x) = rust_source[begin..(begin + m)].rfind(['\\', '\n', '/', '\"'])
342 && rust_source.as_bytes()[begin + x] != b'\n'
343 {
344 begin += m + 5;
345 begin += rust_source[begin..].find(['\n']).unwrap_or(0);
346 continue;
347 }
348 begin += m + 5;
349 while rust_source[begin..].starts_with(' ') {
350 begin += 1;
351 }
352 if !rust_source[begin..].starts_with('!') {
353 continue;
354 }
355 begin += 1;
356 while rust_source[begin..].starts_with(' ') {
357 begin += 1;
358 }
359 let Some(open) = rust_source.as_bytes().get(begin) else { continue };
360 match open {
361 b'{' => break (SyntaxKind::LBrace, SyntaxKind::RBrace),
362 b'[' => break (SyntaxKind::LBracket, SyntaxKind::RBracket),
363 b'(' => break (SyntaxKind::LParent, SyntaxKind::RParent),
364 _ => continue,
365 }
366 } else {
367 return None;
369 }
370 };
371
372 begin += 1;
373
374 let mut state = LexState::default();
377 let start = begin;
378 let mut end = begin;
379 let mut level = 0;
380 while !rust_source[end..].is_empty() {
381 let len = match crate::parser::lex_next_token(&rust_source[end..], &mut state) {
382 Some((len, x)) if x == open => {
383 level += 1;
384 len
385 }
386 Some((_, x)) if x == close && level == 0 => {
387 break;
388 }
389 Some((len, x)) if x == close => {
390 level -= 1;
391 len
392 }
393 Some((len, _)) => len,
394 None => {
395 break;
397 }
398 };
399 if len == 0 {
400 break; }
402 end += len;
403 }
404 begin = end;
405 Some(start..end)
406 })
407}
408
409#[test]
410fn test_locate_rust_macro() {
411 #[track_caller]
412 fn do_test(source: &str, captures: &[&str]) {
413 let result = locate_slint_macro(source).map(|r| &source[r]).collect::<Vec<_>>();
414 assert_eq!(&result, captures);
415 }
416
417 do_test("\nslint{!{}}", &[]);
418 do_test(
419 "//slint!(123)\nslint!(456)\nslint ![789]\n/*slint!{abc}*/\nslint! {def}",
420 &["456", "789", "def"],
421 );
422 do_test("slint!(slint!(abc))slint!()", &["slint!(abc)", ""]);
423}
424
425pub fn extract_rust_macro(rust_source: String) -> Option<String> {
437 let core::ops::Range { start, end } = locate_slint_macro(&rust_source).next()?;
438 let mut bytes = rust_source.into_bytes();
439 for c in &mut bytes[..start] {
440 if *c != b'\n' {
441 *c = b' '
442 }
443 }
444
445 if start > 0 {
446 bytes[start - 1] = 2;
447 }
448 if end < bytes.len() {
449 bytes[end] = 3;
450
451 for c in &mut bytes[end + 1..] {
452 if *c != b'\n' {
453 *c = b' '
454 }
455 }
456 }
457 Some(String::from_utf8(bytes).expect("We just added spaces"))
458}
459
460#[test]
461fn test_extract_rust_macro() {
462 assert_eq!(extract_rust_macro("\nslint{!{}}".into()), None);
463 assert_eq!(
464 extract_rust_macro(
465 "abc\n€\nslint ! {x \" \\\" }🦀\" { () {}\n {} }xx =}- ;}\n xxx \n yyy {}\n".into(),
466 ),
467 Some(
468 " \n \n \u{2}x \" \\\" }🦀\" { () {}\n {} }xx =\u{3} \n \n \n".into(),
469 )
470 );
471
472 assert_eq!(
473 extract_rust_macro("xx\nabcd::slint!{abc{}efg".into()),
474 Some(" \n \u{2}abc{}efg".into())
475 );
476 assert_eq!(
477 extract_rust_macro("slint!\nnot.\nslint!{\nunterminated\nxxx".into()),
478 Some(" \n \n \u{2}\nunterminated\nxxx".into())
479 );
480 assert_eq!(extract_rust_macro("foo\n/* slint! { hello }\n".into()), None);
481 assert_eq!(extract_rust_macro("foo\n/* slint::slint! { hello }\n".into()), None);
482 assert_eq!(
483 extract_rust_macro("foo\n// slint! { hello }\nslint!{world}\na".into()),
484 Some(" \n \n \u{2}world\u{3}\n ".into())
485 );
486 assert_eq!(extract_rust_macro("foo\n\" slint! { hello }\"\n".into()), None);
487 assert_eq!(
488 extract_rust_macro(
489 "abc\n€\nslint ! (x /* \\\" )🦀*/ { () {}\n {} }xx =)- ;}\n xxx \n yyy {}\n".into(),
490 ),
491 Some(
492 " \n \n \u{2}x /* \\\" )🦀*/ { () {}\n {} }xx =\u{3} \n \n \n".into(),
493 )
494 );
495 assert_eq!(
496 extract_rust_macro("abc slint![x slint!() [{[]}] s] abc".into()),
497 Some(" \u{0002}x slint!() [{[]}] s\u{0003} ".into()),
498 );
499}