1use crate::parser::SyntaxKind;
10
11#[derive(Default)]
12pub struct LexState {
13 template_string_stack: Vec<u32>,
16}
17
18pub trait LexingRule {
21 fn lex(&self, text: &str, state: &mut LexState) -> usize;
23}
24
25impl LexingRule for &str {
26 #[inline]
27 fn lex(&self, text: &str, _: &mut LexState) -> usize {
28 if text.starts_with(*self) { self.len() } else { 0 }
29 }
30}
31
32impl<F: Fn(&str, &mut LexState) -> usize> LexingRule for F {
33 #[inline]
34 fn lex(&self, text: &str, state: &mut LexState) -> usize {
35 (self)(text, state)
36 }
37}
38
39pub fn lex_whitespace(text: &str, _: &mut LexState) -> usize {
40 let mut len = 0;
41 let chars = text.chars();
42 for c in chars {
43 if !c.is_whitespace() && !['\u{0002}', '\u{0003}'].contains(&c) {
44 break;
45 }
46 len += c.len_utf8();
47 }
48 len
49}
50
51pub fn lex_comment(text: &str, _: &mut LexState) -> usize {
52 if text.starts_with("//") {
54 return text.find(&['\n', '\r'] as &[_]).unwrap_or(text.len());
55 }
56 if text.starts_with("/*") {
57 let mut nested = 0;
58 let mut offset = 2;
59 let bytes = text.as_bytes();
60 while offset < bytes.len() {
61 if let Some(star) = bytes[offset..].iter().position(|c| *c == b'*') {
62 let star = star + offset;
63 if star > offset && bytes[star - 1] == b'/' {
64 nested += 1;
65 offset = star + 1;
66 } else if star < bytes.len() - 1 && bytes[star + 1] == b'/' {
67 if nested == 0 {
68 return star + 2;
69 }
70 nested -= 1;
71 offset = star + 2;
72 } else {
73 offset = star + 1;
74 }
75 } else {
76 return 0;
78 }
79 }
80 return 0;
82 }
83
84 0
85}
86
87pub fn lex_string(text: &str, state: &mut LexState) -> usize {
88 if let Some(brace_level) = state.template_string_stack.last_mut() {
89 if text.starts_with('{') {
90 *brace_level += 1;
91 return 0;
92 } else if text.starts_with('}') {
93 if *brace_level > 0 {
94 *brace_level -= 1;
95 return 0;
96 } else {
97 state.template_string_stack.pop();
98 }
99 } else if !text.starts_with('"') {
100 return 0;
101 }
102 } else if !text.starts_with('"') {
103 return 0;
104 }
105 let text_len = text.len();
106 let mut end = 1; loop {
108 let stop = match text[end..].find(&['"', '\\'][..]) {
109 Some(stop) => end + stop,
110 None => return 0,
112 };
113 match text.as_bytes()[stop] {
114 b'"' => {
115 return stop + 1;
116 }
117 b'\\' => {
118 if text_len <= stop + 1 {
119 return 0;
121 }
122 if text.as_bytes()[stop + 1] == b'{' {
123 state.template_string_stack.push(0);
124 return stop + 2;
125 }
126 end = stop + 1 + text[stop + 1..].chars().next().map_or(0, |c| c.len_utf8())
127 }
128 _ => unreachable!(),
129 }
130 }
131}
132
133pub fn lex_number(text: &str, _: &mut LexState) -> usize {
134 let mut len = 0;
135 let mut chars = text.chars();
136 let mut had_period = false;
137 while let Some(c) = chars.next() {
138 if !c.is_ascii_digit() {
139 if !had_period && c == '.' && len > 0 {
140 had_period = true;
141 } else {
142 if len > 0 {
143 if c == '%' {
144 return len + 1;
145 }
146 if c.is_ascii_alphabetic() {
147 len += c.len_utf8();
148 for c in chars {
150 if !c.is_ascii_alphabetic() {
151 return len;
152 }
153 len += c.len_utf8();
154 }
155 }
156 }
157 break;
158 }
159 }
160 len += c.len_utf8();
161 }
162 len
163}
164
165pub fn lex_color(text: &str, _: &mut LexState) -> usize {
166 if !text.starts_with('#') {
167 return 0;
168 }
169 let mut len = 1;
170 let chars = text[1..].chars();
171 for c in chars {
172 if !c.is_ascii_alphanumeric() {
173 break;
174 }
175 len += c.len_utf8();
176 }
177 len
178}
179
180pub fn lex_identifier(text: &str, _: &mut LexState) -> usize {
181 let mut len = 0;
182 let chars = text.chars();
183 for c in chars {
184 if !c.is_alphanumeric() && c != '_' && (c != '-' || len == 0) {
185 break;
186 }
187 len += c.len_utf8();
188 }
189 len
190}
191
192#[allow(clippy::needless_update)] pub fn lex(mut source: &str) -> Vec<crate::parser::Token> {
194 let mut result = Vec::new();
195 let mut offset = 0;
196 let mut state = LexState::default();
197 if source.starts_with("\u{FEFF}") {
198 result.push(crate::parser::Token {
200 kind: SyntaxKind::Whitespace,
201 text: source[..3].into(),
202 offset: 0,
203 length: 3,
204 ..Default::default()
205 });
206 source = &source[3..];
207 offset += 3;
208 }
209 while !source.is_empty() {
210 let (len, kind) = crate::parser::lex_next_token(source, &mut state).unwrap_or_else(|| {
211 (source.ceil_char_boundary(1), SyntaxKind::Error)
217 });
218 result.push(crate::parser::Token {
219 kind,
220 text: source[..len].into(),
221 offset,
222 length: len,
223 ..Default::default()
224 });
225 offset += len;
226 source = &source[len..];
227 }
228 result
229}
230
231#[test]
232fn basic_lexer_test() {
233 fn compare(source: &str, expected: &[(SyntaxKind, &str)]) {
234 let actual = lex(source);
235 let actual =
236 actual.iter().map(|token| (token.kind, token.text.as_str())).collect::<Vec<_>>();
237 assert_eq!(actual.as_slice(), expected);
238 }
239
240 compare(
241 r#"45 /*hi/*_*/ho*/ "string""#,
242 &[
243 (SyntaxKind::NumberLiteral, "45"),
244 (SyntaxKind::Whitespace, " "),
245 (SyntaxKind::Comment, "/*hi/*_*/ho*/"),
246 (SyntaxKind::Whitespace, " "),
247 (SyntaxKind::StringLiteral, r#""string""#),
248 ],
249 );
250
251 compare(
252 r#"12px+5.2+=0.7%"#,
253 &[
254 (SyntaxKind::NumberLiteral, "12px"),
255 (SyntaxKind::Plus, "+"),
256 (SyntaxKind::NumberLiteral, "5.2"),
257 (SyntaxKind::PlusEqual, "+="),
258 (SyntaxKind::NumberLiteral, "0.7%"),
259 ],
260 );
261 compare(
262 r#"aa_a.b1,c"#,
263 &[
264 (SyntaxKind::Identifier, "aa_a"),
265 (SyntaxKind::Dot, "."),
266 (SyntaxKind::Identifier, "b1"),
267 (SyntaxKind::Comma, ","),
268 (SyntaxKind::Identifier, "c"),
269 ],
270 );
271 compare(
272 r#"/*/**/*//**/*"#,
273 &[
274 (SyntaxKind::Comment, "/*/**/*/"),
275 (SyntaxKind::Comment, "/**/"),
276 (SyntaxKind::Star, "*"),
277 ],
278 );
279 compare(
280 "a//x\nb//y\r\nc//z",
281 &[
282 (SyntaxKind::Identifier, "a"),
283 (SyntaxKind::Comment, "//x"),
284 (SyntaxKind::Whitespace, "\n"),
285 (SyntaxKind::Identifier, "b"),
286 (SyntaxKind::Comment, "//y"),
287 (SyntaxKind::Whitespace, "\r\n"),
288 (SyntaxKind::Identifier, "c"),
289 (SyntaxKind::Comment, "//z"),
290 ],
291 );
292 compare(r#""x""#, &[(SyntaxKind::StringLiteral, r#""x""#)]);
293 compare(
294 r#"a"\"\\"x"#,
295 &[
296 (SyntaxKind::Identifier, "a"),
297 (SyntaxKind::StringLiteral, r#""\"\\""#),
298 (SyntaxKind::Identifier, "x"),
299 ],
300 );
301 compare(
302 r#""a\{b{c}d"e\{f}g"h}i"j"#,
303 &[
304 (SyntaxKind::StringLiteral, r#""a\{"#),
305 (SyntaxKind::Identifier, "b"),
306 (SyntaxKind::LBrace, "{"),
307 (SyntaxKind::Identifier, "c"),
308 (SyntaxKind::RBrace, "}"),
309 (SyntaxKind::Identifier, "d"),
310 (SyntaxKind::StringLiteral, r#""e\{"#),
311 (SyntaxKind::Identifier, "f"),
312 (SyntaxKind::StringLiteral, r#"}g""#),
313 (SyntaxKind::Identifier, "h"),
314 (SyntaxKind::StringLiteral, r#"}i""#),
315 (SyntaxKind::Identifier, "j"),
316 ],
317 );
318
319 compare(r#"/**"#, &[(SyntaxKind::Div, "/"), (SyntaxKind::Star, "*"), (SyntaxKind::Star, "*")]);
321 compare(r#""\"#, &[(SyntaxKind::Error, "\""), (SyntaxKind::Error, "\\")]);
322 compare(
323 r#""\ޱ"#,
324 &[(SyntaxKind::Error, "\""), (SyntaxKind::Error, "\\"), (SyntaxKind::Identifier, "ޱ")],
325 );
326}
327
328pub fn locate_slint_macro(rust_source: &str) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
331 let mut begin = 0;
332 std::iter::from_fn(move || {
333 let (open, close) = loop {
334 if let Some(m) = rust_source[begin..].find("slint") {
335 if let Some(x) = rust_source[begin..(begin + m)].rfind(['\\', '\n', '/', '\"'])
337 && rust_source.as_bytes()[begin + x] != b'\n'
338 {
339 begin += m + 5;
340 begin += rust_source[begin..].find(['\n']).unwrap_or(0);
341 continue;
342 }
343 begin += m + 5;
344 while rust_source[begin..].starts_with(' ') {
345 begin += 1;
346 }
347 if !rust_source[begin..].starts_with('!') {
348 continue;
349 }
350 begin += 1;
351 while rust_source[begin..].starts_with(' ') {
352 begin += 1;
353 }
354 let Some(open) = rust_source.as_bytes().get(begin) else { continue };
355 match open {
356 b'{' => break (SyntaxKind::LBrace, SyntaxKind::RBrace),
357 b'[' => break (SyntaxKind::LBracket, SyntaxKind::RBracket),
358 b'(' => break (SyntaxKind::LParent, SyntaxKind::RParent),
359 _ => continue,
360 }
361 } else {
362 return None;
364 }
365 };
366
367 begin += 1;
368
369 let mut state = LexState::default();
372 let start = begin;
373 let mut end = begin;
374 let mut level = 0;
375 while !rust_source[end..].is_empty() {
376 let len = match crate::parser::lex_next_token(&rust_source[end..], &mut state) {
377 Some((len, x)) if x == open => {
378 level += 1;
379 len
380 }
381 Some((_, x)) if x == close && level == 0 => {
382 break;
383 }
384 Some((len, x)) if x == close => {
385 level -= 1;
386 len
387 }
388 Some((len, _)) => len,
389 None => {
390 break;
392 }
393 };
394 if len == 0 {
395 break; }
397 end += len;
398 }
399 begin = end;
400 Some(start..end)
401 })
402}
403
404#[test]
405fn test_locate_rust_macro() {
406 #[track_caller]
407 fn do_test(source: &str, captures: &[&str]) {
408 let result = locate_slint_macro(source).map(|r| &source[r]).collect::<Vec<_>>();
409 assert_eq!(&result, captures);
410 }
411
412 do_test("\nslint{!{}}", &[]);
413 do_test(
414 "//slint!(123)\nslint!(456)\nslint ![789]\n/*slint!{abc}*/\nslint! {def}",
415 &["456", "789", "def"],
416 );
417 do_test("slint!(slint!(abc))slint!()", &["slint!(abc)", ""]);
418}
419
420pub fn extract_rust_macro(rust_source: String) -> Option<String> {
432 let core::ops::Range { start, end } = locate_slint_macro(&rust_source).next()?;
433 let mut bytes = rust_source.into_bytes();
434 for c in &mut bytes[..start] {
435 if *c != b'\n' {
436 *c = b' '
437 }
438 }
439
440 if start > 0 {
441 bytes[start - 1] = 2;
442 }
443 if end < bytes.len() {
444 bytes[end] = 3;
445
446 for c in &mut bytes[end + 1..] {
447 if *c != b'\n' {
448 *c = b' '
449 }
450 }
451 }
452 Some(String::from_utf8(bytes).expect("We just added spaces"))
453}
454
455#[test]
456fn test_extract_rust_macro() {
457 assert_eq!(extract_rust_macro("\nslint{!{}}".into()), None);
458 assert_eq!(
459 extract_rust_macro(
460 "abc\n€\nslint ! {x \" \\\" }🦀\" { () {}\n {} }xx =}- ;}\n xxx \n yyy {}\n".into(),
461 ),
462 Some(
463 " \n \n \u{2}x \" \\\" }🦀\" { () {}\n {} }xx =\u{3} \n \n \n".into(),
464 )
465 );
466
467 assert_eq!(
468 extract_rust_macro("xx\nabcd::slint!{abc{}efg".into()),
469 Some(" \n \u{2}abc{}efg".into())
470 );
471 assert_eq!(
472 extract_rust_macro("slint!\nnot.\nslint!{\nunterminated\nxxx".into()),
473 Some(" \n \n \u{2}\nunterminated\nxxx".into())
474 );
475 assert_eq!(extract_rust_macro("foo\n/* slint! { hello }\n".into()), None);
476 assert_eq!(extract_rust_macro("foo\n/* slint::slint! { hello }\n".into()), None);
477 assert_eq!(
478 extract_rust_macro("foo\n// slint! { hello }\nslint!{world}\na".into()),
479 Some(" \n \n \u{2}world\u{3}\n ".into())
480 );
481 assert_eq!(extract_rust_macro("foo\n\" slint! { hello }\"\n".into()), None);
482 assert_eq!(
483 extract_rust_macro(
484 "abc\n€\nslint ! (x /* \\\" )🦀*/ { () {}\n {} }xx =)- ;}\n xxx \n yyy {}\n".into(),
485 ),
486 Some(
487 " \n \n \u{2}x /* \\\" )🦀*/ { () {}\n {} }xx =\u{3} \n \n \n".into(),
488 )
489 );
490 assert_eq!(
491 extract_rust_macro("abc slint![x slint!() [{[]}] s] abc".into()),
492 Some(" \u{0002}x slint!() [{[]}] s\u{0003} ".into()),
493 );
494}