1use crate::parser::SyntaxKind;
10
11#[derive(Default)]
12pub struct LexState {
13 template_string_stack: Vec<u32>,
16}
17
18pub trait LexingRule {
21 fn lex(&self, text: &str, state: &mut LexState) -> usize;
23}
24
25impl LexingRule for &str {
26 #[inline]
27 fn lex(&self, text: &str, _: &mut LexState) -> usize {
28 if text.starts_with(*self) {
29 self.len()
30 } else {
31 0
32 }
33 }
34}
35
36impl<F: Fn(&str, &mut LexState) -> usize> LexingRule for F {
37 #[inline]
38 fn lex(&self, text: &str, state: &mut LexState) -> usize {
39 (self)(text, state)
40 }
41}
42
43pub fn lex_whitespace(text: &str, _: &mut LexState) -> usize {
44 let mut len = 0;
45 let chars = text.chars();
46 for c in chars {
47 if !c.is_whitespace() && !['\u{0002}', '\u{0003}'].contains(&c) {
48 break;
49 }
50 len += c.len_utf8();
51 }
52 len
53}
54
55pub fn lex_comment(text: &str, _: &mut LexState) -> usize {
56 if text.starts_with("//") {
58 return text.find(&['\n', '\r'] as &[_]).unwrap_or(text.len());
59 }
60 if text.starts_with("/*") {
61 let mut nested = 0;
62 let mut offset = 2;
63 let bytes = text.as_bytes();
64 while offset < bytes.len() {
65 if let Some(star) = bytes[offset..].iter().position(|c| *c == b'*') {
66 let star = star + offset;
67 if star > offset && bytes[star - 1] == b'/' {
68 nested += 1;
69 offset = star + 1;
70 } else if star < bytes.len() - 1 && bytes[star + 1] == b'/' {
71 if nested == 0 {
72 return star + 2;
73 }
74 nested -= 1;
75 offset = star + 2;
76 } else {
77 offset = star + 1;
78 }
79 } else {
80 return 0;
82 }
83 }
84 return 0;
86 }
87
88 0
89}
90
91pub fn lex_string(text: &str, state: &mut LexState) -> usize {
92 if let Some(brace_level) = state.template_string_stack.last_mut() {
93 if text.starts_with('{') {
94 *brace_level += 1;
95 return 0;
96 } else if text.starts_with('}') {
97 if *brace_level > 0 {
98 *brace_level -= 1;
99 return 0;
100 } else {
101 state.template_string_stack.pop();
102 }
103 } else if !text.starts_with('"') {
104 return 0;
105 }
106 } else if !text.starts_with('"') {
107 return 0;
108 }
109 let text_len = text.as_bytes().len();
110 let mut end = 1; loop {
112 let stop = match text[end..].find(&['"', '\\'][..]) {
113 Some(stop) => end + stop,
114 None => return 0,
116 };
117 match text.as_bytes()[stop] {
118 b'"' => {
119 return stop + 1;
120 }
121 b'\\' => {
122 if text_len <= stop + 1 {
123 return 0;
125 }
126 if text.as_bytes()[stop + 1] == b'{' {
127 state.template_string_stack.push(0);
128 return stop + 2;
129 }
130 end = stop + 1 + text[stop + 1..].chars().next().map_or(0, |c| c.len_utf8())
131 }
132 _ => unreachable!(),
133 }
134 }
135}
136
137pub fn lex_number(text: &str, _: &mut LexState) -> usize {
138 let mut len = 0;
139 let mut chars = text.chars();
140 let mut had_period = false;
141 while let Some(c) = chars.next() {
142 if !c.is_ascii_digit() {
143 if !had_period && c == '.' && len > 0 {
144 had_period = true;
145 } else {
146 if len > 0 {
147 if c == '%' {
148 return len + 1;
149 }
150 if c.is_ascii_alphabetic() {
151 len += c.len_utf8();
152 for c in chars {
154 if !c.is_ascii_alphabetic() {
155 return len;
156 }
157 len += c.len_utf8();
158 }
159 }
160 }
161 break;
162 }
163 }
164 len += c.len_utf8();
165 }
166 len
167}
168
169pub fn lex_color(text: &str, _: &mut LexState) -> usize {
170 if !text.starts_with('#') {
171 return 0;
172 }
173 let mut len = 1;
174 let chars = text[1..].chars();
175 for c in chars {
176 if !c.is_ascii_alphanumeric() {
177 break;
178 }
179 len += c.len_utf8();
180 }
181 len
182}
183
184pub fn lex_identifier(text: &str, _: &mut LexState) -> usize {
185 let mut len = 0;
186 let chars = text.chars();
187 for c in chars {
188 if !c.is_alphanumeric() && c != '_' && (c != '-' || len == 0) {
189 break;
190 }
191 len += c.len_utf8();
192 }
193 len
194}
195
196#[allow(clippy::needless_update)] pub fn lex(mut source: &str) -> Vec<crate::parser::Token> {
198 let mut result = vec![];
199 let mut offset = 0;
200 let mut state = LexState::default();
201 if source.starts_with("\u{FEFF}") {
202 result.push(crate::parser::Token {
204 kind: SyntaxKind::Whitespace,
205 text: source[..3].into(),
206 offset: 0,
207 ..Default::default()
208 });
209 source = &source[3..];
210 offset += 3;
211 }
212 while !source.is_empty() {
213 if let Some((len, kind)) = crate::parser::lex_next_token(source, &mut state) {
214 result.push(crate::parser::Token {
215 kind,
216 text: source[..len].into(),
217 offset,
218 ..Default::default()
219 });
220 offset += len;
221 source = &source[len..];
222 } else {
223 result.push(crate::parser::Token {
225 kind: SyntaxKind::Error,
226 text: source.into(),
227 offset,
228 ..Default::default()
229 });
230 break;
232 }
233 }
234 result
235}
236
237#[test]
238fn basic_lexer_test() {
239 fn compare(source: &str, expected: &[(SyntaxKind, &str)]) {
240 let actual = lex(source);
241 let actual =
242 actual.iter().map(|token| (token.kind, token.text.as_str())).collect::<Vec<_>>();
243 assert_eq!(actual.as_slice(), expected);
244 }
245
246 compare(
247 r#"45 /*hi/*_*/ho*/ "string""#,
248 &[
249 (SyntaxKind::NumberLiteral, "45"),
250 (SyntaxKind::Whitespace, " "),
251 (SyntaxKind::Comment, "/*hi/*_*/ho*/"),
252 (SyntaxKind::Whitespace, " "),
253 (SyntaxKind::StringLiteral, r#""string""#),
254 ],
255 );
256
257 compare(
258 r#"12px+5.2+=0.7%"#,
259 &[
260 (SyntaxKind::NumberLiteral, "12px"),
261 (SyntaxKind::Plus, "+"),
262 (SyntaxKind::NumberLiteral, "5.2"),
263 (SyntaxKind::PlusEqual, "+="),
264 (SyntaxKind::NumberLiteral, "0.7%"),
265 ],
266 );
267 compare(
268 r#"aa_a.b1,c"#,
269 &[
270 (SyntaxKind::Identifier, "aa_a"),
271 (SyntaxKind::Dot, "."),
272 (SyntaxKind::Identifier, "b1"),
273 (SyntaxKind::Comma, ","),
274 (SyntaxKind::Identifier, "c"),
275 ],
276 );
277 compare(
278 r#"/*/**/*//**/*"#,
279 &[
280 (SyntaxKind::Comment, "/*/**/*/"),
281 (SyntaxKind::Comment, "/**/"),
282 (SyntaxKind::Star, "*"),
283 ],
284 );
285 compare(
286 "a//x\nb//y\r\nc//z",
287 &[
288 (SyntaxKind::Identifier, "a"),
289 (SyntaxKind::Comment, "//x"),
290 (SyntaxKind::Whitespace, "\n"),
291 (SyntaxKind::Identifier, "b"),
292 (SyntaxKind::Comment, "//y"),
293 (SyntaxKind::Whitespace, "\r\n"),
294 (SyntaxKind::Identifier, "c"),
295 (SyntaxKind::Comment, "//z"),
296 ],
297 );
298 compare(r#""x""#, &[(SyntaxKind::StringLiteral, r#""x""#)]);
299 compare(
300 r#"a"\"\\"x"#,
301 &[
302 (SyntaxKind::Identifier, "a"),
303 (SyntaxKind::StringLiteral, r#""\"\\""#),
304 (SyntaxKind::Identifier, "x"),
305 ],
306 );
307 compare(
308 r#""a\{b{c}d"e\{f}g"h}i"j"#,
309 &[
310 (SyntaxKind::StringLiteral, r#""a\{"#),
311 (SyntaxKind::Identifier, "b"),
312 (SyntaxKind::LBrace, "{"),
313 (SyntaxKind::Identifier, "c"),
314 (SyntaxKind::RBrace, "}"),
315 (SyntaxKind::Identifier, "d"),
316 (SyntaxKind::StringLiteral, r#""e\{"#),
317 (SyntaxKind::Identifier, "f"),
318 (SyntaxKind::StringLiteral, r#"}g""#),
319 (SyntaxKind::Identifier, "h"),
320 (SyntaxKind::StringLiteral, r#"}i""#),
321 (SyntaxKind::Identifier, "j"),
322 ],
323 );
324
325 compare(r#"/**"#, &[(SyntaxKind::Div, "/"), (SyntaxKind::Star, "*"), (SyntaxKind::Star, "*")]);
327 compare(r#""\"#, &[(SyntaxKind::Error, "\"\\")]);
328 compare(r#""\ޱ"#, &[(SyntaxKind::Error, "\"\\ޱ")]);
329}
330
331pub fn locate_slint_macro(rust_source: &str) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
334 let mut begin = 0;
335 std::iter::from_fn(move || {
336 let (open, close) = loop {
337 if let Some(m) = rust_source[begin..].find("slint") {
338 if let Some(x) = rust_source[begin..(begin + m)].rfind(['\\', '\n', '/', '\"']) {
340 if rust_source.as_bytes()[begin + x] != b'\n' {
341 begin += m + 5;
342 begin += rust_source[begin..].find(['\n']).unwrap_or(0);
343 continue;
344 }
345 }
346 begin += m + 5;
347 while rust_source[begin..].starts_with(' ') {
348 begin += 1;
349 }
350 if !rust_source[begin..].starts_with('!') {
351 continue;
352 }
353 begin += 1;
354 while rust_source[begin..].starts_with(' ') {
355 begin += 1;
356 }
357 let Some(open) = rust_source.as_bytes().get(begin) else { continue };
358 match open {
359 b'{' => break (SyntaxKind::LBrace, SyntaxKind::RBrace),
360 b'[' => break (SyntaxKind::LBracket, SyntaxKind::RBracket),
361 b'(' => break (SyntaxKind::LParent, SyntaxKind::RParent),
362 _ => continue,
363 }
364 } else {
365 return None;
367 }
368 };
369
370 begin += 1;
371
372 let mut state = LexState::default();
375 let start = begin;
376 let mut end = begin;
377 let mut level = 0;
378 while !rust_source[end..].is_empty() {
379 let len = match crate::parser::lex_next_token(&rust_source[end..], &mut state) {
380 Some((len, x)) if x == open => {
381 level += 1;
382 len
383 }
384 Some((_, x)) if x == close && level == 0 => {
385 break;
386 }
387 Some((len, x)) if x == close => {
388 level -= 1;
389 len
390 }
391 Some((len, _)) => len,
392 None => {
393 break;
395 }
396 };
397 if len == 0 {
398 break; }
400 end += len;
401 }
402 begin = end;
403 Some(start..end)
404 })
405}
406
407#[test]
408fn test_locate_rust_macro() {
409 #[track_caller]
410 fn do_test(source: &str, captures: &[&str]) {
411 let result = locate_slint_macro(source).map(|r| &source[r]).collect::<Vec<_>>();
412 assert_eq!(&result, captures);
413 }
414
415 do_test("\nslint{!{}}", &[]);
416 do_test(
417 "//slint!(123)\nslint!(456)\nslint ![789]\n/*slint!{abc}*/\nslint! {def}",
418 &["456", "789", "def"],
419 );
420 do_test("slint!(slint!(abc))slint!()", &["slint!(abc)", ""]);
421}
422
423pub fn extract_rust_macro(rust_source: String) -> Option<String> {
435 let core::ops::Range { start, end } = locate_slint_macro(&rust_source).next()?;
436 let mut bytes = rust_source.into_bytes();
437 for c in &mut bytes[..start] {
438 if *c != b'\n' {
439 *c = b' '
440 }
441 }
442
443 if start > 0 {
444 bytes[start - 1] = 2;
445 }
446 if end < bytes.len() {
447 bytes[end] = 3;
448
449 for c in &mut bytes[end + 1..] {
450 if *c != b'\n' {
451 *c = b' '
452 }
453 }
454 }
455 Some(String::from_utf8(bytes).expect("We just added spaces"))
456}
457
458#[test]
459fn test_extract_rust_macro() {
460 assert_eq!(extract_rust_macro("\nslint{!{}}".into()), None);
461 assert_eq!(
462 extract_rust_macro(
463 "abc\n€\nslint ! {x \" \\\" }🦀\" { () {}\n {} }xx =}- ;}\n xxx \n yyy {}\n".into(),
464 ),
465 Some(
466 " \n \n \u{2}x \" \\\" }🦀\" { () {}\n {} }xx =\u{3} \n \n \n".into(),
467 )
468 );
469
470 assert_eq!(
471 extract_rust_macro("xx\nabcd::slint!{abc{}efg".into()),
472 Some(" \n \u{2}abc{}efg".into())
473 );
474 assert_eq!(
475 extract_rust_macro("slint!\nnot.\nslint!{\nunterminated\nxxx".into()),
476 Some(" \n \n \u{2}\nunterminated\nxxx".into())
477 );
478 assert_eq!(extract_rust_macro("foo\n/* slint! { hello }\n".into()), None);
479 assert_eq!(extract_rust_macro("foo\n/* slint::slint! { hello }\n".into()), None);
480 assert_eq!(
481 extract_rust_macro("foo\n// slint! { hello }\nslint!{world}\na".into()),
482 Some(" \n \n \u{2}world\u{3}\n ".into())
483 );
484 assert_eq!(extract_rust_macro("foo\n\" slint! { hello }\"\n".into()), None);
485 assert_eq!(
486 extract_rust_macro(
487 "abc\n€\nslint ! (x /* \\\" )🦀*/ { () {}\n {} }xx =)- ;}\n xxx \n yyy {}\n".into(),
488 ),
489 Some(
490 " \n \n \u{2}x /* \\\" )🦀*/ { () {}\n {} }xx =\u{3} \n \n \n".into(),
491 )
492 );
493 assert_eq!(
494 extract_rust_macro("abc slint![x slint!() [{[]}] s] abc".into()),
495 Some(" \u{0002}x slint!() [{[]}] s\u{0003} ".into()),
496 );
497}