1#[derive(Default)]
10pub struct LexState {
11 template_string_stack: Vec<u32>,
14}
15
16pub trait LexingRule {
19 fn lex(&self, text: &str, state: &mut LexState) -> usize;
21}
22
23impl<'a> LexingRule for &'a str {
24 #[inline]
25 fn lex(&self, text: &str, _: &mut LexState) -> usize {
26 if text.starts_with(*self) {
27 self.len()
28 } else {
29 0
30 }
31 }
32}
33
34impl<F: Fn(&str, &mut LexState) -> usize> LexingRule for F {
35 #[inline]
36 fn lex(&self, text: &str, state: &mut LexState) -> usize {
37 (self)(text, state)
38 }
39}
40
41pub fn lex_whitespace(text: &str, _: &mut LexState) -> usize {
42 let mut len = 0;
43 let chars = text.chars();
44 for c in chars {
45 if !c.is_whitespace() {
46 break;
47 }
48 len += c.len_utf8();
49 }
50 len
51}
52
53pub fn lex_comment(text: &str, _: &mut LexState) -> usize {
54 if text.starts_with("//") {
56 return text.find(&['\n', '\r'] as &[_]).unwrap_or(text.len());
57 }
58 if text.starts_with("/*") {
59 let mut nested = 0;
60 let mut offset = 2;
61 let bytes = text.as_bytes();
62 while offset < bytes.len() {
63 if let Some(star) = bytes[offset..].iter().position(|c| *c == b'*') {
64 let star = star + offset;
65 if star > offset && bytes[star - 1] == b'/' {
66 nested += 1;
67 offset = star + 1;
68 } else if star < bytes.len() - 1 && bytes[star + 1] == b'/' {
69 if nested == 0 {
70 return star + 2;
71 }
72 nested -= 1;
73 offset = star + 2;
74 } else {
75 offset = star + 1;
76 }
77 } else {
78 return 0;
80 }
81 }
82 return 0;
84 }
85
86 0
87}
88
89pub fn lex_string(text: &str, state: &mut LexState) -> usize {
90 if let Some(brace_level) = state.template_string_stack.last_mut() {
91 if text.starts_with('{') {
92 *brace_level += 1;
93 return 0;
94 } else if text.starts_with('}') {
95 if *brace_level > 0 {
96 *brace_level -= 1;
97 return 0;
98 } else {
99 state.template_string_stack.pop();
100 }
101 } else if !text.starts_with('"') {
102 return 0;
103 }
104 } else if !text.starts_with('"') {
105 return 0;
106 }
107 let text_len = text.as_bytes().len();
108 let mut end = 1; loop {
110 let stop = match text[end..].find(&['"', '\\'][..]) {
111 Some(stop) => end + stop,
112 None => return 0,
114 };
115 match text.as_bytes()[stop] {
116 b'"' => {
117 return stop + 1;
118 }
119 b'\\' => {
120 if text_len <= stop + 1 {
121 return 0;
123 }
124 if text.as_bytes()[stop + 1] == b'{' {
125 state.template_string_stack.push(0);
126 return stop + 2;
127 }
128 end = stop + 1 + text[stop + 1..].chars().next().map_or(0, |c| c.len_utf8())
129 }
130 _ => unreachable!(),
131 }
132 }
133}
134
135pub fn lex_number(text: &str, _: &mut LexState) -> usize {
136 let mut len = 0;
137 let mut chars = text.chars();
138 let mut had_period = false;
139 while let Some(c) = chars.next() {
140 if !c.is_ascii_digit() {
141 if !had_period && c == '.' && len > 0 {
142 had_period = true;
143 } else {
144 if len > 0 {
145 if c == '%' {
146 return len + 1;
147 }
148 if c.is_ascii_alphabetic() {
149 len += c.len_utf8();
150 for c in chars {
152 if !c.is_ascii_alphabetic() {
153 return len;
154 }
155 len += c.len_utf8();
156 }
157 }
158 }
159 break;
160 }
161 }
162 len += c.len_utf8();
163 }
164 len
165}
166
167pub fn lex_color(text: &str, _: &mut LexState) -> usize {
168 if !text.starts_with('#') {
169 return 0;
170 }
171 let mut len = 1;
172 let chars = text[1..].chars();
173 for c in chars {
174 if !c.is_ascii_alphanumeric() {
175 break;
176 }
177 len += c.len_utf8();
178 }
179 len
180}
181
182pub fn lex_identifier(text: &str, _: &mut LexState) -> usize {
183 let mut len = 0;
184 let chars = text.chars();
185 for c in chars {
186 if !c.is_alphanumeric() && c != '_' && (c != '-' || len == 0) {
187 break;
188 }
189 len += c.len_utf8();
190 }
191 len
192}
193
194#[allow(clippy::needless_update)] pub fn lex(mut source: &str) -> Vec<crate::parser::Token> {
196 let mut result = vec![];
197 let mut offset = 0;
198 let mut state = LexState::default();
199 while !source.is_empty() {
200 if let Some((len, kind)) = crate::parser::lex_next_token(source, &mut state) {
201 result.push(crate::parser::Token {
202 kind,
203 text: source[..len].into(),
204 offset,
205 ..Default::default()
206 });
207 offset += len;
208 source = &source[len..];
209 } else {
210 result.push(crate::parser::Token {
212 kind: crate::parser::SyntaxKind::Error,
213 text: source.into(),
214 offset,
215 ..Default::default()
216 });
217 break;
219 }
220 }
221 result
222}
223
224#[test]
225fn basic_lexer_test() {
226 fn compare(source: &str, expected: &[(crate::parser::SyntaxKind, &str)]) {
227 let actual = lex(source);
228 let actual =
229 actual.iter().map(|token| (token.kind, token.text.as_str())).collect::<Vec<_>>();
230 assert_eq!(actual.as_slice(), expected);
231 }
232
233 compare(
234 r#"45 /*hi/*_*/ho*/ "string""#,
235 &[
236 (crate::parser::SyntaxKind::NumberLiteral, "45"),
237 (crate::parser::SyntaxKind::Whitespace, " "),
238 (crate::parser::SyntaxKind::Comment, "/*hi/*_*/ho*/"),
239 (crate::parser::SyntaxKind::Whitespace, " "),
240 (crate::parser::SyntaxKind::StringLiteral, r#""string""#),
241 ],
242 );
243
244 compare(
245 r#"12px+5.2+=0.7%"#,
246 &[
247 (crate::parser::SyntaxKind::NumberLiteral, "12px"),
248 (crate::parser::SyntaxKind::Plus, "+"),
249 (crate::parser::SyntaxKind::NumberLiteral, "5.2"),
250 (crate::parser::SyntaxKind::PlusEqual, "+="),
251 (crate::parser::SyntaxKind::NumberLiteral, "0.7%"),
252 ],
253 );
254 compare(
255 r#"aa_a.b1,c"#,
256 &[
257 (crate::parser::SyntaxKind::Identifier, "aa_a"),
258 (crate::parser::SyntaxKind::Dot, "."),
259 (crate::parser::SyntaxKind::Identifier, "b1"),
260 (crate::parser::SyntaxKind::Comma, ","),
261 (crate::parser::SyntaxKind::Identifier, "c"),
262 ],
263 );
264 compare(
265 r#"/*/**/*//**/*"#,
266 &[
267 (crate::parser::SyntaxKind::Comment, "/*/**/*/"),
268 (crate::parser::SyntaxKind::Comment, "/**/"),
269 (crate::parser::SyntaxKind::Star, "*"),
270 ],
271 );
272 compare(
273 "a//x\nb//y\r\nc//z",
274 &[
275 (crate::parser::SyntaxKind::Identifier, "a"),
276 (crate::parser::SyntaxKind::Comment, "//x"),
277 (crate::parser::SyntaxKind::Whitespace, "\n"),
278 (crate::parser::SyntaxKind::Identifier, "b"),
279 (crate::parser::SyntaxKind::Comment, "//y"),
280 (crate::parser::SyntaxKind::Whitespace, "\r\n"),
281 (crate::parser::SyntaxKind::Identifier, "c"),
282 (crate::parser::SyntaxKind::Comment, "//z"),
283 ],
284 );
285 compare(r#""x""#, &[(crate::parser::SyntaxKind::StringLiteral, r#""x""#)]);
286 compare(
287 r#"a"\"\\"x"#,
288 &[
289 (crate::parser::SyntaxKind::Identifier, "a"),
290 (crate::parser::SyntaxKind::StringLiteral, r#""\"\\""#),
291 (crate::parser::SyntaxKind::Identifier, "x"),
292 ],
293 );
294 compare(
295 r#""a\{b{c}d"e\{f}g"h}i"j"#,
296 &[
297 (crate::parser::SyntaxKind::StringLiteral, r#""a\{"#),
298 (crate::parser::SyntaxKind::Identifier, "b"),
299 (crate::parser::SyntaxKind::LBrace, "{"),
300 (crate::parser::SyntaxKind::Identifier, "c"),
301 (crate::parser::SyntaxKind::RBrace, "}"),
302 (crate::parser::SyntaxKind::Identifier, "d"),
303 (crate::parser::SyntaxKind::StringLiteral, r#""e\{"#),
304 (crate::parser::SyntaxKind::Identifier, "f"),
305 (crate::parser::SyntaxKind::StringLiteral, r#"}g""#),
306 (crate::parser::SyntaxKind::Identifier, "h"),
307 (crate::parser::SyntaxKind::StringLiteral, r#"}i""#),
308 (crate::parser::SyntaxKind::Identifier, "j"),
309 ],
310 );
311
312 compare(
314 r#"/**"#,
315 &[
316 (crate::parser::SyntaxKind::Div, "/"),
317 (crate::parser::SyntaxKind::Star, "*"),
318 (crate::parser::SyntaxKind::Star, "*"),
319 ],
320 );
321 compare(r#""\"#, &[(crate::parser::SyntaxKind::Error, "\"\\")]);
322 compare(r#""\ޱ"#, &[(crate::parser::SyntaxKind::Error, "\"\\ޱ")]);
323}