text_scanner/ext/css.rs
1use crate::{Scanner, ScannerResult};
2
3/// [`Scanner`] extension for scanning CSS tokens.
4///
5/// See also [`ScssScannerExt`].
6///
7/// [`ScssScannerExt`]: super::ScssScannerExt
8pub trait CssScannerExt<'text>: crate::private::Sealed {
9 /// Scans a single [CSS block comment].
10 ///
11 /// **Note:** CSS block comments do **not** allow nested block comments.
12 ///
13 /// **Note:** This has the same lifetime as the original `text`,
14 /// so the scanner can continue to be used while this exists.
15 ///
16 /// # Example
17 ///
18 /// ```rust
19 /// use text_scanner::{ext::CssScannerExt, Scanner};
20 ///
21 /// let text = r#"
22 /// /* Block Comment */
23 ///
24 /// /* Multi
25 /// // Line
26 /// /* Block
27 /// Comment */
28 ///
29 /// /* Unterminated Block Comment
30 /// "#;
31 ///
32 /// let comments = [
33 /// (3..22, "/* Block Comment */"),
34 /// (26..71, "/* Multi\n // Line\n /* Block\n Comment */"),
35 /// (75..105, "/* Unterminated Block Comment\n"),
36 /// ];
37 ///
38 /// let mut scanner = Scanner::new(text);
39 /// for comment in comments {
40 /// scanner.skip_whitespace();
41 /// assert_eq!(scanner.scan_css_block_comment(), Ok(comment));
42 /// }
43 ///
44 /// # scanner.skip_whitespace();
45 /// # assert_eq!(scanner.remaining_text(), "");
46 /// ```
47 ///
48 /// [CSS block comment]: https://www.w3.org/TR/css-syntax-3/#comment-diagram
49 fn scan_css_block_comment(&mut self) -> ScannerResult<'text, &'text str>;
50
51 /// Scans a single [CSS identifier].
52 ///
53 /// **Note:** This has the same lifetime as the original `text`,
54 /// so the scanner can continue to be used while this exists.
55 ///
56 /// # Example
57 ///
58 /// ```rust
59 /// use text_scanner::{ext::CssScannerExt, Scanner};
60 ///
61 /// let text = r#"
62 /// foo
63 /// foo_bar
64 /// foo-bar
65 /// --foo
66 /// "#;
67 ///
68 /// let idents = [
69 /// (3..6, "foo"),
70 /// (9..16, "foo_bar"),
71 /// (19..26, "foo-bar"),
72 /// (29..34, "--foo"),
73 /// ];
74 ///
75 /// let mut scanner = Scanner::new(text);
76 /// for ident in idents {
77 /// scanner.skip_whitespace();
78 /// assert_eq!(scanner.scan_css_identifier(), Ok(ident));
79 /// }
80 ///
81 /// # scanner.skip_whitespace();
82 /// # assert_eq!(scanner.remaining_text(), "");
83 /// ```
84 ///
85 /// [CSS identifier]: https://www.w3.org/TR/css-syntax-3/#ident-token-diagram
86 fn scan_css_identifier(&mut self) -> ScannerResult<'text, &'text str>;
87
88 fn scan_css_at_keyword(&mut self) -> ScannerResult<'text, &'text str>;
89
90 fn scan_css_hash(&mut self) -> ScannerResult<'text, &'text str>;
91
92 /// Scans a single [CSS string].
93 ///
94 /// **Note:** This has the same lifetime as the original `text`,
95 /// so the scanner can continue to be used while this exists.
96 ///
97 /// # Example
98 ///
99 /// ```rust
100 /// use text_scanner::{ext::CssScannerExt, Scanner};
101 ///
102 /// let text = r#"
103 /// "Hello World"
104 /// 'Hello World'
105 ///
106 /// "Hello ' \" World"
107 /// 'Hello \' " World'
108 ///
109 /// "Unterminated String
110 /// "#;
111 ///
112 /// let strings = [
113 /// (3..16, r#""Hello World""#),
114 /// (19..32, r#"'Hello World'"#),
115 /// (36..54, r#""Hello ' \" World""#),
116 /// (57..75, r#"'Hello \' " World'"#),
117 /// (79..100, "\"Unterminated String\n"),
118 /// ];
119 ///
120 /// let mut scanner = Scanner::new(text);
121 /// for string in strings {
122 /// scanner.skip_whitespace();
123 /// assert_eq!(scanner.scan_css_string(), Ok(string));
124 /// }
125 ///
126 /// # scanner.skip_whitespace();
127 /// # assert_eq!(scanner.remaining_text(), "");
128 /// ```
129 ///
130 /// [CSS string]: https://www.w3.org/TR/css-syntax-3/#string-token-diagram
131 fn scan_css_string(&mut self) -> ScannerResult<'text, &'text str>;
132
133 /// Scans a single [CSS number].
134 ///
135 /// **Note:** CSS numbers allow a unary `+` or `-` before the number,
136 /// as opposed to other languages separating those into two different
137 /// tokens.
138 ///
139 /// **Note:** This has the same lifetime as the original `text`,
140 /// so the scanner can continue to be used while this exists.
141 ///
142 /// # Example
143 ///
144 /// ```rust
145 /// use text_scanner::{ext::CssScannerExt, Scanner};
146 ///
147 /// let text = r#"
148 /// 1
149 /// -2
150 /// +3
151 /// 3.1415
152 /// +10.5E+100
153 /// "#;
154 ///
155 /// let numbers = [
156 /// (3..4, "1"),
157 /// (7..9, "-2"),
158 /// (12..14, "+3"),
159 /// (17..23, "3.1415"),
160 /// (26..36, "+10.5E+100"),
161 /// ];
162 ///
163 /// let mut scanner = Scanner::new(text);
164 /// for num in numbers {
165 /// scanner.skip_whitespace();
166 /// assert_eq!(scanner.scan_css_number(), Ok(num));
167 /// }
168 ///
169 /// # scanner.skip_whitespace();
170 /// # assert_eq!(scanner.remaining_text(), "");
171 /// ```
172 ///
173 /// [CSS number]: https://www.w3.org/TR/css-syntax-3/#number-token-diagram
174 fn scan_css_number(&mut self) -> ScannerResult<'text, &'text str>;
175}
176
177impl<'text> CssScannerExt<'text> for Scanner<'text> {
178 // Reference: https://www.w3.org/TR/css-syntax-3/#comment-diagram
179 fn scan_css_block_comment(&mut self) -> ScannerResult<'text, &'text str> {
180 self.scan_with(|scanner| {
181 scanner.accept_str("/*")?;
182
183 loop {
184 let (r, _) = scanner.skip_until_char('*');
185 if r.is_empty() {
186 break;
187 }
188
189 // Safe to ignore as it is guaranteed to be `Ok`
190 _ = scanner.accept_char('*');
191
192 if scanner.accept_char('/').is_ok() {
193 break;
194 }
195 }
196
197 Ok(())
198 })
199 }
200
201 // Reference: https://www.w3.org/TR/css-syntax-3/#ident-token-diagram
202 fn scan_css_identifier(&mut self) -> ScannerResult<'text, &'text str> {
203 self.scan_with(|scanner| {
204 if scanner.accept_char('-').is_ok() {
205 if scanner.accept_char('-').is_ok() {
206 } else {
207 scanner.accept_if(|c| c.is_alphabetic() || (c == '_'))?;
208 }
209
210 scanner.skip_while(|c| c.is_alphanumeric() || matches!(c, '_' | '-'));
211 } else {
212 scanner.accept_if(|c| c.is_alphabetic() || (c == '_'))?;
213 scanner.skip_while(|c| c.is_alphanumeric() || matches!(c, '_' | '-'));
214 }
215
216 Ok(())
217 })
218 }
219
220 // Reference: https://www.w3.org/TR/css-syntax-3/#at-keyword-token-diagram
221 fn scan_css_at_keyword(&mut self) -> ScannerResult<'text, &'text str> {
222 self.scan_with(|scanner| {
223 scanner.accept_char('@')?;
224 scanner.scan_css_identifier()?;
225 Ok(())
226 })
227 }
228
229 // Reference: https://www.w3.org/TR/css-syntax-3/#hash-token-diagram
230 fn scan_css_hash(&mut self) -> ScannerResult<'text, &'text str> {
231 self.scan_with(|scanner| {
232 scanner.accept_char('#')?;
233 scanner.accept_if(|c| c.is_alphanumeric() || matches!(c, '_' | '-'))?;
234 scanner.skip_while(|c| c.is_alphanumeric() || matches!(c, '_' | '-'));
235 Ok(())
236 })
237 }
238
239 // Reference: https://www.w3.org/TR/css-syntax-3/#string-token-diagram
240 fn scan_css_string(&mut self) -> ScannerResult<'text, &'text str> {
241 self.scan_with(|scanner| {
242 let (_r, quote) = scanner.accept_char_any(&['"', '\''])?;
243
244 loop {
245 scanner.skip_until(|c| (c == quote) || (c == '\\'));
246 match scanner.next() {
247 Ok((_r, c)) if c == quote => break,
248 Ok((_r, '\\')) => {
249 // Skip the next character as it is escaped
250 _ = scanner.next();
251 }
252 Ok(_) => unreachable!(),
253 Err(_) => break,
254 }
255 }
256
257 Ok(())
258 })
259 }
260
261 // Reference: https://www.w3.org/TR/css-syntax-3/#number-token-diagram
262 fn scan_css_number(&mut self) -> ScannerResult<'text, &'text str> {
263 self.scan_with(|scanner| {
264 _ = scanner.accept_char_any(&['+', '-']);
265
266 if scanner.accept_char('.').is_ok() {
267 scanner.accept_if_ext(char::is_ascii_digit)?;
268 scanner.skip_while_ext(char::is_ascii_digit);
269 } else {
270 scanner.accept_if_ext(char::is_ascii_digit)?;
271 scanner.skip_while_ext(char::is_ascii_digit);
272
273 if scanner.accept_char('.').is_ok() {
274 scanner.accept_if_ext(char::is_ascii_digit)?;
275 scanner.skip_while_ext(char::is_ascii_digit);
276 }
277 }
278
279 if scanner.accept_char_any(&['E', 'e']).is_ok() {
280 _ = scanner.accept_char_any(&['+', '-']);
281 scanner.accept_if_ext(char::is_ascii_digit)?;
282 scanner.skip_while_ext(char::is_ascii_digit);
283 }
284
285 Ok(())
286 })
287 }
288}
289
290#[cfg(test)]
291mod tests {
292 use super::*;
293
294 #[test]
295 fn test_css_ident() {
296 let cases = [
297 ("x", Ok((0..1, "x")), ""),
298 ("foo", Ok((0..3, "foo")), ""),
299 ("foo123", Ok((0..6, "foo123")), ""),
300 ("foo_123", Ok((0..7, "foo_123")), ""),
301 ("foo-123", Ok((0..7, "foo-123")), ""),
302 ("foo__123_", Ok((0..9, "foo__123_")), ""),
303 ("foo--123-", Ok((0..9, "foo--123-")), ""),
304 //
305 ("_", Ok((0..1, "_")), ""),
306 ("__", Ok((0..2, "__")), ""),
307 ("_x", Ok((0..2, "_x")), ""),
308 ("_1", Ok((0..2, "_1")), ""),
309 ("--", Ok((0..2, "--")), ""),
310 ("_-", Ok((0..2, "_-")), ""),
311 ("-_", Ok((0..2, "-_")), ""),
312 ("-x", Ok((0..2, "-x")), ""),
313 ("--x", Ok((0..3, "--x")), ""),
314 ("_foo", Ok((0..4, "_foo")), ""),
315 ("__foo", Ok((0..5, "__foo")), ""),
316 ("-foo", Ok((0..4, "-foo")), ""),
317 ("--foo", Ok((0..5, "--foo")), ""),
318 //
319 ("--1", Ok((0..3, "--1")), ""),
320 ("--1x", Ok((0..4, "--1x")), ""),
321 ("--1+", Ok((0..3, "--1")), "+"),
322 ("---1", Ok((0..4, "---1")), ""),
323 ("---1x", Ok((0..5, "---1x")), ""),
324 //
325 ("æøå", Ok((0..6, "æøå")), ""),
326 ("-æøå", Ok((0..7, "-æøå")), ""),
327 ("--æøå", Ok((0..8, "--æøå")), ""),
328 //
329 ("x ", Ok((0..1, "x")), " "),
330 ("_ ", Ok((0..1, "_")), " "),
331 ("__ ", Ok((0..2, "__")), " "),
332 ("-- ", Ok((0..2, "--")), " "),
333 ("_- ", Ok((0..2, "_-")), " "),
334 ("-_ ", Ok((0..2, "-_")), " "),
335 ];
336
337 for (text, expected, remaining) in cases {
338 let mut scanner = Scanner::new(text);
339 assert_eq!(scanner.scan_css_identifier(), expected);
340 assert_eq!(scanner.remaining_text(), remaining);
341 }
342 }
343
344 #[test]
345 fn test_css_ident_invalid() {
346 let cases = [
347 ("", Err((0..0, "")), ""),
348 (" ", Err((0..0, "")), " "),
349 ("-", Err((0..1, "-")), "-"),
350 ("- ", Err((0..1, "-")), "- "),
351 ("-1", Err((0..1, "-")), "-1"),
352 ("-1x", Err((0..1, "-")), "-1x"),
353 ("-1+", Err((0..1, "-")), "-1+"),
354 ];
355
356 for (text, expected, remaining) in cases {
357 let mut scanner = Scanner::new(text);
358 assert_eq!(scanner.scan_css_identifier(), expected);
359 assert_eq!(scanner.remaining_text(), remaining);
360 }
361 }
362
363 #[test]
364 fn test_css_num() {
365 let cases = [
366 ("", Err((0..0, "")), ""),
367 (" ", Err((0..0, "")), " "),
368 ("+", Err((0..1, "+")), "+"),
369 ("-", Err((0..1, "-")), "-"),
370 ("+ ", Err((0..1, "+")), "+ "),
371 ("- ", Err((0..1, "-")), "- "),
372 //
373 ("1", Ok((0..1, "1")), ""),
374 ("+1", Ok((0..2, "+1")), ""),
375 ("-1", Ok((0..2, "-1")), ""),
376 //
377 ("1.2", Ok((0..3, "1.2")), ""),
378 ("+1.2", Ok((0..4, "+1.2")), ""),
379 ("-1.2", Ok((0..4, "-1.2")), ""),
380 //
381 (".1", Ok((0..2, ".1")), ""),
382 ("+.1", Ok((0..3, "+.1")), ""),
383 ("-.1", Ok((0..3, "-.1")), ""),
384 //
385 ("++", Err((0..1, "+")), "++"),
386 ("--", Err((0..1, "-")), "--"),
387 ("+-", Err((0..1, "+")), "+-"),
388 ("-+", Err((0..1, "-")), "-+"),
389 //
390 ("++1", Err((0..1, "+")), "++1"),
391 ("--1", Err((0..1, "-")), "--1"),
392 ("+-1", Err((0..1, "+")), "+-1"),
393 ("-+1", Err((0..1, "-")), "-+1"),
394 //
395 ("1E", Err((0..2, "1E")), "1E"),
396 ("1EE", Err((0..2, "1E")), "1EE"),
397 ("1E*", Err((0..2, "1E")), "1E*"),
398 ("1E+", Err((0..3, "1E+")), "1E+"),
399 ("1E+X", Err((0..3, "1E+")), "1E+X"),
400 ];
401
402 for (text, expected, remaining) in cases.iter().cloned() {
403 let mut scanner = Scanner::new(text);
404 assert_eq!(scanner.scan_css_number(), expected);
405 assert_eq!(scanner.remaining_text(), remaining);
406 }
407
408 for (text, expected, remaining) in cases {
409 if expected.is_err() {
410 continue;
411 }
412
413 for e in ['E', 'e'] {
414 for sign in ["", "+", "-"] {
415 let exponent = format!("{e}{sign}1");
416 let text = format!("{text}{exponent}");
417
418 let (r, expected) = expected.clone().unwrap();
419 let r = r.start..(r.end + exponent.len());
420 let expected = format!("{expected}{exponent}");
421 let expected = Ok((r, expected.as_str()));
422
423 let mut scanner = Scanner::new(&text);
424 assert_eq!(scanner.scan_css_number(), expected);
425 assert_eq!(scanner.remaining_text(), remaining);
426 }
427 }
428 }
429 }
430
431 #[test]
432 fn test_css_num_invalid() {
433 let cases = [
434 ("1E", Err((0..2, "1E")), "1E"),
435 ("1E ", Err((0..2, "1E")), "1E "),
436 ("1EE", Err((0..2, "1E")), "1EE"),
437 ("1E*", Err((0..2, "1E")), "1E*"),
438 ("1E+", Err((0..3, "1E+")), "1E+"),
439 ("1E+ ", Err((0..3, "1E+")), "1E+ "),
440 ("1E+X", Err((0..3, "1E+")), "1E+X"),
441 ];
442
443 for (text, expected, remaining) in cases {
444 let mut scanner = Scanner::new(text);
445 assert_eq!(scanner.scan_css_number(), expected);
446 assert_eq!(scanner.remaining_text(), remaining);
447 }
448 }
449}