text_scanner/ext/
css.rs

1use crate::{Scanner, ScannerResult};
2
3/// [`Scanner`] extension for scanning CSS tokens.
4///
5/// See also [`ScssScannerExt`].
6///
7/// [`ScssScannerExt`]: super::ScssScannerExt
8pub trait CssScannerExt<'text>: crate::private::Sealed {
9    /// Scans a single [CSS block comment].
10    ///
11    /// **Note:** CSS block comments do **not** allow nested block comments.
12    ///
13    /// **Note:** This has the same lifetime as the original `text`,
14    /// so the scanner can continue to be used while this exists.
15    ///
16    /// # Example
17    ///
18    /// ```rust
19    /// use text_scanner::{ext::CssScannerExt, Scanner};
20    ///
21    /// let text = r#"
22    ///   /* Block Comment */
23    ///
24    ///   /* Multi
25    ///   // Line
26    ///   /* Block
27    ///      Comment */
28    ///
29    ///   /* Unterminated Block Comment
30    /// "#;
31    ///
32    /// let comments = [
33    ///     (3..22,  "/* Block Comment */"),
34    ///     (26..71, "/* Multi\n  // Line\n  /* Block\n     Comment */"),
35    ///     (75..105, "/* Unterminated Block Comment\n"),
36    /// ];
37    ///
38    /// let mut scanner = Scanner::new(text);
39    /// for comment in comments {
40    ///     scanner.skip_whitespace();
41    ///     assert_eq!(scanner.scan_css_block_comment(), Ok(comment));
42    /// }
43    ///
44    /// # scanner.skip_whitespace();
45    /// # assert_eq!(scanner.remaining_text(), "");
46    /// ```
47    ///
48    /// [CSS block comment]: https://www.w3.org/TR/css-syntax-3/#comment-diagram
49    fn scan_css_block_comment(&mut self) -> ScannerResult<'text, &'text str>;
50
51    /// Scans a single [CSS identifier].
52    ///
53    /// **Note:** This has the same lifetime as the original `text`,
54    /// so the scanner can continue to be used while this exists.
55    ///
56    /// # Example
57    ///
58    /// ```rust
59    /// use text_scanner::{ext::CssScannerExt, Scanner};
60    ///
61    /// let text = r#"
62    ///   foo
63    ///   foo_bar
64    ///   foo-bar
65    ///   --foo
66    /// "#;
67    ///
68    /// let idents = [
69    ///     (3..6,   "foo"),
70    ///     (9..16,  "foo_bar"),
71    ///     (19..26, "foo-bar"),
72    ///     (29..34, "--foo"),
73    /// ];
74    ///
75    /// let mut scanner = Scanner::new(text);
76    /// for ident in idents {
77    ///     scanner.skip_whitespace();
78    ///     assert_eq!(scanner.scan_css_identifier(), Ok(ident));
79    /// }
80    ///
81    /// # scanner.skip_whitespace();
82    /// # assert_eq!(scanner.remaining_text(), "");
83    /// ```
84    ///
85    /// [CSS identifier]: https://www.w3.org/TR/css-syntax-3/#ident-token-diagram
86    fn scan_css_identifier(&mut self) -> ScannerResult<'text, &'text str>;
87
88    fn scan_css_at_keyword(&mut self) -> ScannerResult<'text, &'text str>;
89
90    fn scan_css_hash(&mut self) -> ScannerResult<'text, &'text str>;
91
92    /// Scans a single [CSS string].
93    ///
94    /// **Note:** This has the same lifetime as the original `text`,
95    /// so the scanner can continue to be used while this exists.
96    ///
97    /// # Example
98    ///
99    /// ```rust
100    /// use text_scanner::{ext::CssScannerExt, Scanner};
101    ///
102    /// let text = r#"
103    ///   "Hello World"
104    ///   'Hello World'
105    ///
106    ///   "Hello ' \" World"
107    ///   'Hello \' " World'
108    ///
109    ///   "Unterminated String
110    /// "#;
111    ///
112    /// let strings = [
113    ///     (3..16,  r#""Hello World""#),
114    ///     (19..32, r#"'Hello World'"#),
115    ///     (36..54, r#""Hello ' \" World""#),
116    ///     (57..75, r#"'Hello \' " World'"#),
117    ///     (79..100, "\"Unterminated String\n"),
118    /// ];
119    ///
120    /// let mut scanner = Scanner::new(text);
121    /// for string in strings {
122    ///     scanner.skip_whitespace();
123    ///     assert_eq!(scanner.scan_css_string(), Ok(string));
124    /// }
125    ///
126    /// # scanner.skip_whitespace();
127    /// # assert_eq!(scanner.remaining_text(), "");
128    /// ```
129    ///
130    /// [CSS string]: https://www.w3.org/TR/css-syntax-3/#string-token-diagram
131    fn scan_css_string(&mut self) -> ScannerResult<'text, &'text str>;
132
133    /// Scans a single [CSS number].
134    ///
135    /// **Note:** CSS numbers allow a unary `+` or `-` before the number,
136    /// as opposed to other languages separating those into two different
137    /// tokens.
138    ///
139    /// **Note:** This has the same lifetime as the original `text`,
140    /// so the scanner can continue to be used while this exists.
141    ///
142    /// # Example
143    ///
144    /// ```rust
145    /// use text_scanner::{ext::CssScannerExt, Scanner};
146    ///
147    /// let text = r#"
148    ///   1
149    ///   -2
150    ///   +3
151    ///   3.1415
152    ///   +10.5E+100
153    /// "#;
154    ///
155    /// let numbers = [
156    ///     (3..4,   "1"),
157    ///     (7..9,   "-2"),
158    ///     (12..14, "+3"),
159    ///     (17..23, "3.1415"),
160    ///     (26..36, "+10.5E+100"),
161    /// ];
162    ///
163    /// let mut scanner = Scanner::new(text);
164    /// for num in numbers {
165    ///     scanner.skip_whitespace();
166    ///     assert_eq!(scanner.scan_css_number(), Ok(num));
167    /// }
168    ///
169    /// # scanner.skip_whitespace();
170    /// # assert_eq!(scanner.remaining_text(), "");
171    /// ```
172    ///
173    /// [CSS number]: https://www.w3.org/TR/css-syntax-3/#number-token-diagram
174    fn scan_css_number(&mut self) -> ScannerResult<'text, &'text str>;
175}
176
177impl<'text> CssScannerExt<'text> for Scanner<'text> {
178    // Reference: https://www.w3.org/TR/css-syntax-3/#comment-diagram
179    fn scan_css_block_comment(&mut self) -> ScannerResult<'text, &'text str> {
180        self.scan_with(|scanner| {
181            scanner.accept_str("/*")?;
182
183            loop {
184                let (r, _) = scanner.skip_until_char('*');
185                if r.is_empty() {
186                    break;
187                }
188
189                // Safe to ignore as it is guaranteed to be `Ok`
190                _ = scanner.accept_char('*');
191
192                if scanner.accept_char('/').is_ok() {
193                    break;
194                }
195            }
196
197            Ok(())
198        })
199    }
200
201    // Reference: https://www.w3.org/TR/css-syntax-3/#ident-token-diagram
202    fn scan_css_identifier(&mut self) -> ScannerResult<'text, &'text str> {
203        self.scan_with(|scanner| {
204            if scanner.accept_char('-').is_ok() {
205                if scanner.accept_char('-').is_ok() {
206                } else {
207                    scanner.accept_if(|c| c.is_alphabetic() || (c == '_'))?;
208                }
209
210                scanner.skip_while(|c| c.is_alphanumeric() || matches!(c, '_' | '-'));
211            } else {
212                scanner.accept_if(|c| c.is_alphabetic() || (c == '_'))?;
213                scanner.skip_while(|c| c.is_alphanumeric() || matches!(c, '_' | '-'));
214            }
215
216            Ok(())
217        })
218    }
219
220    // Reference: https://www.w3.org/TR/css-syntax-3/#at-keyword-token-diagram
221    fn scan_css_at_keyword(&mut self) -> ScannerResult<'text, &'text str> {
222        self.scan_with(|scanner| {
223            scanner.accept_char('@')?;
224            scanner.scan_css_identifier()?;
225            Ok(())
226        })
227    }
228
229    // Reference: https://www.w3.org/TR/css-syntax-3/#hash-token-diagram
230    fn scan_css_hash(&mut self) -> ScannerResult<'text, &'text str> {
231        self.scan_with(|scanner| {
232            scanner.accept_char('#')?;
233            scanner.accept_if(|c| c.is_alphanumeric() || matches!(c, '_' | '-'))?;
234            scanner.skip_while(|c| c.is_alphanumeric() || matches!(c, '_' | '-'));
235            Ok(())
236        })
237    }
238
239    // Reference: https://www.w3.org/TR/css-syntax-3/#string-token-diagram
240    fn scan_css_string(&mut self) -> ScannerResult<'text, &'text str> {
241        self.scan_with(|scanner| {
242            let (_r, quote) = scanner.accept_char_any(&['"', '\''])?;
243
244            loop {
245                scanner.skip_until(|c| (c == quote) || (c == '\\'));
246                match scanner.next() {
247                    Ok((_r, c)) if c == quote => break,
248                    Ok((_r, '\\')) => {
249                        // Skip the next character as it is escaped
250                        _ = scanner.next();
251                    }
252                    Ok(_) => unreachable!(),
253                    Err(_) => break,
254                }
255            }
256
257            Ok(())
258        })
259    }
260
261    // Reference: https://www.w3.org/TR/css-syntax-3/#number-token-diagram
262    fn scan_css_number(&mut self) -> ScannerResult<'text, &'text str> {
263        self.scan_with(|scanner| {
264            _ = scanner.accept_char_any(&['+', '-']);
265
266            if scanner.accept_char('.').is_ok() {
267                scanner.accept_if_ext(char::is_ascii_digit)?;
268                scanner.skip_while_ext(char::is_ascii_digit);
269            } else {
270                scanner.accept_if_ext(char::is_ascii_digit)?;
271                scanner.skip_while_ext(char::is_ascii_digit);
272
273                if scanner.accept_char('.').is_ok() {
274                    scanner.accept_if_ext(char::is_ascii_digit)?;
275                    scanner.skip_while_ext(char::is_ascii_digit);
276                }
277            }
278
279            if scanner.accept_char_any(&['E', 'e']).is_ok() {
280                _ = scanner.accept_char_any(&['+', '-']);
281                scanner.accept_if_ext(char::is_ascii_digit)?;
282                scanner.skip_while_ext(char::is_ascii_digit);
283            }
284
285            Ok(())
286        })
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn test_css_ident() {
296        let cases = [
297            ("x", Ok((0..1, "x")), ""),
298            ("foo", Ok((0..3, "foo")), ""),
299            ("foo123", Ok((0..6, "foo123")), ""),
300            ("foo_123", Ok((0..7, "foo_123")), ""),
301            ("foo-123", Ok((0..7, "foo-123")), ""),
302            ("foo__123_", Ok((0..9, "foo__123_")), ""),
303            ("foo--123-", Ok((0..9, "foo--123-")), ""),
304            //
305            ("_", Ok((0..1, "_")), ""),
306            ("__", Ok((0..2, "__")), ""),
307            ("_x", Ok((0..2, "_x")), ""),
308            ("_1", Ok((0..2, "_1")), ""),
309            ("--", Ok((0..2, "--")), ""),
310            ("_-", Ok((0..2, "_-")), ""),
311            ("-_", Ok((0..2, "-_")), ""),
312            ("-x", Ok((0..2, "-x")), ""),
313            ("--x", Ok((0..3, "--x")), ""),
314            ("_foo", Ok((0..4, "_foo")), ""),
315            ("__foo", Ok((0..5, "__foo")), ""),
316            ("-foo", Ok((0..4, "-foo")), ""),
317            ("--foo", Ok((0..5, "--foo")), ""),
318            //
319            ("--1", Ok((0..3, "--1")), ""),
320            ("--1x", Ok((0..4, "--1x")), ""),
321            ("--1+", Ok((0..3, "--1")), "+"),
322            ("---1", Ok((0..4, "---1")), ""),
323            ("---1x", Ok((0..5, "---1x")), ""),
324            //
325            ("æøå", Ok((0..6, "æøå")), ""),
326            ("-æøå", Ok((0..7, "-æøå")), ""),
327            ("--æøå", Ok((0..8, "--æøå")), ""),
328            //
329            ("x ", Ok((0..1, "x")), " "),
330            ("_ ", Ok((0..1, "_")), " "),
331            ("__ ", Ok((0..2, "__")), " "),
332            ("-- ", Ok((0..2, "--")), " "),
333            ("_- ", Ok((0..2, "_-")), " "),
334            ("-_ ", Ok((0..2, "-_")), " "),
335        ];
336
337        for (text, expected, remaining) in cases {
338            let mut scanner = Scanner::new(text);
339            assert_eq!(scanner.scan_css_identifier(), expected);
340            assert_eq!(scanner.remaining_text(), remaining);
341        }
342    }
343
344    #[test]
345    fn test_css_ident_invalid() {
346        let cases = [
347            ("", Err((0..0, "")), ""),
348            (" ", Err((0..0, "")), " "),
349            ("-", Err((0..1, "-")), "-"),
350            ("- ", Err((0..1, "-")), "- "),
351            ("-1", Err((0..1, "-")), "-1"),
352            ("-1x", Err((0..1, "-")), "-1x"),
353            ("-1+", Err((0..1, "-")), "-1+"),
354        ];
355
356        for (text, expected, remaining) in cases {
357            let mut scanner = Scanner::new(text);
358            assert_eq!(scanner.scan_css_identifier(), expected);
359            assert_eq!(scanner.remaining_text(), remaining);
360        }
361    }
362
363    #[test]
364    fn test_css_num() {
365        let cases = [
366            ("", Err((0..0, "")), ""),
367            (" ", Err((0..0, "")), " "),
368            ("+", Err((0..1, "+")), "+"),
369            ("-", Err((0..1, "-")), "-"),
370            ("+ ", Err((0..1, "+")), "+ "),
371            ("- ", Err((0..1, "-")), "- "),
372            //
373            ("1", Ok((0..1, "1")), ""),
374            ("+1", Ok((0..2, "+1")), ""),
375            ("-1", Ok((0..2, "-1")), ""),
376            //
377            ("1.2", Ok((0..3, "1.2")), ""),
378            ("+1.2", Ok((0..4, "+1.2")), ""),
379            ("-1.2", Ok((0..4, "-1.2")), ""),
380            //
381            (".1", Ok((0..2, ".1")), ""),
382            ("+.1", Ok((0..3, "+.1")), ""),
383            ("-.1", Ok((0..3, "-.1")), ""),
384            //
385            ("++", Err((0..1, "+")), "++"),
386            ("--", Err((0..1, "-")), "--"),
387            ("+-", Err((0..1, "+")), "+-"),
388            ("-+", Err((0..1, "-")), "-+"),
389            //
390            ("++1", Err((0..1, "+")), "++1"),
391            ("--1", Err((0..1, "-")), "--1"),
392            ("+-1", Err((0..1, "+")), "+-1"),
393            ("-+1", Err((0..1, "-")), "-+1"),
394            //
395            ("1E", Err((0..2, "1E")), "1E"),
396            ("1EE", Err((0..2, "1E")), "1EE"),
397            ("1E*", Err((0..2, "1E")), "1E*"),
398            ("1E+", Err((0..3, "1E+")), "1E+"),
399            ("1E+X", Err((0..3, "1E+")), "1E+X"),
400        ];
401
402        for (text, expected, remaining) in cases.iter().cloned() {
403            let mut scanner = Scanner::new(text);
404            assert_eq!(scanner.scan_css_number(), expected);
405            assert_eq!(scanner.remaining_text(), remaining);
406        }
407
408        for (text, expected, remaining) in cases {
409            if expected.is_err() {
410                continue;
411            }
412
413            for e in ['E', 'e'] {
414                for sign in ["", "+", "-"] {
415                    let exponent = format!("{e}{sign}1");
416                    let text = format!("{text}{exponent}");
417
418                    let (r, expected) = expected.clone().unwrap();
419                    let r = r.start..(r.end + exponent.len());
420                    let expected = format!("{expected}{exponent}");
421                    let expected = Ok((r, expected.as_str()));
422
423                    let mut scanner = Scanner::new(&text);
424                    assert_eq!(scanner.scan_css_number(), expected);
425                    assert_eq!(scanner.remaining_text(), remaining);
426                }
427            }
428        }
429    }
430
431    #[test]
432    fn test_css_num_invalid() {
433        let cases = [
434            ("1E", Err((0..2, "1E")), "1E"),
435            ("1E ", Err((0..2, "1E")), "1E "),
436            ("1EE", Err((0..2, "1E")), "1EE"),
437            ("1E*", Err((0..2, "1E")), "1E*"),
438            ("1E+", Err((0..3, "1E+")), "1E+"),
439            ("1E+ ", Err((0..3, "1E+")), "1E+ "),
440            ("1E+X", Err((0..3, "1E+")), "1E+X"),
441        ];
442
443        for (text, expected, remaining) in cases {
444            let mut scanner = Scanner::new(text);
445            assert_eq!(scanner.scan_css_number(), expected);
446            assert_eq!(scanner.remaining_text(), remaining);
447        }
448    }
449}