cssparser/
serializer.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use crate::match_byte;
6use dtoa_short::Notation;
7use std::fmt::{self, Write};
8use std::str;
9
10use super::Token;
11
12/// Trait for things the can serialize themselves in CSS syntax.
13pub trait ToCss {
14    /// Serialize `self` in CSS syntax, writing to `dest`.
15    fn to_css<W>(&self, dest: &mut W) -> fmt::Result
16    where
17        W: fmt::Write;
18
19    /// Serialize `self` in CSS syntax and return a string.
20    ///
21    /// (This is a convenience wrapper for `to_css` and probably should not be overridden.)
22    #[inline]
23    fn to_css_string(&self) -> String {
24        let mut s = String::new();
25        self.to_css(&mut s).unwrap();
26        s
27    }
28}
29
30#[inline]
31fn write_numeric<W>(value: f32, int_value: Option<i32>, has_sign: bool, dest: &mut W) -> fmt::Result
32where
33    W: fmt::Write,
34{
35    // `value.value >= 0` is true for negative 0.
36    if has_sign && value.is_sign_positive() {
37        dest.write_str("+")?;
38    }
39
40    let notation = if value == 0.0 && value.is_sign_negative() {
41        // Negative zero. Work around #20596.
42        dest.write_str("-0")?;
43        Notation {
44            decimal_point: false,
45            scientific: false,
46        }
47    } else {
48        dtoa_short::write(dest, value)?
49    };
50
51    if int_value.is_none() && value.fract() == 0. && !notation.decimal_point && !notation.scientific
52    {
53        dest.write_str(".0")?;
54    }
55    Ok(())
56}
57
58impl ToCss for Token<'_> {
59    fn to_css<W>(&self, dest: &mut W) -> fmt::Result
60    where
61        W: fmt::Write,
62    {
63        match *self {
64            Token::Ident(ref value) => serialize_identifier(value, dest)?,
65            Token::AtKeyword(ref value) => {
66                dest.write_str("@")?;
67                serialize_identifier(value, dest)?;
68            }
69            Token::Hash(ref value) => {
70                dest.write_str("#")?;
71                serialize_name(value, dest)?;
72            }
73            Token::IDHash(ref value) => {
74                dest.write_str("#")?;
75                serialize_identifier(value, dest)?;
76            }
77            Token::QuotedString(ref value) => serialize_string(value, dest)?,
78            Token::UnquotedUrl(ref value) => {
79                dest.write_str("url(")?;
80                serialize_unquoted_url(value, dest)?;
81                dest.write_str(")")?;
82            }
83            Token::Delim(value) => dest.write_char(value)?,
84
85            Token::Number {
86                value,
87                int_value,
88                has_sign,
89            } => write_numeric(value, int_value, has_sign, dest)?,
90            Token::Percentage {
91                unit_value,
92                int_value,
93                has_sign,
94            } => {
95                write_numeric(unit_value * 100., int_value, has_sign, dest)?;
96                dest.write_str("%")?;
97            }
98            Token::Dimension {
99                value,
100                int_value,
101                has_sign,
102                ref unit,
103            } => {
104                write_numeric(value, int_value, has_sign, dest)?;
105                // Disambiguate with scientific notation.
106                let unit = &**unit;
107                // TODO(emilio): This doesn't handle e.g. 100E1m, which gets us
108                // an unit of "E1m"...
109                if unit == "e" || unit == "E" || unit.starts_with("e-") || unit.starts_with("E-") {
110                    dest.write_str("\\65 ")?;
111                    serialize_name(&unit[1..], dest)?;
112                } else {
113                    serialize_identifier(unit, dest)?;
114                }
115            }
116
117            Token::WhiteSpace(content) => dest.write_str(content)?,
118            Token::Comment(content) => {
119                dest.write_str("/*")?;
120                dest.write_str(content)?;
121                dest.write_str("*/")?
122            }
123            Token::Colon => dest.write_str(":")?,
124            Token::Semicolon => dest.write_str(";")?,
125            Token::Comma => dest.write_str(",")?,
126            Token::IncludeMatch => dest.write_str("~=")?,
127            Token::DashMatch => dest.write_str("|=")?,
128            Token::PrefixMatch => dest.write_str("^=")?,
129            Token::SuffixMatch => dest.write_str("$=")?,
130            Token::SubstringMatch => dest.write_str("*=")?,
131            Token::CDO => dest.write_str("<!--")?,
132            Token::CDC => dest.write_str("-->")?,
133
134            Token::Function(ref name) => {
135                serialize_identifier(name, dest)?;
136                dest.write_str("(")?;
137            }
138            Token::ParenthesisBlock => dest.write_str("(")?,
139            Token::SquareBracketBlock => dest.write_str("[")?,
140            Token::CurlyBracketBlock => dest.write_str("{")?,
141
142            Token::BadUrl(ref contents) => {
143                dest.write_str("url(")?;
144                dest.write_str(contents)?;
145                dest.write_char(')')?;
146            }
147            Token::BadString(ref value) => {
148                // During tokenization, an unescaped newline after a quote causes
149                // the token to be a BadString instead of a QuotedString.
150                // The BadString token ends just before the newline
151                // (which is in a separate WhiteSpace token),
152                // and therefore does not have a closing quote.
153                dest.write_char('"')?;
154                CssStringWriter::new(dest).write_str(value)?;
155            }
156            Token::CloseParenthesis => dest.write_str(")")?,
157            Token::CloseSquareBracket => dest.write_str("]")?,
158            Token::CloseCurlyBracket => dest.write_str("}")?,
159        }
160        Ok(())
161    }
162}
163
164fn hex_escape<W>(ascii_byte: u8, dest: &mut W) -> fmt::Result
165where
166    W: fmt::Write,
167{
168    static HEX_DIGITS: &[u8; 16] = b"0123456789abcdef";
169    let b3;
170    let b4;
171    let bytes = if ascii_byte > 0x0F {
172        let high = (ascii_byte >> 4) as usize;
173        let low = (ascii_byte & 0x0F) as usize;
174        b4 = [b'\\', HEX_DIGITS[high], HEX_DIGITS[low], b' '];
175        &b4[..]
176    } else {
177        b3 = [b'\\', HEX_DIGITS[ascii_byte as usize], b' '];
178        &b3[..]
179    };
180    dest.write_str(unsafe { str::from_utf8_unchecked(bytes) })
181}
182
183fn char_escape<W>(ascii_byte: u8, dest: &mut W) -> fmt::Result
184where
185    W: fmt::Write,
186{
187    let bytes = [b'\\', ascii_byte];
188    dest.write_str(unsafe { str::from_utf8_unchecked(&bytes) })
189}
190
191/// Write a CSS identifier, escaping characters as necessary.
192pub fn serialize_identifier<W>(mut value: &str, dest: &mut W) -> fmt::Result
193where
194    W: fmt::Write,
195{
196    if value.is_empty() {
197        return Ok(());
198    }
199
200    if let Some(value) = value.strip_prefix("--") {
201        dest.write_str("--")?;
202        serialize_name(value, dest)
203    } else if value == "-" {
204        dest.write_str("\\-")
205    } else {
206        if value.as_bytes()[0] == b'-' {
207            dest.write_str("-")?;
208            value = &value[1..];
209        }
210        if let digit @ b'0'..=b'9' = value.as_bytes()[0] {
211            hex_escape(digit, dest)?;
212            value = &value[1..];
213        }
214        serialize_name(value, dest)
215    }
216}
217
218/// Write a CSS name, like a custom property name.
219///
220/// You should only use this when you know what you're doing, when in doubt,
221/// consider using `serialize_identifier`.
222pub fn serialize_name<W>(value: &str, dest: &mut W) -> fmt::Result
223where
224    W: fmt::Write,
225{
226    let mut chunk_start = 0;
227    for (i, b) in value.bytes().enumerate() {
228        let escaped = match_byte! { b,
229            b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' => continue,
230            b'\0' => Some("\u{FFFD}"),
231            b => {
232                if !b.is_ascii() {
233                    continue;
234                }
235                None
236            },
237        };
238        dest.write_str(&value[chunk_start..i])?;
239        if let Some(escaped) = escaped {
240            dest.write_str(escaped)?;
241        } else if (b'\x01'..=b'\x1F').contains(&b) || b == b'\x7F' {
242            hex_escape(b, dest)?;
243        } else {
244            char_escape(b, dest)?;
245        }
246        chunk_start = i + 1;
247    }
248    dest.write_str(&value[chunk_start..])
249}
250
251fn serialize_unquoted_url<W>(value: &str, dest: &mut W) -> fmt::Result
252where
253    W: fmt::Write,
254{
255    let mut chunk_start = 0;
256    for (i, b) in value.bytes().enumerate() {
257        let hex = match_byte! { b,
258            b'\0'..=b' ' | b'\x7F' => true,
259            b'(' | b')' | b'"' | b'\'' | b'\\' => false,
260            _ => continue,
261        };
262        dest.write_str(&value[chunk_start..i])?;
263        if hex {
264            hex_escape(b, dest)?;
265        } else {
266            char_escape(b, dest)?;
267        }
268        chunk_start = i + 1;
269    }
270    dest.write_str(&value[chunk_start..])
271}
272
273/// Write a double-quoted CSS string token, escaping content as necessary.
274pub fn serialize_string<W>(value: &str, dest: &mut W) -> fmt::Result
275where
276    W: fmt::Write,
277{
278    dest.write_str("\"")?;
279    CssStringWriter::new(dest).write_str(value)?;
280    dest.write_str("\"")?;
281    Ok(())
282}
283
284/// A `fmt::Write` adapter that escapes text for writing as a double-quoted CSS string.
285/// Quotes are not included.
286///
287/// Typical usage:
288///
289/// ```{rust,ignore}
290/// fn write_foo<W>(foo: &Foo, dest: &mut W) -> fmt::Result where W: fmt::Write {
291///     dest.write_str("\"")?;
292///     {
293///         let mut string_dest = CssStringWriter::new(dest);
294///         // Write into string_dest...
295///     }
296///     dest.write_str("\"")?;
297///     Ok(())
298/// }
299/// ```
300pub struct CssStringWriter<'a, W> {
301    inner: &'a mut W,
302}
303
304impl<'a, W> CssStringWriter<'a, W>
305where
306    W: fmt::Write,
307{
308    /// Wrap a text writer to create a `CssStringWriter`.
309    pub fn new(inner: &'a mut W) -> CssStringWriter<'a, W> {
310        CssStringWriter { inner }
311    }
312}
313
314impl<W> fmt::Write for CssStringWriter<'_, W>
315where
316    W: fmt::Write,
317{
318    fn write_str(&mut self, s: &str) -> fmt::Result {
319        let mut chunk_start = 0;
320        for (i, b) in s.bytes().enumerate() {
321            let escaped = match_byte! { b,
322                b'"' => Some("\\\""),
323                b'\\' => Some("\\\\"),
324                b'\0' => Some("\u{FFFD}"),
325                b'\x01'..=b'\x1F' | b'\x7F' => None,
326                _ => continue,
327            };
328            self.inner.write_str(&s[chunk_start..i])?;
329            match escaped {
330                Some(x) => self.inner.write_str(x)?,
331                None => hex_escape(b, self.inner)?,
332            };
333            chunk_start = i + 1;
334        }
335        self.inner.write_str(&s[chunk_start..])
336    }
337}
338
339macro_rules! impl_tocss_for_int {
340    ($T: ty) => {
341        impl ToCss for $T {
342            fn to_css<W>(&self, dest: &mut W) -> fmt::Result
343            where
344                W: fmt::Write,
345            {
346                let mut buf = itoa::Buffer::new();
347                dest.write_str(buf.format(*self))
348            }
349        }
350    };
351}
352
353impl_tocss_for_int!(i8);
354impl_tocss_for_int!(u8);
355impl_tocss_for_int!(i16);
356impl_tocss_for_int!(u16);
357impl_tocss_for_int!(i32);
358impl_tocss_for_int!(u32);
359impl_tocss_for_int!(i64);
360impl_tocss_for_int!(u64);
361
362macro_rules! impl_tocss_for_float {
363    ($T: ty) => {
364        impl ToCss for $T {
365            fn to_css<W>(&self, dest: &mut W) -> fmt::Result
366            where
367                W: fmt::Write,
368            {
369                dtoa_short::write(dest, *self).map(|_| ())
370            }
371        }
372    };
373}
374
375impl_tocss_for_float!(f32);
376impl_tocss_for_float!(f64);
377
378/// A category of token. See the `needs_separator_when_before` method.
379#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)]
380pub enum TokenSerializationType {
381    /// No token serialization type.
382    #[default]
383    Nothing,
384
385    /// The [`<whitespace-token>`](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram)
386    /// type.
387    WhiteSpace,
388
389    /// The [`<at-keyword-token>`](https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram)
390    /// type, the "[`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with
391    /// the type flag set to 'unrestricted'" type, or the
392    /// "[`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type
393    /// flag set to 'id'" type.
394    AtKeywordOrHash,
395
396    /// The [`<number-token>`](https://drafts.csswg.org/css-syntax/#number-token-diagram) type.
397    Number,
398
399    /// The [`<dimension-token>`](https://drafts.csswg.org/css-syntax/#dimension-token-diagram)
400    /// type.
401    Dimension,
402
403    /// The [`<percentage-token>`](https://drafts.csswg.org/css-syntax/#percentage-token-diagram)
404    /// type.
405    Percentage,
406
407    /// The [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram) or
408    /// `<bad-url-token>` type.
409    UrlOrBadUrl,
410
411    /// The [`<function-token>`](https://drafts.csswg.org/css-syntax/#function-token-diagram) type.
412    Function,
413
414    /// The [`<ident-token>`](https://drafts.csswg.org/css-syntax/#ident-token-diagram) type.
415    Ident,
416
417    /// The `-->` [`<CDC-token>`](https://drafts.csswg.org/css-syntax/#CDC-token-diagram) type.
418    CDC,
419
420    /// The `|=`
421    /// [`<dash-match-token>`](https://drafts.csswg.org/css-syntax/#dash-match-token-diagram) type.
422    DashMatch,
423
424    /// The `*=`
425    /// [`<substring-match-token>`](https://drafts.csswg.org/css-syntax/#substring-match-token-diagram)
426    /// type.
427    SubstringMatch,
428
429    /// The `<(-token>` type.
430    OpenParen,
431
432    /// The `#` `<delim-token>` type.
433    DelimHash,
434
435    /// The `@` `<delim-token>` type.
436    DelimAt,
437
438    /// The `.` or `+` `<delim-token>` type.
439    DelimDotOrPlus,
440
441    /// The `-` `<delim-token>` type.
442    DelimMinus,
443
444    /// The `?` `<delim-token>` type.
445    DelimQuestion,
446
447    /// The `$`, `^`, or `~` `<delim-token>` type.
448    DelimAssorted,
449
450    /// The `=` `<delim-token>` type.
451    DelimEquals,
452
453    /// The `|` `<delim-token>` type.
454    DelimBar,
455
456    /// The `/` `<delim-token>` type.
457    DelimSlash,
458
459    /// The `*` `<delim-token>` type.
460    DelimAsterisk,
461
462    /// The `%` `<delim-token>` type.
463    DelimPercent,
464
465    /// A type indicating any other token.
466    Other,
467}
468
469#[cfg(feature = "malloc_size_of")]
470malloc_size_of::malloc_size_of_is_0!(TokenSerializationType);
471
472impl TokenSerializationType {
473    /// Return a value that represents the absence of a token, e.g. before the start of the input.
474    #[deprecated(
475        since = "0.32.1",
476        note = "use TokenSerializationType::Nothing or TokenSerializationType::default() instead"
477    )]
478    pub fn nothing() -> TokenSerializationType {
479        Default::default()
480    }
481
482    /// If this value is `TokenSerializationType::Nothing`, set it to the given value instead.
483    pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) {
484        if matches!(self, TokenSerializationType::Nothing) {
485            *self = new_value
486        }
487    }
488
489    /// Return true if, when a token of category `self` is serialized just before
490    /// a token of category `other` with no whitespace in between,
491    /// an empty comment `/**/` needs to be inserted between them
492    /// so that they are not re-parsed as a single token.
493    ///
494    /// See https://drafts.csswg.org/css-syntax/#serialization
495    ///
496    /// See https://github.com/w3c/csswg-drafts/issues/4088 for the
497    /// `DelimPercent` bits.
498    pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool {
499        use self::TokenSerializationType::*;
500        match self {
501            Ident => matches!(
502                other,
503                Ident
504                    | Function
505                    | UrlOrBadUrl
506                    | DelimMinus
507                    | Number
508                    | Percentage
509                    | Dimension
510                    | CDC
511                    | OpenParen
512            ),
513            AtKeywordOrHash | Dimension => matches!(
514                other,
515                Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | CDC
516            ),
517            DelimHash | DelimMinus => matches!(
518                other,
519                Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension
520            ),
521            Number => matches!(
522                other,
523                Ident
524                    | Function
525                    | UrlOrBadUrl
526                    | DelimMinus
527                    | Number
528                    | Percentage
529                    | DelimPercent
530                    | Dimension
531            ),
532            DelimAt => matches!(other, Ident | Function | UrlOrBadUrl | DelimMinus),
533            DelimDotOrPlus => matches!(other, Number | Percentage | Dimension),
534            DelimAssorted | DelimAsterisk => matches!(other, DelimEquals),
535            DelimBar => matches!(other, DelimEquals | DelimBar | DashMatch),
536            DelimSlash => matches!(other, DelimAsterisk | SubstringMatch),
537            Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen
538            | DashMatch | SubstringMatch | DelimQuestion | DelimEquals | DelimPercent | Other => {
539                false
540            }
541        }
542    }
543}
544
545impl Token<'_> {
546    /// Categorize a token into a type that determines when `/**/` needs to be inserted
547    /// between two tokens when serialized next to each other without whitespace in between.
548    ///
549    /// See the `TokenSerializationType::needs_separator_when_before` method.
550    pub fn serialization_type(&self) -> TokenSerializationType {
551        use self::TokenSerializationType::*;
552        match self {
553            Token::Ident(_) => Ident,
554            Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
555            Token::UnquotedUrl(_) | Token::BadUrl(_) => UrlOrBadUrl,
556            Token::Delim('#') => DelimHash,
557            Token::Delim('@') => DelimAt,
558            Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
559            Token::Delim('-') => DelimMinus,
560            Token::Delim('?') => DelimQuestion,
561            Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted,
562            Token::Delim('%') => DelimPercent,
563            Token::Delim('=') => DelimEquals,
564            Token::Delim('|') => DelimBar,
565            Token::Delim('/') => DelimSlash,
566            Token::Delim('*') => DelimAsterisk,
567            Token::Number { .. } => Number,
568            Token::Percentage { .. } => Percentage,
569            Token::Dimension { .. } => Dimension,
570            Token::WhiteSpace(_) => WhiteSpace,
571            Token::Comment(_) => DelimSlash,
572            Token::DashMatch => DashMatch,
573            Token::SubstringMatch => SubstringMatch,
574            Token::CDC => CDC,
575            Token::Function(_) => Function,
576            Token::ParenthesisBlock => OpenParen,
577            Token::SquareBracketBlock
578            | Token::CurlyBracketBlock
579            | Token::CloseParenthesis
580            | Token::CloseSquareBracket
581            | Token::CloseCurlyBracket
582            | Token::QuotedString(_)
583            | Token::BadString(_)
584            | Token::Delim(_)
585            | Token::Colon
586            | Token::Semicolon
587            | Token::Comma
588            | Token::CDO
589            | Token::IncludeMatch
590            | Token::PrefixMatch
591            | Token::SuffixMatch => Other,
592        }
593    }
594}