Skip to main content

cssparser/
serializer.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5use std::fmt::{self, Write};
6use std::str;
7
8#[cfg(feature = "fast_match_byte")]
9pub use crate::match_byte;
10
11use super::Token;
12
13/// Trait for things the can serialize themselves in CSS syntax.
14pub trait ToCss {
15    /// Serialize `self` in CSS syntax, writing to `dest`.
16    fn to_css<W>(&self, dest: &mut W) -> fmt::Result
17    where
18        W: fmt::Write;
19
20    /// Serialize `self` in CSS syntax and return a string.
21    ///
22    /// (This is a convenience wrapper for `to_css` and probably should not be overridden.)
23    #[inline]
24    fn to_css_string(&self) -> String {
25        let mut s = String::new();
26        self.to_css(&mut s).unwrap();
27        s
28    }
29}
30
31#[inline]
32fn write_numeric<W>(value: f32, int_value: Option<i32>, has_sign: bool, dest: &mut W) -> fmt::Result
33where
34    W: fmt::Write,
35{
36    if value == 0.0 && value.is_sign_negative() {
37        // Negative zero. Work around #20596.
38        return dest.write_str("-0");
39    }
40    // NOTE: `value.value >= 0` is true for negative 0 but we've dealt with it above.
41    if has_sign && value >= 0.0 {
42        dest.write_str("+")?;
43    }
44
45    if let Some(v) = int_value {
46        return write!(dest, "{}", v);
47    }
48
49    let notation = dtoa_short::write(dest, value)?;
50    if value.fract() == 0. && !notation.decimal_point && !notation.scientific {
51        dest.write_str(".0")?;
52    }
53    Ok(())
54}
55
56impl ToCss for Token<'_> {
57    fn to_css<W>(&self, dest: &mut W) -> fmt::Result
58    where
59        W: fmt::Write,
60    {
61        match *self {
62            Token::Ident(ref value) => serialize_identifier(value, dest)?,
63            Token::AtKeyword(ref value) => {
64                dest.write_str("@")?;
65                serialize_identifier(value, dest)?;
66            }
67            Token::Hash(ref value) => {
68                dest.write_str("#")?;
69                serialize_name(value, dest)?;
70            }
71            Token::IDHash(ref value) => {
72                dest.write_str("#")?;
73                serialize_identifier(value, dest)?;
74            }
75            Token::QuotedString(ref value) => serialize_string(value, dest)?,
76            Token::UnquotedUrl(ref value) => {
77                dest.write_str("url(")?;
78                serialize_unquoted_url(value, dest)?;
79                dest.write_str(")")?;
80            }
81            Token::Delim(value) => dest.write_char(value)?,
82
83            Token::Number {
84                value,
85                int_value,
86                has_sign,
87            } => write_numeric(value, int_value, has_sign, dest)?,
88            Token::Percentage {
89                unit_value,
90                int_value,
91                has_sign,
92            } => {
93                write_numeric(unit_value * 100., int_value, has_sign, dest)?;
94                dest.write_str("%")?;
95            }
96            Token::Dimension {
97                value,
98                int_value,
99                has_sign,
100                ref unit,
101            } => {
102                write_numeric(value, int_value, has_sign, dest)?;
103                // Disambiguate with scientific notation.
104                let unit = &**unit;
105                // TODO(emilio): This doesn't handle e.g. 100E1m, which gets us
106                // an unit of "E1m"...
107                if unit == "e" || unit == "E" || unit.starts_with("e-") || unit.starts_with("E-") {
108                    dest.write_str("\\65 ")?;
109                    serialize_name(&unit[1..], dest)?;
110                } else {
111                    serialize_identifier(unit, dest)?;
112                }
113            }
114
115            Token::WhiteSpace(content) => dest.write_str(content)?,
116            Token::Comment(content) => {
117                dest.write_str("/*")?;
118                dest.write_str(content)?;
119                dest.write_str("*/")?
120            }
121            Token::Colon => dest.write_str(":")?,
122            Token::Semicolon => dest.write_str(";")?,
123            Token::Comma => dest.write_str(",")?,
124            Token::IncludeMatch => dest.write_str("~=")?,
125            Token::DashMatch => dest.write_str("|=")?,
126            Token::PrefixMatch => dest.write_str("^=")?,
127            Token::SuffixMatch => dest.write_str("$=")?,
128            Token::SubstringMatch => dest.write_str("*=")?,
129            Token::CDO => dest.write_str("<!--")?,
130            Token::CDC => dest.write_str("-->")?,
131
132            Token::Function(ref name) => {
133                serialize_identifier(name, dest)?;
134                dest.write_str("(")?;
135            }
136            Token::ParenthesisBlock => dest.write_str("(")?,
137            Token::SquareBracketBlock => dest.write_str("[")?,
138            Token::CurlyBracketBlock => dest.write_str("{")?,
139
140            Token::BadUrl(ref contents) => {
141                dest.write_str("url(")?;
142                dest.write_str(contents)?;
143                dest.write_char(')')?;
144            }
145            Token::BadString(ref value) => {
146                // During tokenization, an unescaped newline after a quote causes
147                // the token to be a BadString instead of a QuotedString.
148                // The BadString token ends just before the newline
149                // (which is in a separate WhiteSpace token),
150                // and therefore does not have a closing quote.
151                dest.write_char('"')?;
152                CssStringWriter::new(dest).write_str(value)?;
153            }
154            Token::CloseParenthesis => dest.write_str(")")?,
155            Token::CloseSquareBracket => dest.write_str("]")?,
156            Token::CloseCurlyBracket => dest.write_str("}")?,
157        }
158        Ok(())
159    }
160}
161
162fn hex_escape<W>(ascii_byte: u8, dest: &mut W) -> fmt::Result
163where
164    W: fmt::Write,
165{
166    static HEX_DIGITS: &[u8; 16] = b"0123456789abcdef";
167    let b3;
168    let b4;
169    let bytes = if ascii_byte > 0x0F {
170        let high = (ascii_byte >> 4) as usize;
171        let low = (ascii_byte & 0x0F) as usize;
172        b4 = [b'\\', HEX_DIGITS[high], HEX_DIGITS[low], b' '];
173        &b4[..]
174    } else {
175        b3 = [b'\\', HEX_DIGITS[ascii_byte as usize], b' '];
176        &b3[..]
177    };
178    dest.write_str(unsafe { str::from_utf8_unchecked(bytes) })
179}
180
181fn char_escape<W>(ascii_byte: u8, dest: &mut W) -> fmt::Result
182where
183    W: fmt::Write,
184{
185    let bytes = [b'\\', ascii_byte];
186    dest.write_str(unsafe { str::from_utf8_unchecked(&bytes) })
187}
188
189/// Write a CSS identifier, escaping characters as necessary.
190pub fn serialize_identifier<W>(mut value: &str, dest: &mut W) -> fmt::Result
191where
192    W: fmt::Write,
193{
194    if value.is_empty() {
195        return Ok(());
196    }
197
198    if let Some(value) = value.strip_prefix("--") {
199        dest.write_str("--")?;
200        serialize_name(value, dest)
201    } else if value == "-" {
202        dest.write_str("\\-")
203    } else {
204        if value.as_bytes()[0] == b'-' {
205            dest.write_str("-")?;
206            value = &value[1..];
207        }
208        if let digit @ b'0'..=b'9' = value.as_bytes()[0] {
209            hex_escape(digit, dest)?;
210            value = &value[1..];
211        }
212        serialize_name(value, dest)
213    }
214}
215
216/// Write a CSS name, like a custom property name.
217///
218/// You should only use this when you know what you're doing, when in doubt,
219/// consider using `serialize_identifier`.
220pub fn serialize_name<W>(value: &str, dest: &mut W) -> fmt::Result
221where
222    W: fmt::Write,
223{
224    let mut chunk_start = 0;
225    for (i, b) in value.bytes().enumerate() {
226        let escaped = match_byte! { b,
227            b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' | b'_' | b'-' => continue,
228            b'\0' => Some("\u{FFFD}"),
229            b => {
230                if !b.is_ascii() {
231                    continue;
232                }
233                None
234            },
235        };
236        dest.write_str(&value[chunk_start..i])?;
237        if let Some(escaped) = escaped {
238            dest.write_str(escaped)?;
239        } else if (b'\x01'..=b'\x1F').contains(&b) || b == b'\x7F' {
240            hex_escape(b, dest)?;
241        } else {
242            char_escape(b, dest)?;
243        }
244        chunk_start = i + 1;
245    }
246    dest.write_str(&value[chunk_start..])
247}
248
249fn serialize_unquoted_url<W>(value: &str, dest: &mut W) -> fmt::Result
250where
251    W: fmt::Write,
252{
253    let mut chunk_start = 0;
254    for (i, b) in value.bytes().enumerate() {
255        let hex = match_byte! { b,
256            b'\0'..=b' ' | b'\x7F' => true,
257            b'(' | b')' | b'"' | b'\'' | b'\\' => false,
258            _ => continue,
259        };
260        dest.write_str(&value[chunk_start..i])?;
261        if hex {
262            hex_escape(b, dest)?;
263        } else {
264            char_escape(b, dest)?;
265        }
266        chunk_start = i + 1;
267    }
268    dest.write_str(&value[chunk_start..])
269}
270
271/// Write a double-quoted CSS string token, escaping content as necessary.
272pub fn serialize_string<W>(value: &str, dest: &mut W) -> fmt::Result
273where
274    W: fmt::Write,
275{
276    dest.write_str("\"")?;
277    CssStringWriter::new(dest).write_str(value)?;
278    dest.write_str("\"")?;
279    Ok(())
280}
281
282/// A `fmt::Write` adapter that escapes text for writing as a double-quoted CSS string.
283/// Quotes are not included.
284///
285/// Typical usage:
286///
287/// ```rust,ignore
288/// fn write_foo<W>(foo: &Foo, dest: &mut W) -> fmt::Result where W: fmt::Write {
289///     dest.write_str("\"")?;
290///     {
291///         let mut string_dest = CssStringWriter::new(dest);
292///         // Write into string_dest...
293///     }
294///     dest.write_str("\"")?;
295///     Ok(())
296/// }
297/// ```
298pub struct CssStringWriter<'a, W> {
299    inner: &'a mut W,
300}
301
302impl<'a, W> CssStringWriter<'a, W>
303where
304    W: fmt::Write,
305{
306    /// Wrap a text writer to create a `CssStringWriter`.
307    pub fn new(inner: &'a mut W) -> CssStringWriter<'a, W> {
308        CssStringWriter { inner }
309    }
310}
311
312impl<W> fmt::Write for CssStringWriter<'_, W>
313where
314    W: fmt::Write,
315{
316    fn write_str(&mut self, s: &str) -> fmt::Result {
317        let mut chunk_start = 0;
318        for (i, b) in s.bytes().enumerate() {
319            let escaped = match_byte! { b,
320                b'"' => Some("\\\""),
321                b'\\' => Some("\\\\"),
322                b'\0' => Some("\u{FFFD}"),
323                b'\x01'..=b'\x1F' | b'\x7F' => None,
324                _ => continue,
325            };
326            self.inner.write_str(&s[chunk_start..i])?;
327            match escaped {
328                Some(x) => self.inner.write_str(x)?,
329                None => hex_escape(b, self.inner)?,
330            };
331            chunk_start = i + 1;
332        }
333        self.inner.write_str(&s[chunk_start..])
334    }
335}
336
337macro_rules! impl_tocss_for_int {
338    ($T: ty) => {
339        impl ToCss for $T {
340            fn to_css<W>(&self, dest: &mut W) -> fmt::Result
341            where
342                W: fmt::Write,
343            {
344                let mut buf = itoa::Buffer::new();
345                dest.write_str(buf.format(*self))
346            }
347        }
348    };
349}
350
351impl_tocss_for_int!(i8);
352impl_tocss_for_int!(u8);
353impl_tocss_for_int!(i16);
354impl_tocss_for_int!(u16);
355impl_tocss_for_int!(i32);
356impl_tocss_for_int!(u32);
357impl_tocss_for_int!(i64);
358impl_tocss_for_int!(u64);
359
360macro_rules! impl_tocss_for_float {
361    ($T: ty) => {
362        impl ToCss for $T {
363            fn to_css<W>(&self, dest: &mut W) -> fmt::Result
364            where
365                W: fmt::Write,
366            {
367                dtoa_short::write(dest, *self).map(|_| ())
368            }
369        }
370    };
371}
372
373impl_tocss_for_float!(f32);
374impl_tocss_for_float!(f64);
375
376/// A category of token. See the `needs_separator_when_before` method.
377#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)]
378pub enum TokenSerializationType {
379    /// No token serialization type.
380    #[default]
381    Nothing,
382
383    /// The [`<whitespace-token>`](https://drafts.csswg.org/css-syntax/#whitespace-token-diagram)
384    /// type.
385    WhiteSpace,
386
387    /// The [`<at-keyword-token>`](https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram)
388    /// type, the "[`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with
389    /// the type flag set to 'unrestricted'" type, or the
390    /// "[`<hash-token>`](https://drafts.csswg.org/css-syntax/#hash-token-diagram) with the type
391    /// flag set to 'id'" type.
392    AtKeywordOrHash,
393
394    /// The [`<number-token>`](https://drafts.csswg.org/css-syntax/#number-token-diagram) type.
395    Number,
396
397    /// The [`<dimension-token>`](https://drafts.csswg.org/css-syntax/#dimension-token-diagram)
398    /// type.
399    Dimension,
400
401    /// The [`<percentage-token>`](https://drafts.csswg.org/css-syntax/#percentage-token-diagram)
402    /// type.
403    Percentage,
404
405    /// The [`<url-token>`](https://drafts.csswg.org/css-syntax/#url-token-diagram) or
406    /// `<bad-url-token>` type.
407    UrlOrBadUrl,
408
409    /// The [`<function-token>`](https://drafts.csswg.org/css-syntax/#function-token-diagram) type.
410    Function,
411
412    /// The [`<ident-token>`](https://drafts.csswg.org/css-syntax/#ident-token-diagram) type.
413    Ident,
414
415    /// The `-->` [`<CDC-token>`](https://drafts.csswg.org/css-syntax/#CDC-token-diagram) type.
416    CDC,
417
418    /// The `|=`
419    /// [`<dash-match-token>`](https://drafts.csswg.org/css-syntax/#dash-match-token-diagram) type.
420    DashMatch,
421
422    /// The `*=`
423    /// [`<substring-match-token>`](https://drafts.csswg.org/css-syntax/#substring-match-token-diagram)
424    /// type.
425    SubstringMatch,
426
427    /// The `<(-token>` type.
428    OpenParen,
429
430    /// The `#` `<delim-token>` type.
431    DelimHash,
432
433    /// The `@` `<delim-token>` type.
434    DelimAt,
435
436    /// The `.` or `+` `<delim-token>` type.
437    DelimDotOrPlus,
438
439    /// The `-` `<delim-token>` type.
440    DelimMinus,
441
442    /// The `?` `<delim-token>` type.
443    DelimQuestion,
444
445    /// The `$`, `^`, or `~` `<delim-token>` type.
446    DelimAssorted,
447
448    /// The `=` `<delim-token>` type.
449    DelimEquals,
450
451    /// The `|` `<delim-token>` type.
452    DelimBar,
453
454    /// The `/` `<delim-token>` type.
455    DelimSlash,
456
457    /// The `*` `<delim-token>` type.
458    DelimAsterisk,
459
460    /// The `%` `<delim-token>` type.
461    DelimPercent,
462
463    /// A type indicating any other token.
464    Other,
465}
466
467#[cfg(feature = "malloc_size_of")]
468malloc_size_of::malloc_size_of_is_0!(TokenSerializationType);
469
470impl TokenSerializationType {
471    /// Return a value that represents the absence of a token, e.g. before the start of the input.
472    #[deprecated(
473        since = "0.32.1",
474        note = "use TokenSerializationType::Nothing or TokenSerializationType::default() instead"
475    )]
476    pub fn nothing() -> TokenSerializationType {
477        Default::default()
478    }
479
480    /// If this value is `TokenSerializationType::Nothing`, set it to the given value instead.
481    pub fn set_if_nothing(&mut self, new_value: TokenSerializationType) {
482        if matches!(self, TokenSerializationType::Nothing) {
483            *self = new_value
484        }
485    }
486
487    /// Return true if, when a token of category `self` is serialized just before
488    /// a token of category `other` with no whitespace in between,
489    /// an empty comment `/**/` needs to be inserted between them
490    /// so that they are not re-parsed as a single token.
491    ///
492    /// See https://drafts.csswg.org/css-syntax/#serialization
493    ///
494    /// See https://github.com/w3c/csswg-drafts/issues/4088 for the
495    /// `DelimPercent` bits.
496    pub fn needs_separator_when_before(self, other: TokenSerializationType) -> bool {
497        use self::TokenSerializationType::*;
498        match self {
499            Ident => matches!(
500                other,
501                Ident
502                    | Function
503                    | UrlOrBadUrl
504                    | DelimMinus
505                    | Number
506                    | Percentage
507                    | Dimension
508                    | CDC
509                    | OpenParen
510            ),
511            AtKeywordOrHash | Dimension => matches!(
512                other,
513                Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension | CDC
514            ),
515            DelimHash | DelimMinus => matches!(
516                other,
517                Ident | Function | UrlOrBadUrl | DelimMinus | Number | Percentage | Dimension
518            ),
519            Number => matches!(
520                other,
521                Ident
522                    | Function
523                    | UrlOrBadUrl
524                    | DelimMinus
525                    | Number
526                    | Percentage
527                    | DelimPercent
528                    | Dimension
529            ),
530            DelimAt => matches!(other, Ident | Function | UrlOrBadUrl | DelimMinus),
531            DelimDotOrPlus => matches!(other, Number | Percentage | Dimension),
532            DelimAssorted | DelimAsterisk => matches!(other, DelimEquals),
533            DelimBar => matches!(other, DelimEquals | DelimBar | DashMatch),
534            DelimSlash => matches!(other, DelimAsterisk | SubstringMatch),
535            Nothing | WhiteSpace | Percentage | UrlOrBadUrl | Function | CDC | OpenParen
536            | DashMatch | SubstringMatch | DelimQuestion | DelimEquals | DelimPercent | Other => {
537                false
538            }
539        }
540    }
541}
542
543impl Token<'_> {
544    /// Categorize a token into a type that determines when `/**/` needs to be inserted
545    /// between two tokens when serialized next to each other without whitespace in between.
546    ///
547    /// See the `TokenSerializationType::needs_separator_when_before` method.
548    pub fn serialization_type(&self) -> TokenSerializationType {
549        use self::TokenSerializationType::*;
550        match self {
551            Token::Ident(_) => Ident,
552            Token::AtKeyword(_) | Token::Hash(_) | Token::IDHash(_) => AtKeywordOrHash,
553            Token::UnquotedUrl(_) | Token::BadUrl(_) => UrlOrBadUrl,
554            Token::Delim('#') => DelimHash,
555            Token::Delim('@') => DelimAt,
556            Token::Delim('.') | Token::Delim('+') => DelimDotOrPlus,
557            Token::Delim('-') => DelimMinus,
558            Token::Delim('?') => DelimQuestion,
559            Token::Delim('$') | Token::Delim('^') | Token::Delim('~') => DelimAssorted,
560            Token::Delim('%') => DelimPercent,
561            Token::Delim('=') => DelimEquals,
562            Token::Delim('|') => DelimBar,
563            Token::Delim('/') => DelimSlash,
564            Token::Delim('*') => DelimAsterisk,
565            Token::Number { .. } => Number,
566            Token::Percentage { .. } => Percentage,
567            Token::Dimension { .. } => Dimension,
568            Token::WhiteSpace(_) => WhiteSpace,
569            Token::Comment(_) => DelimSlash,
570            Token::DashMatch => DashMatch,
571            Token::SubstringMatch => SubstringMatch,
572            Token::CDC => CDC,
573            Token::Function(_) => Function,
574            Token::ParenthesisBlock => OpenParen,
575            Token::SquareBracketBlock
576            | Token::CurlyBracketBlock
577            | Token::CloseParenthesis
578            | Token::CloseSquareBracket
579            | Token::CloseCurlyBracket
580            | Token::QuotedString(_)
581            | Token::BadString(_)
582            | Token::Delim(_)
583            | Token::Colon
584            | Token::Semicolon
585            | Token::Comma
586            | Token::CDO
587            | Token::IncludeMatch
588            | Token::PrefixMatch
589            | Token::SuffixMatch => Other,
590        }
591    }
592}