Skip to main content

contextual_encoder/
css.rs

1//! CSS contextual output encoders.
2//!
3//! provides two encoding contexts:
4//!
5//! - [`for_css_string`] — safe for CSS string values (inside quotes)
6//! - [`for_css_url`] — safe for CSS `url()` values
7//!
8//! both use CSS hex escape syntax (`\XX`) with a trailing space appended
9//! when the next character could be misinterpreted as part of the hex value.
10//!
11//! # security notes
12//!
13//! - CSS string values **must** be quoted. these encoders produce output safe
14//!   inside `"..."` or `'...'` delimiters.
15//! - these encoders do not validate CSS property names, selectors, or
16//!   expressions. encoding cannot make arbitrary CSS safe — validate the
17//!   structure separately.
18//! - for `url()` values, the URL itself must be validated (scheme whitelist,
19//!   etc.) before encoding. encoding only prevents syntax breakout.
20
21use std::fmt;
22
23use crate::engine::{encode_loop, is_unicode_noncharacter};
24
25// ---------------------------------------------------------------------------
26// for_css_string — safe for quoted CSS string values
27// ---------------------------------------------------------------------------
28
29/// encodes `input` for safe embedding in a quoted CSS string value.
30///
31/// uses CSS hex escape syntax (`\XX`) with shortest hex representation.
32/// a trailing space is appended after the hex escape when the next character
33/// is a hex digit or whitespace, to prevent ambiguous parsing.
34///
35/// unicode non-characters are replaced with `_`.
36///
37/// # encoded characters
38///
39/// C0 controls (U+0000-U+001F), `"`, `'`, `\`, `<`, `&`, `(`, `)`, `/`,
40/// `>`, DEL (U+007F), U+2028, U+2029.
41///
42/// # examples
43///
44/// ```
45/// use contextual_encoder::for_css_string;
46///
47/// assert_eq!(for_css_string("background"), "background");
48/// assert_eq!(for_css_string(r#"a"b"#), r"a\22 b");
49/// // z is not a hex digit, so no trailing space
50/// assert_eq!(for_css_string("a'z"), r"a\27z");
51/// ```
52pub fn for_css_string(input: &str) -> String {
53    let mut out = String::with_capacity(input.len());
54    write_css_string(&mut out, input).expect("writing to string cannot fail");
55    out
56}
57
58/// writes the CSS-string-encoded form of `input` to `out`.
59///
60/// see [`for_css_string`] for encoding rules.
61pub fn write_css_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
62    encode_loop(out, input, needs_css_string_encoding, write_css_encoded)
63}
64
65fn needs_css_string_encoding(c: char) -> bool {
66    needs_css_common_encoding(c) || matches!(c, '(' | ')')
67}
68
69// ---------------------------------------------------------------------------
70// for_css_url — safe for CSS url() values
71// ---------------------------------------------------------------------------
72
73/// encodes `input` for safe embedding in a CSS `url()` value.
74///
75/// identical to [`for_css_string`] except parentheses `(` and `)` are
76/// **not** encoded (they are part of the `url()` syntax, not the value).
77///
78/// the URL **must be validated** before encoding (e.g., ensure the scheme
79/// is allowed). encoding only prevents syntax breakout, not malicious URLs.
80///
81/// # examples
82///
83/// ```
84/// use contextual_encoder::for_css_url;
85///
86/// assert_eq!(for_css_url("image.png"), "image.png");
87/// // b is a hex digit, so trailing space after \27
88/// assert_eq!(for_css_url("a'b"), r"a\27 b");
89/// assert_eq!(for_css_url("a(b)"), "a(b)");
90/// ```
91pub fn for_css_url(input: &str) -> String {
92    let mut out = String::with_capacity(input.len());
93    write_css_url(&mut out, input).expect("writing to string cannot fail");
94    out
95}
96
97/// writes the CSS-url-encoded form of `input` to `out`.
98///
99/// see [`for_css_url`] for encoding rules.
100pub fn write_css_url<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
101    encode_loop(out, input, needs_css_url_encoding, write_css_encoded)
102}
103
104fn needs_css_url_encoding(c: char) -> bool {
105    needs_css_common_encoding(c)
106    // parentheses NOT encoded in url context
107}
108
109// ---------------------------------------------------------------------------
110// shared implementation
111// ---------------------------------------------------------------------------
112
113fn needs_css_common_encoding(c: char) -> bool {
114    let cp = c as u32;
115    cp <= 0x1F
116        || matches!(c, '"' | '\'' | '\\' | '<' | '&' | '/' | '>')
117        || cp == 0x7F
118        || cp == 0x2028
119        || cp == 0x2029
120        || is_unicode_noncharacter(cp)
121}
122
123fn write_css_encoded<W: fmt::Write>(out: &mut W, c: char, next: Option<char>) -> fmt::Result {
124    let cp = c as u32;
125
126    // non-characters → underscore
127    if is_unicode_noncharacter(cp) {
128        return out.write_char('_');
129    }
130
131    // hex escape: shortest representation, no zero-padding
132    write!(out, "\\{:x}", cp)?;
133
134    // append a space if the next character could extend the hex value
135    if needs_css_separator(next) {
136        out.write_char(' ')?;
137    }
138
139    Ok(())
140}
141
142/// returns true if a trailing space is needed after a CSS hex escape
143/// to prevent ambiguous parsing with the next character.
144fn needs_css_separator(next: Option<char>) -> bool {
145    match next {
146        Some(c) => c.is_ascii_hexdigit() || matches!(c, ' ' | '\t' | '\n' | '\x0C' | '\r'),
147        None => false,
148    }
149}
150
151#[cfg(test)]
152mod tests {
153    use super::*;
154
155    // -- for_css_string --
156
157    #[test]
158    fn css_string_no_encoding_needed() {
159        assert_eq!(for_css_string("hello"), "hello");
160        assert_eq!(for_css_string(""), "");
161    }
162
163    #[test]
164    fn css_string_encodes_double_quote() {
165        // " (0x22) → \22, followed by space because 'b' is a hex digit
166        assert_eq!(for_css_string(r#"a"b"#), r"a\22 b");
167        // " at end → no trailing space
168        assert_eq!(for_css_string(r#"a""#), r"a\22");
169    }
170
171    #[test]
172    fn css_string_encodes_single_quote() {
173        // ' (0x27) → \27, 'z' is not a hex digit → no space
174        assert_eq!(for_css_string("a'z"), r"a\27z");
175        // ' (0x27) → \27, '1' is a hex digit → space
176        assert_eq!(for_css_string("a'1"), r"a\27 1");
177    }
178
179    #[test]
180    fn css_string_encodes_backslash() {
181        assert_eq!(for_css_string(r"a\b"), r"a\5c b");
182    }
183
184    #[test]
185    fn css_string_encodes_angle_brackets() {
186        // x is not a hex digit, so no trailing space after \3c
187        assert_eq!(for_css_string("<x>"), r"\3cx\3e");
188    }
189
190    #[test]
191    fn css_string_encodes_ampersand() {
192        assert_eq!(for_css_string("a&b"), r"a\26 b");
193    }
194
195    #[test]
196    fn css_string_encodes_parens() {
197        assert_eq!(for_css_string("a(b)"), r"a\28 b\29");
198    }
199
200    #[test]
201    fn css_string_encodes_slash() {
202        assert_eq!(for_css_string("a/b"), r"a\2f b");
203    }
204
205    #[test]
206    fn css_string_encodes_control_chars() {
207        assert_eq!(for_css_string("\x00"), r"\0");
208        assert_eq!(for_css_string("\x01x"), r"\1x");
209        assert_eq!(for_css_string("\x1F"), r"\1f");
210    }
211
212    #[test]
213    fn css_string_encodes_del() {
214        assert_eq!(for_css_string("\x7F"), r"\7f");
215    }
216
217    #[test]
218    fn css_string_encodes_line_separators() {
219        assert_eq!(for_css_string("\u{2028}"), r"\2028");
220        assert_eq!(for_css_string("\u{2029}"), r"\2029");
221    }
222
223    #[test]
224    fn css_string_replaces_nonchars_with_underscore() {
225        assert_eq!(for_css_string("\u{FDD0}"), "_");
226        assert_eq!(for_css_string("\u{FFFE}"), "_");
227        assert_eq!(for_css_string("\u{FFFF}"), "_");
228    }
229
230    #[test]
231    fn css_string_separator_before_whitespace() {
232        // \27 followed by space → needs separator → \27 + space + space
233        // first space is the separator, second is the content space
234        assert_eq!(for_css_string("' "), r"\27  ");
235    }
236
237    #[test]
238    fn css_string_preserves_non_ascii() {
239        assert_eq!(for_css_string("café"), "café");
240    }
241
242    #[test]
243    fn css_string_writer_variant() {
244        let mut out = String::new();
245        // b is a hex digit, so trailing space after \27
246        write_css_string(&mut out, "a'b").unwrap();
247        assert_eq!(out, r"a\27 b");
248    }
249
250    // -- for_css_url --
251
252    #[test]
253    fn css_url_does_not_encode_parens() {
254        assert_eq!(for_css_url("a(b)c"), "a(b)c");
255    }
256
257    #[test]
258    fn css_url_encodes_quotes() {
259        // b is a hex digit, so trailing space after \27
260        assert_eq!(for_css_url("a'b"), r"a\27 b");
261    }
262
263    #[test]
264    fn css_url_encodes_backslash() {
265        assert_eq!(for_css_url(r"a\b"), r"a\5c b");
266    }
267}