contextual_encoder/css.rs
1//! CSS contextual output encoders.
2//!
3//! provides two encoding contexts:
4//!
5//! - [`for_css_string`] — safe for CSS string values (inside quotes)
6//! - [`for_css_url`] — safe for CSS `url()` values
7//!
8//! both use CSS hex escape syntax (`\XX`) with a trailing space appended
9//! when the next character could be misinterpreted as part of the hex value.
10//!
11//! # security notes
12//!
13//! - CSS string values **must** be quoted. these encoders produce output safe
14//! inside `"..."` or `'...'` delimiters.
15//! - these encoders do not validate CSS property names, selectors, or
16//! expressions. encoding cannot make arbitrary CSS safe — validate the
17//! structure separately.
18//! - for `url()` values, the URL itself must be validated (scheme whitelist,
19//! etc.) before encoding. encoding only prevents syntax breakout.
20
21use std::fmt;
22
23use crate::engine::{encode_loop, is_unicode_noncharacter};
24
25// ---------------------------------------------------------------------------
26// for_css_string — safe for quoted CSS string values
27// ---------------------------------------------------------------------------
28
29/// encodes `input` for safe embedding in a quoted CSS string value.
30///
31/// uses CSS hex escape syntax (`\XX`) with shortest hex representation.
32/// a trailing space is appended after the hex escape when the next character
33/// is a hex digit or whitespace, to prevent ambiguous parsing.
34///
35/// unicode non-characters are replaced with `_`.
36///
37/// # encoded characters
38///
39/// C0 controls (U+0000-U+001F), `"`, `'`, `\`, `<`, `&`, `(`, `)`, `/`,
40/// `>`, DEL (U+007F), U+2028, U+2029.
41///
42/// # examples
43///
44/// ```
45/// use contextual_encoder::for_css_string;
46///
47/// assert_eq!(for_css_string("background"), "background");
48/// assert_eq!(for_css_string(r#"a"b"#), r"a\22 b");
49/// // z is not a hex digit, so no trailing space
50/// assert_eq!(for_css_string("a'z"), r"a\27z");
51/// ```
52pub fn for_css_string(input: &str) -> String {
53 let mut out = String::with_capacity(input.len());
54 write_css_string(&mut out, input).expect("writing to string cannot fail");
55 out
56}
57
58/// writes the CSS-string-encoded form of `input` to `out`.
59///
60/// see [`for_css_string`] for encoding rules.
61pub fn write_css_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
62 encode_loop(out, input, needs_css_string_encoding, write_css_encoded)
63}
64
65fn needs_css_string_encoding(c: char) -> bool {
66 needs_css_common_encoding(c) || matches!(c, '(' | ')')
67}
68
69// ---------------------------------------------------------------------------
70// for_css_url — safe for CSS url() values
71// ---------------------------------------------------------------------------
72
73/// encodes `input` for safe embedding in a CSS `url()` value.
74///
75/// identical to [`for_css_string`] except parentheses `(` and `)` are
76/// **not** encoded (they are part of the `url()` syntax, not the value).
77///
78/// the URL **must be validated** before encoding (e.g., ensure the scheme
79/// is allowed). encoding only prevents syntax breakout, not malicious URLs.
80///
81/// # examples
82///
83/// ```
84/// use contextual_encoder::for_css_url;
85///
86/// assert_eq!(for_css_url("image.png"), "image.png");
87/// // b is a hex digit, so trailing space after \27
88/// assert_eq!(for_css_url("a'b"), r"a\27 b");
89/// assert_eq!(for_css_url("a(b)"), "a(b)");
90/// ```
91pub fn for_css_url(input: &str) -> String {
92 let mut out = String::with_capacity(input.len());
93 write_css_url(&mut out, input).expect("writing to string cannot fail");
94 out
95}
96
97/// writes the CSS-url-encoded form of `input` to `out`.
98///
99/// see [`for_css_url`] for encoding rules.
100pub fn write_css_url<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
101 encode_loop(out, input, needs_css_url_encoding, write_css_encoded)
102}
103
104fn needs_css_url_encoding(c: char) -> bool {
105 needs_css_common_encoding(c)
106 // parentheses NOT encoded in url context
107}
108
109// ---------------------------------------------------------------------------
110// shared implementation
111// ---------------------------------------------------------------------------
112
113fn needs_css_common_encoding(c: char) -> bool {
114 let cp = c as u32;
115 cp <= 0x1F
116 || matches!(c, '"' | '\'' | '\\' | '<' | '&' | '/' | '>')
117 || cp == 0x7F
118 || cp == 0x2028
119 || cp == 0x2029
120 || is_unicode_noncharacter(cp)
121}
122
123fn write_css_encoded<W: fmt::Write>(out: &mut W, c: char, next: Option<char>) -> fmt::Result {
124 let cp = c as u32;
125
126 // non-characters → underscore
127 if is_unicode_noncharacter(cp) {
128 return out.write_char('_');
129 }
130
131 // hex escape: shortest representation, no zero-padding
132 write!(out, "\\{:x}", cp)?;
133
134 // append a space if the next character could extend the hex value
135 if needs_css_separator(next) {
136 out.write_char(' ')?;
137 }
138
139 Ok(())
140}
141
142/// returns true if a trailing space is needed after a CSS hex escape
143/// to prevent ambiguous parsing with the next character.
144fn needs_css_separator(next: Option<char>) -> bool {
145 match next {
146 Some(c) => c.is_ascii_hexdigit() || matches!(c, ' ' | '\t' | '\n' | '\x0C' | '\r'),
147 None => false,
148 }
149}
150
151#[cfg(test)]
152mod tests {
153 use super::*;
154
155 // -- for_css_string --
156
157 #[test]
158 fn css_string_no_encoding_needed() {
159 assert_eq!(for_css_string("hello"), "hello");
160 assert_eq!(for_css_string(""), "");
161 }
162
163 #[test]
164 fn css_string_encodes_double_quote() {
165 // " (0x22) → \22, followed by space because 'b' is a hex digit
166 assert_eq!(for_css_string(r#"a"b"#), r"a\22 b");
167 // " at end → no trailing space
168 assert_eq!(for_css_string(r#"a""#), r"a\22");
169 }
170
171 #[test]
172 fn css_string_encodes_single_quote() {
173 // ' (0x27) → \27, 'z' is not a hex digit → no space
174 assert_eq!(for_css_string("a'z"), r"a\27z");
175 // ' (0x27) → \27, '1' is a hex digit → space
176 assert_eq!(for_css_string("a'1"), r"a\27 1");
177 }
178
179 #[test]
180 fn css_string_encodes_backslash() {
181 assert_eq!(for_css_string(r"a\b"), r"a\5c b");
182 }
183
184 #[test]
185 fn css_string_encodes_angle_brackets() {
186 // x is not a hex digit, so no trailing space after \3c
187 assert_eq!(for_css_string("<x>"), r"\3cx\3e");
188 }
189
190 #[test]
191 fn css_string_encodes_ampersand() {
192 assert_eq!(for_css_string("a&b"), r"a\26 b");
193 }
194
195 #[test]
196 fn css_string_encodes_parens() {
197 assert_eq!(for_css_string("a(b)"), r"a\28 b\29");
198 }
199
200 #[test]
201 fn css_string_encodes_slash() {
202 assert_eq!(for_css_string("a/b"), r"a\2f b");
203 }
204
205 #[test]
206 fn css_string_encodes_control_chars() {
207 assert_eq!(for_css_string("\x00"), r"\0");
208 assert_eq!(for_css_string("\x01x"), r"\1x");
209 assert_eq!(for_css_string("\x1F"), r"\1f");
210 }
211
212 #[test]
213 fn css_string_encodes_del() {
214 assert_eq!(for_css_string("\x7F"), r"\7f");
215 }
216
217 #[test]
218 fn css_string_encodes_line_separators() {
219 assert_eq!(for_css_string("\u{2028}"), r"\2028");
220 assert_eq!(for_css_string("\u{2029}"), r"\2029");
221 }
222
223 #[test]
224 fn css_string_replaces_nonchars_with_underscore() {
225 assert_eq!(for_css_string("\u{FDD0}"), "_");
226 assert_eq!(for_css_string("\u{FFFE}"), "_");
227 assert_eq!(for_css_string("\u{FFFF}"), "_");
228 }
229
230 #[test]
231 fn css_string_separator_before_whitespace() {
232 // \27 followed by space → needs separator → \27 + space + space
233 // first space is the separator, second is the content space
234 assert_eq!(for_css_string("' "), r"\27 ");
235 }
236
237 #[test]
238 fn css_string_preserves_non_ascii() {
239 assert_eq!(for_css_string("café"), "café");
240 }
241
242 #[test]
243 fn css_string_writer_variant() {
244 let mut out = String::new();
245 // b is a hex digit, so trailing space after \27
246 write_css_string(&mut out, "a'b").unwrap();
247 assert_eq!(out, r"a\27 b");
248 }
249
250 // -- for_css_url --
251
252 #[test]
253 fn css_url_does_not_encode_parens() {
254 assert_eq!(for_css_url("a(b)c"), "a(b)c");
255 }
256
257 #[test]
258 fn css_url_encodes_quotes() {
259 // b is a hex digit, so trailing space after \27
260 assert_eq!(for_css_url("a'b"), r"a\27 b");
261 }
262
263 #[test]
264 fn css_url_encodes_backslash() {
265 assert_eq!(for_css_url(r"a\b"), r"a\5c b");
266 }
267}