Skip to main content

contextual_encoder/
lib.rs

1#![forbid(unsafe_code)]
2
3//! contextual output encoding for XSS defense and safe literal embedding.
4//!
5//! this crate provides context-aware encoding functions inspired by the
6//! [OWASP Java Encoder](https://owasp.org/owasp-java-encoder/). each function
7//! encodes input for safe embedding in a specific output context — web contexts
8//! (HTML, XML, JavaScript, CSS, URI) and source literal contexts (Java, Rust,
9//! Ruby).
10//!
11//! **disclaimer:** contextual-encoder is an independent Rust crate. its API and security model
12//! are inspired by the OWASP Java Encoder, but this project is not affiliated with,
13//! endorsed by, or maintained by the OWASP Foundation.
14//!
15//! # quick start
16//!
17//! ```
18//! use contextual_encoder::{for_html, for_javascript, for_css_string, for_uri_component};
19//!
20//! let user_input = "<script>alert('xss')</script>";
21//!
22//! // safe for HTML text content and quoted attributes
23//! let html_safe = for_html(user_input);
24//! assert!(html_safe.contains("&lt;script&gt;"));
25//!
26//! // safe for javascript string literals (universal)
27//! let js_safe = for_javascript(user_input);
28//! assert!(js_safe.contains(r"<\/script>"));
29//!
30//! // safe for quoted CSS string values
31//! let css_safe = for_css_string(user_input);
32//! assert!(css_safe.contains(r"\3c"));
33//!
34//! // safe as a URI query parameter value
35//! let uri_safe = for_uri_component(user_input);
36//! assert!(uri_safe.contains("%3C"));
37//! ```
38//!
39//! # available contexts
40//!
41//! ## HTML
42//!
43//! | function | safe for |
44//! |----------|----------|
45//! | [`for_html`] | text content + quoted attributes |
46//! | [`for_html_content`] | text content only |
47//! | [`for_html_attribute`] | quoted attributes only |
48//! | [`for_html_unquoted_attribute`] | unquoted attribute values |
49//!
50//! ## XML
51//!
52//! | function | safe for |
53//! |----------|----------|
54//! | [`for_xml`] | XML text content + quoted attributes (alias for `for_html`) |
55//! | [`for_xml_content`] | XML text content only (alias for `for_html_content`) |
56//! | [`for_xml_attribute`] | quoted XML attributes only (alias for `for_html_attribute`) |
57//! | [`for_xml_comment`] | XML comment content |
58//! | [`for_cdata`] | CDATA section content |
59//!
60//! ## XML 1.1
61//!
62//! | function | safe for |
63//! |----------|----------|
64//! | [`for_xml11`] | XML 1.1 content + quoted attributes |
65//! | [`for_xml11_content`] | XML 1.1 content only |
66//! | [`for_xml11_attribute`] | XML 1.1 quoted attributes only |
67//!
68//! ## JavaScript
69//!
70//! | function | safe for |
71//! |----------|----------|
72//! | [`for_javascript`] | general JS string contexts |
73//! | [`for_javascript_attribute`] | HTML event attributes |
74//! | [`for_javascript_block`] | `<script>` blocks |
75//! | [`for_javascript_source`] | standalone .js files |
76//! | [`for_js_template`] | ES6 template literal content (`` `...` ``) |
77//!
78//! ## CSS
79//!
80//! | function | safe for |
81//! |----------|----------|
82//! | [`for_css_string`] | quoted CSS string values |
83//! | [`for_css_url`] | CSS `url()` values |
84//!
85//! ## URI
86//!
87//! | function | safe for |
88//! |----------|----------|
89//! | [`for_uri_component`] | URI components (query params, path segments) |
90//!
91//! ## additional literal contexts
92//!
93//! these encoders are not part of the OWASP Java Encoder's scope. they encode
94//! untrusted strings for safe embedding in source code literals.
95//!
96//! | function | safe for |
97//! |----------|----------|
98//! | [`for_json`] | JSON string values |
99//! | [`for_java`] | Java string / char literals |
100//! | [`for_go_string`] | Go interpreted string literals (`"..."`) |
101//! | [`for_go_char`] | Go rune literals (`'...'`) |
102//! | [`for_go_byte_string`] | Go byte-explicit string literals (`[]byte("...")`) |
103//! | [`for_rust_string`] | Rust string literals (`"..."`) |
104//! | [`for_rust_char`] | Rust char literals (`'...'`) |
105//! | [`for_rust_byte_string`] | Rust byte string literals (`b"..."`) |
106//! | [`for_ruby_string`] | Ruby double-quoted string literals (`"..."`) |
107//! | [`for_python_string`] | Python string literals (`"..."` or `'...'`) |
108//! | [`for_python_bytes`] | Python bytes literals (`b"..."` or `b'...'`) |
109//! | [`for_python_raw_string`] | Python raw string literals (`r"..."` or `r'...'`) |
110//! | [`for_sql`] | Standard SQL string literals (`'...'`) |
111//! | [`for_sql_backslash`] | MySQL/MariaDB string literals with backslash escaping (`'...'`) |
112//!
113//! # security model
114//!
115//! this is a **contextual output encoder**, not a sanitizer. it prevents
116//! cross-site scripting by encoding output for specific contexts, but it
117//! does not validate or sanitize input.
118//!
119//! **important caveats:**
120//!
121//! - **encoding is not sanitization.** encoding `<script>` as `&lt;script&gt;`
122//!   makes it display safely in HTML, but does not remove it. if you need to
123//!   allow a subset of HTML, use a dedicated sanitizer.
124//! - **context matters.** using the wrong encoder for a context can leave
125//!   you vulnerable. `for_html_content` output is not safe in attributes.
126//! - **tag and attribute names cannot be encoded.** never pass untrusted data
127//!   as a tag name, attribute name, or event handler name. validate these
128//!   against a whitelist.
129//! - **full URLs must be validated separately.** `for_uri_component` encodes
130//!   a component, not a full URL. to embed an untrusted URL, validate its
131//!   scheme and structure first, then encode for the final sink.
132//! - **template literals.** the string literal JavaScript encoders do not
133//!   encode backticks. use [`for_js_template`] to embed data directly in
134//!   ES2015+ template literals.
135//! - **grave accent.** unpatched Internet Explorer treats `` ` `` as an
136//!   attribute delimiter. `for_html_unquoted_attribute` encodes it, but
137//!   numeric entities decode back to the original character, so this is
138//!   not a complete fix. avoid unquoted attributes.
139//! - **HTML comments.** no HTML comment encoder is provided because HTML
140//!   comments have vendor-specific extensions (e.g., conditional comments)
141//!   that make safe encoding impractical. [`for_xml_comment`] is for XML
142//!   comments only.
143//!
144//! # writer-based API
145//!
146//! every `for_*` function has a corresponding `write_*` function that writes
147//! to any `std::fmt::Write` implementor, avoiding allocation when writing to
148//! an existing buffer:
149//!
150//! ```
151//! use contextual_encoder::write_html;
152//!
153//! let mut buf = String::new();
154//! write_html(&mut buf, "safe & sound").unwrap();
155//! assert_eq!(buf, "safe &amp; sound");
156//! ```
157//!
158//! # display wrappers
159//!
160//! every `for_*` function also has a corresponding `display_*` function that
161//! returns a zero-allocation [`Display`](std::fmt::Display) wrapper. use these
162//! when embedding encoded output inline in `format!` or `write!`:
163//!
164//! ```
165//! use contextual_encoder::display_html;
166//!
167//! let user_input = "<script>alert('xss')</script>";
168//! // one allocation (the final String), zero intermediate allocations
169//! let safe = format!("<p>{}</p>", display_html(user_input));
170//! assert!(safe.contains("&lt;script&gt;"));
171//! ```
172
173pub mod css;
174pub mod display;
175pub mod go;
176pub mod html;
177pub mod java;
178pub mod javascript;
179pub mod json;
180pub mod python;
181pub mod ruby;
182pub mod rust;
183pub mod sql;
184pub mod uri;
185pub mod xml;
186
187mod engine;
188
189// convenience re-exports — users can `use contextual_encoder::for_html` directly
190pub use css::{for_css_string, for_css_url, write_css_string, write_css_url};
191pub use display::{
192    display_cdata, display_css_string, display_css_url, display_go_byte_string, display_go_char,
193    display_go_string, display_html, display_html_attribute, display_html_content,
194    display_html_unquoted_attribute, display_java, display_javascript,
195    display_javascript_attribute, display_javascript_block, display_javascript_source,
196    display_js_template, display_json, display_python_bytes, display_python_raw_string,
197    display_python_string, display_ruby_string, display_rust_byte_string, display_rust_char,
198    display_rust_string, display_sql, display_sql_backslash, display_uri_component, display_xml,
199    display_xml11, display_xml11_attribute, display_xml11_content, display_xml_attribute,
200    display_xml_comment, display_xml_content,
201};
202pub use go::{
203    for_go_byte_string, for_go_char, for_go_string, write_go_byte_string, write_go_char,
204    write_go_string,
205};
206pub use html::{
207    for_html, for_html_attribute, for_html_content, for_html_unquoted_attribute, write_html,
208    write_html_attribute, write_html_content, write_html_unquoted_attribute,
209};
210pub use java::{for_java, write_java};
211pub use javascript::{
212    for_javascript, for_javascript_attribute, for_javascript_block, for_javascript_source,
213    for_js_template, write_javascript, write_javascript_attribute, write_javascript_block,
214    write_javascript_source, write_js_template,
215};
216pub use json::{for_json, write_json};
217pub use python::{
218    for_python_bytes, for_python_raw_string, for_python_string, write_python_bytes,
219    write_python_raw_string, write_python_string,
220};
221pub use ruby::{for_ruby_string, write_ruby_string};
222pub use rust::{
223    for_rust_byte_string, for_rust_char, for_rust_string, write_rust_byte_string, write_rust_char,
224    write_rust_string,
225};
226pub use sql::{for_sql, for_sql_backslash, write_sql, write_sql_backslash};
227pub use uri::{for_uri_component, write_uri_component};
228pub use xml::{
229    for_cdata, for_xml, for_xml11, for_xml11_attribute, for_xml11_content, for_xml_attribute,
230    for_xml_comment, for_xml_content, write_cdata, write_xml, write_xml11, write_xml11_attribute,
231    write_xml11_content, write_xml_attribute, write_xml_comment, write_xml_content,
232};
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237
238    #[test]
239    fn empty_string_returns_empty() {
240        assert_eq!(for_html(""), "");
241        assert_eq!(for_html_content(""), "");
242        assert_eq!(for_html_attribute(""), "");
243        assert_eq!(for_html_unquoted_attribute(""), "");
244        assert_eq!(for_javascript(""), "");
245        assert_eq!(for_javascript_attribute(""), "");
246        assert_eq!(for_javascript_block(""), "");
247        assert_eq!(for_javascript_source(""), "");
248        assert_eq!(for_css_string(""), "");
249        assert_eq!(for_css_url(""), "");
250        assert_eq!(for_uri_component(""), "");
251        assert_eq!(for_xml(""), "");
252        assert_eq!(for_xml_content(""), "");
253        assert_eq!(for_xml_attribute(""), "");
254        assert_eq!(for_xml_comment(""), "");
255        assert_eq!(for_cdata(""), "");
256        assert_eq!(for_xml11(""), "");
257        assert_eq!(for_xml11_content(""), "");
258        assert_eq!(for_xml11_attribute(""), "");
259        assert_eq!(for_java(""), "");
260        assert_eq!(for_json(""), "");
261        assert_eq!(for_go_string(""), "");
262        assert_eq!(for_go_char(""), "");
263        assert_eq!(for_go_byte_string(""), "");
264        assert_eq!(for_rust_string(""), "");
265        assert_eq!(for_rust_char(""), "");
266        assert_eq!(for_rust_byte_string(""), "");
267        assert_eq!(for_ruby_string(""), "");
268        assert_eq!(for_python_string(""), "");
269        assert_eq!(for_python_bytes(""), "");
270        assert_eq!(for_python_raw_string(""), "");
271        assert_eq!(for_js_template(""), "");
272        assert_eq!(for_sql(""), "");
273        assert_eq!(for_sql_backslash(""), "");
274    }
275
276    #[test]
277    fn empty_string_writer_variants() {
278        let mut buf = String::new();
279        write_html(&mut buf, "").unwrap();
280        assert_eq!(buf, "");
281
282        buf.clear();
283        write_javascript(&mut buf, "").unwrap();
284        assert_eq!(buf, "");
285
286        buf.clear();
287        write_css_string(&mut buf, "").unwrap();
288        assert_eq!(buf, "");
289
290        buf.clear();
291        write_uri_component(&mut buf, "").unwrap();
292        assert_eq!(buf, "");
293    }
294
295    // two-byte: é (U+00E9), ñ (U+00F1)
296    // three-byte: 世 (U+4E16), € (U+20AC)
297    // four-byte: 😀 (U+1F600), 𐍈 (U+10348)
298
299    #[test]
300    fn multibyte_utf8_html() {
301        assert_eq!(for_html("café"), "café");
302        assert_eq!(for_html("世界"), "世界");
303        assert_eq!(for_html("😀"), "😀");
304        assert_eq!(for_html("é<世>&😀"), "é&lt;世&gt;&amp;😀");
305    }
306
307    #[test]
308    fn multibyte_utf8_javascript() {
309        assert_eq!(for_javascript("café"), "café");
310        assert_eq!(for_javascript("世界"), "世界");
311        assert_eq!(for_javascript("😀"), "😀");
312    }
313
314    #[test]
315    fn multibyte_utf8_css_string() {
316        assert_eq!(for_css_string("café"), "café");
317        assert_eq!(for_css_string("世界"), "世界");
318        assert_eq!(for_css_string("😀"), "😀");
319    }
320
321    #[test]
322    fn multibyte_utf8_uri_component() {
323        assert_eq!(for_uri_component("é"), "%C3%A9");
324        assert_eq!(for_uri_component("世"), "%E4%B8%96");
325        assert_eq!(for_uri_component("😀"), "%F0%9F%98%80");
326        assert_eq!(for_uri_component("café"), "caf%C3%A9");
327    }
328
329    #[test]
330    fn multibyte_utf8_go_string_passthrough() {
331        assert_eq!(for_go_string("caf\u{00e9}"), "caf\u{00e9}");
332        assert_eq!(for_go_string("\u{4e16}\u{754c}"), "\u{4e16}\u{754c}");
333        assert_eq!(for_go_string("\u{1F600}"), "\u{1F600}");
334    }
335
336    #[test]
337    fn multibyte_utf8_go_byte_string() {
338        assert_eq!(for_go_byte_string("\u{00e9}"), r"\xc3\xa9");
339        assert_eq!(for_go_byte_string("\u{4e16}"), r"\xe4\xb8\x96");
340        assert_eq!(for_go_byte_string("\u{1F600}"), r"\xf0\x9f\x98\x80");
341    }
342
343    #[test]
344    fn multibyte_utf8_rust_byte_string() {
345        assert_eq!(for_rust_byte_string("é"), r"\xc3\xa9");
346        assert_eq!(for_rust_byte_string("世"), r"\xe4\xb8\x96");
347        assert_eq!(for_rust_byte_string("😀"), r"\xf0\x9f\x98\x80");
348    }
349
350    #[test]
351    fn multibyte_utf8_rust_string_passthrough() {
352        assert_eq!(for_rust_string("café"), "café");
353        assert_eq!(for_rust_string("世界"), "世界");
354        assert_eq!(for_rust_string("😀"), "😀");
355    }
356
357    #[test]
358    fn multibyte_utf8_ruby_string_passthrough() {
359        assert_eq!(for_ruby_string("café"), "café");
360        assert_eq!(for_ruby_string("世界"), "世界");
361        assert_eq!(for_ruby_string("😀"), "😀");
362    }
363
364    #[test]
365    fn multibyte_utf8_python_string_passthrough() {
366        assert_eq!(for_python_string("café"), "café");
367        assert_eq!(for_python_string("世界"), "世界");
368        assert_eq!(for_python_string("😀"), "😀");
369    }
370
371    #[test]
372    fn multibyte_utf8_python_bytes() {
373        assert_eq!(for_python_bytes("\u{00e9}"), r"\xc3\xa9");
374        assert_eq!(for_python_bytes("\u{4e16}"), r"\xe4\xb8\x96");
375        assert_eq!(for_python_bytes("\u{1F600}"), r"\xf0\x9f\x98\x80");
376    }
377
378    #[test]
379    fn multibyte_utf8_python_raw_string_passthrough() {
380        assert_eq!(for_python_raw_string("café"), "café");
381        assert_eq!(for_python_raw_string("世界"), "世界");
382        assert_eq!(for_python_raw_string("😀"), "😀");
383    }
384
385    #[test]
386    fn multibyte_utf8_json() {
387        assert_eq!(for_json("café"), "café");
388        assert_eq!(for_json("世界"), "世界");
389        assert_eq!(for_json("😀"), "😀");
390    }
391
392    #[test]
393    fn multibyte_utf8_java() {
394        assert_eq!(for_java("café"), "café");
395        assert_eq!(for_java("世界"), "世界");
396        assert_eq!(for_java("😀"), "\\ud83d\\ude00");
397    }
398
399    #[test]
400    fn multibyte_utf8_sql() {
401        assert_eq!(for_sql("café"), "café");
402        assert_eq!(for_sql("世界"), "世界");
403        assert_eq!(for_sql("😀"), "😀");
404    }
405
406    #[test]
407    fn multibyte_utf8_sql_backslash() {
408        assert_eq!(for_sql_backslash("café"), "café");
409        assert_eq!(for_sql_backslash("世界"), "世界");
410        assert_eq!(for_sql_backslash("😀"), "😀");
411    }
412
413    #[test]
414    fn multibyte_utf8_xml() {
415        assert_eq!(for_xml("café"), "café");
416        assert_eq!(for_xml("世界"), "世界");
417        assert_eq!(for_xml("😀"), "😀");
418    }
419}