contextual_encoder/lib.rs
1//! contextual output encoding for XSS defense.
2//!
3//! this crate provides context-aware encoding functions inspired by the
4//! [OWASP Java Encoder](https://owasp.org/owasp-java-encoder/). each function
5//! encodes input for safe embedding in a specific output context (HTML, XML,
6//! JavaScript, CSS, URI, Java, or Rust).
7//!
8//! **disclaimer:** contextual-encoder is an independent Rust crate. its API and security model
9//! are inspired by the OWASP Java Encoder, but this project is not affiliated with,
10//! endorsed by, or maintained by the OWASP Foundation.
11//!
12//! # quick start
13//!
14//! ```
15//! use contextual_encoder::{for_html, for_javascript, for_css_string, for_uri_component};
16//!
17//! let user_input = "<script>alert('xss')</script>";
18//!
19//! // safe for HTML text content and quoted attributes
20//! let html_safe = for_html(user_input);
21//!
22//! // safe for javascript string literals (universal)
23//! let js_safe = for_javascript(user_input);
24//!
25//! // safe for quoted CSS string values
26//! let css_safe = for_css_string(user_input);
27//!
28//! // safe as a URI query parameter value
29//! let uri_safe = for_uri_component(user_input);
30//! ```
31//!
32//! # available contexts
33//!
34//! ## HTML / XML
35//!
36//! | function | safe for |
37//! |----------|----------|
38//! | [`for_html`] | text content + quoted attributes |
39//! | [`for_html_content`] | text content only |
40//! | [`for_html_attribute`] | quoted attributes only |
41//! | [`for_html_unquoted_attribute`] | unquoted attribute values |
42//!
43//! ## XML
44//!
45//! | function | safe for |
46//! |----------|----------|
47//! | [`for_xml`] | XML text content + quoted attributes (alias for `for_html`) |
48//! | [`for_xml_content`] | XML text content only (alias for `for_html_content`) |
49//! | [`for_xml_attribute`] | quoted XML attributes only (alias for `for_html_attribute`) |
50//! | [`for_xml_comment`] | XML comment content |
51//! | [`for_cdata`] | CDATA section content |
52//!
53//! ## XML 1.1
54//!
55//! | function | safe for |
56//! |----------|----------|
57//! | [`for_xml11`] | XML 1.1 content + quoted attributes |
58//! | [`for_xml11_content`] | XML 1.1 content only |
59//! | [`for_xml11_attribute`] | XML 1.1 quoted attributes only |
60//!
61//! ## JavaScript
62//!
63//! | function | safe for |
64//! |----------|----------|
65//! | [`for_javascript`] | all JS contexts (universal) |
66//! | [`for_javascript_attribute`] | HTML event attributes |
67//! | [`for_javascript_block`] | `<script>` blocks |
68//! | [`for_javascript_source`] | standalone .js files |
69//!
70//! ## CSS
71//!
72//! | function | safe for |
73//! |----------|----------|
74//! | [`for_css_string`] | quoted CSS string values |
75//! | [`for_css_url`] | CSS `url()` values |
76//!
77//! ## URI
78//!
79//! | function | safe for |
80//! |----------|----------|
81//! | [`for_uri_component`] | URI components (query params, path segments) |
82//!
83//! ## Java
84//!
85//! | function | safe for |
86//! |----------|----------|
87//! | [`for_java`] | Java string / char literals |
88//!
89//! ## Rust
90//!
91//! | function | safe for |
92//! |----------|----------|
93//! | [`for_rust_string`] | Rust string literals (`"..."`) |
94//! | [`for_rust_char`] | Rust char literals (`'...'`) |
95//! | [`for_rust_byte_string`] | Rust byte string literals (`b"..."`) |
96//!
97//! # security model
98//!
99//! this is a **contextual output encoder**, not a sanitizer. it prevents
100//! cross-site scripting by encoding output for specific contexts, but it
101//! does not validate or sanitize input.
102//!
103//! **important caveats:**
104//!
105//! - **encoding is not sanitization.** encoding `<script>` as `<script>`
106//! makes it display safely in HTML, but does not remove it. if you need to
107//! allow a subset of HTML, use a dedicated sanitizer.
108//! - **context matters.** using the wrong encoder for a context can leave
109//! you vulnerable. `for_html_content` output is not safe in attributes.
110//! - **tag and attribute names cannot be encoded.** never pass untrusted data
111//! as a tag name, attribute name, or event handler name. validate these
112//! against a whitelist.
113//! - **full URLs must be validated separately.** `for_uri_component` encodes
114//! a component, not a full URL. a `javascript:` URL will be encoded but
115//! still execute. always validate the scheme.
116//! - **template literals.** the JavaScript encoders do not encode backticks.
117//! never embed untrusted data directly in ES2015+ template literals.
118//! - **grave accent.** unpatched Internet Explorer treats `` ` `` as an
119//! attribute delimiter. `for_html_unquoted_attribute` encodes it, but
120//! numeric entities decode back to the original character, so this is
121//! not a complete fix. avoid unquoted attributes.
122//! - **HTML comments.** no HTML comment encoder is provided because HTML
123//! comments have vendor-specific extensions (e.g., conditional comments)
124//! that make safe encoding impractical. [`for_xml_comment`] is for XML
125//! comments only.
126//!
127//! # writer-based API
128//!
129//! every `for_*` function has a corresponding `write_*` function that writes
130//! to any `std::fmt::Write` implementor, avoiding allocation when writing to
131//! an existing buffer:
132//!
133//! ```
134//! use contextual_encoder::write_html;
135//!
136//! let mut buf = String::new();
137//! write_html(&mut buf, "safe & sound").unwrap();
138//! assert_eq!(buf, "safe & sound");
139//! ```
140
141pub mod css;
142pub mod html;
143pub mod java;
144pub mod javascript;
145pub mod rust;
146pub mod uri;
147pub mod xml;
148
149mod engine;
150
151// convenience re-exports — users can `use contextual_encoder::for_html` directly
152pub use css::{for_css_string, for_css_url, write_css_string, write_css_url};
153pub use html::{
154 for_html, for_html_attribute, for_html_content, for_html_unquoted_attribute, write_html,
155 write_html_attribute, write_html_content, write_html_unquoted_attribute,
156};
157pub use java::{for_java, write_java};
158pub use javascript::{
159 for_javascript, for_javascript_attribute, for_javascript_block, for_javascript_source,
160 write_javascript, write_javascript_attribute, write_javascript_block, write_javascript_source,
161};
162pub use rust::{
163 for_rust_byte_string, for_rust_char, for_rust_string, write_rust_byte_string, write_rust_char,
164 write_rust_string,
165};
166pub use uri::{for_uri_component, write_uri_component};
167pub use xml::{
168 for_cdata, for_xml, for_xml11, for_xml11_attribute, for_xml11_content, for_xml_attribute,
169 for_xml_comment, for_xml_content, write_cdata, write_xml, write_xml11, write_xml11_attribute,
170 write_xml11_content, write_xml_attribute, write_xml_comment, write_xml_content,
171};