Skip to main content

contextual_encoder/
lib.rs

1//! contextual output encoding for XSS defense.
2//!
3//! this crate provides context-aware encoding functions inspired by the
4//! [OWASP Java Encoder](https://owasp.org/owasp-java-encoder/). each function
5//! encodes input for safe embedding in a specific output context (HTML, JavaScript,
6//! CSS, or URI).
7//!
8//! **disclaimer:** contextual-encoder is an independent Rust crate. its API and security model
9//! are inspired by the OWASP Java Encoder, but this project is not affiliated with,
10//! endorsed by, or maintained by the OWASP Foundation.
11//!
12//! # quick start
13//!
14//! ```
15//! use contextual_encoder::{for_html, for_javascript, for_css_string, for_uri_component};
16//!
17//! let user_input = "<script>alert('xss')</script>";
18//!
19//! // safe for HTML text content and quoted attributes
20//! let html_safe = for_html(user_input);
21//!
22//! // safe for javascript string literals (universal)
23//! let js_safe = for_javascript(user_input);
24//!
25//! // safe for quoted CSS string values
26//! let css_safe = for_css_string(user_input);
27//!
28//! // safe as a URI query parameter value
29//! let uri_safe = for_uri_component(user_input);
30//! ```
31//!
32//! # available contexts
33//!
34//! ## HTML / XML
35//!
36//! | function | safe for |
37//! |----------|----------|
38//! | [`for_html`] | text content + quoted attributes |
39//! | [`for_html_content`] | text content only |
40//! | [`for_html_attribute`] | quoted attributes only |
41//! | [`for_html_unquoted_attribute`] | unquoted attribute values |
42//!
43//! ## JavaScript
44//!
45//! | function | safe for |
46//! |----------|----------|
47//! | [`for_javascript`] | all JS contexts (universal) |
48//! | [`for_javascript_attribute`] | HTML event attributes |
49//! | [`for_javascript_block`] | `<script>` blocks |
50//! | [`for_javascript_source`] | standalone .js files |
51//!
52//! ## CSS
53//!
54//! | function | safe for |
55//! |----------|----------|
56//! | [`for_css_string`] | quoted CSS string values |
57//! | [`for_css_url`] | CSS `url()` values |
58//!
59//! ## URI
60//!
61//! | function | safe for |
62//! |----------|----------|
63//! | [`for_uri_component`] | URI components (query params, path segments) |
64//!
65//! # security model
66//!
67//! this is a **contextual output encoder**, not a sanitizer. it prevents
68//! cross-site scripting by encoding output for specific contexts, but it
69//! does not validate or sanitize input.
70//!
71//! **important caveats:**
72//!
73//! - **encoding is not sanitization.** encoding `<script>` as `&lt;script&gt;`
74//!   makes it display safely in HTML, but does not remove it. if you need to
75//!   allow a subset of HTML, use a dedicated sanitizer.
76//! - **context matters.** using the wrong encoder for a context can leave
77//!   you vulnerable. `for_html_content` output is not safe in attributes.
78//! - **tag and attribute names cannot be encoded.** never pass untrusted data
79//!   as a tag name, attribute name, or event handler name. validate these
80//!   against a whitelist.
81//! - **full URLs must be validated separately.** `for_uri_component` encodes
82//!   a component, not a full URL. a `javascript:` URL will be encoded but
83//!   still execute. always validate the scheme.
84//! - **template literals.** the JavaScript encoders do not encode backticks.
85//!   never embed untrusted data directly in ES2015+ template literals.
86//! - **grave accent.** unpatched Internet Explorer treats `` ` `` as an
87//!   attribute delimiter. `for_html_unquoted_attribute` encodes it, but
88//!   numeric entities decode back to the original character, so this is
89//!   not a complete fix. avoid unquoted attributes.
90//! - **HTML comments.** no HTML comment encoder is provided because HTML
91//!   comments have vendor-specific extensions (e.g., conditional comments)
92//!   that make safe encoding impractical.
93//!
94//! # writer-based API
95//!
96//! every `for_*` function has a corresponding `write_*` function that writes
97//! to any `std::fmt::Write` implementor, avoiding allocation when writing to
98//! an existing buffer:
99//!
100//! ```
101//! use contextual_encoder::write_html;
102//!
103//! let mut buf = String::new();
104//! write_html(&mut buf, "safe & sound").unwrap();
105//! assert_eq!(buf, "safe &amp; sound");
106//! ```
107
108pub mod css;
109pub mod html;
110pub mod javascript;
111pub mod uri;
112
113mod engine;
114
115// convenience re-exports — users can `use contextual_encoder::for_html` directly
116pub use css::{for_css_string, for_css_url, write_css_string, write_css_url};
117pub use html::{
118    for_html, for_html_attribute, for_html_content, for_html_unquoted_attribute, write_html,
119    write_html_attribute, write_html_content, write_html_unquoted_attribute,
120};
121pub use javascript::{
122    for_javascript, for_javascript_attribute, for_javascript_block, for_javascript_source,
123    write_javascript, write_javascript_attribute, write_javascript_block, write_javascript_source,
124};
125pub use uri::{for_uri_component, write_uri_component};