Skip to main content

for_java

Function for_java 

Source
pub fn for_java(input: &str) -> String
Expand description

encodes input for safe embedding in a java string literal.

produces output suitable for embedding between double quotes in java source code. also safe for char literals (single quotes are escaped).

§encoding rules

inputoutput
C0 named (\b, \t, \n, \f, \r)named escape
", ', \\", \', \\
other C0 controls, DELoctal escape
U+2028, U+2029\u2028, \u2029
supplementary plane (U+10000+)surrogate pair \uHHHH\uHHHH
unicode non-charactersspace

octal escapes use the shortest form (\0 for NUL) unless the next character is an octal digit, in which case the 3-digit form is used (\000) to prevent ambiguity.

§examples

use contextual_encoder::for_java;

assert_eq!(for_java(r#"he said "hello""#), r#"he said \"hello\""#);
assert_eq!(for_java("line\nbreak"), r"line\nbreak");
assert_eq!(for_java("null\x00byte"), r"null\0byte");
assert_eq!(for_java("\x007"), r"\0007");
Examples found in repository?
examples/contexts.rs (line 119)
17fn main() {
18    let input = r#"<script>alert("xss")</script>"#;
19
20    println!("input: {input}");
21    println!();
22
23    // -----------------------------------------------------------------------
24    // comparison: same input across all encoders
25    // -----------------------------------------------------------------------
26
27    // html text content AND quoted attributes (safe default when unsure)
28    println!("--- html ---");
29    println!("  for_html:                     {}", for_html(input));
30
31    // html text nodes only — does NOT encode quotes, so never use in attributes
32    println!(
33        "  for_html_content:             {}",
34        for_html_content(input)
35    );
36
37    // quoted attribute values only — does NOT encode >, slightly more minimal
38    println!(
39        "  for_html_attribute:           {}",
40        for_html_attribute(input)
41    );
42
43    // unquoted attribute values — most aggressive, encodes whitespace/grave/etc.
44    println!(
45        "  for_html_unquoted_attribute:  {}",
46        for_html_unquoted_attribute(input)
47    );
48    println!();
49
50    // universal js encoder — safe in event attrs, <script> blocks, and .js files
51    println!("--- javascript ---");
52    println!("  for_javascript:               {}", for_javascript(input));
53
54    // html event attributes (onclick="...") — does not escape /
55    println!(
56        "  for_javascript_attribute:     {}",
57        for_javascript_attribute(input)
58    );
59
60    // <script> blocks — uses \" and \' (not safe in html attributes)
61    println!(
62        "  for_javascript_block:         {}",
63        for_javascript_block(input)
64    );
65
66    // standalone .js / json files — minimal, NOT safe in any html context
67    println!(
68        "  for_javascript_source:        {}",
69        for_javascript_source(input)
70    );
71    println!();
72
73    // quoted css string values, e.g., content: "..." or font-family: "..."
74    println!("--- css ---");
75    println!("  for_css_string:               {}", for_css_string(input));
76
77    // css url() values — like for_css_string but parens pass through
78    println!("  for_css_url:                  {}", for_css_url(input));
79    println!();
80
81    // uri component (query params, path segments) — NOT for full urls
82    println!("--- uri ---");
83    println!(
84        "  for_uri_component:            {}",
85        for_uri_component(input)
86    );
87    println!();
88
89    // xml 1.0 aliases — identical to the html encoders
90    println!("--- xml 1.0 ---");
91    println!("  for_xml:                      {}", for_xml(input));
92    println!("  for_xml_content:              {}", for_xml_content(input));
93    println!(
94        "  for_xml_attribute:            {}",
95        for_xml_attribute(input)
96    );
97
98    // xml-only contexts
99    println!("  for_xml_comment:              {}", for_xml_comment(input));
100    println!("  for_cdata:                    {}", for_cdata(input));
101    println!();
102
103    // xml 1.1 — restricted chars get &#xHH; instead of space
104    println!("--- xml 1.1 ---");
105    let xml11_input = "a\x01b<c>";
106    println!("  for_xml11:                    {}", for_xml11(xml11_input));
107    println!(
108        "  for_xml11_content:            {}",
109        for_xml11_content(xml11_input)
110    );
111    println!(
112        "  for_xml11_attribute:          {}",
113        for_xml11_attribute(xml11_input)
114    );
115    println!();
116
117    // java string literal — octal escapes, surrogate pairs
118    println!("--- java ---");
119    println!("  for_java:                     {}", for_java(input));
120    println!();
121
122    // rust literals — \xHH escapes, UTF-8 byte encoding for byte strings
123    println!("--- rust ---");
124    println!("  for_rust_string:              {}", for_rust_string(input));
125    println!("  for_rust_char:                {}", for_rust_char(input));
126    println!(
127        "  for_rust_byte_string:         {}",
128        for_rust_byte_string(input)
129    );
130
131    // -----------------------------------------------------------------------
132    // practical: one realistic input per sink, correct encoder for each
133    // -----------------------------------------------------------------------
134
135    let user_name = r#"Bob <img src=x onerror="alert(1)">"#;
136    let user_query = "hello world & goodbye";
137    let user_text = r#"hi from </script><script>alert(1)</script>"#;
138    let user_css_text = r#"hello "css" \ test"#;
139
140    println!("--- practical usage ---");
141
142    // html text node — for_html_content is the right encoder
143    println!(r#"  <p>{}</p>"#, for_html_content(user_name));
144
145    // nested context: uri component inside an html attribute.
146    // encode from inside out: first percent-encode the query value,
147    // then html-attribute-encode the entire href.
148    let href = format!("/search?q={}", for_uri_component(user_query));
149    println!(r#"  <a href="{}">search</a>"#, for_html_attribute(&href),);
150
151    // actual css string context: a quoted content value in a stylesheet
152    println!(
153        r#"  <style>.msg::after {{ content: "{}"; }}</style>"#,
154        for_css_string(user_css_text),
155    );
156
157    // javascript string inside an event-handler attribute
158    println!(
159        r#"  <button onclick="greet('{}');">hi</button>"#,
160        for_javascript_attribute(user_text),
161    );
162}