contextual_encoder/
java.rs1use std::fmt;
18
19use crate::engine::{encode_loop, is_unicode_noncharacter};
20
21pub fn for_java(input: &str) -> String {
52 let mut out = String::with_capacity(input.len());
53 write_java(&mut out, input).expect("writing to string cannot fail");
54 out
55}
56
57pub fn write_java<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
61 encode_loop(out, input, needs_java_encoding, write_java_encoded)
62}
63
64fn needs_java_encoding(c: char) -> bool {
65 match c {
66 '\x00'..='\x1F' | '\x7F' | '"' | '\'' | '\\' | '\u{2028}' | '\u{2029}' => true,
67 c if (c as u32) >= 0x10000 => true,
68 c if is_unicode_noncharacter(c as u32) => true,
69 _ => false,
70 }
71}
72
73fn write_java_encoded<W: fmt::Write>(out: &mut W, c: char, next: Option<char>) -> fmt::Result {
74 match c {
75 '\x08' => out.write_str("\\b"),
76 '\t' => out.write_str("\\t"),
77 '\n' => out.write_str("\\n"),
78 '\x0C' => out.write_str("\\f"),
79 '\r' => out.write_str("\\r"),
80 '"' => out.write_str("\\\""),
81 '\'' => out.write_str("\\'"),
82 '\\' => out.write_str("\\\\"),
83 '\u{2028}' => out.write_str("\\u2028"),
84 '\u{2029}' => out.write_str("\\u2029"),
85 c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
86 c if (c as u32) >= 0x10000 => {
88 let cp = c as u32 - 0x10000;
89 let high = 0xD800 + (cp >> 10);
90 let low = 0xDC00 + (cp & 0x3FF);
91 write!(out, "\\u{high:04x}\\u{low:04x}")
92 }
93 c => {
95 let val = c as u32;
96 let next_is_octal = next.is_some_and(|n| ('0'..='7').contains(&n));
97 if next_is_octal {
98 write!(out, "\\{val:03o}")
99 } else {
100 write!(out, "\\{val:o}")
101 }
102 }
103 }
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109
110 #[test]
111 fn passthrough() {
112 assert_eq!(for_java("hello world"), "hello world");
113 assert_eq!(for_java(""), "");
114 assert_eq!(for_java("café"), "café");
115 }
116
117 #[test]
118 fn named_escapes() {
119 assert_eq!(for_java("\x08"), "\\b");
120 assert_eq!(for_java("\t"), "\\t");
121 assert_eq!(for_java("\n"), "\\n");
122 assert_eq!(for_java("\x0C"), "\\f");
123 assert_eq!(for_java("\r"), "\\r");
124 }
125
126 #[test]
127 fn quotes_and_backslash() {
128 assert_eq!(for_java(r#"a"b"#), r#"a\"b"#);
129 assert_eq!(for_java("a'b"), r"a\'b");
130 assert_eq!(for_java(r"a\b"), r"a\\b");
131 }
132
133 #[test]
134 fn octal_shortest_form() {
135 assert_eq!(for_java("\x00a"), "\\0a");
137 assert_eq!(for_java("\x01a"), "\\1a");
139 assert_eq!(for_java("\x07a"), "\\7a");
141 assert_eq!(for_java("\x0Ba"), "\\13a");
143 assert_eq!(for_java("\x7Fa"), "\\177a");
145 }
146
147 #[test]
148 fn octal_three_digit_before_octal_char() {
149 assert_eq!(for_java("\x000"), "\\0000");
151 assert_eq!(for_java("\x007"), "\\0007");
152 assert_eq!(for_java("\x015"), "\\0015");
153 }
154
155 #[test]
156 fn octal_at_end_of_input() {
157 assert_eq!(for_java("\x00"), "\\0");
159 assert_eq!(for_java("\x07"), "\\7");
160 assert_eq!(for_java("\x7F"), "\\177");
161 }
162
163 #[test]
164 fn line_separators() {
165 assert_eq!(for_java("\u{2028}"), "\\u2028");
166 assert_eq!(for_java("\u{2029}"), "\\u2029");
167 }
168
169 #[test]
170 fn supplementary_plane_surrogate_pairs() {
171 assert_eq!(for_java("\u{1F600}"), "\\ud83d\\ude00");
175
176 assert_eq!(for_java("\u{10000}"), "\\ud800\\udc00");
179
180 assert_eq!(for_java("\u{10FFFD}"), "\\udbff\\udffd");
185 }
186
187 #[test]
188 fn noncharacters_replaced_with_space() {
189 assert_eq!(for_java("\u{FDD0}"), " ");
190 assert_eq!(for_java("\u{FFFE}"), " ");
191 }
192
193 #[test]
194 fn mixed_input() {
195 assert_eq!(
196 for_java("he said \"hello\"\nnew line"),
197 "he said \\\"hello\\\"\\nnew line"
198 );
199 }
200
201 #[test]
202 fn writer_matches_string() {
203 let input = "test\x00\"\\\u{1F600}";
204 let string_result = for_java(input);
205 let mut writer_result = String::new();
206 write_java(&mut writer_result, input).unwrap();
207 assert_eq!(string_result, writer_result);
208 }
209}