use std::fmt;
use crate::engine::{encode_loop, is_unicode_noncharacter};
pub fn for_java(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_java(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_java<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(out, input, needs_java_encoding, write_java_encoded)
}
fn needs_java_encoding(c: char) -> bool {
match c {
'\x00'..='\x1F' | '\x7F' | '"' | '\'' | '\\' | '\u{2028}' | '\u{2029}' => true,
c if (c as u32) >= 0x10000 => true,
c if is_unicode_noncharacter(c as u32) => true,
_ => false,
}
}
fn write_java_encoded<W: fmt::Write>(out: &mut W, c: char, next: Option<char>) -> fmt::Result {
match c {
'\x08' => out.write_str("\\b"),
'\t' => out.write_str("\\t"),
'\n' => out.write_str("\\n"),
'\x0C' => out.write_str("\\f"),
'\r' => out.write_str("\\r"),
'"' => out.write_str("\\\""),
'\'' => out.write_str("\\'"),
'\\' => out.write_str("\\\\"),
'\u{2028}' => out.write_str("\\u2028"),
'\u{2029}' => out.write_str("\\u2029"),
c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
c if (c as u32) >= 0x10000 => {
let cp = c as u32 - 0x10000;
let high = 0xD800 + (cp >> 10);
let low = 0xDC00 + (cp & 0x3FF);
write!(out, "\\u{high:04x}\\u{low:04x}")
}
c => {
let val = c as u32;
let next_is_octal = next.is_some_and(|n| ('0'..='7').contains(&n));
if next_is_octal {
write!(out, "\\{val:03o}")
} else {
write!(out, "\\{val:o}")
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn passthrough() {
assert_eq!(for_java("hello world"), "hello world");
assert_eq!(for_java(""), "");
assert_eq!(for_java("café"), "café");
}
#[test]
fn named_escapes() {
assert_eq!(for_java("\x08"), "\\b");
assert_eq!(for_java("\t"), "\\t");
assert_eq!(for_java("\n"), "\\n");
assert_eq!(for_java("\x0C"), "\\f");
assert_eq!(for_java("\r"), "\\r");
}
#[test]
fn quotes_and_backslash() {
assert_eq!(for_java(r#"a"b"#), r#"a\"b"#);
assert_eq!(for_java("a'b"), r"a\'b");
assert_eq!(for_java(r"a\b"), r"a\\b");
}
#[test]
fn octal_shortest_form() {
assert_eq!(for_java("\x00a"), "\\0a");
assert_eq!(for_java("\x01a"), "\\1a");
assert_eq!(for_java("\x07a"), "\\7a");
assert_eq!(for_java("\x0Ba"), "\\13a");
assert_eq!(for_java("\x7Fa"), "\\177a");
}
#[test]
fn octal_three_digit_before_octal_char() {
assert_eq!(for_java("\x000"), "\\0000");
assert_eq!(for_java("\x007"), "\\0007");
assert_eq!(for_java("\x015"), "\\0015");
}
#[test]
fn octal_at_end_of_input() {
assert_eq!(for_java("\x00"), "\\0");
assert_eq!(for_java("\x07"), "\\7");
assert_eq!(for_java("\x7F"), "\\177");
}
#[test]
fn line_separators() {
assert_eq!(for_java("\u{2028}"), "\\u2028");
assert_eq!(for_java("\u{2029}"), "\\u2029");
}
#[test]
fn supplementary_plane_surrogate_pairs() {
assert_eq!(for_java("\u{1F600}"), "\\ud83d\\ude00");
assert_eq!(for_java("\u{10000}"), "\\ud800\\udc00");
assert_eq!(for_java("\u{10FFFD}"), "\\udbff\\udffd");
}
#[test]
fn noncharacters_replaced_with_space() {
assert_eq!(for_java("\u{FDD0}"), " ");
assert_eq!(for_java("\u{FFFE}"), " ");
}
#[test]
fn mixed_input() {
assert_eq!(
for_java("he said \"hello\"\nnew line"),
"he said \\\"hello\\\"\\nnew line"
);
}
#[test]
fn writer_matches_string() {
let input = "test\x00\"\\\u{1F600}";
let string_result = for_java(input);
let mut writer_result = String::new();
write_java(&mut writer_result, input).unwrap();
assert_eq!(string_result, writer_result);
}
}