use std::fmt;
use crate::engine::{
encode_loop, is_unicode_noncharacter, write_c0_named_escape, write_utf8_hex_bytes,
};
pub fn for_python_string(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_python_string(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_python_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(out, input, needs_python_string_encoding, |out, c, _next| {
write_python_text_encoded(out, c)
})
}
fn needs_python_string_encoding(c: char) -> bool {
matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\'' | '\\') || is_unicode_noncharacter(c as u32)
}
pub fn for_python_bytes(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_python_bytes(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_python_bytes<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
encode_loop(
out,
input,
needs_python_bytes_encoding,
write_python_bytes_encoded,
)
}
fn needs_python_bytes_encoding(c: char) -> bool {
matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\'' | '\\') || !c.is_ascii()
}
fn write_python_bytes_encoded<W: fmt::Write>(
out: &mut W,
c: char,
_next: Option<char>,
) -> fmt::Result {
if let Some(r) = write_c0_named_escape(out, c) {
return r;
}
match c {
'"' => out.write_str("\\\""),
'\'' => out.write_str("\\'"),
c if !c.is_ascii() => write_utf8_hex_bytes(out, c),
c => write!(out, "\\x{:02x}", c as u32),
}
}
fn write_python_text_encoded<W: fmt::Write>(out: &mut W, c: char) -> fmt::Result {
if let Some(r) = write_c0_named_escape(out, c) {
return r;
}
match c {
'"' => out.write_str("\\\""),
'\'' => out.write_str("\\'"),
c if is_unicode_noncharacter(c as u32) => out.write_char(' '),
c => write!(out, "\\x{:02x}", c as u32),
}
}
pub fn for_python_raw_string(input: &str) -> String {
let mut out = String::with_capacity(input.len());
write_python_raw_string(&mut out, input).expect("writing to string cannot fail");
out
}
pub fn write_python_raw_string<W: fmt::Write>(out: &mut W, input: &str) -> fmt::Result {
let trailing_bs = input.bytes().rev().take_while(|&b| b == b'\\').count();
let cutoff = if trailing_bs % 2 == 1 {
input.len() - 1
} else {
input.len()
};
for (i, c) in input.char_indices() {
if i >= cutoff {
out.write_char(' ')?;
} else if needs_python_raw_string_encoding(c) {
out.write_char(' ')?;
} else {
out.write_char(c)?;
}
}
Ok(())
}
fn needs_python_raw_string_encoding(c: char) -> bool {
matches!(c, '\x00'..='\x1F' | '\x7F' | '"' | '\'') || is_unicode_noncharacter(c as u32)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn string_passthrough() {
assert_eq!(for_python_string("hello world"), "hello world");
assert_eq!(for_python_string(""), "");
assert_eq!(
for_python_string("cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"),
"cafe\u{0301} \u{65E5}\u{672C}\u{8A9E}"
);
assert_eq!(for_python_string("\u{1F600}"), "\u{1F600}");
}
#[test]
fn string_escapes_double_quote() {
assert_eq!(for_python_string(r#"a"b"#), r#"a\"b"#);
}
#[test]
fn string_escapes_single_quote() {
assert_eq!(for_python_string("a'b"), r"a\'b");
}
#[test]
fn string_escapes_backslash() {
assert_eq!(for_python_string(r"a\b"), r"a\\b");
}
#[test]
fn string_named_escapes() {
assert_eq!(for_python_string("\x07"), "\\a");
assert_eq!(for_python_string("\x08"), "\\b");
assert_eq!(for_python_string("\t"), "\\t");
assert_eq!(for_python_string("\n"), "\\n");
assert_eq!(for_python_string("\x0B"), "\\v");
assert_eq!(for_python_string("\x0C"), "\\f");
assert_eq!(for_python_string("\r"), "\\r");
}
#[test]
fn string_hex_escapes_for_controls() {
assert_eq!(for_python_string("\x00"), "\\x00");
assert_eq!(for_python_string("\x01"), "\\x01");
assert_eq!(for_python_string("\x06"), "\\x06");
assert_eq!(for_python_string("\x0E"), "\\x0e");
assert_eq!(for_python_string("\x1F"), "\\x1f");
assert_eq!(for_python_string("\x7F"), "\\x7f");
}
#[test]
fn string_nonchars_replaced() {
assert_eq!(for_python_string("\u{FDD0}"), " ");
assert_eq!(for_python_string("\u{FFFE}"), " ");
}
#[test]
fn string_writer_matches() {
let input = "test\x00\"'\\\n cafe\u{0301}";
let mut w = String::new();
write_python_string(&mut w, input).unwrap();
assert_eq!(for_python_string(input), w);
}
#[test]
fn bytes_passthrough() {
assert_eq!(for_python_bytes("hello world"), "hello world");
assert_eq!(for_python_bytes(""), "");
}
#[test]
fn bytes_escapes_double_quote() {
assert_eq!(for_python_bytes(r#"a"b"#), r#"a\"b"#);
}
#[test]
fn bytes_escapes_single_quote() {
assert_eq!(for_python_bytes("a'b"), r"a\'b");
}
#[test]
fn bytes_escapes_backslash() {
assert_eq!(for_python_bytes(r"a\b"), r"a\\b");
}
#[test]
fn bytes_named_escapes() {
assert_eq!(for_python_bytes("\x07"), "\\a");
assert_eq!(for_python_bytes("\x08"), "\\b");
assert_eq!(for_python_bytes("\t"), "\\t");
assert_eq!(for_python_bytes("\n"), "\\n");
assert_eq!(for_python_bytes("\x0B"), "\\v");
assert_eq!(for_python_bytes("\x0C"), "\\f");
assert_eq!(for_python_bytes("\r"), "\\r");
}
#[test]
fn bytes_hex_for_controls() {
assert_eq!(for_python_bytes("\x00"), "\\x00");
assert_eq!(for_python_bytes("\x01"), "\\x01");
assert_eq!(for_python_bytes("\x7F"), "\\x7f");
}
#[test]
fn bytes_non_ascii_as_utf8_bytes() {
assert_eq!(for_python_bytes("\u{0301}"), r"\xcc\x81");
assert_eq!(for_python_bytes("cafe\u{0301}"), r"cafe\xcc\x81");
assert_eq!(for_python_bytes("\u{65E5}"), r"\xe6\x97\xa5");
assert_eq!(for_python_bytes("\u{1F600}"), r"\xf0\x9f\x98\x80");
}
#[test]
fn bytes_nonchars_as_bytes() {
assert_eq!(for_python_bytes("\u{FDD0}"), r"\xef\xb7\x90");
}
#[test]
fn bytes_writer_matches() {
let input = "test\x00\"'\\cafe\u{0301}";
let mut w = String::new();
write_python_bytes(&mut w, input).unwrap();
assert_eq!(for_python_bytes(input), w);
}
#[test]
fn raw_passthrough() {
assert_eq!(for_python_raw_string("hello world"), "hello world");
assert_eq!(for_python_raw_string(""), "");
}
#[test]
fn raw_quotes_replaced() {
assert_eq!(for_python_raw_string(r#"a"b"#), "a b");
assert_eq!(for_python_raw_string("a'b"), "a b");
assert_eq!(for_python_raw_string(r#"a"b'c"#), "a b c");
}
#[test]
fn raw_controls_replaced() {
assert_eq!(for_python_raw_string("\x00"), " ");
assert_eq!(for_python_raw_string("\x01"), " ");
assert_eq!(for_python_raw_string("\t"), " ");
assert_eq!(for_python_raw_string("\n"), " ");
assert_eq!(for_python_raw_string("\x7F"), " ");
}
#[test]
fn raw_backslash_in_middle() {
assert_eq!(for_python_raw_string(r"a\b"), r"a\b");
assert_eq!(for_python_raw_string(r"path\to\file"), r"path\to\file");
}
#[test]
fn raw_trailing_even_backslashes() {
assert_eq!(for_python_raw_string(r"ab\\"), r"ab\\");
assert_eq!(for_python_raw_string(r"ab\\\\"), r"ab\\\\");
}
#[test]
fn raw_trailing_odd_backslash_replaced() {
assert_eq!(for_python_raw_string(r"trailing\"), "trailing ");
assert_eq!(for_python_raw_string(r"ab\\\"), "ab\\\\ ");
assert_eq!(for_python_raw_string(r"\"), " ");
}
#[test]
fn raw_nonchars_replaced() {
assert_eq!(for_python_raw_string("\u{FDD0}"), " ");
assert_eq!(for_python_raw_string("\u{FFFE}"), " ");
}
#[test]
fn raw_non_ascii_passes_through() {
assert_eq!(for_python_raw_string("café"), "café");
assert_eq!(for_python_raw_string("日本語"), "日本語");
assert_eq!(for_python_raw_string("😀"), "😀");
}
#[test]
fn raw_writer_matches() {
let input = "test\x00\"'\\path\\to";
let mut w = String::new();
write_python_raw_string(&mut w, input).unwrap();
assert_eq!(for_python_raw_string(input), w);
}
}