use super::*;
#[test]
fn ascii_roundtrip_all_128() {
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
for byte in 0u8..128 {
let c = byte as u32;
assert_eq!(char_bytes(c), 1);
let n = char_string(c, &mut buf);
assert_eq!(n, 1);
assert_eq!(buf[0], byte);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c);
assert_eq!(len, 1);
}
}
#[test]
fn two_byte_unicode_roundtrip() {
let c: u32 = 0xE9; let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
assert_eq!(char_bytes(c), 2);
let n = char_string(c, &mut buf);
assert_eq!(n, 2);
assert_eq!(buf[0], 0xC3);
assert_eq!(buf[1], 0xA9);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c);
assert_eq!(len, 2);
}
#[test]
fn three_byte_unicode_roundtrip() {
let c: u32 = 0x2018;
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
assert_eq!(char_bytes(c), 3);
let n = char_string(c, &mut buf);
assert_eq!(n, 3);
assert_eq!(buf[0], 0xE2);
assert_eq!(buf[1], 0x80);
assert_eq!(buf[2], 0x98);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c);
assert_eq!(len, 3);
}
#[test]
fn four_byte_unicode_roundtrip() {
let c: u32 = 0x1F344;
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
assert_eq!(char_bytes(c), 4);
let n = char_string(c, &mut buf);
assert_eq!(n, 4);
assert_eq!(buf[0], 0xF0);
assert_eq!(buf[1], 0x9F);
assert_eq!(buf[2], 0x8D);
assert_eq!(buf[3], 0x84);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c);
assert_eq!(len, 4);
}
#[test]
fn raw_byte_0x80_roundtrip() {
let byte: u8 = 0x80;
let c = byte8_to_char(byte);
assert_eq!(c, 0x3FFF80);
assert!(char_byte8_p(c));
assert_eq!(char_to_byte8(c), byte);
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
assert_eq!(char_bytes(c), 2);
let n = char_string(c, &mut buf);
assert_eq!(n, 2);
assert_eq!(buf[0], 0xC0);
assert_eq!(buf[1], 0x80);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c);
assert_eq!(len, 2);
}
#[test]
fn raw_byte_0xff_roundtrip() {
let byte: u8 = 0xFF;
let c = byte8_to_char(byte);
assert_eq!(c, 0x3FFFFF);
assert!(char_byte8_p(c));
assert_eq!(char_to_byte8(c), byte);
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
assert_eq!(char_bytes(c), 2);
let n = char_string(c, &mut buf);
assert_eq!(n, 2);
assert_eq!(buf[0], 0xC1);
assert_eq!(buf[1], 0xBF);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c);
assert_eq!(len, 2);
}
#[test]
fn raw_byte_all_roundtrip() {
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
for byte in 0x80u8..=0xFF {
let c = byte8_to_char(byte);
assert!(char_byte8_p(c));
assert_eq!(char_to_byte8(c), byte);
let n = char_string(c, &mut buf);
assert_eq!(n, 2);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c, "roundtrip failed for byte 0x{:02X}", byte);
assert_eq!(len, 2);
}
}
#[test]
fn chars_in_multibyte_mixed() {
let mut data = Vec::new();
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
let n = char_string(b'A' as u32, &mut buf);
data.extend_from_slice(&buf[..n]);
let n = char_string(0xE9, &mut buf); data.extend_from_slice(&buf[..n]);
let n = char_string(byte8_to_char(0x80), &mut buf);
data.extend_from_slice(&buf[..n]);
assert_eq!(chars_in_multibyte(&data), 3);
assert_eq!(data.len(), 5);
}
#[test]
fn chars_in_multibyte_empty() {
assert_eq!(chars_in_multibyte(&[]), 0);
}
#[test]
fn chars_in_multibyte_ascii_only() {
assert_eq!(chars_in_multibyte(b"hello"), 5);
}
#[test]
fn char_byte_pos_conversion() {
let mut data = Vec::new();
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
let n = char_string(b'A' as u32, &mut buf);
data.extend_from_slice(&buf[..n]);
let n = char_string(0x2018, &mut buf);
data.extend_from_slice(&buf[..n]);
let n = char_string(byte8_to_char(0xFF), &mut buf);
data.extend_from_slice(&buf[..n]);
let n = char_string(b'B' as u32, &mut buf);
data.extend_from_slice(&buf[..n]);
assert_eq!(data.len(), 7);
assert_eq!(char_to_byte_pos(&data, 0), 0);
assert_eq!(char_to_byte_pos(&data, 1), 1);
assert_eq!(char_to_byte_pos(&data, 2), 4);
assert_eq!(char_to_byte_pos(&data, 3), 6);
assert_eq!(char_to_byte_pos(&data, 4), 7);
assert_eq!(byte_to_char_pos(&data, 0), 0);
assert_eq!(byte_to_char_pos(&data, 1), 1);
assert_eq!(byte_to_char_pos(&data, 4), 2);
assert_eq!(byte_to_char_pos(&data, 6), 3);
assert_eq!(byte_to_char_pos(&data, 7), 4);
}
#[test]
fn try_as_utf8_valid() {
let s = "hello world";
let bytes = utf8_to_emacs(s);
assert_eq!(try_as_utf8(&bytes), Some(s));
}
#[test]
fn try_as_utf8_with_unicode() {
let s = "\u{2018}cafe\u{0301}\u{2019}";
let bytes = utf8_to_emacs(s);
assert_eq!(try_as_utf8(&bytes), Some(s));
}
#[test]
fn try_as_utf8_with_raw_bytes() {
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
let n = char_string(byte8_to_char(0x80), &mut buf);
assert!(try_as_utf8(&buf[..n]).is_none());
}
#[test]
fn to_utf8_lossy_clean() {
let s = "hello";
let bytes = utf8_to_emacs(s);
assert_eq!(to_utf8_lossy(&bytes), "hello");
}
#[test]
fn to_utf8_lossy_with_raw_bytes() {
let mut data = Vec::new();
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
let n = char_string(b'A' as u32, &mut buf);
data.extend_from_slice(&buf[..n]);
let n = char_string(byte8_to_char(0x80), &mut buf);
data.extend_from_slice(&buf[..n]);
let n = char_string(b'B' as u32, &mut buf);
data.extend_from_slice(&buf[..n]);
assert_eq!(to_utf8_lossy(&data), "A\u{FFFD}B");
}
#[test]
fn utf8_to_emacs_roundtrip() {
let cases = [
"",
"hello",
"\u{E9}", "\u{2018}", "\u{1F344}", "mix\u{E9}d \u{2018}text\u{2019} with \u{1F344}",
];
for s in cases {
let emacs_bytes = utf8_to_emacs(s);
assert_eq!(
try_as_utf8(&emacs_bytes),
Some(s),
"roundtrip failed for {:?}",
s
);
}
}
#[test]
fn five_byte_char_roundtrip() {
let c: u32 = 0x20_0000;
let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
assert_eq!(char_bytes(c), 5);
let n = char_string(c, &mut buf);
assert_eq!(n, 5);
assert_eq!(buf[0], 0xF8);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c);
assert_eq!(len, 5);
}
#[test]
fn five_byte_max_roundtrip() {
let c: u32 = MAX_5_BYTE_CHAR; let mut buf = [0u8; MAX_MULTIBYTE_LENGTH];
assert_eq!(char_bytes(c), 5);
let n = char_string(c, &mut buf);
assert_eq!(n, 5);
let (decoded, len) = string_char(&buf[..n]);
assert_eq!(decoded, c);
assert_eq!(len, 5);
}
#[test]
fn char_byte8_p_boundary() {
assert!(!char_byte8_p(MAX_5_BYTE_CHAR));
assert!(char_byte8_p(MAX_5_BYTE_CHAR + 1));
assert!(char_byte8_p(MAX_CHAR));
}
#[test]
fn byte8_to_char_ascii_passthrough() {
for b in 0u8..0x80 {
assert_eq!(byte8_to_char(b), b as u32);
}
}
#[test]
fn char_to_byte_pos_beyond_end() {
let data = b"AB";
assert_eq!(char_to_byte_pos(data, 5), 2); }
#[test]
fn byte_to_char_pos_at_zero() {
let data = b"hello";
assert_eq!(byte_to_char_pos(data, 0), 0);
}