use crate::format::FormatError;
const COMPRESSED_HEADER: [u8; 2] = [0xFF, 0xFE];
pub fn decode_latin1(bytes: &[u8]) -> String {
bytes.iter().map(|&b| b as char).collect()
}
pub fn decode_utf16le(bytes: &[u8]) -> Result<String, FormatError> {
if bytes.len() % 2 != 0 {
return Err(FormatError::InvalidEncoding);
}
let u16s: Vec<u16> = bytes
.chunks_exact(2)
.map(|c| u16::from_le_bytes([c[0], c[1]]))
.collect();
String::from_utf16(&u16s).map_err(|_| FormatError::InvalidEncoding)
}
pub fn decode_text(data: &[u8], is_jet3: bool) -> Result<String, FormatError> {
if is_jet3 {
return Ok(decode_latin1(data));
}
if data.is_empty() {
return Ok(String::new());
}
if data.len() >= 2 && data[0] == COMPRESSED_HEADER[0] && data[1] == COMPRESSED_HEADER[1] {
let expanded = decompress_text(&data[2..]);
decode_utf16le(&expanded)
} else {
decode_utf16le(data)
}
}
fn decompress_text(data: &[u8]) -> Vec<u8> {
let mut compressed = true;
let mut output = Vec::with_capacity(data.len() * 2);
let mut i = 0;
while i < data.len() {
let b = data[i];
if b == 0x00 {
compressed = !compressed;
i += 1;
} else if compressed {
output.push(b);
output.push(0x00);
i += 1;
} else {
if i + 1 < data.len() {
output.push(b);
output.push(data[i + 1]);
}
i += 2;
}
}
output
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn latin1_ascii() {
assert_eq!(decode_latin1(b"Hello"), "Hello");
}
#[test]
fn latin1_special_chars() {
assert_eq!(decode_latin1(&[0xC0, 0xE9]), "Àé");
}
#[test]
fn latin1_empty() {
assert_eq!(decode_latin1(b""), "");
}
#[test]
fn latin1_full_range() {
let bytes: Vec<u8> = (0..=255).collect();
let s = decode_latin1(&bytes);
assert_eq!(s.chars().count(), 256);
}
#[test]
fn utf16le_ascii() {
let bytes = [0x48, 0x00, 0x69, 0x00];
assert_eq!(decode_utf16le(&bytes).unwrap(), "Hi");
}
#[test]
fn utf16le_japanese() {
let bytes = [0xE5, 0x65, 0x2C, 0x67];
assert_eq!(decode_utf16le(&bytes).unwrap(), "日本");
}
#[test]
fn utf16le_empty() {
assert_eq!(decode_utf16le(&[]).unwrap(), "");
}
#[test]
fn utf16le_odd_length_error() {
let bytes = [0x48, 0x00, 0x69];
assert_eq!(decode_utf16le(&bytes), Err(FormatError::InvalidEncoding));
}
#[test]
fn utf16le_invalid_surrogate() {
let bytes = [0x00, 0xD8];
assert_eq!(decode_utf16le(&bytes), Err(FormatError::InvalidEncoding));
}
#[test]
fn decompress_ascii_only() {
let input = b"Hello";
let result = decompress_text(input);
assert_eq!(
result,
vec![0x48, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00]
);
}
#[test]
fn decompress_empty() {
assert_eq!(decompress_text(b""), Vec::<u8>::new());
}
#[test]
fn decompress_mixed_segments() {
let input = [0x41, 0x42, 0x00, 0xE5, 0x65, 0x00, 0x43];
let result = decompress_text(&input);
assert_eq!(result, vec![0x41, 0x00, 0x42, 0x00, 0xE5, 0x65, 0x43, 0x00]);
}
#[test]
fn decompress_uncompressed_odd_trailing_byte() {
let input = [0x00, 0xAB];
let result = decompress_text(&input);
assert_eq!(result, Vec::<u8>::new());
}
#[test]
fn decode_text_compressed_hello() {
let data = [0xFF, 0xFE, 0x48, 0x65, 0x6C, 0x6C, 0x6F];
assert_eq!(decode_text(&data, false).unwrap(), "Hello");
}
#[test]
fn decode_text_raw_utf16le() {
let data = [0x48, 0x00, 0x69, 0x00];
assert_eq!(decode_text(&data, false).unwrap(), "Hi");
}
#[test]
fn decode_text_mixed_ascii_japanese() {
let mut data = vec![0xFF, 0xFE]; data.push(0x41); data.push(0x00); data.extend_from_slice(&[0xE5, 0x65]); data.push(0x00); data.push(0x42); assert_eq!(decode_text(&data, false).unwrap(), "A日B");
}
#[test]
fn decode_text_empty() {
assert_eq!(decode_text(&[], false).unwrap(), "");
}
#[test]
fn decode_text_header_only() {
let data = [0xFF, 0xFE];
assert_eq!(decode_text(&data, false).unwrap(), "");
}
#[test]
fn decode_text_jet3_latin1() {
let data = [0x48, 0x65, 0x6C, 0x6C, 0x6F];
assert_eq!(decode_text(&data, true).unwrap(), "Hello");
}
#[test]
fn decode_text_jet3_special_chars() {
let data = [0xC0, 0xE9]; assert_eq!(decode_text(&data, true).unwrap(), "Àé");
}
}