use super::errors::{DecodeError, EncodeError};
use crate::core::dictionary::Dictionary;
fn safe_char_from_codepoint(codepoint: u32, start: u32, byte: u8) -> Result<char, EncodeError> {
std::char::from_u32(codepoint).ok_or(EncodeError::InvalidCodepoint {
codepoint,
start_codepoint: start,
byte,
})
}
pub fn encode_byte_range(data: &[u8], dictionary: &Dictionary) -> Result<String, EncodeError> {
let start = dictionary
.start_codepoint()
.expect("ByteRange mode requires start_codepoint");
let mut result = String::with_capacity(data.len() * 4);
const CHUNK_SIZE: usize = 64;
let chunks = data.chunks_exact(CHUNK_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
for &byte in chunk {
let codepoint = start + byte as u32;
let c = safe_char_from_codepoint(codepoint, start, byte)?;
result.push(c);
}
}
for &byte in remainder {
let codepoint = start + byte as u32;
let c = safe_char_from_codepoint(codepoint, start, byte)?;
result.push(c);
}
Ok(result)
}
pub fn decode_byte_range(encoded: &str, dictionary: &Dictionary) -> Result<Vec<u8>, DecodeError> {
let start = dictionary
.start_codepoint()
.expect("ByteRange mode requires start_codepoint");
let char_count = encoded.chars().count();
let mut result = Vec::with_capacity(char_count);
let valid_chars = format!("U+{:04X} to U+{:04X}", start, start + 255);
let mut char_position = 0;
const CHUNK_SIZE: usize = 64;
let chars: Vec<char> = encoded.chars().collect();
let chunks = chars.chunks_exact(CHUNK_SIZE);
let remainder = chunks.remainder();
for chunk in chunks {
for &c in chunk {
let codepoint = c as u32;
if codepoint >= start && codepoint < start + 256 {
result.push((codepoint - start) as u8);
} else {
return Err(DecodeError::invalid_character(
c,
char_position,
encoded,
&valid_chars,
));
}
char_position += 1;
}
}
for &c in remainder {
let codepoint = c as u32;
if codepoint >= start && codepoint < start + 256 {
result.push((codepoint - start) as u8);
} else {
return Err(DecodeError::invalid_character(
c,
char_position,
encoded,
&valid_chars,
));
}
char_position += 1;
}
Ok(result)
}
#[cfg(test)]
#[allow(deprecated)]
mod tests {
use super::*;
use crate::core::config::EncodingMode;
#[test]
fn test_byte_range_encode_decode() {
let dictionary = Dictionary::new_with_mode_and_range(
Vec::new(),
EncodingMode::ByteRange,
None,
Some(0x1F3F7), )
.unwrap();
let data = b"Hello, World!";
let encoded = encode_byte_range(data, &dictionary).unwrap();
let decoded = decode_byte_range(&encoded, &dictionary).unwrap();
assert_eq!(data, &decoded[..]);
}
#[test]
fn test_byte_range_all_bytes() {
let dictionary = Dictionary::new_with_mode_and_range(
Vec::new(),
EncodingMode::ByteRange,
None,
Some(0x1F3F7),
)
.unwrap();
let data: Vec<u8> = (0..=255).collect();
let encoded = encode_byte_range(&data, &dictionary).unwrap();
let decoded = decode_byte_range(&encoded, &dictionary).unwrap();
assert_eq!(data, decoded);
}
#[test]
fn test_byte_range_empty() {
let dictionary = Dictionary::new_with_mode_and_range(
Vec::new(),
EncodingMode::ByteRange,
None,
Some(0x1F3F7),
)
.unwrap();
let data = b"";
let encoded = encode_byte_range(data, &dictionary).unwrap();
let decoded = decode_byte_range(&encoded, &dictionary).unwrap();
assert_eq!(data, &decoded[..]);
}
}