use rustc_hash::FxHashMap;
use std::sync::LazyLock;
static BYTE_TO_CHAR: LazyLock<[char; 256]> = LazyLock::new(|| {
let mut mapping = ['\0'; 256];
let mut direct_chars: Vec<u8> = Vec::new();
direct_chars.extend(33u8..=126);
direct_chars.extend(161u8..=172);
direct_chars.extend(174u8..=255);
for &b in &direct_chars {
mapping[b as usize] = b as char;
}
let mut next_char = 256u32; for b in 0u8..=255 {
if !direct_chars.contains(&b) {
mapping[b as usize] = char::from_u32(next_char).unwrap();
next_char += 1;
}
}
mapping
});
static CHAR_TO_BYTE: LazyLock<FxHashMap<char, u8>> = LazyLock::new(|| {
BYTE_TO_CHAR
.iter()
.enumerate()
.map(|(byte, &ch)| (ch, byte as u8))
.collect()
});
#[inline]
pub fn byte_level_encode(bytes: &[u8]) -> String {
bytes.iter().map(|&b| BYTE_TO_CHAR[b as usize]).collect()
}
#[inline]
pub fn byte_level_decode(text: &str) -> Option<Vec<u8>> {
text.chars()
.map(|ch| CHAR_TO_BYTE.get(&ch).copied())
.collect()
}
#[inline]
pub fn byte_level_decode_bytes(encoded_bytes: &[u8]) -> Option<Vec<u8>> {
let text = std::str::from_utf8(encoded_bytes).ok()?;
byte_level_decode(text)
}
#[inline]
pub fn is_byte_level_char(ch: char) -> bool {
CHAR_TO_BYTE.contains_key(&ch)
}
#[inline]
pub fn get_byte_level_char(byte: u8) -> char {
BYTE_TO_CHAR[byte as usize]
}
#[inline]
pub fn get_byte_level_byte(ch: char) -> Option<u8> {
CHAR_TO_BYTE.get(&ch).copied()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_byte_level_mapping_is_bijective() {
let mut seen: std::collections::HashSet<char> = std::collections::HashSet::new();
for b in 0u8..=255 {
let ch = BYTE_TO_CHAR[b as usize];
assert!(!seen.contains(&ch), "Duplicate mapping for byte {}", b);
seen.insert(ch);
}
assert_eq!(seen.len(), 256);
}
#[test]
fn test_byte_level_roundtrip() {
for b in 0u8..=255 {
let encoded = byte_level_encode(&[b]);
let decoded = byte_level_decode(&encoded).unwrap();
assert_eq!(decoded, vec![b], "Roundtrip failed for byte {}", b);
}
}
#[test]
fn test_space_mapping() {
let space_char = BYTE_TO_CHAR[32];
assert_eq!(space_char, 'Ġ');
assert_eq!(space_char as u32, 0x0120);
}
#[test]
fn test_printable_ascii_preserved() {
for b in 33u8..=126 {
let ch = BYTE_TO_CHAR[b as usize];
assert_eq!(ch as u8, b, "ASCII {} should map to itself", b);
}
}
#[test]
fn test_encode_hello() {
let encoded = byte_level_encode(b"Hello");
assert_eq!(encoded, "Hello"); }
#[test]
fn test_encode_with_space() {
let encoded = byte_level_encode(b" hello");
assert_eq!(encoded, "Ġhello"); }
#[test]
fn test_encode_chinese() {
let text = "你好";
let encoded = byte_level_encode(text.as_bytes());
assert_eq!(encoded, "ä½łå¥½");
}
#[test]
fn test_decode_hello() {
let decoded = byte_level_decode("Hello").unwrap();
assert_eq!(decoded, b"Hello");
}
#[test]
fn test_decode_with_space() {
let decoded = byte_level_decode("Ġhello").unwrap();
assert_eq!(decoded, b" hello");
}
#[test]
fn test_decode_chinese() {
let decoded = byte_level_decode("ä½łå¥½").unwrap();
assert_eq!(String::from_utf8(decoded).unwrap(), "你好");
}
#[test]
fn test_full_roundtrip_string() {
let original = "Hello, 世界! 🌍";
let encoded = byte_level_encode(original.as_bytes());
let decoded_bytes = byte_level_decode(&encoded).unwrap();
let decoded = String::from_utf8(decoded_bytes).unwrap();
assert_eq!(decoded, original);
}
}