use crate::{AsmErr, AsmResult, StrRef};
use crate::impls::ToStringRef;
pub(crate) fn mutf8_to_utf8(mutf8: &[u8]) -> AsmResult<Vec<u8>> {
let len = mutf8.len();
let mut utf8 = Vec::with_capacity(len);
let mut current_offset = 0;
while current_offset < len {
let byte1 = mutf8[current_offset];
if byte1 >= 0x01 && byte1 <= 0x7F {
utf8.push(byte1);
current_offset += 1;
continue;
}
let byte2 = mutf8[current_offset + 1];
if byte1 >= 0xC0 && byte1 <= 0xDF {
if byte2 == 0x80 {
utf8.push(0x00); } else {
utf8.push(byte1);
utf8.push(byte2);
}
current_offset += 2;
continue;
}
let byte3 = mutf8[current_offset + 2];
if byte1 == 0xED && byte2 >= 0xA0 && byte2 <= 0xAF {
let byte5 = mutf8[current_offset + 4];
let byte6 = mutf8[current_offset + 5];
let code1 = (byte2 as u32 & 0x0F) << 16;
let code2 = (byte3 as u32 & 0x3F) << 12;
let code3 = (byte5 as u32 & 0x0F) << 6;
let code4 = byte6 as u32 & 0x3F;
let code = 0x0100 + (code1 | code2 | code3 | code4);
let utf1 = 0xF0 | ((code >> 18) as u8);
let utf2 = 0x80 | ((code >> 12) as u8);
let utf3 = 0x80 | ((code >> 6) as u8);
let utf4 = 0x80 | (code as u8);
utf8.push(utf1);
utf8.push(utf2);
utf8.push(utf3);
utf8.push(utf4);
current_offset += 6;
continue;
}
if byte1 >= 0xE0 && byte1 <= 0xEF {
utf8.push(byte1);
utf8.push(byte2);
utf8.push(byte3);
current_offset += 3;
continue;
}
return AsmErr::ReadUTF8(format!("unknown MUTF-8 first byte: 0x{:X}", byte1)).e();
}
Ok(utf8)
}
pub(crate) fn mutf8_to_string(mutf8: &[u8]) -> AsmResult<StrRef> {
let utf8 = mutf8_to_utf8(mutf8)?;
match String::from_utf8(utf8) {
Ok(str) => Ok(str.to_ref()),
Err(e) => Err(AsmErr::ReadUTF8(e.to_string())),
}
}
pub(crate) fn utf8_to_mutf8(utf8: &[u8]) -> AsmResult<Vec<u8>> {
let len = utf8.len();
let mut mutf8 = Vec::with_capacity(len);
let mut current_offset = 0;
while current_offset < len {
let byte1 = utf8[current_offset];
if byte1 == 0x00 { mutf8.push(0xC0);
mutf8.push(0x80);
current_offset += 1;
continue;
}
if byte1 >= 0x01 && byte1 <= 0x7F {
mutf8.push(byte1);
current_offset += 1;
continue;
}
let byte2 = utf8[current_offset + 1];
if byte1 >= 0xC0 && byte1 <= 0xDF {
mutf8.push(byte1);
mutf8.push(byte2);
current_offset += 2;
continue;
}
let byte3 = utf8[current_offset + 2];
if byte1 >= 0xE0 && byte1 <= 0xEF {
mutf8.push(byte1);
mutf8.push(byte2);
mutf8.push(byte3);
current_offset += 3;
continue;
}
if byte1 >= 0xF0 && byte1 <= 0xF4 {
let byte4 = utf8[current_offset + 3];
let code = ((byte1 as u32 & 0x07) << 18) | ((byte2 as u32 & 0x3F) << 12) |
((byte3 as u32 & 0x3F) << 6) | (byte4 as u32 & 0x3F);
let code = code - 0x010000;
mutf8.push(0xED);
mutf8.push(0xA0 | ((code >> 16) as u8));
mutf8.push(0x80 | ((code >> 10) as u8));
mutf8.push(0xED);
mutf8.push(0xB0 | ((code >> 6) as u8));
mutf8.push(0x80 | (code as u8));
current_offset += 4;
continue;
}
return AsmErr::ReadUTF8(format!("unknown UTF-8 first byte: 0x{:X}", byte1)).e();
};
Ok(mutf8)
}