use std::borrow::{
Borrow,
Cow,
ToOwned,
};
use std::ops::Deref;
pub fn utf8_to_mutf8(input: &[u8]) -> Cow<[u8]> {
let len = input.len();
if len == 0 {
return Cow::Borrowed(input);
}
const MODE_BORROW: u8 = 0;
const MODE_COPY: u8 = 1;
let mut mode = MODE_BORROW;
let mut data = vec![];
let mut i = 0;
while i < len {
let mark = i;
let byte1 = unsafe { *input.get_unchecked(i) };
i += 1;
if byte1 & 0x80 == 0 { if byte1 == 0 {
if mode == MODE_BORROW {
mode = MODE_COPY;
let run = &input[0..mark];
data.extend(run);
}
data.push(0xC0);
data.push(0x80);
} else if mode == MODE_COPY {
data.push(byte1);
}
} else if byte1 & 0xE0 == 0xC0 { if mode == MODE_COPY {
data.push(byte1);
let byte2 = *input.get(i).unwrap_or(&0);
i += 1;
data.push(byte2);
}
} else if byte1 & 0xF0 == 0xE0 { if mode == MODE_COPY {
data.push(byte1);
let byte2 = *input.get(i).unwrap_or(&0);
i += 1;
data.push(byte2);
let byte3 = *input.get(i).unwrap_or(&0);
i += 1;
data.push(byte3);
}
} else if byte1 & 0xF8 == 0xF0 {
if mode == MODE_BORROW {
mode = MODE_COPY;
let run = &input[0..mark];
data.extend(run);
}
let byte2 = *input.get(i).unwrap_or(&0);
i += 1;
let byte3 = *input.get(i).unwrap_or(&0);
i += 1;
let byte4 = *input.get(i).unwrap_or(&0);
i += 1;
let mut bits: u32 = ((byte1 as u32) & 0x07) << 18;
bits += ((byte2 as u32) & 0x3F) << 12;
bits += ((byte3 as u32) & 0x3F) << 6;
bits += ((byte4 as u32) & 0x3F);
data.push(0xED);
data.push((0xA0 + (((bits >> 16) - 1) & 0x0F)) as u8);
data.push((0x80 + ((bits >> 10) & 0x3F)) as u8);
data.push(0xED);
data.push((0xB0 + ((bits >> 6) & 0x0F)) as u8);
data.push(byte4);
}
}
if mode == MODE_BORROW {
Cow::Borrowed(input)
} else {
Cow::Owned(data)
}
}
pub fn mutf8_to_utf8(input: &[u8]) -> Cow<[u8]> {
let len = input.len();
if len == 0 {
return Cow::Borrowed(input);
}
const MODE_BORROW: u8 = 0;
const MODE_COPY: u8 = 1;
let mut mode = MODE_BORROW;
let mut data = vec![];
let mut i = 0;
while i < len {
let mark = i;
let byte1 = unsafe { *input.get_unchecked(i) };
i += 1;
if byte1 & 0x80 == 0 { if mode == MODE_BORROW {
continue;
}
data.push(byte1);
} else if byte1 & 0xE0 == 0xC0 { let byte2 = *input.get(i).unwrap_or(&0);
i += 1;
if byte1 != 0xC0 || byte2 != 0x80 {
if mode == MODE_BORROW {
continue;
}
data.push(byte1);
data.push(byte2);
} else {
if mode == MODE_BORROW {
mode = MODE_COPY;
let run = &input[0..mark];
data.extend(run);
}
data.push(0);
}
} else if byte1 & 0xF0 == 0xE0 { let byte2 = *input.get(i).unwrap_or(&0);
i += 1;
let byte3 = *input.get(i).unwrap_or(&0);
i += 1;
if i + 2 < len && byte1 == 0xED && byte2 & 0xF0 == 0xA0 {
let byte4 = *input.get(i).unwrap_or(&0);
let byte5 = *input.get(i + 1).unwrap_or(&0);
let byte6 = *input.get(i + 2).unwrap_or(&0);
if byte4 == 0xED && byte5 & 0xF0 == 0xB0 {
i += 2;
let mut bits: u32 = (((byte2 as u32) & 0x0F) + 1) << 16;
bits += ((byte3 as u32) & 0x3F) << 10;
bits += ((byte5 as u32) & 0x0F) << 6;
bits += (byte6 as u32) & 0x3F;
if mode == MODE_BORROW {
mode = MODE_COPY;
let run = &input[0..mark];
data.extend(run);
}
data.push((0xF0 + ((bits >> 18) & 0x07)) as u8);
data.push((0x80 + ((bits >> 12) & 0x3F)) as u8);
data.push((0x80 + ((bits >> 6) & 0x3F)) as u8);
data.push((0x80 + (bits & 0x3F)) as u8);
continue;
}
}
if mode == MODE_BORROW {
continue;
}
data.push(byte1);
data.push(byte2);
data.push(byte3);
}
}
if mode == MODE_BORROW {
Cow::Borrowed(input)
} else {
Cow::Owned(data)
}
}