use crate::error::{Result, Error, Mode, Expected, Position};
use std::borrow::Cow;
pub fn utf8_to_mutf8(input: &[u8]) -> Result<Cow<[u8]>> {
let len = input.len();
if len == 0 {
return Ok(Cow::Borrowed(input));
}
const MODE_BORROW: u8 = 0;
const MODE_COPY: u8 = 1;
let mut mode = MODE_BORROW;
let mut data = vec![];
let mut i = 0;
while i < len {
let mark = i;
let byte1 = unsafe { *input.get_unchecked(i) };
i += 1;
if byte1 & 0x80 == 0 {
if byte1 == 0 {
if mode == MODE_BORROW {
mode = MODE_COPY;
let run = &input[0..mark];
data.extend(run);
}
data.push(0xC0);
data.push(0x80);
} else if mode == MODE_COPY {
data.push(byte1);
}
} else if byte1 & 0xE0 == 0xC0 {
if mode == MODE_COPY {
data.push(byte1);
let byte2 = *input.get(i).ok_or(Error::EndOfInput(Mode::Encoding, Expected::TwoByte, Position::Two))?;
i += 1;
data.push(byte2);
}
} else if byte1 & 0xF0 == 0xE0 {
if mode == MODE_COPY {
data.push(byte1);
let byte2 = *input.get(i).ok_or(Error::EndOfInput(Mode::Encoding, Expected::ThreeByte, Position::Two))?;
i += 1;
data.push(byte2);
let byte3 = *input.get(i).ok_or(Error::EndOfInput(Mode::Encoding, Expected::ThreeByte, Position::Three))?;
i += 1;
data.push(byte3);
}
} else if byte1 & 0xF8 == 0xF0 {
if mode == MODE_BORROW {
mode = MODE_COPY;
let run = &input[0..mark];
data.extend(run);
}
let byte2 = *input.get(i).ok_or(Error::EndOfInput(Mode::Encoding, Expected::FourByte, Position::Two))?;
i += 1;
let byte3 = *input.get(i).ok_or(Error::EndOfInput(Mode::Encoding, Expected::FourByte, Position::Three))?;
i += 1;
let byte4 = *input.get(i).ok_or(Error::EndOfInput(Mode::Encoding, Expected::FourByte, Position::Four))?;
i += 1;
let mut bits: u32 = ((byte1 as u32) & 0x07) << 18;
bits += ((byte2 as u32) & 0x3F) << 12;
bits += ((byte3 as u32) & 0x3F) << 6;
bits += (byte4 as u32) & 0x3F;
data.push(0xED);
data.push((0xA0 + (((bits >> 16) - 1) & 0x0F)) as u8);
data.push((0x80 + ((bits >> 10) & 0x3F)) as u8);
data.push(0xED);
data.push((0xB0 + ((bits >> 6) & 0x0F)) as u8);
data.push(byte4);
}
}
let cow = if mode == MODE_BORROW {
Cow::Borrowed(input)
} else {
Cow::Owned(data)
};
Ok(cow)
}
pub fn mutf8_to_utf8(input: &[u8]) -> Result<Cow<[u8]>> {
let len = input.len();
if len == 0 {
return Ok(Cow::Borrowed(input));
}
const MODE_BORROW: u8 = 0;
const MODE_COPY: u8 = 1;
let mut mode = MODE_BORROW;
let mut data = vec![];
let mut i = 0;
while i < len {
let mark = i;
let byte1 = unsafe { *input.get_unchecked(i) };
i += 1;
if byte1 & 0x80 == 0 {
if mode == MODE_BORROW {
continue;
}
data.push(byte1);
} else if byte1 & 0xE0 == 0xC0 {
let byte2 = *input.get(i).ok_or(Error::EndOfInput(Mode::Decoding, Expected::TwoByte, Position::Two))?;
i += 1;
if byte1 != 0xC0 || byte2 != 0x80 {
if mode == MODE_BORROW {
continue;
}
data.push(byte1);
data.push(byte2);
} else {
if mode == MODE_BORROW {
mode = MODE_COPY;
let run = &input[0..mark];
data.extend(run);
}
data.push(0);
}
} else if byte1 & 0xF0 == 0xE0 {
let byte2 = *input.get(i).ok_or(Error::EndOfInput(Mode::Decoding, Expected::ThreeByte, Position::Two))?;
i += 1;
let byte3 = *input.get(i).ok_or(Error::EndOfInput(Mode::Decoding, Expected::ThreeByte, Position::Three))?;
i += 1;
if i + 2 < len && byte1 == 0xED && byte2 & 0xF0 == 0xA0 {
let byte4 = *input.get(i).ok_or(Error::EndOfInput(Mode::Decoding, Expected::SixByte, Position::Four))?;
let byte5 = *input.get(i + 1).ok_or(Error::EndOfInput(Mode::Decoding, Expected::SixByte, Position::Five))?;
let byte6 = *input.get(i + 2).ok_or(Error::EndOfInput(Mode::Decoding, Expected::SixByte, Position::Six))?;
if byte4 == 0xED && byte5 & 0xF0 == 0xB0 {
i += 2;
let mut bits: u32 = (((byte2 as u32) & 0x0F) + 1) << 16;
bits += ((byte3 as u32) & 0x3F) << 10;
bits += ((byte5 as u32) & 0x0F) << 6;
bits += (byte6 as u32) & 0x3F;
if mode == MODE_BORROW {
mode = MODE_COPY;
let run = &input[0..mark];
data.extend(run);
}
data.push((0xF0 + ((bits >> 18) & 0x07)) as u8);
data.push((0x80 + ((bits >> 12) & 0x3F)) as u8);
data.push((0x80 + ((bits >> 6) & 0x3F)) as u8);
data.push((0x80 + (bits & 0x3F)) as u8);
continue;
}
}
if mode == MODE_BORROW {
continue;
}
data.push(byte1);
data.push(byte2);
data.push(byte3);
}
}
let cow = if mode == MODE_BORROW {
Cow::Borrowed(input)
} else {
Cow::Owned(data)
};
Ok(cow)
}