use bumpalo::Bump;
use bumpalo::collections::String as BumpString;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Utf16LeDecodeError {
OddLength,
InvalidData,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Utf16LeSlice<'a> {
bytes: &'a [u8],
num_chars: usize,
}
impl<'a> Utf16LeSlice<'a> {
pub fn new(bytes: &'a [u8], num_chars: usize) -> Self {
Utf16LeSlice { bytes, num_chars }
}
pub fn empty() -> Self {
Utf16LeSlice {
bytes: &[],
num_chars: 0,
}
}
pub fn as_bytes(&self) -> &'a [u8] {
let max = self.num_chars.saturating_mul(2).min(self.bytes.len());
&self.bytes[..max]
}
pub fn num_chars(&self) -> usize {
self.num_chars
}
pub fn is_empty(&self) -> bool {
self.num_chars == 0 || self.bytes.is_empty()
}
pub fn to_string(&self) -> Result<String, Utf16LeDecodeError> {
decode_utf16le_bytes(self.as_bytes())
}
}
pub(crate) fn decode_utf16le_bytes(bytes: &[u8]) -> Result<String, Utf16LeDecodeError> {
if !bytes.len().is_multiple_of(2) {
return Err(Utf16LeDecodeError::OddLength);
}
let mut units = Vec::with_capacity(bytes.len() / 2);
for chunk in bytes.chunks_exact(2) {
units.push(u16::from_le_bytes([chunk[0], chunk[1]]));
}
String::from_utf16(&units).map_err(|_| Utf16LeDecodeError::InvalidData)
}
#[allow(dead_code)]
pub(crate) fn decode_utf16le_bytes_to_bump_str<'a>(
bytes: &[u8],
num_chars: usize,
bump: &'a Bump,
) -> Result<&'a str, Utf16LeDecodeError> {
if bytes.is_empty() || num_chars == 0 {
return Ok("");
}
if !bytes.len().is_multiple_of(2) {
return Err(Utf16LeDecodeError::OddLength);
}
let max_units = bytes.len() / 2;
let limit = num_chars.min(max_units);
if limit == 0 {
return Ok("");
}
let mut out = BumpString::with_capacity_in(limit, bump);
let mut unit_index = 0usize;
while unit_index < limit {
let (ch, consumed) = decode_utf16le_at_unit(bytes, unit_index, limit)?;
if ch == '\u{0}' {
break;
}
out.push(ch);
unit_index += consumed;
}
Ok(out.into_bump_str())
}
#[cfg(feature = "wevt_templates")]
pub(crate) fn decode_utf16le_bytes_z(bytes: &[u8]) -> Result<String, Utf16LeDecodeError> {
if !bytes.len().is_multiple_of(2) {
return Err(Utf16LeDecodeError::OddLength);
}
let mut units = Vec::with_capacity(bytes.len() / 2);
for chunk in bytes.chunks_exact(2) {
units.push(u16::from_le_bytes([chunk[0], chunk[1]]));
}
decode_utf16_units_z(&units)
}
#[cfg(feature = "wevt_templates")]
pub(crate) fn decode_utf16_units_z(units: &[u16]) -> Result<String, Utf16LeDecodeError> {
let end = units.iter().position(|&c| c == 0).unwrap_or(units.len());
String::from_utf16(&units[..end]).map_err(|_| Utf16LeDecodeError::InvalidData)
}
pub(crate) fn trim_utf16le_whitespace(
bytes: &[u8],
num_chars: usize,
) -> Result<usize, Utf16LeDecodeError> {
if !bytes.len().is_multiple_of(2) {
return Err(Utf16LeDecodeError::OddLength);
}
let max_chars = bytes.len() / 2;
let limit = num_chars.min(max_chars);
if limit == 0 {
return Ok(0);
}
let mut unit_index = 0usize;
let mut last_non_ws = 0usize;
let mut saw_non_ws = false;
while unit_index < limit {
let (ch, consumed) = decode_utf16le_at_unit(bytes, unit_index, limit)?;
if ch == '\u{0}' {
break;
}
let next_index = unit_index + consumed;
if !ch.is_whitespace() {
last_non_ws = next_index;
saw_non_ws = true;
}
unit_index = next_index;
}
Ok(if saw_non_ws { last_non_ws } else { 0 })
}
fn decode_utf16le_at_unit(
bytes: &[u8],
unit_index: usize,
max_units: usize,
) -> Result<(char, usize), Utf16LeDecodeError> {
if unit_index >= max_units {
return Err(Utf16LeDecodeError::InvalidData);
}
let lo = bytes
.get(unit_index * 2)
.copied()
.ok_or(Utf16LeDecodeError::InvalidData)?;
let hi = bytes
.get(unit_index * 2 + 1)
.copied()
.ok_or(Utf16LeDecodeError::InvalidData)?;
let cu = u16::from_le_bytes([lo, hi]);
decode_utf16le_unit_value(bytes, cu, unit_index, max_units)
}
fn decode_utf16le_unit_value(
bytes: &[u8],
cu: u16,
unit_index: usize,
max_units: usize,
) -> Result<(char, usize), Utf16LeDecodeError> {
match cu {
0xD800..=0xDBFF => {
let next_index = unit_index + 1;
if next_index >= max_units {
return Err(Utf16LeDecodeError::InvalidData);
}
let lo = bytes
.get(next_index * 2)
.copied()
.ok_or(Utf16LeDecodeError::InvalidData)?;
let hi = bytes
.get(next_index * 2 + 1)
.copied()
.ok_or(Utf16LeDecodeError::InvalidData)?;
let cu2 = u16::from_le_bytes([lo, hi]);
if !(0xDC00..=0xDFFF).contains(&cu2) {
return Err(Utf16LeDecodeError::InvalidData);
}
let high = (cu as u32) - 0xD800;
let low = (cu2 as u32) - 0xDC00;
let codepoint = 0x10000 + ((high << 10) | low);
let ch = char::from_u32(codepoint).ok_or(Utf16LeDecodeError::InvalidData)?;
Ok((ch, 2))
}
0xDC00..=0xDFFF => Err(Utf16LeDecodeError::InvalidData),
_ => {
let ch = char::from_u32(u32::from(cu)).ok_or(Utf16LeDecodeError::InvalidData)?;
Ok((ch, 1))
}
}
}