use crate::error::ErrorKind::Io;
use crate::error::{Error, Result};
use cesu8::from_java_cesu8;
use num_traits::Num;
use std::array::TryFromSliceError;
use std::io;
use std::io::ErrorKind::{InvalidData, InvalidInput};
use std::mem::size_of;
use std::result::Result as StdResult;
pub fn get_int<T>(slice: &[u8], start_from: &mut usize) -> Result<T>
where
T: Num + From<u8> + std::ops::Shl<Output = T> + std::ops::BitOr<Output = T> + Copy,
{
let size = size_of::<T>();
let sub_slice = slice.get(*start_from..*start_from + size).ok_or_else(|| {
Error::new_io(
InvalidInput,
format!(
"overflow : attempt to read from {} whereas len is {}",
*start_from,
slice.len()
)
.as_str(),
)
})?;
let mut value: T = T::from(sub_slice[0]);
for &byte in &sub_slice[1..] {
value = (value << 8.into()) | T::from(byte);
}
*start_from += size;
Ok(value)
}
pub fn get_bytes<'a>(slice: &'a [u8], start_from: &mut usize, size: usize) -> Result<&'a [u8]> {
read_byte_block(slice, *start_from, size).map(|sub_slice| {
*start_from += size;
sub_slice
})
}
pub fn read_byte_block(slice: &[u8], start_from: usize, size: usize) -> Result<&[u8]> {
slice.get(start_from..start_from + size).ok_or_else(|| {
Error::new_io(
InvalidInput,
format!(
"Index out of bounds: {} of {}",
start_from + size,
slice.len()
)
.as_str(),
)
})
}
pub fn get_bitfield<T>(slice: &[u8], start_from: &mut usize) -> Result<T>
where
T: bitflags::Flags<Bits = u16>,
{
let bits = get_int(slice, start_from)?;
Ok(T::from_bits_truncate(bits))
}
pub trait FromBeBytes: Sized {
fn from_be_bytes(bytes: &[u8]) -> StdResult<Self, TryFromSliceError>;
}
impl FromBeBytes for f32 {
fn from_be_bytes(bytes: &[u8]) -> StdResult<Self, TryFromSliceError> {
Ok(f32::from_be_bytes(bytes.try_into()?))
}
}
impl FromBeBytes for f64 {
fn from_be_bytes(bytes: &[u8]) -> StdResult<Self, TryFromSliceError> {
Ok(f64::from_be_bytes(bytes.try_into()?))
}
}
pub fn get_float<T>(data: &[u8], mut start_from: &mut usize) -> Result<T>
where
T: FromBeBytes + Sized,
{
let size = size_of::<T>();
let bytes = get_bytes(&data, &mut start_from, size)?;
T::from_be_bytes(bytes).map_err(|e| Error::new(Io(io::Error::new(InvalidData, e))))
}
pub fn get_string(data: &&[u8], mut start_from: &mut usize) -> Result<String> {
let length: u16 = get_int(&data, &mut start_from)?;
let mutf8_bytes: &[u8] = get_bytes(&data, &mut start_from, length as usize)?;
Ok(from_java_cesu8(mutf8_bytes)
.map(|c| c.into_owned())
.unwrap_or_else(|_| from_java_cesu8_lossy(mutf8_bytes)))
}
fn from_java_cesu8_lossy(bytes: &[u8]) -> String {
let mut out = String::with_capacity(bytes.len());
let mut i = 0;
while i < bytes.len() {
match &bytes[i..] {
[0xED, b2 @ 0xA0..=0xAF, b3 @ 0x80..=0xBF, 0xED, b5 @ 0xB0..=0xBF, b6 @ 0x80..=0xBF, ..] =>
{
let u1 = ((0xED as u32 & 0x0F) << 12)
| ((*b2 as u32 & 0x3F) << 6)
| (*b3 as u32 & 0x3F);
let u2 = ((0xED as u32 & 0x0F) << 12)
| ((*b5 as u32 & 0x3F) << 6)
| (*b6 as u32 & 0x3F);
let codepoint = 0x10000 + ((u1 - 0xD800) << 10) + (u2 - 0xDC00);
out.push(char::from_u32(codepoint).unwrap_or('?'));
i += 6;
}
[b1 @ 0xE0..=0xEF, b2 @ 0x80..=0xBF, b3 @ 0x80..=0xBF, ..] => {
let u =
((*b1 as u32 & 0x0F) << 12) | ((*b2 as u32 & 0x3F) << 6) | (*b3 as u32 & 0x3F);
out.push(char::from_u32(u).unwrap_or('?'));
i += 3;
}
[0xE0..=0xEF, 0x80..=0xBF, ..] => {
out.push('?');
i += 2;
}
[b1 @ 0xC0..=0xDF, b2 @ 0x80..=0xBF, ..] => {
let u = ((*b1 as u32 & 0x1F) << 6) | (*b2 as u32 & 0x3F);
out.push(char::from_u32(u).unwrap_or('?'));
i += 2;
}
[b @ 0x00..=0x7F, ..] => {
out.push(*b as char);
i += 1;
}
[_, ..] => {
out.push('?');
i += 1;
}
[] => break,
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
use bitflags::bitflags;
bitflags! {
#[derive(Debug, PartialEq)]
pub struct Flags: u16 {
const FLAG1 = 0x01;
const FLAG2 = 0x02;
}
}
#[test]
fn test_ignore_unmatched_bit() {
let mut start_from: usize = 0;
let binding = vec![0b0000_0000, 0b0000_0111];
let data = binding.as_slice();
let result = get_bitfield::<Flags>(&data, &mut start_from);
match result {
Ok(flags) => {
assert_eq!(flags, Flags::FLAG1 | Flags::FLAG2);
}
_ => panic!("Expected Ok, got {:?}", result),
}
}
#[test]
fn test_all_matched_bits() {
let mut start_from: usize = 0;
let binding = vec![0b0000_0000, 0b0000_0011];
let data = binding.as_slice();
let result = get_bitfield::<Flags>(&data, &mut start_from);
match result {
Ok(flags) => {
assert_eq!(flags, Flags::FLAG1 | Flags::FLAG2);
}
_ => panic!("Expected Ok, got {:?}", result),
}
}
#[test]
fn test_ascii() {
let input = b"Hello, World!";
assert_eq!(from_java_cesu8_lossy(input), "Hello, World!");
}
#[test]
fn test_java_null_byte() {
let input = [0xC0, 0x80];
assert_eq!(from_java_cesu8_lossy(&input), "\0");
assert_eq!(from_java_cesu8_lossy(&[0x00]), "\0");
}
#[test]
fn test_bmp_multibyte() {
assert_eq!(from_java_cesu8_lossy(&[0xD1, 0x97]), "ї");
assert_eq!(from_java_cesu8_lossy(&[0xE2, 0x82, 0xAC]), "€");
}
#[test]
fn test_cesu8_emojis() {
let crab_bytes = [0xED, 0xA0, 0xBE, 0xED, 0xB6, 0x80];
assert_eq!(from_java_cesu8_lossy(&crab_bytes), "🦀");
let yoyo_bytes = [0xED, 0xA0, 0xBE, 0xED, 0xBA, 0x80];
assert_eq!(from_java_cesu8_lossy(&yoyo_bytes), "🪀");
}
#[test]
fn test_unpaired_surrogates_lossy() {
assert_eq!(from_java_cesu8_lossy(&[0xED, 0xA0, 0xBE]), "?");
assert_eq!(from_java_cesu8_lossy(&[0xED, 0xBA, 0x80]), "?");
let input = [0xED, 0xA0, 0xBE, 0x41]; assert_eq!(from_java_cesu8_lossy(&input), "?A");
}
#[test]
fn test_truncated_sequences() {
assert_eq!(from_java_cesu8_lossy(&[0xC2]), "?");
assert_eq!(from_java_cesu8_lossy(&[0xE2, 0x82]), "?");
assert_eq!(from_java_cesu8_lossy(&[0xED, 0xA0, 0xBE, 0xED]), "??");
}
#[test]
fn test_invalid_bytes() {
assert_eq!(from_java_cesu8_lossy(&[0xFF, 0xFE]), "??");
assert_eq!(from_java_cesu8_lossy(&[0xC2, 0x31]), "?1");
}
#[test]
fn test_empty_input() {
assert_eq!(from_java_cesu8_lossy(&[]), "");
}
#[test]
fn test_complex_mix() {
let mut input = Vec::new();
input.push(0x41); input.extend_from_slice(&[0xC0, 0x80]); input.extend_from_slice(&[0xD1, 0x97]);
input.extend_from_slice(&[0xED, 0xA0, 0xBE, 0xED, 0xB6, 0x80]);
input.push(0xFF);
assert_eq!(from_java_cesu8_lossy(&input), "A\0ї🦀?");
}
}