abxml 0.8.2

Parser for resources.arsc contained on APKs and binary XML decompresser.
Documentation
use std::{
    cell::RefCell,
    collections::{
        hash_map::Entry::{Occupied, Vacant},
        HashMap,
    },
    io::Cursor,
    rc::Rc,
};

use byteorder::{LittleEndian, ReadBytesExt};
use encoding::codec::{utf_16, utf_8};
use failure::{ensure, format_err, Error};

use crate::model::{
    owned::{Encoding as EncodingType, StringTableBuf},
    StringTable,
};

#[derive(Debug)]
pub struct StringTableWrapper<'a> {
    raw_data: &'a [u8],
}

impl<'a> StringTableWrapper<'a> {
    pub fn new(raw_data: &'a [u8]) -> Self {
        Self { raw_data }
    }

    pub fn get_flags(&self) -> u32 {
        let mut cursor = Cursor::new(self.raw_data);
        cursor.set_position(16);

        cursor.read_u32::<LittleEndian>().unwrap_or(0)
    }

    pub fn to_buffer(&self) -> Result<StringTableBuf, Error> {
        let mut owned = StringTableBuf::default();

        if !self.is_utf8() {
            owned.set_encoding(EncodingType::Utf16);
        }

        for i in 0..self.get_strings_len() {
            let string = &*self.get_string(i)?;
            owned.add_string(string.clone());
        }

        Ok(owned)
    }

    fn get_string_position(&self, idx: u32) -> Result<u64, Error> {
        let mut cursor = Cursor::new(self.raw_data);
        cursor.set_position(20);
        let str_offset = cursor.read_u32::<LittleEndian>()?;

        cursor.set_position(28);

        let mut position = str_offset;
        let mut max_offset = 0;

        for _ in 0..=idx {
            let current_offset = cursor.read_u32::<LittleEndian>()?;
            position = str_offset.wrapping_add(current_offset);

            if current_offset > max_offset {
                max_offset = current_offset
            }
        }

        Ok(u64::from(position))
    }

    fn parse_string(&self, offset: u32) -> Result<String, Error> {
        let mut cursor = Cursor::new(self.raw_data);
        cursor.set_position(u64::from(offset));

        if self.is_utf8() {
            let mut ini_offset = offset;
            let v = u32::from(cursor.read_u8()?);
            if v == 0x80 {
                ini_offset += 2;
                cursor.read_u8()?;
            } else {
                ini_offset += 1;
            }

            let v = u32::from(cursor.read_u8()?);
            if v == 0x80 {
                ini_offset += 2;
                cursor.read_u8()?;
            } else {
                ini_offset += 1;
            }

            let mut length = 0;

            loop {
                let v = cursor.read_u8()?;

                if v == 0 {
                    break;
                } else {
                    length += 1;
                }
            }

            let a = ini_offset;
            let b = ini_offset + length;

            ensure!(
                a <= self.raw_data.len() as u32 && b <= self.raw_data.len() as u32 && a <= b,
                "sub-slice out of raw_data range"
            );

            let subslice: &[u8] = &self.raw_data[a as usize..b as usize];

            let mut decoder = utf_8::UTF8Decoder::new();
            let mut o = String::new();
            decoder.raw_feed(subslice, &mut o);
            let decode_error = decoder.raw_finish(&mut o);

            if decode_error.is_none() {
                Ok(o)
            } else {
                Err(format_err!("error decoding UTF8 string"))
            }
        } else {
            let size1 = u32::from(cursor.read_u8()?);
            let size2 = u32::from(cursor.read_u8()?);

            let val = ((size2 & 0xFF) << 8) | size1 & 0xFF;

            let a = offset + 2;
            let b = a + val * 2;

            ensure!(
                a <= self.raw_data.len() as u32 && b <= self.raw_data.len() as u32 && a <= b,
                "sub-slice out of raw_data range"
            );

            let subslice: &[u8] = &self.raw_data[a as usize..b as usize];

            let mut decoder = utf_16::UTF16Decoder::<utf_16::Little>::new();
            let mut o = String::new();
            decoder.raw_feed(subslice, &mut o);
            let decode_error = decoder.raw_finish(&mut o);

            match decode_error {
                None => Ok(o),
                Some(_) => Err(format_err!("error decoding UTF16 string")),
            }
        }
    }

    fn is_utf8(&self) -> bool {
        (self.get_flags() & 0x00000100) == 0x00000100
    }
}

impl<'a> StringTable for StringTableWrapper<'a> {
    fn get_strings_len(&self) -> u32 {
        let mut cursor = Cursor::new(self.raw_data);
        cursor.set_position(8);

        cursor.read_u32::<LittleEndian>().unwrap_or(0)
    }

    fn get_styles_len(&self) -> u32 {
        let mut cursor = Cursor::new(self.raw_data);
        cursor.set_position(12);

        cursor.read_u32::<LittleEndian>().unwrap_or(0)
    }

    fn get_string(&self, idx: u32) -> Result<Rc<String>, Error> {
        ensure!(idx <= self.get_strings_len(), "index out of bounds");

        let string = self
            .get_string_position(idx)
            .and_then(|position| self.parse_string(position as u32))?;

        Ok(Rc::new(string))
    }
}

#[derive(Debug)]
pub struct StringTableCache<S: StringTable> {
    inner: S,
    cache: RefCell<HashMap<u32, Rc<String>>>,
}

impl<S: StringTable> StringTableCache<S> {
    pub fn new(inner: S) -> Self {
        Self {
            inner,
            cache: RefCell::new(HashMap::new()),
        }
    }
}

impl<S: StringTable> StringTable for StringTableCache<S> {
    fn get_strings_len(&self) -> u32 {
        self.inner.get_strings_len()
    }

    fn get_styles_len(&self) -> u32 {
        self.inner.get_styles_len()
    }

    fn get_string(&self, idx: u32) -> Result<Rc<String>, Error> {
        let mut cache = self.cache.borrow_mut();
        let entry = cache.entry(idx);

        match entry {
            Vacant(entry) => {
                let string_ref = self.inner.get_string(idx)?;
                entry.insert(string_ref.clone());

                Ok(string_ref)
            }
            Occupied(entry) => Ok(entry.get().clone()),
        }
    }
}