use std::rc::Rc;
use nom::bytes::complete::take;
use nom::combinator::iterator;
use nom::combinator::{cond, map, map_res, verify};
use nom::error::ErrorKind;
use nom::number::complete::{be_u16, be_u32, le_u16, le_u32, u8};
use nom::{Err, IResult, Parser};
use protobuf::{EnumOrUnknown, MessageField};
use crate::modules::protos;
use crate::modules::utils::leb128::uleb128;
type Error<'a> = nom::error::Error<&'a [u8]>;
#[derive(Default)]
pub struct Dex {
header: DexHeader,
strings: Vec<Rc<String>>,
types: Vec<Rc<String>>,
protos: Vec<Rc<ProtoItem>>,
fields: Vec<FieldItem>,
methods: Vec<MethodItem>,
class_defs: Vec<ClassItem>,
map_list: Option<MapList>,
}
impl Dex {
const ENDIAN_CONSTANT: u32 = 0x12345678;
const REVERSE_ENDIAN_CONSTANT: u32 = 0x78563412;
const NO_INDEX: u32 = 0xffffffff;
pub fn parse<'a>(data: &'a [u8]) -> Result<Self, Err<Error<'a>>> {
let (strings_offset, header) = Self::parse_dex_header(data)?;
let (types_offset, strings) =
Self::parse_strings(strings_offset, data, &header)?;
let (proto_offset, types) =
Self::parse_types(types_offset, &header, &strings)?;
let (field_offset, proto_ids) = Self::parse_proto_ids(
proto_offset,
data,
&header,
&strings,
&types,
)?;
let (method_offset, fields) =
Self::parse_fields(field_offset, &header, &strings, &types)?;
let (class_offset, methods) = Self::parse_methods(
method_offset,
&header,
&strings,
&types,
&proto_ids,
)?;
let (_, class_defs) =
Self::parse_class_defs(class_offset, &header, &strings, &types)?;
let map_list = Self::parse_map_items(data, &header);
Ok(Self {
header,
strings,
types,
protos: proto_ids,
fields,
methods,
class_defs,
map_list,
})
}
fn parse_dex_header(data: &[u8]) -> IResult<&[u8], DexHeader> {
let (mut remainder, (magic, _, version, _)) = (
verify(be_u32, |magic| *magic == 0x6465780A),
verify(u8, |b| *b == 0x30),
map_res(be_u16, DexVersion::try_from),
verify(u8, |b| *b == 0x00),
)
.parse(data)?;
let mut header = DexHeader { magic, version, ..DexHeader::default() };
let file_size = data.len() as u32;
(
remainder,
(
header.checksum,
header.signature,
header.file_size,
header.header_size,
header.endian_tag,
header.link_size,
header.link_off,
header.map_off,
header.string_ids_size,
header.string_ids_off,
header.type_ids_size,
header.type_ids_off,
),
) = (
le_u32, map(take(20_u8), |v: &[u8]| {
v.iter().map(|b| format!("{b:02x}")).collect()
}), verify(le_u32, |&size| size <= file_size), verify(le_u32, |&size| size == 0x70), verify(le_u32, |&tag| {
tag == Self::ENDIAN_CONSTANT
|| tag == Self::REVERSE_ENDIAN_CONSTANT
}), le_u32, verify(le_u32, |&offset| offset <= file_size), verify(le_u32, |&offset| offset <= file_size), le_u32, verify(le_u32, |&offset| offset <= file_size), verify(le_u32, |&size| size <= u16::MAX.into()), verify(le_u32, |&offset| offset <= file_size), )
.parse(remainder)?;
(
remainder,
(
header.proto_ids_size,
header.proto_ids_off,
header.field_ids_size,
header.field_ids_off,
header.method_ids_size,
header.method_ids_off,
header.class_defs_size,
header.class_defs_off,
header.data_size,
header.data_off,
header.container_size,
header.header_offset,
),
) = (
verify(le_u32, |&size| size <= u16::MAX.into()), verify(le_u32, |&offset| offset <= file_size), le_u32, verify(le_u32, |&offset| offset <= file_size), le_u32, verify(le_u32, |&offset| offset <= file_size), le_u32, verify(le_u32, |&offset| offset <= file_size), le_u32, verify(le_u32, |&offset| offset <= file_size), cond(header.version >= DexVersion::DEX41, le_u32), cond(header.version >= DexVersion::DEX41, le_u32), )
.parse(remainder)?;
Ok((remainder, header))
}
fn parse_strings<'a>(
remainder: &'a [u8],
data: &'a [u8],
header: &DexHeader,
) -> IResult<&'a [u8], Vec<Rc<String>>> {
if header.string_ids_off == 0 {
return Ok((remainder, Vec::new()));
}
let mut it = iterator(remainder, le_u32);
let string_offsets = it
.by_ref()
.take(header.string_ids_size as usize)
.filter_map(|offset| Self::parse_string_from_offset(data, offset))
.map(Rc::new)
.collect();
let (rem, _) = it.finish()?;
Ok((rem, string_offsets))
}
fn parse_string_from_offset(
data: &[u8],
string_data_offset: u32,
) -> Option<String> {
data.get(string_data_offset as usize..).and_then(|data| {
let (data, utf16_size) = uleb128(data).ok()?;
let (_, bytes) =
take::<usize, &[u8], Error>(utf16_size as usize)(data).ok()?;
match simd_cesu8::mutf8::decode(bytes) {
Ok(v) => Some(v.to_string()),
Err(_) => None,
}
})
}
fn parse_types<'a>(
remainder: &'a [u8],
header: &DexHeader,
string_items: &[Rc<String>],
) -> IResult<&'a [u8], Vec<Rc<String>>> {
if header.type_ids_off == 0 {
return Ok((remainder, Vec::new()));
}
let mut it = iterator(remainder, le_u32);
let type_indexes = it
.by_ref()
.take(header.type_ids_size as usize)
.filter_map(|idx| string_items.get(idx as usize).cloned())
.collect();
let (rem, _) = it.finish()?;
Ok((rem, type_indexes))
}
fn parse_proto_ids<'a>(
remainder: &'a [u8],
data: &'a [u8],
header: &DexHeader,
string_items: &[Rc<String>],
type_items: &[Rc<String>],
) -> IResult<&'a [u8], Vec<Rc<ProtoItem>>> {
if header.proto_ids_off == 0 {
return Ok((remainder, Vec::new()));
}
let mut it = iterator(remainder, (le_u32, le_u32, le_u32));
let proto_entries = it
.by_ref()
.take(header.proto_ids_size as usize)
.filter_map(|(shorty_idx, return_type_idx, parameters_off)| {
let shorty = string_items.get(shorty_idx as usize)?.clone();
let return_type =
type_items.get(return_type_idx as usize)?.clone();
let parameters = if parameters_off == 0 {
Vec::new()
} else {
Self::parse_type_list(data, type_items, parameters_off)
.unwrap_or_default()
};
Some(Rc::new(ProtoItem {
shorty,
return_type,
parameters_count: parameters.len() as u32,
parameters,
}))
})
.collect();
let (rem, _) = it.finish()?;
Ok((rem, proto_entries))
}
fn parse_type_list(
data: &[u8],
type_items: &[Rc<String>],
offset: u32,
) -> Option<Vec<Rc<String>>> {
let remainder = data.get(offset as usize..)?;
let (rem, size) = le_u32::<&[u8], Error>(remainder).ok()?;
let mut it = iterator(rem, le_u32::<&[u8], Error>);
let items = it
.by_ref()
.take(size as usize)
.filter_map(|idx| type_items.get(idx as usize).cloned())
.collect();
let _ = it.finish();
Some(items)
}
fn parse_fields<'a>(
remainder: &'a [u8],
header: &DexHeader,
string_items: &[Rc<String>],
type_items: &[Rc<String>],
) -> IResult<&'a [u8], Vec<FieldItem>> {
if header.field_ids_off == 0 {
return Ok((remainder, Vec::new()));
}
let mut it = iterator(remainder, (le_u16, le_u16, le_u32));
let field_entries = it
.by_ref()
.take(header.field_ids_size as usize)
.filter_map(|(class_idx, type_idx, name_idx)| {
let class = type_items.get(class_idx as usize)?.clone();
let type_ = type_items.get(type_idx as usize)?.clone();
let name = string_items.get(name_idx as usize)?.clone();
Some(FieldItem { class, type_, name })
})
.collect();
let (rem, _) = it.finish()?;
Ok((rem, field_entries))
}
fn parse_methods<'a>(
remainder: &'a [u8],
header: &DexHeader,
string_items: &[Rc<String>],
type_items: &[Rc<String>],
proto_items: &[Rc<ProtoItem>],
) -> IResult<&'a [u8], Vec<MethodItem>> {
if header.method_ids_off == 0 {
return Ok((remainder, Vec::new()));
}
let mut it = iterator(remainder, (le_u16, le_u16, le_u32));
let method_entries = it
.by_ref()
.take(header.method_ids_size as usize)
.filter_map(|(class_idx, proto_idx, name_idx)| {
let class = type_items.get(class_idx as usize)?.clone();
let proto = proto_items.get(proto_idx as usize)?.clone();
let name = string_items.get(name_idx as usize)?.clone();
Some(MethodItem { class, proto, name })
})
.collect();
let (rem, _) = it.finish()?;
Ok((rem, method_entries))
}
fn parse_class_defs<'a>(
remainder: &'a [u8],
header: &DexHeader,
string_items: &[Rc<String>],
type_items: &[Rc<String>],
) -> IResult<&'a [u8], Vec<ClassItem>> {
if header.class_defs_off == 0 {
return Ok((remainder, Vec::new()));
}
let mut it =
iterator(remainder, (le_u32, le_u32, le_u32, le_u32, le_u32));
let class_entries = it
.by_ref()
.take(header.class_defs_size as usize)
.filter_map(
|(
class_idx,
access_flags,
superclass_idx,
_,
source_file_idx,
)| {
let class = type_items.get(class_idx as usize)?.clone();
let superclass = if superclass_idx != Self::NO_INDEX {
type_items.get(superclass_idx as usize).cloned()
} else {
None
};
let source_file = if source_file_idx != Self::NO_INDEX {
string_items.get(source_file_idx as usize).cloned()
} else {
None
};
Some(ClassItem {
class,
access_flags,
superclass,
source_file,
})
},
)
.collect();
let (rem, _) = it.finish()?;
Ok((rem, class_entries))
}
fn parse_map_items(data: &[u8], header: &DexHeader) -> Option<MapList> {
data.get(header.map_off as usize..).and_then(|offset| {
let (items_offset, size) = le_u32::<&[u8], Error>(offset).ok()?;
let mut it = iterator(items_offset, Self::parse_map_item);
let items = it.by_ref().take(size as usize).collect();
let _ = it.finish();
Some(MapList { size, items })
})
}
#[inline]
fn parse_map_item(input: &[u8]) -> IResult<&[u8], MapItem> {
let (remainder, (item_type, unused, size, offset)) = (
le_u16, le_u16, le_u32, le_u32, )
.parse(input)?;
Ok((remainder, MapItem { item_type, unused, size, offset }))
}
}
#[derive(Default, Debug, Clone, PartialEq, PartialOrd)]
enum DexVersion {
#[default]
DEX35,
DEX36,
DEX37,
DEX38,
DEX39,
DEX40,
DEX41,
}
impl TryFrom<u16> for DexVersion {
type Error = Error<'static>;
fn try_from(value: u16) -> Result<Self, Self::Error> {
match value {
0x3335 => Ok(DexVersion::DEX35),
0x3336 => Ok(DexVersion::DEX36),
0x3337 => Ok(DexVersion::DEX37),
0x3338 => Ok(DexVersion::DEX38),
0x3339 => Ok(DexVersion::DEX39),
0x3430 => Ok(DexVersion::DEX40),
0x3431 => Ok(DexVersion::DEX41),
_ => Err(Error::new(&[], ErrorKind::Verify)),
}
}
}
impl From<DexVersion> for u32 {
fn from(value: DexVersion) -> Self {
match value {
DexVersion::DEX35 => 35,
DexVersion::DEX36 => 36,
DexVersion::DEX37 => 37,
DexVersion::DEX38 => 38,
DexVersion::DEX39 => 39,
DexVersion::DEX40 => 40,
DexVersion::DEX41 => 41,
}
}
}
#[derive(Default, Debug, Clone)]
struct DexHeader {
magic: u32,
version: DexVersion,
checksum: u32,
signature: String,
file_size: u32,
header_size: u32,
endian_tag: u32,
link_size: u32,
link_off: u32,
map_off: u32,
string_ids_size: u32,
string_ids_off: u32,
type_ids_size: u32,
type_ids_off: u32,
proto_ids_size: u32,
proto_ids_off: u32,
field_ids_size: u32,
field_ids_off: u32,
method_ids_size: u32,
method_ids_off: u32,
class_defs_size: u32,
class_defs_off: u32,
data_size: u32,
data_off: u32,
container_size: Option<u32>,
header_offset: Option<u32>,
}
#[derive(Debug)]
pub struct ProtoItem {
shorty: Rc<String>,
return_type: Rc<String>,
parameters_count: u32,
parameters: Vec<Rc<String>>,
}
#[derive(Debug)]
pub struct FieldItem {
class: Rc<String>,
type_: Rc<String>,
name: Rc<String>,
}
#[derive(Debug)]
pub struct MethodItem {
class: Rc<String>,
proto: Rc<ProtoItem>,
name: Rc<String>,
}
#[derive(Debug)]
pub struct ClassItem {
class: Rc<String>,
access_flags: u32,
superclass: Option<Rc<String>>,
source_file: Option<Rc<String>>,
}
#[derive(Default)]
pub struct MapList {
size: u32,
items: Vec<MapItem>,
}
#[derive(Default)]
pub struct MapItem {
item_type: u16,
unused: u16,
size: u32,
offset: u32,
}
impl From<Dex> for protos::dex::Dex {
fn from(dex: Dex) -> Self {
let mut result = protos::dex::Dex::new();
result.set_is_dex(true);
result.header = MessageField::some(dex.header.clone().into());
result
.strings
.extend(dex.strings.into_iter().map(|x| x.as_ref().clone()));
result.types.extend(dex.types.into_iter().map(|x| x.as_ref().clone()));
result.protos.extend(
dex.protos
.iter()
.map(|x| protos::dex::ProtoItem::from(x.as_ref())),
);
result
.fields
.extend(dex.fields.iter().map(protos::dex::FieldItem::from));
result
.methods
.extend(dex.methods.iter().map(protos::dex::MethodItem::from));
result
.class_defs
.extend(dex.class_defs.iter().map(protos::dex::ClassItem::from));
if let Some(map_list) = dex.map_list {
result.map_list = MessageField::some(map_list.into());
}
result
}
}
impl From<DexHeader> for protos::dex::DexHeader {
fn from(header: DexHeader) -> Self {
let mut result = protos::dex::DexHeader::new();
result.set_magic(header.magic);
result.set_version(header.version.into());
result.set_checksum(header.checksum);
result.set_signature(header.signature);
result.set_file_size(header.file_size);
result.set_header_size(header.header_size);
result.set_endian_tag(header.endian_tag);
result.set_link_size(header.link_size);
result.set_link_off(header.link_off);
result.container_size = header.container_size;
result.header_offset = header.header_offset;
result
}
}
impl From<&ProtoItem> for protos::dex::ProtoItem {
fn from(value: &ProtoItem) -> Self {
let mut result = protos::dex::ProtoItem::new();
result.shorty = Some(value.shorty.to_string());
result.return_type = Some(value.return_type.to_string());
result.set_parameters_count(value.parameters_count);
result
.parameters
.extend(value.parameters.iter().map(|x| x.as_ref().into()));
result
}
}
impl From<&FieldItem> for protos::dex::FieldItem {
fn from(value: &FieldItem) -> Self {
let mut result = protos::dex::FieldItem::new();
result.class = Some(value.class.to_string());
result.type_ = Some(value.type_.to_string());
result.name = Some(value.name.to_string());
result
}
}
impl From<&MethodItem> for protos::dex::MethodItem {
fn from(value: &MethodItem) -> Self {
let mut result = protos::dex::MethodItem::new();
result.class = Some(value.class.to_string());
result.proto = MessageField::some(value.proto.as_ref().into());
result.name = Some(value.name.to_string());
result
}
}
impl From<&ClassItem> for protos::dex::ClassItem {
fn from(value: &ClassItem) -> Self {
let mut result = protos::dex::ClassItem::new();
result.class = Some(value.class.to_string());
result.set_access_flags(value.access_flags);
if let Some(superclass) = &value.superclass {
result.superclass = Some(superclass.to_string());
}
if let Some(source_file) = &value.source_file {
result.source_file = Some(source_file.to_string());
}
result
}
}
impl From<MapList> for protos::dex::MapList {
fn from(value: MapList) -> Self {
let mut result = protos::dex::MapList::new();
result.set_size(value.size);
result.items =
value.items.iter().map(protos::dex::MapItem::from).collect();
result
}
}
impl From<&MapItem> for protos::dex::MapItem {
fn from(item: &MapItem) -> Self {
let mut result = protos::dex::MapItem::new();
result.type_ = Some(EnumOrUnknown::from_i32(item.item_type.into()));
result.set_unused(item.unused.into());
result.set_size(item.size);
result.set_offset(item.offset);
result
}
}