use nom::{
bytes::complete::{tag, take},
combinator::map,
multi::count,
number::complete::{
be_f32, be_f64, be_i16, be_i32, be_i64, be_i8, be_u16, be_u32, be_u64, be_u8,
},
IResult,
};
use crate::constants::*;
use crate::handle::HandleTable;
use crate::types::*;
pub fn preprocess_jdk8u20(data: &[u8]) -> Vec<u8> {
let pattern: &[u8] = &[0x00, 0x7e, 0x00, 0x09];
if let Some(pos) = data.windows(4).position(|w| w == pattern) {
let mut result = Vec::with_capacity(data.len() + 1);
result.extend_from_slice(&data[..pos + 4]);
result.push(TC_ENDBLOCKDATA);
result.extend_from_slice(&data[pos + 4..]);
result
} else {
data.to_vec()
}
}
pub fn parse_serialization_stream(input: &[u8]) -> IResult<&[u8], SerializationStream> {
let result = parse_serialization_stream_inner(input);
if result.is_ok() {
return result;
}
let preprocessed = preprocess_jdk8u20(input);
if preprocessed.len() == input.len() {
return result;
}
match parse_serialization_stream_inner(&preprocessed) {
Ok((_, stream)) => Ok((&input[input.len()..], stream)),
Err(_) => result,
}
}
fn parse_serialization_stream_inner(input: &[u8]) -> IResult<&[u8], SerializationStream> {
let (input, _) = tag(&[0xAC, 0xED])(input)?;
let (input, version) = be_u16(input)?;
let mut handle_table = HandleTable::new();
let mut remaining = input;
let mut contents = Vec::new();
while !remaining.is_empty() {
let (rem, content) = parse_content(remaining, &mut handle_table)?;
contents.push(content);
remaining = rem;
}
Ok((remaining, SerializationStream { version, contents }))
}
fn parse_content<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], ContentElement> {
let tc = peek_tc(input)?;
match tc {
TC_BLOCKDATA | TC_BLOCKDATALONG => {
let (input, bd) = parse_block_data(input)?;
Ok((input, ContentElement::BlockData(bd)))
}
_ => {
let (input, obj) = parse_object(input, handles)?;
Ok((input, ContentElement::Object(obj)))
}
}
}
fn peek_tc(input: &[u8]) -> Result<u8, nom::Err<nom::error::Error<&[u8]>>> {
if input.is_empty() {
return Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Eof,
)));
}
Ok(input[0])
}
fn parse_object<'a>(input: &'a [u8], handles: &mut HandleTable) -> IResult<&'a [u8], StreamObject> {
let (input, tc) = be_u8(input)?;
match tc {
TC_NULL => Ok((input, StreamObject::NullReference)),
TC_REFERENCE => {
let (input, handle) = be_u32(input)?;
Ok((input, StreamObject::PrevObject { handle }))
}
TC_CLASSDESC => {
let (input, desc) = parse_normal_class_desc(input, handles)?;
Ok((
input,
StreamObject::NewClassDesc(ClassDesc::Normal(Box::new(desc))),
))
}
TC_PROXYCLASSDESC => {
let (input, desc) = parse_proxy_class_desc(input, handles)?;
Ok((input, StreamObject::NewClassDesc(ClassDesc::Proxy(desc))))
}
TC_OBJECT => {
let (input, obj) = parse_new_object(input, handles)?;
Ok((input, StreamObject::NewObject(obj)))
}
TC_STRING => {
let (input, s) = parse_tc_string(input, handles, false)?;
Ok((input, StreamObject::NewString(s)))
}
TC_LONGSTRING => {
let (input, s) = parse_tc_string(input, handles, true)?;
Ok((input, StreamObject::NewString(s)))
}
TC_ARRAY => {
let (input, arr) = parse_new_array(input, handles)?;
Ok((input, StreamObject::NewArray(arr)))
}
TC_CLASS => {
let (input, cls) = parse_new_class(input, handles)?;
Ok((input, StreamObject::NewClass(cls)))
}
TC_ENUM => {
let (input, en) = parse_new_enum(input, handles)?;
Ok((input, StreamObject::NewEnum(en)))
}
TC_RESET => {
handles.reset();
Ok((input, StreamObject::Reset))
}
TC_BLOCKDATA | TC_BLOCKDATALONG => {
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
)))
}
TC_EXCEPTION => {
Ok((input, StreamObject::Exception))
}
TC_ENDBLOCKDATA => Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
))),
_ => Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
))),
}
}
fn parse_class_desc_ref<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], ClassDescRef> {
let (input, tc) = be_u8(input)?;
match tc {
TC_NULL => Ok((input, ClassDescRef::Null)),
TC_REFERENCE => {
let (input, handle) = be_u32(input)?;
Ok((input, ClassDescRef::Reference { handle }))
}
TC_CLASSDESC => {
let (input, desc) = parse_normal_class_desc(input, handles)?;
Ok((
input,
ClassDescRef::Inline(Box::new(ClassDesc::Normal(Box::new(desc)))),
))
}
TC_PROXYCLASSDESC => {
let (input, desc) = parse_proxy_class_desc(input, handles)?;
Ok((
input,
ClassDescRef::Inline(Box::new(ClassDesc::Proxy(desc))),
))
}
_ => Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
))),
}
}
fn parse_normal_class_desc<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], NormalClassDesc> {
let handle = handles.assign_handle();
let (input, class_name) = parse_utf(input)?;
let (input, serial_version_uid) = be_i64(input)?;
let (input, flags) = be_u8(input)?;
let (input, fields) = parse_fields_desc(input, handles)?;
let (input, class_annotation) = parse_class_annotation(input, handles)?;
let (input, super_class_desc) = parse_class_desc_ref(input, handles)?;
let desc = NormalClassDesc {
class_name,
serial_version_uid,
handle,
flags,
fields,
class_annotation,
super_class_desc: Box::new(super_class_desc),
};
handles.update(
handle,
StreamObject::NewClassDesc(ClassDesc::Normal(Box::new(desc.clone()))),
);
Ok((input, desc))
}
fn parse_proxy_class_desc<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], ProxyClassDesc> {
let handle = handles.assign_handle();
let (input, interface_count) = be_u32(input)?;
let (input, interface_names) = count(parse_utf, interface_count as usize)(input)?;
let (input, class_annotation) = parse_class_annotation(input, handles)?;
let (input, super_class_desc) = parse_class_desc_ref(input, handles)?;
let desc = ProxyClassDesc {
handle,
interface_names,
class_annotation,
super_class_desc: Box::new(super_class_desc),
};
handles.update(
handle,
StreamObject::NewClassDesc(ClassDesc::Proxy(desc.clone())),
);
Ok((input, desc))
}
fn parse_fields_desc<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], Vec<FieldDesc>> {
let (input, field_count) = be_u16(input)?;
let mut remaining = input;
let mut fields = Vec::with_capacity(field_count as usize);
for _ in 0..field_count {
let (rem, field) = parse_field_desc(remaining, handles)?;
fields.push(field);
remaining = rem;
}
Ok((remaining, fields))
}
fn parse_field_desc<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], FieldDesc> {
let (input, type_code) = be_u8(input)?;
let (input, field_name) = parse_utf(input)?;
match type_code {
b'B' | b'C' | b'D' | b'F' | b'I' | b'J' | b'S' | b'Z' => Ok((
input,
FieldDesc::Primitive(PrimitiveFieldDesc {
type_code,
field_name,
}),
)),
b'L' | b'[' => {
let (input, class_name) = parse_string_object(input, handles)?;
Ok((
input,
FieldDesc::Object(ObjectFieldDesc {
type_code,
field_name,
class_name,
}),
))
}
_ => Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
))),
}
}
fn parse_string_object<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], String> {
let (input, tc) = be_u8(input)?;
match tc {
TC_STRING => {
let (input, value) = parse_utf(input)?;
let _handle = handles.assign(StreamObject::NewString(StreamString {
value: value.clone(),
handle: 0,
is_long: false,
}));
Ok((input, value))
}
TC_LONGSTRING => {
let (input, value) = parse_long_utf(input)?;
let _handle = handles.assign(StreamObject::NewString(StreamString {
value: value.clone(),
handle: 0,
is_long: true,
}));
Ok((input, value))
}
TC_REFERENCE => {
let (input, handle) = be_u32(input)?;
let value = handles
.get(handle)
.and_then(|obj| {
if let StreamObject::NewString(s) = obj {
Some(s.value.clone())
} else {
None
}
})
.unwrap_or_else(|| format!("<ref:{:#08x}>", handle));
Ok((input, value))
}
TC_NULL => Ok((input, String::new())),
_ => Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
))),
}
}
fn parse_class_annotation<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], Vec<AnnotationElement>> {
let mut remaining = input;
let mut elements = Vec::new();
loop {
let tc = peek_tc(remaining)?;
if tc == TC_ENDBLOCKDATA {
let (rem, _) = be_u8(remaining)?; return Ok((rem, elements));
}
if tc == TC_BLOCKDATA || tc == TC_BLOCKDATALONG {
let (rem, bd) = parse_block_data(remaining)?;
elements.push(AnnotationElement::BlockData(bd));
remaining = rem;
} else {
let (rem, obj) = parse_object(remaining, handles)?;
elements.push(AnnotationElement::Object(obj));
remaining = rem;
}
}
}
fn parse_new_object<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], NewObject> {
let (input, class_desc) = parse_class_desc_ref(input, handles)?;
let handle = handles.assign_handle();
let (input, class_data) = parse_class_data_hierarchy(input, &class_desc, handles)?;
let obj = NewObject {
class_desc: class_desc.clone(),
handle,
class_data,
};
handles.update(handle, StreamObject::NewObject(obj.clone()));
Ok((input, obj))
}
fn parse_class_data_hierarchy<'a>(
input: &'a [u8],
class_desc_ref: &ClassDescRef,
handles: &mut HandleTable,
) -> IResult<&'a [u8], Vec<ClassData>> {
let hierarchy = collect_class_hierarchy(class_desc_ref, handles);
let mut remaining = input;
let mut class_data = Vec::new();
for desc in &hierarchy {
let (rem, data) = parse_class_data(remaining, desc, handles)?;
class_data.push(data);
remaining = rem;
}
Ok((remaining, class_data))
}
fn collect_class_hierarchy(
class_desc_ref: &ClassDescRef,
handles: &HandleTable,
) -> Vec<NormalClassDesc> {
let mut hierarchy = Vec::new();
let mut current = Some(class_desc_ref);
while let Some(desc_ref) = current {
match desc_ref {
ClassDescRef::Null => break,
ClassDescRef::Reference { handle } => {
match handles.get(*handle) {
Some(StreamObject::NewClassDesc(ClassDesc::Normal(desc))) => {
let super_ref = &*desc.super_class_desc;
hierarchy.push((**desc).clone());
current = Some(super_ref);
}
Some(StreamObject::NewClassDesc(ClassDesc::Proxy(_))) => break,
_ => break,
}
}
ClassDescRef::Inline(class_desc) => match class_desc.as_ref() {
ClassDesc::Normal(desc) => {
let super_ref = &*desc.super_class_desc;
hierarchy.push((**desc).clone());
current = Some(super_ref);
}
ClassDesc::Proxy(_) => {
break;
}
},
}
}
hierarchy.reverse();
hierarchy
}
fn parse_class_data<'a>(
input: &'a [u8],
desc: &NormalClassDesc,
handles: &mut HandleTable,
) -> IResult<&'a [u8], ClassData> {
if desc.is_externalizable() {
if desc.has_block_data() {
let (input, annotation) = parse_object_annotation(input, handles)?;
Ok((input, ClassData::ExternalBlockData(annotation)))
} else {
let (input, annotation) = parse_object_annotation(input, handles)?;
Ok((input, ClassData::ExternalBlockData(annotation)))
}
} else if desc.is_serializable() {
if desc.has_write_method() {
let snap = handles.snapshot();
if let Ok((input, field_values)) = parse_field_values(input, &desc.fields, handles) {
let field_set = FieldValueSet {
values: field_values,
};
if let Ok((input, annotation)) = parse_object_annotation(input, handles) {
return Ok((
input,
ClassData::WriteMethodWithFields(field_set, annotation),
));
}
}
handles.rollback(snap);
let (input, annotation) = parse_object_annotation(input, handles)?;
Ok((input, ClassData::WriteMethod(annotation)))
} else {
let (input, field_values) = parse_field_values(input, &desc.fields, handles)?;
let field_set = FieldValueSet {
values: field_values,
};
Ok((input, ClassData::NoWriteMethod(field_set)))
}
} else {
Ok((
input,
ClassData::NoWriteMethod(FieldValueSet { values: vec![] }),
))
}
}
fn parse_field_values<'a>(
input: &'a [u8],
fields: &[FieldDesc],
handles: &mut HandleTable,
) -> IResult<&'a [u8], Vec<FieldValue>> {
let mut remaining = input;
let mut values = Vec::with_capacity(fields.len());
for field in fields {
let (rem, value) = parse_field_value(remaining, field, handles)?;
values.push(value);
remaining = rem;
}
Ok((remaining, values))
}
fn parse_field_value<'a>(
input: &'a [u8],
field: &FieldDesc,
handles: &mut HandleTable,
) -> IResult<&'a [u8], FieldValue> {
match field {
FieldDesc::Primitive(pf) => match pf.type_code {
b'B' => map(be_i8, FieldValue::Byte)(input),
b'C' => map(be_u16, FieldValue::Char)(input),
b'D' => map(be_f64, FieldValue::Double)(input),
b'F' => map(be_f32, FieldValue::Float)(input),
b'I' => map(be_i32, FieldValue::Int)(input),
b'J' => map(be_i64, FieldValue::Long)(input),
b'S' => map(be_i16, FieldValue::Short)(input),
b'Z' => map(be_u8, |v| FieldValue::Boolean(v != 0))(input),
_ => Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
))),
},
FieldDesc::Object(_) => {
let (input, obj) = parse_object(input, handles)?;
match obj {
StreamObject::NullReference => Ok((input, FieldValue::Object(None))),
obj => Ok((input, FieldValue::Object(Some(Box::new(obj))))),
}
}
}
}
fn parse_object_annotation<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], ObjectAnnotation> {
let mut remaining = input;
let mut contents = Vec::new();
loop {
let tc = peek_tc(remaining)?;
if tc == TC_ENDBLOCKDATA {
let (rem, _) = be_u8(remaining)?; return Ok((rem, ObjectAnnotation { contents }));
}
if tc == TC_BLOCKDATA || tc == TC_BLOCKDATALONG {
let (rem, bd) = parse_block_data(remaining)?;
contents.push(AnnotationElement::BlockData(bd));
remaining = rem;
} else {
let (rem, obj) = parse_object(remaining, handles)?;
contents.push(AnnotationElement::Object(obj));
remaining = rem;
}
}
}
fn parse_new_class<'a>(input: &'a [u8], handles: &mut HandleTable) -> IResult<&'a [u8], NewClass> {
let (input, class_desc) = parse_class_desc_ref(input, handles)?;
let handle = handles.assign_handle();
let cls = NewClass {
class_desc: class_desc.clone(),
handle,
};
handles.update(handle, StreamObject::NewClass(cls.clone()));
Ok((input, cls))
}
fn parse_new_array<'a>(input: &'a [u8], handles: &mut HandleTable) -> IResult<&'a [u8], NewArray> {
let (input, class_desc) = parse_class_desc_ref(input, handles)?;
let (input, size) = be_u32(input)?;
let handle = handles.assign_handle();
let (input, values) = parse_array_values(input, &class_desc, size as usize, handles)?;
let arr = NewArray {
class_desc,
handle,
size,
values,
};
handles.update(handle, StreamObject::NewArray(arr.clone()));
Ok((input, arr))
}
fn parse_array_values<'a>(
input: &'a [u8],
class_desc: &ClassDescRef,
size: usize,
handles: &mut HandleTable,
) -> IResult<&'a [u8], ArrayValues> {
let class_name = match class_desc {
ClassDescRef::Inline(cd) => match cd.as_ref() {
ClassDesc::Normal(desc) => desc.class_name.as_str(),
ClassDesc::Proxy(_) => "",
},
ClassDescRef::Reference { handle } => {
handles
.get(*handle)
.and_then(|obj| {
if let StreamObject::NewClassDesc(ClassDesc::Normal(desc)) = obj {
Some(desc.class_name.as_str())
} else {
None
}
})
.unwrap_or("")
}
ClassDescRef::Null => "",
};
match get_array_component_type(class_name) {
ArrayComponentType::Byte => {
let (input, vals) = count(be_i8, size)(input)?;
Ok((input, ArrayValues::Byte(vals)))
}
ArrayComponentType::Char => {
let (input, vals) = count(be_u16, size)(input)?;
Ok((input, ArrayValues::Char(vals)))
}
ArrayComponentType::Double => {
let (input, vals) = count(be_f64, size)(input)?;
Ok((input, ArrayValues::Double(vals)))
}
ArrayComponentType::Float => {
let (input, vals) = count(be_f32, size)(input)?;
Ok((input, ArrayValues::Float(vals)))
}
ArrayComponentType::Int => {
let (input, vals) = count(be_i32, size)(input)?;
Ok((input, ArrayValues::Int(vals)))
}
ArrayComponentType::Long => {
let (input, vals) = count(be_i64, size)(input)?;
Ok((input, ArrayValues::Long(vals)))
}
ArrayComponentType::Short => {
let (input, vals) = count(be_i16, size)(input)?;
Ok((input, ArrayValues::Short(vals)))
}
ArrayComponentType::Boolean => {
let (input, vals) = count(be_u8, size)(input)?;
Ok((input, ArrayValues::Boolean(vals)))
}
ArrayComponentType::Object => {
let mut remaining = input;
let mut vals = Vec::with_capacity(size);
for _ in 0..size {
let (rem, obj) = parse_object(remaining, handles)?;
match obj {
StreamObject::NullReference => vals.push(None),
obj => vals.push(Some(obj)),
}
remaining = rem;
}
Ok((remaining, ArrayValues::Object(vals)))
}
}
}
enum ArrayComponentType {
Byte,
Char,
Double,
Float,
Int,
Long,
Short,
Boolean,
Object,
}
fn get_array_component_type(class_name: &str) -> ArrayComponentType {
let mut chars = class_name.chars().peekable();
let mut depth = 0;
while chars.peek() == Some(&'[') {
chars.next();
depth += 1;
}
if depth == 0 {
return ArrayComponentType::Object;
}
if depth > 1 {
return ArrayComponentType::Object;
}
match chars.next() {
Some('B') => ArrayComponentType::Byte,
Some('C') => ArrayComponentType::Char,
Some('D') => ArrayComponentType::Double,
Some('F') => ArrayComponentType::Float,
Some('I') => ArrayComponentType::Int,
Some('J') => ArrayComponentType::Long,
Some('S') => ArrayComponentType::Short,
Some('Z') => ArrayComponentType::Boolean,
Some('L') | None => ArrayComponentType::Object,
_ => ArrayComponentType::Object,
}
}
fn parse_new_enum<'a>(input: &'a [u8], handles: &mut HandleTable) -> IResult<&'a [u8], NewEnum> {
let (input, class_desc) = parse_class_desc_ref(input, handles)?;
let handle = handles.assign_handle();
let (input, constant_name) = parse_enum_constant_name(input, handles)?;
let en = NewEnum {
class_desc,
handle,
constant_name,
};
handles.update(handle, StreamObject::NewEnum(en.clone()));
Ok((input, en))
}
fn parse_enum_constant_name<'a>(
input: &'a [u8],
handles: &mut HandleTable,
) -> IResult<&'a [u8], StreamString> {
let (input, tc) = be_u8(input)?;
match tc {
TC_STRING => parse_tc_string(input, handles, false),
TC_LONGSTRING => parse_tc_string(input, handles, true),
TC_REFERENCE => {
let (input, handle) = be_u32(input)?;
Ok((
input,
StreamString {
value: format!("<ref:{:#08x}>", handle),
handle: 0, is_long: false,
},
))
}
_ => Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
))),
}
}
fn parse_tc_string<'a>(
input: &'a [u8],
handles: &mut HandleTable,
is_long: bool,
) -> IResult<&'a [u8], StreamString> {
let (input, value) = if is_long {
parse_long_utf(input)?
} else {
parse_utf(input)?
};
let stream_string = StreamString {
value: value.clone(),
handle: 0,
is_long,
};
let handle = handles.assign(StreamObject::NewString(stream_string.clone()));
let result = StreamString {
value,
handle,
is_long,
};
Ok((input, result))
}
fn parse_utf(input: &[u8]) -> IResult<&[u8], String> {
let (input, length) = be_u16(input)?;
let (input, bytes) = take(length as usize)(input)?;
let value = cesu8::from_java_cesu8(bytes)
.map(|s| s.into_owned())
.unwrap_or_else(|_| String::from_utf8_lossy(bytes).into_owned());
Ok((input, value))
}
fn parse_long_utf(input: &[u8]) -> IResult<&[u8], String> {
let (input, length) = be_u64(input)?;
let (input, bytes) = take(length as usize)(input)?;
let value = cesu8::from_java_cesu8(bytes)
.map(|s| s.into_owned())
.unwrap_or_else(|_| String::from_utf8_lossy(bytes).into_owned());
Ok((input, value))
}
fn parse_block_data(input: &[u8]) -> IResult<&[u8], BlockData> {
let (input, tc) = be_u8(input)?;
parse_block_data_from_tc(input, tc)
}
fn parse_block_data_from_tc(input: &[u8], tc: u8) -> IResult<&[u8], BlockData> {
match tc {
TC_BLOCKDATA => {
let (input, size) = be_u8(input)?;
let (input, data) = take(size as usize)(input)?;
Ok((
input,
BlockData::Short {
data: data.to_vec(),
},
))
}
TC_BLOCKDATALONG => {
let (input, size) = be_u32(input)?;
let (input, data) = take(size as usize)(input)?;
Ok((
input,
BlockData::Long {
data: data.to_vec(),
},
))
}
_ => Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Tag,
))),
}
}