use std::io::Read;
use std::path::Path;
use byteorder::{BigEndian, ByteOrder};
use scrump_core::{
apply_hits_in_place, Chunk, ChunkOrigin, Format, Handler, Hit, Result, ScrumpError,
};
const MIN_MAGIC_PREFIX: &[u8] = b"JAVA PROFILE";
#[derive(Clone, Debug)]
struct RecordRange {
tag: u8,
body_offset: u64,
body_len: u64,
id_size: u32,
}
pub struct Hprof {
bytes: Vec<u8>,
records: Vec<RecordRange>,
}
impl Hprof {
pub fn open_path(path: &Path) -> Result<Self> {
let mut f = std::fs::File::open(path)?;
let mut bytes = Vec::new();
f.read_to_end(&mut bytes)?;
Self::from_bytes(bytes)
}
pub fn from_bytes(bytes: Vec<u8>) -> Result<Self> {
if !bytes.starts_with(MIN_MAGIC_PREFIX) {
return Err(ScrumpError::InvalidFile(
"HPROF: missing 'JAVA PROFILE' magic prefix".into(),
));
}
let nul = bytes
.iter()
.position(|&b| b == 0)
.ok_or_else(|| ScrumpError::InvalidFile("HPROF: missing NUL in header".into()))?;
let header_after = nul + 1;
if bytes.len() < header_after + 12 {
return Err(ScrumpError::InvalidFile(
"HPROF: truncated header (need id_size + timestamp)".into(),
));
}
let id_size = BigEndian::read_u32(&bytes[header_after..header_after + 4]);
if !(1..=16).contains(&id_size) {
return Err(ScrumpError::InvalidFile(format!(
"HPROF: implausible id_size {id_size}"
)));
}
let mut cursor = (header_after + 12) as u64;
let mut records = Vec::new();
while (cursor as usize) + 9 <= bytes.len() {
let off = cursor as usize;
let tag = bytes[off];
let length = BigEndian::read_u32(&bytes[off + 5..off + 9]) as u64;
let body_offset = cursor + 9;
let body_end = body_offset + length;
if (body_end as usize) > bytes.len() {
return Err(ScrumpError::InvalidFile(format!(
"HPROF: record at {off:#x} (tag {tag:#x}, length {length}) extends past EOF ({} bytes)",
bytes.len()
)));
}
records.push(RecordRange {
tag,
body_offset,
body_len: length,
id_size,
});
cursor = body_end;
}
Ok(Self { bytes, records })
}
}
fn tag_label(tag: u8) -> &'static str {
match tag {
0x01 => "HPROF_UTF8",
0x02 => "HPROF_LOAD_CLASS",
0x03 => "HPROF_UNLOAD_CLASS",
0x04 => "HPROF_FRAME",
0x05 => "HPROF_TRACE",
0x06 => "HPROF_ALLOC_SITES",
0x07 => "HPROF_HEAP_SUMMARY",
0x0A => "HPROF_START_THREAD",
0x0B => "HPROF_END_THREAD",
0x0C => "HPROF_HEAP_DUMP",
0x0D => "HPROF_CPU_SAMPLES",
0x0E => "HPROF_CONTROL_SETTINGS",
0x1C => "HPROF_HEAP_DUMP_SEGMENT",
0x2C => "HPROF_HEAP_DUMP_END",
_ => "HPROF_UNKNOWN",
}
}
impl Format for Hprof {
fn name(&self) -> &'static str {
"hprof"
}
fn chunks<'a>(&'a self) -> Box<dyn Iterator<Item = Chunk<'a>> + 'a> {
let mut out: Vec<Chunk<'a>> = Vec::new();
for r in &self.records {
if r.body_len == 0 {
continue;
}
let from = r.body_offset as usize;
let to = from + r.body_len as usize;
if to > self.bytes.len() {
continue;
}
if r.tag == 0x01 && r.body_len > r.id_size as u64 {
let s_from = from + r.id_size as usize;
out.push(Chunk {
bytes: &self.bytes[s_from..to],
offset: s_from as u64,
origin: ChunkOrigin::StringTable("hprof.utf8".into()),
});
}
out.push(Chunk {
bytes: &self.bytes[from..to],
offset: r.body_offset,
origin: ChunkOrigin::Section(format!("hprof.{}", tag_label(r.tag))),
});
}
Box::new(out.into_iter())
}
fn apply(&mut self, hits: &[Hit]) -> Result<()> {
apply_hits_in_place(&mut self.bytes, hits)
}
fn to_bytes(&self) -> Result<Vec<u8>> {
Ok(self.bytes.clone())
}
}
fn detect(head: &[u8], _path: &Path) -> bool {
head.starts_with(MIN_MAGIC_PREFIX)
}
fn open_path(path: &Path) -> Result<Box<dyn Format>> {
Ok(Box::new(Hprof::open_path(path)?))
}
fn open_bytes(bytes: Vec<u8>, _hint: Option<&Path>) -> Result<Box<dyn Format>> {
Ok(Box::new(Hprof::from_bytes(bytes)?))
}
pub fn handler() -> Handler {
Handler {
name: "hprof",
detect,
open_path,
open_bytes,
}
}
#[cfg(test)]
mod tests {
use super::*;
use scrump_core::Replacement;
fn synth_hprof(planted: &str) -> Vec<u8> {
let mut f = Vec::new();
f.extend_from_slice(b"JAVA PROFILE 1.0.2\0");
f.extend_from_slice(&(8u32).to_be_bytes()); f.extend_from_slice(&(0u32).to_be_bytes()); f.extend_from_slice(&(0u32).to_be_bytes()); let body_len = 8 + planted.len();
f.push(0x01);
f.extend_from_slice(&(0u32).to_be_bytes()); f.extend_from_slice(&(body_len as u32).to_be_bytes());
f.extend_from_slice(&(42u64).to_be_bytes()); f.extend_from_slice(planted.as_bytes());
f.push(0x2C);
f.extend_from_slice(&(0u32).to_be_bytes());
f.extend_from_slice(&(0u32).to_be_bytes());
f
}
#[test]
fn detect_recognises_magic() {
assert!(detect(b"JAVA PROFILE 1.0.2\0xxx", Path::new("/x/a.hprof")));
assert!(!detect(b"random", Path::new("/x/a")));
}
#[test]
fn parses_synthetic_hprof() {
let token = "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
let f = synth_hprof(token);
let pre_len = f.len();
let mut h = Hprof::from_bytes(f).unwrap();
let chunks: Vec<_> = h.chunks().collect();
let saw_string = chunks
.iter()
.any(|c| matches!(&c.origin, ChunkOrigin::StringTable(s) if s == "hprof.utf8"));
assert!(saw_string);
let pos = h
.bytes
.windows(token.len())
.position(|w| w == token.as_bytes())
.unwrap() as u64;
h.apply(&[Hit {
offset: pos,
len: token.len(),
rule_id: "x".into(),
verified: None,
replacement: Replacement::ZeroFill,
origin: ChunkOrigin::StringTable("hprof.utf8".into()),
}])
.unwrap();
let out = h.to_bytes().unwrap();
assert_eq!(out.len(), pre_len);
assert!(!out.windows(token.len()).any(|w| w == token.as_bytes()));
assert!(out.starts_with(MIN_MAGIC_PREFIX));
}
}