scrump_format_hprof/
lib.rs1use std::io::Read;
33use std::path::Path;
34
35use byteorder::{BigEndian, ByteOrder};
36use scrump_core::{
37 apply_hits_in_place, Chunk, ChunkOrigin, Format, Handler, Hit, Result, ScrumpError,
38};
39
40const MIN_MAGIC_PREFIX: &[u8] = b"JAVA PROFILE";
41
42#[derive(Clone, Debug)]
43struct RecordRange {
44 tag: u8,
45 body_offset: u64,
47 body_len: u64,
48 id_size: u32,
51}
52
53pub struct Hprof {
54 bytes: Vec<u8>,
55 records: Vec<RecordRange>,
56}
57
58impl Hprof {
59 pub fn open_path(path: &Path) -> Result<Self> {
60 let mut f = std::fs::File::open(path)?;
61 let mut bytes = Vec::new();
62 f.read_to_end(&mut bytes)?;
63 Self::from_bytes(bytes)
64 }
65
66 pub fn from_bytes(bytes: Vec<u8>) -> Result<Self> {
67 if !bytes.starts_with(MIN_MAGIC_PREFIX) {
68 return Err(ScrumpError::InvalidFile(
69 "HPROF: missing 'JAVA PROFILE' magic prefix".into(),
70 ));
71 }
72 let nul = bytes
74 .iter()
75 .position(|&b| b == 0)
76 .ok_or_else(|| ScrumpError::InvalidFile("HPROF: missing NUL in header".into()))?;
77 let header_after = nul + 1;
79 if bytes.len() < header_after + 12 {
80 return Err(ScrumpError::InvalidFile(
81 "HPROF: truncated header (need id_size + timestamp)".into(),
82 ));
83 }
84 let id_size = BigEndian::read_u32(&bytes[header_after..header_after + 4]);
85 if !(1..=16).contains(&id_size) {
86 return Err(ScrumpError::InvalidFile(format!(
87 "HPROF: implausible id_size {id_size}"
88 )));
89 }
90 let mut cursor = (header_after + 12) as u64;
91
92 let mut records = Vec::new();
93 while (cursor as usize) + 9 <= bytes.len() {
94 let off = cursor as usize;
95 let tag = bytes[off];
96 let length = BigEndian::read_u32(&bytes[off + 5..off + 9]) as u64;
97 let body_offset = cursor + 9;
98 let body_end = body_offset + length;
99 if (body_end as usize) > bytes.len() {
100 return Err(ScrumpError::InvalidFile(format!(
101 "HPROF: record at {off:#x} (tag {tag:#x}, length {length}) extends past EOF ({} bytes)",
102 bytes.len()
103 )));
104 }
105 records.push(RecordRange {
106 tag,
107 body_offset,
108 body_len: length,
109 id_size,
110 });
111 cursor = body_end;
112 }
113
114 Ok(Self { bytes, records })
115 }
116}
117
118fn tag_label(tag: u8) -> &'static str {
119 match tag {
120 0x01 => "HPROF_UTF8",
121 0x02 => "HPROF_LOAD_CLASS",
122 0x03 => "HPROF_UNLOAD_CLASS",
123 0x04 => "HPROF_FRAME",
124 0x05 => "HPROF_TRACE",
125 0x06 => "HPROF_ALLOC_SITES",
126 0x07 => "HPROF_HEAP_SUMMARY",
127 0x0A => "HPROF_START_THREAD",
128 0x0B => "HPROF_END_THREAD",
129 0x0C => "HPROF_HEAP_DUMP",
130 0x0D => "HPROF_CPU_SAMPLES",
131 0x0E => "HPROF_CONTROL_SETTINGS",
132 0x1C => "HPROF_HEAP_DUMP_SEGMENT",
133 0x2C => "HPROF_HEAP_DUMP_END",
134 _ => "HPROF_UNKNOWN",
135 }
136}
137
138impl Format for Hprof {
139 fn name(&self) -> &'static str {
140 "hprof"
141 }
142
143 fn chunks<'a>(&'a self) -> Box<dyn Iterator<Item = Chunk<'a>> + 'a> {
144 let mut out: Vec<Chunk<'a>> = Vec::new();
145 for r in &self.records {
146 if r.body_len == 0 {
147 continue;
148 }
149 let from = r.body_offset as usize;
150 let to = from + r.body_len as usize;
151 if to > self.bytes.len() {
152 continue;
153 }
154 if r.tag == 0x01 && r.body_len > r.id_size as u64 {
158 let s_from = from + r.id_size as usize;
159 out.push(Chunk {
160 bytes: &self.bytes[s_from..to],
161 offset: s_from as u64,
162 origin: ChunkOrigin::StringTable("hprof.utf8".into()),
163 });
164 }
165 out.push(Chunk {
166 bytes: &self.bytes[from..to],
167 offset: r.body_offset,
168 origin: ChunkOrigin::Section(format!("hprof.{}", tag_label(r.tag))),
169 });
170 }
171 Box::new(out.into_iter())
172 }
173
174 fn apply(&mut self, hits: &[Hit]) -> Result<()> {
175 apply_hits_in_place(&mut self.bytes, hits)
176 }
177
178 fn to_bytes(&self) -> Result<Vec<u8>> {
179 Ok(self.bytes.clone())
180 }
181}
182
183fn detect(head: &[u8], _path: &Path) -> bool {
186 head.starts_with(MIN_MAGIC_PREFIX)
187}
188
189fn open_path(path: &Path) -> Result<Box<dyn Format>> {
190 Ok(Box::new(Hprof::open_path(path)?))
191}
192
193fn open_bytes(bytes: Vec<u8>, _hint: Option<&Path>) -> Result<Box<dyn Format>> {
194 Ok(Box::new(Hprof::from_bytes(bytes)?))
195}
196
197pub fn handler() -> Handler {
198 Handler {
199 name: "hprof",
200 detect,
201 open_path,
202 open_bytes,
203 }
204}
205
206#[cfg(test)]
207mod tests {
208 use super::*;
209 use scrump_core::Replacement;
210
211 fn synth_hprof(planted: &str) -> Vec<u8> {
213 let mut f = Vec::new();
214 f.extend_from_slice(b"JAVA PROFILE 1.0.2\0");
215 f.extend_from_slice(&(8u32).to_be_bytes()); f.extend_from_slice(&(0u32).to_be_bytes()); f.extend_from_slice(&(0u32).to_be_bytes()); let body_len = 8 + planted.len();
220 f.push(0x01);
221 f.extend_from_slice(&(0u32).to_be_bytes()); f.extend_from_slice(&(body_len as u32).to_be_bytes());
223 f.extend_from_slice(&(42u64).to_be_bytes()); f.extend_from_slice(planted.as_bytes());
225 f.push(0x2C);
227 f.extend_from_slice(&(0u32).to_be_bytes());
228 f.extend_from_slice(&(0u32).to_be_bytes());
229 f
230 }
231
232 #[test]
233 fn detect_recognises_magic() {
234 assert!(detect(b"JAVA PROFILE 1.0.2\0xxx", Path::new("/x/a.hprof")));
235 assert!(!detect(b"random", Path::new("/x/a")));
236 }
237
238 #[test]
239 fn parses_synthetic_hprof() {
240 let token = "ghp_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
241 let f = synth_hprof(token);
242 let pre_len = f.len();
243 let mut h = Hprof::from_bytes(f).unwrap();
244 let chunks: Vec<_> = h.chunks().collect();
245 let saw_string = chunks
247 .iter()
248 .any(|c| matches!(&c.origin, ChunkOrigin::StringTable(s) if s == "hprof.utf8"));
249 assert!(saw_string);
250
251 let pos = h
253 .bytes
254 .windows(token.len())
255 .position(|w| w == token.as_bytes())
256 .unwrap() as u64;
257 h.apply(&[Hit {
258 offset: pos,
259 len: token.len(),
260 rule_id: "x".into(),
261 verified: None,
262 replacement: Replacement::ZeroFill,
263 origin: ChunkOrigin::StringTable("hprof.utf8".into()),
264 }])
265 .unwrap();
266 let out = h.to_bytes().unwrap();
267 assert_eq!(out.len(), pre_len);
268 assert!(!out.windows(token.len()).any(|w| w == token.as_bytes()));
269 assert!(out.starts_with(MIN_MAGIC_PREFIX));
270 }
271}