1use crate::export::binary::error::BinaryExportError;
7use crate::export::binary::format::{FileHeader, ALLOCATION_RECORD_TYPE, HEADER_SIZE};
8use crate::export::binary::index::{
9 BinaryIndex, BloomFilterParams, CompactAllocationIndex, QuickFilterData, RecordMetadata,
10 StringTableIndex,
11};
12use crate::export::binary::serializable::primitives;
13use std::collections::hash_map::DefaultHasher;
14use std::fs::File;
15use std::hash::{Hash, Hasher};
16use std::io::{BufReader, Read, Seek, SeekFrom};
17use std::path::Path;
18
19pub struct BinaryIndexBuilder {
21 quick_filter_threshold: u32,
23
24 quick_filter_batch_size: usize,
26
27 bloom_filter_params: BloomFilterParams,
29}
30
31impl BinaryIndexBuilder {
32 pub fn new() -> Self {
34 Self {
35 quick_filter_threshold: 1000,
36 quick_filter_batch_size: 1000,
37 bloom_filter_params: BloomFilterParams::default(),
38 }
39 }
40
41 pub fn with_quick_filter_threshold(mut self, threshold: u32) -> Self {
43 self.quick_filter_threshold = threshold;
44 self
45 }
46
47 pub fn with_quick_filter_batch_size(mut self, batch_size: usize) -> Self {
49 self.quick_filter_batch_size = batch_size;
50 self
51 }
52
53 pub fn with_bloom_filter_params(mut self, params: BloomFilterParams) -> Self {
55 self.bloom_filter_params = params;
56 self
57 }
58
59 pub fn build_index<P: AsRef<Path>>(
61 &self,
62 binary_path: P,
63 ) -> Result<BinaryIndex, BinaryExportError> {
64 let path = binary_path.as_ref();
65 let file = File::open(path)?;
66 let file_size = file.metadata()?.len();
67 let file_hash = self.compute_file_hash(path)?;
68
69 let mut reader = BufReader::new(file);
70
71 let header = self.read_header(&mut reader)?;
73
74 let string_table_info = self.read_string_table_info(&mut reader)?;
76
77 let allocation_index = self.scan_allocation_records(&mut reader, &header)?;
79
80 let mut index = BinaryIndex::new(path.to_path_buf(), file_hash, file_size, header);
82
83 index.string_table = string_table_info;
84 index.allocations = allocation_index;
85
86 Ok(index)
87 }
88
89 fn compute_file_hash<P: AsRef<Path>>(&self, path: P) -> Result<u64, BinaryExportError> {
91 let mut file = File::open(path)?;
92 let mut hasher = DefaultHasher::new();
93
94 let metadata = file.metadata()?;
96 metadata.len().hash(&mut hasher);
97 if let Ok(modified) = metadata.modified() {
98 if let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) {
99 duration.as_secs().hash(&mut hasher);
100 }
101 }
102
103 let mut buffer = [0u8; 1024];
105 let bytes_read = file.read(&mut buffer)?;
106 buffer[..bytes_read].hash(&mut hasher);
107
108 if metadata.len() > 2048 {
110 file.seek(SeekFrom::End(-1024))?;
111 let bytes_read = file.read(&mut buffer)?;
112 buffer[..bytes_read].hash(&mut hasher);
113 }
114
115 Ok(hasher.finish())
116 }
117
118 fn read_header(&self, reader: &mut BufReader<File>) -> Result<FileHeader, BinaryExportError> {
120 let mut header_bytes = [0u8; HEADER_SIZE];
121 reader.read_exact(&mut header_bytes)?;
122
123 let header = FileHeader::from_bytes(&header_bytes);
124
125 if !header.is_valid_magic() {
127 return Err(BinaryExportError::InvalidMagic {
128 expected: "MEMSCOPE".to_string(),
129 actual: String::from_utf8_lossy(&header.magic).to_string(),
130 });
131 }
132
133 if !header.is_compatible_version() {
135 return Err(BinaryExportError::UnsupportedVersion(header.version));
136 }
137
138 Ok(header)
139 }
140
141 fn read_string_table_info(
143 &self,
144 reader: &mut BufReader<File>,
145 ) -> Result<StringTableIndex, BinaryExportError> {
146 let table_start_offset = reader.stream_position()?;
147
148 let mut marker = [0u8; 4];
150 reader.read_exact(&mut marker)?;
151
152 let table_size = primitives::read_u32(reader)?;
154
155 if &marker == b"STBL" && table_size > 0 {
156 let uses_compression = primitives::read_u8(reader)? != 0;
158
159 let string_count = primitives::read_u32(reader)?;
161
162 reader.seek(SeekFrom::Current(table_size as i64 - 9))?; Ok(StringTableIndex {
166 offset: table_start_offset,
167 size: table_size as u64 + 8, string_count,
169 uses_compression,
170 })
171 } else if &marker == b"NONE" {
172 Ok(StringTableIndex::default())
174 } else {
175 Err(BinaryExportError::CorruptedData(
176 "Invalid string table marker".to_string(),
177 ))
178 }
179 }
180
181 fn scan_allocation_records(
183 &self,
184 reader: &mut BufReader<File>,
185 header: &FileHeader,
186 ) -> Result<CompactAllocationIndex, BinaryExportError> {
187 let records_start_offset = reader.stream_position()?;
188 let mut allocation_index = CompactAllocationIndex::new(records_start_offset);
189 allocation_index.reserve(header.total_count as usize);
190
191 let mut record_metadata = Vec::new();
192 let mut quick_filter_builder = if header.total_count >= self.quick_filter_threshold {
193 Some(QuickFilterBuilder::new(
194 self.quick_filter_batch_size,
195 self.bloom_filter_params.clone(),
196 ))
197 } else {
198 None
199 };
200
201 let file_size = reader
203 .get_ref()
204 .metadata()
205 .map_err(|e| {
206 BinaryExportError::CorruptedData(format!("Failed to get file metadata: {e:?}"))
207 })?
208 .len();
209
210 for i in 0..header.total_count {
212 let record_start_offset = reader.stream_position()?;
213
214 if record_start_offset >= file_size {
216 return Err(BinaryExportError::CorruptedData(format!(
217 "Reached end of file while reading record {} of {} at offset {}",
218 i + 1,
219 header.total_count,
220 record_start_offset
221 )));
222 }
223
224 let metadata = match self.read_record_metadata(reader) {
226 Ok(metadata) => metadata,
227 Err(e) => {
228 tracing::warn!(
229 "Failed to read record {} at offset {}: {e}",
230 i + 1,
231 record_start_offset,
232 );
233
234 continue;
237 }
238 };
239
240 let record_end_offset = reader.stream_position()?;
241
242 let record_size_u64 = record_end_offset - record_start_offset;
244 if record_size_u64 > u16::MAX as u64 {
245 tracing::warn!(
246 "Record {} has unusually large size: {} bytes, skipping",
247 i + 1,
248 record_size_u64
249 );
250 continue;
251 }
252
253 let record_size = record_size_u64 as u16;
254
255 if let Err(e) = allocation_index.add_record(record_start_offset, record_size) {
257 tracing::warn!("Failed to add record {} to index: {}", i + 1, e);
258 continue;
259 }
260
261 if let Some(ref mut builder) = quick_filter_builder {
263 builder.add_record(i as usize, &metadata);
264 }
265
266 record_metadata.push(metadata);
267 }
268
269 if let Some(builder) = quick_filter_builder {
271 allocation_index.quick_filter_data = Some(builder.build());
272 }
273
274 tracing::info!(
275 "Successfully indexed {} allocation records",
276 record_metadata.len()
277 );
278
279 Ok(allocation_index)
280 }
281
282 fn read_record_metadata(
284 &self,
285 reader: &mut BufReader<File>,
286 ) -> Result<RecordMetadata, BinaryExportError> {
287 let current_position = reader.stream_position().unwrap_or(0);
288
289 let mut type_byte = [0u8; 1];
291 reader.read_exact(&mut type_byte).map_err(|e| {
292 BinaryExportError::CorruptedData(format!(
293 "Failed to read record type at position {current_position}: {e:?}",
294 ))
295 })?;
296
297 if type_byte[0] != ALLOCATION_RECORD_TYPE {
298 return Err(BinaryExportError::CorruptedData(format!(
299 "Invalid record type: {} at position {current_position}",
300 type_byte[0]
301 )));
302 }
303
304 let mut length_bytes = [0u8; 4];
306 reader.read_exact(&mut length_bytes).map_err(|e| {
307 BinaryExportError::CorruptedData(format!(
308 "Failed to read record length at position {}: {e:?}",
309 current_position + 1,
310 ))
311 })?;
312 let record_length = u32::from_le_bytes(length_bytes);
313
314 if record_length == 0 || record_length > 1024 * 1024 {
316 return Err(BinaryExportError::CorruptedData(format!(
317 "Invalid record length: {record_length} at position {current_position}",
318 )));
319 }
320
321 let ptr = primitives::read_u64(reader).map_err(|e| {
323 BinaryExportError::CorruptedData(format!(
324 "Failed to read ptr at position {}: {e:?}",
325 current_position + 5,
326 ))
327 })? as usize;
328
329 let size = primitives::read_u64(reader).map_err(|e| {
330 BinaryExportError::CorruptedData(format!(
331 "Failed to read size at position {}: {e:?}",
332 current_position + 13,
333 ))
334 })? as usize;
335
336 let timestamp = primitives::read_u64(reader).map_err(|e| {
337 BinaryExportError::CorruptedData(format!(
338 "Failed to read timestamp at position {}: {e:?}",
339 current_position + 21,
340 ))
341 })?;
342
343 let bytes_read_from_data = 24u32;
347
348 if record_length < bytes_read_from_data {
349 return Err(BinaryExportError::CorruptedData(format!(
350 "Record length {record_length} is smaller than minimum required {bytes_read_from_data} at position {current_position}",
351 )));
352 }
353
354 let remaining_bytes = record_length - bytes_read_from_data;
355
356 if remaining_bytes > 0 {
358 reader
359 .seek(SeekFrom::Current(remaining_bytes as i64))
360 .map_err(|e| {
361 BinaryExportError::CorruptedData(format!(
362 "Failed to skip {remaining_bytes} remaining bytes at position {}: {e:?}",
363 current_position + 29,
364 ))
365 })?;
366 }
367
368 Ok(RecordMetadata::new(ptr, size, timestamp))
369 }
370}
371
372impl Default for BinaryIndexBuilder {
373 fn default() -> Self {
374 Self::new()
375 }
376}
377
378struct QuickFilterBuilder {
380 batch_size: usize,
381 bloom_filter_params: BloomFilterParams,
382 current_batch: Vec<RecordMetadata>,
383 ptr_ranges: Vec<(usize, usize)>,
384 size_ranges: Vec<(usize, usize)>,
385 timestamp_ranges: Vec<(u64, u64)>,
386 thread_ids: Vec<String>,
387 type_names: Vec<String>,
388}
389
390impl QuickFilterBuilder {
391 fn new(batch_size: usize, bloom_filter_params: BloomFilterParams) -> Self {
392 Self {
393 batch_size,
394 bloom_filter_params,
395 current_batch: Vec::with_capacity(batch_size),
396 ptr_ranges: Vec::new(),
397 size_ranges: Vec::new(),
398 timestamp_ranges: Vec::new(),
399 thread_ids: Vec::new(),
400 type_names: Vec::new(),
401 }
402 }
403
404 fn add_record(&mut self, _record_index: usize, metadata: &RecordMetadata) {
405 self.current_batch.push(metadata.clone());
406
407 if let Some(ref thread_id) = metadata.thread_id {
409 self.thread_ids.push(thread_id.clone());
410 }
411 if let Some(ref type_name) = metadata.type_name {
412 self.type_names.push(type_name.clone());
413 }
414
415 if self.current_batch.len() >= self.batch_size {
417 self.process_batch();
418 }
419 }
420
421 fn process_batch(&mut self) {
422 if self.current_batch.is_empty() {
423 return;
424 }
425
426 let ptr_min = self.current_batch.iter().map(|r| r.ptr).min().unwrap_or(0);
428 let ptr_max = self.current_batch.iter().map(|r| r.ptr).max().unwrap_or(0);
429 self.ptr_ranges.push((ptr_min, ptr_max));
430
431 let size_min = self.current_batch.iter().map(|r| r.size).min().unwrap_or(0);
432 let size_max = self.current_batch.iter().map(|r| r.size).max().unwrap_or(0);
433 self.size_ranges.push((size_min, size_max));
434
435 let timestamp_min = self
436 .current_batch
437 .iter()
438 .map(|r| r.timestamp)
439 .min()
440 .unwrap_or(0);
441 let timestamp_max = self
442 .current_batch
443 .iter()
444 .map(|r| r.timestamp)
445 .max()
446 .unwrap_or(0);
447 self.timestamp_ranges.push((timestamp_min, timestamp_max));
448
449 self.current_batch.clear();
450 }
451
452 fn build(mut self) -> QuickFilterData {
453 self.process_batch();
455
456 let thread_bloom_filter = self.build_simple_bloom_filter(&self.thread_ids);
458 let type_bloom_filter = self.build_simple_bloom_filter(&self.type_names);
459
460 QuickFilterData {
461 batch_size: self.batch_size,
462 ptr_ranges: self.ptr_ranges,
463 size_ranges: self.size_ranges,
464 timestamp_ranges: self.timestamp_ranges,
465 thread_bloom_filter,
466 type_bloom_filter,
467 bloom_filter_params: self.bloom_filter_params,
468 }
469 }
470
471 fn build_simple_bloom_filter(&self, strings: &[String]) -> Vec<u8> {
473 let filter_size = (self.bloom_filter_params.filter_size_bits / 8) as usize;
476 let mut filter = vec![0u8; filter_size];
477
478 for string in strings {
479 let mut hasher = DefaultHasher::new();
480 string.hash(&mut hasher);
481 let hash = hasher.finish();
482
483 for i in 0..self.bloom_filter_params.hash_functions {
485 let bit_index = ((hash.wrapping_add(i as u64)) % (filter_size * 8) as u64) as usize;
486 let byte_index = bit_index / 8;
487 let bit_offset = bit_index % 8;
488
489 if byte_index < filter.len() {
490 filter[byte_index] |= 1 << bit_offset;
491 }
492 }
493 }
494
495 filter
496 }
497}
498
499#[cfg(test)]
500mod tests {
501 use super::*;
502 use crate::core::types::AllocationInfo;
503 use crate::export::binary::writer::BinaryWriter;
504 use tempfile::NamedTempFile;
505
506 fn create_test_allocation() -> AllocationInfo {
507 AllocationInfo {
508 ptr: 0x1000,
509 size: 1024,
510 var_name: Some("test_var".to_string()),
511 type_name: Some("i32".to_string()),
512 scope_name: None,
513 timestamp_alloc: 1234567890,
514 timestamp_dealloc: None,
515 thread_id: "main".to_string(),
516 borrow_count: 0,
517 stack_trace: None,
518 is_leaked: false,
519 lifetime_ms: None,
520 borrow_info: None,
521 clone_info: None,
522 ownership_history_available: false,
523 smart_pointer_info: None,
524 memory_layout: None,
525 generic_info: None,
526 dynamic_type_info: None,
527 runtime_state: None,
528 stack_allocation: None,
529 temporary_object: None,
530 fragmentation_analysis: None,
531 generic_instantiation: None,
532 type_relationships: None,
533 type_usage: None,
534 function_call_tracking: None,
535 lifecycle_tracking: None,
536 access_tracking: None,
537 drop_chain_analysis: None,
538 }
539 }
540
541 #[test]
542 fn test_index_builder_creation() {
543 let builder = BinaryIndexBuilder::new();
544 assert_eq!(builder.quick_filter_threshold, 1000);
545 assert_eq!(builder.quick_filter_batch_size, 1000);
546 }
547
548 #[test]
549 fn test_index_builder_configuration() {
550 let builder = BinaryIndexBuilder::new()
551 .with_quick_filter_threshold(500)
552 .with_quick_filter_batch_size(200);
553
554 assert_eq!(builder.quick_filter_threshold, 500);
555 assert_eq!(builder.quick_filter_batch_size, 200);
556 }
557
558 #[test]
559 fn test_build_index_from_binary_file() {
560 let temp_file = NamedTempFile::new().expect("Failed to create temp file");
561 let test_allocations = vec![create_test_allocation(), {
562 let mut alloc = create_test_allocation();
563 alloc.ptr = 0x2000;
564 alloc.size = 2048;
565 alloc.timestamp_alloc = 1234567891;
566 alloc
567 }];
568
569 {
571 let mut writer =
572 BinaryWriter::new(temp_file.path()).expect("Failed to create temp file");
573 writer
574 .write_header(test_allocations.len() as u32)
575 .expect("Failed to write header");
576 for alloc in &test_allocations {
577 writer
578 .write_allocation(alloc)
579 .expect("Failed to write allocation");
580 }
581 writer.finish().expect("Failed to finish writing");
582 }
583
584 let builder = BinaryIndexBuilder::new();
586 let index = builder
587 .build_index(temp_file.path())
588 .expect("Failed to create temp file");
589
590 assert_eq!(index.record_count(), 2);
592 assert_eq!(index.header.total_count, 2);
593 assert!(index.file_size > 0);
594 assert!(index.file_hash != 0);
595
596 assert!(index.get_record_offset(0).is_some());
598 assert!(index.get_record_offset(1).is_some());
599 assert!(index.get_record_offset(2).is_none());
600
601 assert!(index.get_record_size(0).is_some());
603 assert!(index.get_record_size(1).is_some());
604 assert!(index.get_record_size(2).is_none());
605 }
606
607 #[test]
608 fn test_file_hash_computation() {
609 let temp_file = NamedTempFile::new().expect("Failed to create temp file");
610
611 std::fs::write(temp_file.path(), b"test data for hashing")
613 .expect("Failed to create temp file");
614
615 let builder = BinaryIndexBuilder::new();
616 let hash1 = builder
617 .compute_file_hash(temp_file.path())
618 .expect("Failed to create temp file");
619 let hash2 = builder
620 .compute_file_hash(temp_file.path())
621 .expect("Failed to create temp file");
622
623 assert_eq!(hash1, hash2);
625
626 std::fs::write(temp_file.path(), b"different test data")
628 .expect("Failed to create temp file");
629 let hash3 = builder
630 .compute_file_hash(temp_file.path())
631 .expect("Failed to create temp file");
632 assert_ne!(hash1, hash3);
633 }
634
635 #[test]
636 fn test_quick_filter_builder() {
637 let mut builder = QuickFilterBuilder::new(2, BloomFilterParams::default());
638
639 let metadata1 = RecordMetadata::new(0x1000, 1024, 1000)
640 .with_thread_id("main".to_string())
641 .with_type_name("Vec<u8>".to_string());
642
643 let metadata2 = RecordMetadata::new(0x2000, 2048, 2000)
644 .with_thread_id("worker".to_string())
645 .with_type_name("String".to_string());
646
647 builder.add_record(0, &metadata1);
648 builder.add_record(1, &metadata2);
649
650 let quick_filter = builder.build();
651
652 assert_eq!(quick_filter.batch_size, 2);
653 assert_eq!(quick_filter.ptr_ranges.len(), 1);
654 assert_eq!(quick_filter.size_ranges.len(), 1);
655 assert_eq!(quick_filter.timestamp_ranges.len(), 1);
656
657 assert_eq!(quick_filter.ptr_ranges[0], (0x1000, 0x2000));
659 assert_eq!(quick_filter.size_ranges[0], (1024, 2048));
660 assert_eq!(quick_filter.timestamp_ranges[0], (1000, 2000));
661
662 assert!(!quick_filter.thread_bloom_filter.is_empty());
664 assert!(!quick_filter.type_bloom_filter.is_empty());
665 }
666}