1use crate::export::binary::error::BinaryExportError;
10use crate::export::binary::format::FileHeader;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13use std::path::PathBuf;
14use std::time::SystemTime;
15
16#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct BinaryIndex {
19 pub version: u32,
21
22 pub file_hash: u64,
24
25 pub file_path: PathBuf,
27
28 pub header: FileHeader,
30
31 pub string_table: StringTableIndex,
33
34 pub allocations: CompactAllocationIndex,
36
37 pub advanced_metrics: Option<AdvancedMetricsIndex>,
39
40 pub created_at: SystemTime,
42
43 pub file_size: u64,
45}
46
47impl BinaryIndex {
48 pub const INDEX_VERSION: u32 = 1;
50
51 pub fn new(file_path: PathBuf, file_hash: u64, file_size: u64, header: FileHeader) -> Self {
53 Self {
54 version: Self::INDEX_VERSION,
55 file_hash,
56 file_path,
57 header,
58 string_table: StringTableIndex::default(),
59 allocations: CompactAllocationIndex::default(),
60 advanced_metrics: None,
61 created_at: SystemTime::now(),
62 file_size,
63 }
64 }
65
66 pub fn is_valid_for_file(&self, file_path: &std::path::Path, file_hash: u64) -> bool {
68 self.file_path == file_path && self.file_hash == file_hash
69 }
70
71 pub fn record_count(&self) -> u32 {
73 self.allocations.count
74 }
75
76 pub fn get_record_offset(&self, record_index: usize) -> Option<u64> {
78 if record_index >= self.allocations.count as usize {
79 return None;
80 }
81
82 let relative_offset = self.allocations.relative_offsets.get(record_index)?;
83 Some(self.allocations.records_start_offset + *relative_offset as u64)
84 }
85
86 pub fn get_record_size(&self, record_index: usize) -> Option<u16> {
88 self.allocations.record_sizes.get(record_index).copied()
89 }
90
91 pub fn has_quick_filter_data(&self) -> bool {
93 self.allocations.quick_filter_data.is_some()
94 }
95}
96
97#[derive(Debug, Clone, Serialize, Deserialize, Default)]
99pub struct StringTableIndex {
100 pub offset: u64,
102
103 pub size: u64,
105
106 pub string_count: u32,
108
109 pub uses_compression: bool,
111}
112
113#[derive(Debug, Clone, Serialize, Deserialize, Default)]
115pub struct CompactAllocationIndex {
116 pub count: u32,
118
119 pub records_start_offset: u64,
121
122 pub relative_offsets: Vec<u32>,
124
125 pub record_sizes: Vec<u16>,
127
128 pub quick_filter_data: Option<QuickFilterData>,
130}
131
132impl CompactAllocationIndex {
133 pub fn new(records_start_offset: u64) -> Self {
135 Self {
136 count: 0,
137 records_start_offset,
138 relative_offsets: Vec::new(),
139 record_sizes: Vec::new(),
140 quick_filter_data: None,
141 }
142 }
143
144 pub fn add_record(&mut self, absolute_offset: u64, size: u16) -> Result<(), BinaryExportError> {
146 if absolute_offset < self.records_start_offset {
147 return Err(BinaryExportError::CorruptedData(
148 "Record offset is before records start".to_string(),
149 ));
150 }
151
152 let relative_offset = absolute_offset - self.records_start_offset;
153 if relative_offset > u32::MAX as u64 {
154 return Err(BinaryExportError::CorruptedData(
155 "Record offset too large for relative addressing".to_string(),
156 ));
157 }
158
159 self.relative_offsets.push(relative_offset as u32);
160 self.record_sizes.push(size);
161 self.count += 1;
162
163 Ok(())
164 }
165
166 pub fn reserve(&mut self, capacity: usize) {
168 self.relative_offsets.reserve(capacity);
169 self.record_sizes.reserve(capacity);
170 }
171
172 pub fn memory_usage(&self) -> usize {
174 std::mem::size_of::<Self>()
175 + self.relative_offsets.capacity() * std::mem::size_of::<u32>()
176 + self.record_sizes.capacity() * std::mem::size_of::<u16>()
177 + self
178 .quick_filter_data
179 .as_ref()
180 .map_or(0, |qfd| qfd.memory_usage())
181 }
182}
183
184#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct QuickFilterData {
187 pub batch_size: usize,
189
190 pub ptr_ranges: Vec<(usize, usize)>,
192
193 pub size_ranges: Vec<(usize, usize)>,
195
196 pub timestamp_ranges: Vec<(u64, u64)>,
198
199 pub thread_bloom_filter: Vec<u8>,
201
202 pub type_bloom_filter: Vec<u8>,
204
205 pub bloom_filter_params: BloomFilterParams,
207}
208
209impl QuickFilterData {
210 pub fn new(batch_size: usize) -> Self {
212 Self {
213 batch_size,
214 ptr_ranges: Vec::new(),
215 size_ranges: Vec::new(),
216 timestamp_ranges: Vec::new(),
217 thread_bloom_filter: Vec::new(),
218 type_bloom_filter: Vec::new(),
219 bloom_filter_params: BloomFilterParams::default(),
220 }
221 }
222
223 pub fn memory_usage(&self) -> usize {
225 std::mem::size_of::<Self>()
226 + self.ptr_ranges.capacity() * std::mem::size_of::<(usize, usize)>()
227 + self.size_ranges.capacity() * std::mem::size_of::<(usize, usize)>()
228 + self.timestamp_ranges.capacity() * std::mem::size_of::<(u64, u64)>()
229 + self.thread_bloom_filter.capacity()
230 + self.type_bloom_filter.capacity()
231 }
232
233 pub fn ptr_might_be_in_batch(&self, batch_index: usize, ptr: usize) -> bool {
235 if let Some(&(min, max)) = self.ptr_ranges.get(batch_index) {
236 ptr >= min && ptr <= max
237 } else {
238 false
239 }
240 }
241
242 pub fn size_might_be_in_batch(&self, batch_index: usize, size: usize) -> bool {
244 if let Some(&(min, max)) = self.size_ranges.get(batch_index) {
245 size >= min && size <= max
246 } else {
247 false
248 }
249 }
250
251 pub fn timestamp_might_be_in_batch(&self, batch_index: usize, timestamp: u64) -> bool {
253 if let Some(&(min, max)) = self.timestamp_ranges.get(batch_index) {
254 timestamp >= min && timestamp <= max
255 } else {
256 false
257 }
258 }
259}
260
261#[derive(Debug, Clone, Serialize, Deserialize)]
263pub struct BloomFilterParams {
264 pub hash_functions: u32,
266
267 pub filter_size_bits: u32,
269
270 pub expected_elements: u32,
272
273 pub false_positive_rate: f64,
275}
276
277impl Default for BloomFilterParams {
278 fn default() -> Self {
279 Self {
280 hash_functions: 3,
281 filter_size_bits: 8192, expected_elements: 1000,
283 false_positive_rate: 0.01, }
285 }
286}
287
288#[derive(Debug, Clone, Serialize, Deserialize)]
290pub struct AdvancedMetricsIndex {
291 pub offset: u64,
293
294 pub size: u32,
296
297 pub metrics_bitmap: u32,
299
300 pub metric_sections: HashMap<String, MetricSectionIndex>,
302}
303
304#[derive(Debug, Clone, Serialize, Deserialize)]
306pub struct MetricSectionIndex {
307 pub relative_offset: u32,
309
310 pub size: u32,
312
313 pub entry_count: u32,
315}
316
317#[derive(Debug, Clone)]
319pub struct RecordMetadata {
320 pub ptr: usize,
322
323 pub size: usize,
325
326 pub timestamp: u64,
328
329 pub thread_id: Option<String>,
331
332 pub type_name: Option<String>,
334}
335
336impl RecordMetadata {
337 pub fn new(ptr: usize, size: usize, timestamp: u64) -> Self {
339 Self {
340 ptr,
341 size,
342 timestamp,
343 thread_id: None,
344 type_name: None,
345 }
346 }
347
348 pub fn with_thread_id(mut self, thread_id: String) -> Self {
350 self.thread_id = Some(thread_id);
351 self
352 }
353
354 pub fn with_type_name(mut self, type_name: String) -> Self {
356 self.type_name = Some(type_name);
357 self
358 }
359}
360
361#[cfg(test)]
362mod tests {
363 use super::*;
364 use std::path::Path;
365
366 #[test]
367 fn test_binary_index_creation() {
368 let file_path = PathBuf::from("test.memscope");
369 let file_hash = 0x123456789ABCDEF0;
370 let file_size = 1024;
371 let header = FileHeader::new_legacy(100);
372
373 let index = BinaryIndex::new(file_path.clone(), file_hash, file_size, header.clone());
374
375 assert_eq!(index.version, BinaryIndex::INDEX_VERSION);
376 assert_eq!(index.file_hash, file_hash);
377 assert_eq!(index.file_path, file_path);
378 assert_eq!(index.header, header);
379 assert_eq!(index.file_size, file_size);
380 assert_eq!(index.record_count(), 0);
381 }
382
383 #[test]
384 fn test_index_validation() {
385 let file_path = PathBuf::from("test.memscope");
386 let file_hash = 0x123456789ABCDEF0;
387 let header = FileHeader::new_legacy(100);
388
389 let index = BinaryIndex::new(file_path.clone(), file_hash, 1024, header);
390
391 assert!(index.is_valid_for_file(&file_path, file_hash));
393
394 assert!(!index.is_valid_for_file(&file_path, 0xDEADBEEF));
396
397 assert!(!index.is_valid_for_file(Path::new("other.memscope"), file_hash));
399 }
400
401 #[test]
402 fn test_compact_allocation_index() {
403 let mut index = CompactAllocationIndex::new(1000);
404
405 assert!(index.add_record(1000, 100).is_ok());
407 assert!(index.add_record(1200, 200).is_ok());
408 assert!(index.add_record(1500, 150).is_ok());
409
410 assert_eq!(index.count, 3);
411 assert_eq!(index.relative_offsets, vec![0, 200, 500]);
412 assert_eq!(index.record_sizes, vec![100, 200, 150]);
413
414 assert!(index.add_record(500, 100).is_err()); }
417
418 #[test]
419 fn test_record_offset_calculation() {
420 let file_path = PathBuf::from("test.memscope");
421 let header = FileHeader::new_legacy(3);
422 let mut index = BinaryIndex::new(file_path, 0x123, 1024, header);
423
424 index.allocations.records_start_offset = 1000;
426 index
427 .allocations
428 .add_record(1000, 100)
429 .expect("Test operation failed");
430 index
431 .allocations
432 .add_record(1200, 200)
433 .expect("Test operation failed");
434 index
435 .allocations
436 .add_record(1500, 150)
437 .expect("Test operation failed");
438
439 assert_eq!(index.get_record_offset(0), Some(1000));
441 assert_eq!(index.get_record_offset(1), Some(1200));
442 assert_eq!(index.get_record_offset(2), Some(1500));
443 assert_eq!(index.get_record_offset(3), None); assert_eq!(index.get_record_size(0), Some(100));
447 assert_eq!(index.get_record_size(1), Some(200));
448 assert_eq!(index.get_record_size(2), Some(150));
449 assert_eq!(index.get_record_size(3), None); }
451
452 #[test]
453 fn test_quick_filter_data() {
454 let mut qfd = QuickFilterData::new(1000);
455
456 qfd.ptr_ranges.push((0x1000, 0x2000));
458 qfd.ptr_ranges.push((0x3000, 0x4000));
459 qfd.size_ranges.push((100, 500));
460 qfd.size_ranges.push((600, 1000));
461 qfd.timestamp_ranges.push((1000, 2000));
462 qfd.timestamp_ranges.push((3000, 4000));
463
464 assert!(qfd.ptr_might_be_in_batch(0, 0x1500));
466 assert!(!qfd.ptr_might_be_in_batch(0, 0x2500));
467 assert!(qfd.ptr_might_be_in_batch(1, 0x3500));
468
469 assert!(qfd.size_might_be_in_batch(0, 300));
470 assert!(!qfd.size_might_be_in_batch(0, 550));
471 assert!(qfd.size_might_be_in_batch(1, 800));
472
473 assert!(qfd.timestamp_might_be_in_batch(0, 1500));
474 assert!(!qfd.timestamp_might_be_in_batch(0, 2500));
475 assert!(qfd.timestamp_might_be_in_batch(1, 3500));
476
477 assert!(!qfd.ptr_might_be_in_batch(2, 0x1500));
479 }
480
481 #[test]
482 fn test_record_metadata() {
483 let metadata = RecordMetadata::new(0x1000, 1024, 1234567890)
484 .with_thread_id("main".to_string())
485 .with_type_name("Vec<u8>".to_string());
486
487 assert_eq!(metadata.ptr, 0x1000);
488 assert_eq!(metadata.size, 1024);
489 assert_eq!(metadata.timestamp, 1234567890);
490 assert_eq!(metadata.thread_id, Some("main".to_string()));
491 assert_eq!(metadata.type_name, Some("Vec<u8>".to_string()));
492 }
493
494 #[test]
495 fn test_memory_usage_calculation() {
496 let mut index = CompactAllocationIndex::new(1000);
497
498 for i in 0..100 {
500 index
501 .add_record(1000 + i * 100, 100)
502 .expect("Test operation failed");
503 }
504
505 let memory_usage = index.memory_usage();
506
507 let expected_min = std::mem::size_of::<CompactAllocationIndex>()
509 + 100 * std::mem::size_of::<u32>()
510 + 100 * std::mem::size_of::<u16>();
511
512 assert!(memory_usage >= expected_min);
513 }
514}