memscope_rs/export/binary/
selective_reader.rs

1//! Selective binary reader for optimized field-specific reading
2//!
3//! This module provides selective reading capabilities that allow reading only
4//! specific fields from binary allocation records, with advanced filtering
5//! and optimization features.
6
7use crate::core::types::AllocationInfo;
8use crate::export::binary::error::BinaryExportError;
9use crate::export::binary::index::BinaryIndex;
10use crate::export::binary::parser::BinaryParser;
11use serde::{Deserialize, Serialize};
12use std::collections::HashSet;
13use std::fs::File;
14use std::io::{BufReader, Seek, SeekFrom};
15use std::path::Path;
16
17/// Configuration options for selective reading operations
18#[derive(Debug, Clone)]
19pub struct SelectiveReadOptions {
20    /// Fields to include in the read operation
21    pub include_fields: HashSet<AllocationField>,
22
23    /// Filters to apply during reading
24    pub filters: Vec<AllocationFilter>,
25
26    /// Maximum number of records to read (None for unlimited)
27    pub limit: Option<usize>,
28
29    /// Number of records to skip from the beginning
30    pub offset: Option<usize>,
31
32    /// Field to sort results by
33    pub sort_by: Option<SortField>,
34
35    /// Sort order (ascending or descending)
36    pub sort_order: SortOrder,
37
38    /// Whether to enable batch processing optimizations
39    pub enable_batch_processing: bool,
40
41    /// Batch size for processing (default: 1000)
42    pub batch_size: usize,
43}
44
45impl Default for SelectiveReadOptions {
46    fn default() -> Self {
47        Self {
48            include_fields: AllocationField::all_basic_fields(),
49            filters: Vec::new(),
50            limit: None,
51            offset: None,
52            sort_by: None,
53            sort_order: SortOrder::Ascending,
54            enable_batch_processing: true,
55            batch_size: 1000,
56        }
57    }
58}
59
60impl SelectiveReadOptions {
61    /// Create a new SelectiveReadOptions with default settings
62    pub fn new() -> Self {
63        Self::default()
64    }
65
66    /// Set the fields to include in the read operation
67    pub fn with_fields(mut self, fields: HashSet<AllocationField>) -> Self {
68        self.include_fields = fields;
69        self
70    }
71
72    /// Add a single field to include
73    pub fn include_field(mut self, field: AllocationField) -> Self {
74        self.include_fields.insert(field);
75        self
76    }
77
78    /// Add multiple fields to include
79    pub fn include_fields(mut self, fields: &[AllocationField]) -> Self {
80        for field in fields {
81            self.include_fields.insert(*field);
82        }
83        self
84    }
85
86    /// Set filters to apply during reading
87    pub fn with_filters(mut self, filters: Vec<AllocationFilter>) -> Self {
88        self.filters = filters;
89        self
90    }
91
92    /// Add a single filter
93    pub fn add_filter(mut self, filter: AllocationFilter) -> Self {
94        self.filters.push(filter);
95        self
96    }
97
98    /// Set the maximum number of records to read
99    pub fn with_limit(mut self, limit: usize) -> Self {
100        self.limit = Some(limit);
101        self
102    }
103
104    /// Set the number of records to skip
105    pub fn with_offset(mut self, offset: usize) -> Self {
106        self.offset = Some(offset);
107        self
108    }
109
110    /// Set the field to sort by
111    pub fn sort_by(mut self, field: SortField, order: SortOrder) -> Self {
112        self.sort_by = Some(field);
113        self.sort_order = order;
114        self
115    }
116
117    /// Enable or disable batch processing
118    pub fn with_batch_processing(mut self, enabled: bool, batch_size: Option<usize>) -> Self {
119        self.enable_batch_processing = enabled;
120        if let Some(size) = batch_size {
121            self.batch_size = size;
122        }
123        self
124    }
125
126    /// Validate the configuration options
127    pub fn validate(&self) -> Result<(), BinaryExportError> {
128        if self.include_fields.is_empty() {
129            return Err(BinaryExportError::CorruptedData(
130                "At least one field must be included".to_string(),
131            ));
132        }
133
134        if self.batch_size == 0 {
135            return Err(BinaryExportError::CorruptedData(
136                "Batch size must be greater than 0".to_string(),
137            ));
138        }
139
140        // Note: offset and limit are independent - offset is how many to skip,
141        // limit is how many to return after skipping
142
143        Ok(())
144    }
145
146    /// Check if a specific field is included
147    pub fn includes_field(&self, field: &AllocationField) -> bool {
148        self.include_fields.contains(field)
149    }
150
151    /// Get the effective limit considering offset
152    pub fn effective_limit(&self) -> Option<usize> {
153        match (self.limit, self.offset) {
154            (Some(limit), Some(offset)) => Some(limit + offset),
155            (Some(limit), None) => Some(limit),
156            _ => None,
157        }
158    }
159}
160
161/// Enumeration of all possible allocation fields that can be selectively read
162#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
163pub enum AllocationField {
164    // Basic fields (always available)
165    Ptr,
166    Size,
167    TimestampAlloc,
168    ThreadId,
169
170    // Optional basic fields
171    VarName,
172    TypeName,
173    ScopeName,
174    TimestampDealloc,
175    BorrowCount,
176    IsLeaked,
177    LifetimeMs,
178
179    // improve.md extensions
180    BorrowInfo,
181    CloneInfo,
182    OwnershipHistoryAvailable,
183
184    // Stack trace information
185    StackTrace,
186
187    // Advanced fields (may not be available in all files)
188    SmartPointerInfo,
189    MemoryLayout,
190    GenericInfo,
191    DynamicTypeInfo,
192    RuntimeState,
193    StackAllocation,
194    TemporaryObject,
195    FragmentationAnalysis,
196    GenericInstantiation,
197    TypeRelationships,
198    TypeUsage,
199    FunctionCallTracking,
200    LifecycleTracking,
201    AccessTracking,
202    DropChainAnalysis,
203}
204
205impl AllocationField {
206    /// Get all basic fields that are commonly needed
207    pub fn all_basic_fields() -> HashSet<Self> {
208        [
209            Self::Ptr,
210            Self::Size,
211            Self::TimestampAlloc,
212            Self::ThreadId,
213            Self::VarName,
214            Self::TypeName,
215            Self::IsLeaked,
216        ]
217        .into_iter()
218        .collect()
219    }
220
221    /// Get all available fields
222    pub fn all_fields() -> HashSet<Self> {
223        [
224            Self::Ptr,
225            Self::Size,
226            Self::TimestampAlloc,
227            Self::ThreadId,
228            Self::VarName,
229            Self::TypeName,
230            Self::ScopeName,
231            Self::TimestampDealloc,
232            Self::BorrowCount,
233            Self::IsLeaked,
234            Self::LifetimeMs,
235            // improve.md extensions
236            Self::BorrowInfo,
237            Self::CloneInfo,
238            Self::OwnershipHistoryAvailable,
239            Self::StackTrace,
240            Self::SmartPointerInfo,
241            Self::MemoryLayout,
242            Self::GenericInfo,
243            Self::DynamicTypeInfo,
244            Self::RuntimeState,
245            Self::StackAllocation,
246            Self::TemporaryObject,
247            Self::FragmentationAnalysis,
248            Self::GenericInstantiation,
249            Self::TypeRelationships,
250            Self::TypeUsage,
251            Self::FunctionCallTracking,
252            Self::LifecycleTracking,
253            Self::AccessTracking,
254            Self::DropChainAnalysis,
255        ]
256        .into_iter()
257        .collect()
258    }
259
260    /// Get fields needed for memory analysis
261    pub fn memory_analysis_fields() -> HashSet<Self> {
262        [
263            Self::Ptr,
264            Self::Size,
265            Self::VarName,
266            Self::TypeName,
267            Self::ThreadId,
268            Self::TimestampAlloc,
269            Self::IsLeaked,
270            Self::BorrowCount,
271            // improve.md extensions for memory analysis
272            Self::LifetimeMs,
273            Self::BorrowInfo,
274            Self::CloneInfo,
275            Self::OwnershipHistoryAvailable,
276        ]
277        .into_iter()
278        .collect()
279    }
280
281    /// Get fields needed for lifetime analysis
282    pub fn lifetime_analysis_fields() -> HashSet<Self> {
283        [
284            Self::Ptr,
285            Self::VarName,
286            Self::TimestampAlloc,
287            Self::TimestampDealloc,
288            Self::LifetimeMs,
289            Self::ScopeName,
290        ]
291        .into_iter()
292        .collect()
293    }
294
295    /// Get fields needed for performance analysis
296    pub fn performance_analysis_fields() -> HashSet<Self> {
297        [
298            Self::Ptr,
299            Self::Size,
300            Self::TimestampAlloc,
301            Self::ThreadId,
302            Self::BorrowCount,
303            Self::FragmentationAnalysis,
304        ]
305        .into_iter()
306        .collect()
307    }
308
309    /// Get fields needed for complex types analysis
310    pub fn complex_types_fields() -> HashSet<Self> {
311        [
312            Self::Ptr,
313            Self::Size,
314            Self::VarName,
315            Self::TypeName,
316            Self::SmartPointerInfo,
317            Self::MemoryLayout,
318            Self::GenericInfo,
319            Self::TypeRelationships,
320        ]
321        .into_iter()
322        .collect()
323    }
324
325    /// Get fields needed for unsafe FFI analysis
326    pub fn unsafe_ffi_fields() -> HashSet<Self> {
327        [
328            Self::Ptr,
329            Self::VarName,
330            Self::TypeName,
331            Self::ThreadId,
332            Self::StackTrace,
333            Self::RuntimeState,
334        ]
335        .into_iter()
336        .collect()
337    }
338
339    /// Check if this field is always available in binary files
340    pub fn is_basic_field(&self) -> bool {
341        matches!(
342            self,
343            Self::Ptr | Self::Size | Self::TimestampAlloc | Self::ThreadId
344        )
345    }
346
347    /// Check if this field requires advanced metrics to be enabled
348    pub fn requires_advanced_metrics(&self) -> bool {
349        matches!(
350            self,
351            Self::SmartPointerInfo
352                | Self::MemoryLayout
353                | Self::GenericInfo
354                | Self::DynamicTypeInfo
355                | Self::RuntimeState
356                | Self::StackAllocation
357                | Self::TemporaryObject
358                | Self::FragmentationAnalysis
359                | Self::GenericInstantiation
360                | Self::TypeRelationships
361                | Self::TypeUsage
362                | Self::FunctionCallTracking
363                | Self::LifecycleTracking
364                | Self::AccessTracking
365                | Self::DropChainAnalysis
366        )
367    }
368}
369
370/// Filter conditions that can be applied during selective reading
371#[derive(Debug, Clone)]
372pub enum AllocationFilter {
373    /// Filter by pointer value range
374    PtrRange(usize, usize),
375
376    /// Filter by allocation size range
377    SizeRange(usize, usize),
378
379    /// Filter by timestamp range
380    TimestampRange(u64, u64),
381
382    /// Filter by exact thread ID match
383    ThreadEquals(String),
384
385    /// Filter by thread ID pattern (contains)
386    ThreadContains(String),
387
388    /// Filter by exact type name match
389    TypeEquals(String),
390
391    /// Filter by type name pattern (contains)
392    TypeContains(String),
393
394    /// Filter by variable name pattern (contains)
395    VarNameContains(String),
396
397    /// Filter by scope name pattern (contains)
398    ScopeNameContains(String),
399
400    /// Filter records that have stack trace information
401    HasStackTrace,
402
403    /// Filter records that don't have stack trace information
404    NoStackTrace,
405
406    /// Filter leaked allocations only
407    LeakedOnly,
408
409    /// Filter non-leaked allocations only
410    NotLeaked,
411
412    /// Filter by minimum borrow count
413    MinBorrowCount(usize),
414
415    /// Filter by maximum borrow count
416    MaxBorrowCount(usize),
417
418    /// Filter by lifetime range (in milliseconds)
419    LifetimeRange(u64, u64),
420}
421
422impl AllocationFilter {
423    /// Check if this filter can be applied using index pre-filtering
424    pub fn supports_index_prefiltering(&self) -> bool {
425        matches!(
426            self,
427            Self::PtrRange(_, _)
428                | Self::SizeRange(_, _)
429                | Self::TimestampRange(_, _)
430                | Self::ThreadEquals(_)
431                | Self::ThreadContains(_)
432                | Self::TypeEquals(_)
433                | Self::TypeContains(_)
434        )
435    }
436
437    /// Apply this filter to an allocation record
438    pub fn matches(&self, allocation: &AllocationInfo) -> bool {
439        match self {
440            Self::PtrRange(min, max) => allocation.ptr >= *min && allocation.ptr <= *max,
441            Self::SizeRange(min, max) => allocation.size >= *min && allocation.size <= *max,
442            Self::TimestampRange(min, max) => {
443                allocation.timestamp_alloc >= *min && allocation.timestamp_alloc <= *max
444            }
445            Self::ThreadEquals(thread) => allocation.thread_id == *thread,
446            Self::ThreadContains(pattern) => allocation.thread_id.contains(pattern),
447            Self::TypeEquals(type_name) => allocation.type_name.as_ref() == Some(type_name),
448            Self::TypeContains(pattern) => allocation
449                .type_name
450                .as_ref()
451                .is_some_and(|t| t.contains(pattern)),
452            Self::VarNameContains(pattern) => allocation
453                .var_name
454                .as_ref()
455                .is_some_and(|v| v.contains(pattern)),
456            Self::ScopeNameContains(pattern) => allocation
457                .scope_name
458                .as_ref()
459                .is_some_and(|s| s.contains(pattern)),
460            Self::HasStackTrace => allocation.stack_trace.is_some(),
461            Self::NoStackTrace => allocation.stack_trace.is_none(),
462            Self::LeakedOnly => allocation.is_leaked,
463            Self::NotLeaked => !allocation.is_leaked,
464            Self::MinBorrowCount(min) => allocation.borrow_count >= *min,
465            Self::MaxBorrowCount(max) => allocation.borrow_count <= *max,
466            Self::LifetimeRange(min, max) => allocation
467                .lifetime_ms
468                .is_some_and(|lifetime| lifetime >= *min && lifetime <= *max),
469        }
470    }
471}
472
473/// Fields that can be used for sorting results
474#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
475pub enum SortField {
476    Ptr,
477    Size,
478    TimestampAlloc,
479    TimestampDealloc,
480    LifetimeMs,
481    BorrowCount,
482    ThreadId,
483    TypeName,
484    VarName,
485}
486
487/// Sort order for results
488#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
489pub enum SortOrder {
490    Ascending,
491    Descending,
492}
493
494/// Builder for creating SelectiveReadOptions with a fluent API
495pub struct SelectiveReadOptionsBuilder {
496    options: SelectiveReadOptions,
497}
498
499impl SelectiveReadOptionsBuilder {
500    /// Create a new builder with default options
501    pub fn new() -> Self {
502        Self {
503            options: SelectiveReadOptions::default(),
504        }
505    }
506
507    /// Create a builder for memory analysis
508    pub fn for_memory_analysis() -> Self {
509        Self {
510            options: SelectiveReadOptions {
511                include_fields: AllocationField::memory_analysis_fields(),
512                ..Default::default()
513            },
514        }
515    }
516
517    /// Create a builder for lifetime analysis
518    pub fn for_lifetime_analysis() -> Self {
519        Self {
520            options: SelectiveReadOptions {
521                include_fields: AllocationField::lifetime_analysis_fields(),
522                ..Default::default()
523            },
524        }
525    }
526
527    /// Create a builder for performance analysis
528    pub fn for_performance_analysis() -> Self {
529        Self {
530            options: SelectiveReadOptions {
531                include_fields: AllocationField::performance_analysis_fields(),
532                ..Default::default()
533            },
534        }
535    }
536
537    /// Create a builder for complex types analysis
538    pub fn for_complex_types_analysis() -> Self {
539        Self {
540            options: SelectiveReadOptions {
541                include_fields: AllocationField::complex_types_fields(),
542                ..Default::default()
543            },
544        }
545    }
546
547    /// Create a builder for unsafe FFI analysis
548    pub fn for_unsafe_ffi_analysis() -> Self {
549        Self {
550            options: SelectiveReadOptions {
551                include_fields: AllocationField::unsafe_ffi_fields(),
552                ..Default::default()
553            },
554        }
555    }
556
557    /// Add a field to include
558    pub fn include_field(mut self, field: AllocationField) -> Self {
559        self.options.include_fields.insert(field);
560        self
561    }
562
563    /// Add multiple fields to include
564    pub fn include_fields(mut self, fields: &[AllocationField]) -> Self {
565        for field in fields {
566            self.options.include_fields.insert(*field);
567        }
568        self
569    }
570
571    /// Set all fields to include
572    pub fn with_fields(mut self, fields: HashSet<AllocationField>) -> Self {
573        self.options.include_fields = fields;
574        self
575    }
576
577    /// Add a filter
578    pub fn filter(mut self, filter: AllocationFilter) -> Self {
579        self.options.filters.push(filter);
580        self
581    }
582
583    /// Add multiple filters
584    pub fn filters(mut self, filters: Vec<AllocationFilter>) -> Self {
585        self.options.filters.extend(filters);
586        self
587    }
588
589    /// Set the limit
590    pub fn limit(mut self, limit: usize) -> Self {
591        self.options.limit = Some(limit);
592        self
593    }
594
595    /// Set the offset
596    pub fn offset(mut self, offset: usize) -> Self {
597        self.options.offset = Some(offset);
598        self
599    }
600
601    /// Set sorting
602    pub fn sort_by(mut self, field: SortField, order: SortOrder) -> Self {
603        self.options.sort_by = Some(field);
604        self.options.sort_order = order;
605        self
606    }
607
608    /// Configure batch processing
609    pub fn batch_processing(mut self, enabled: bool, batch_size: Option<usize>) -> Self {
610        self.options.enable_batch_processing = enabled;
611        if let Some(size) = batch_size {
612            self.options.batch_size = size;
613        }
614        self
615    }
616
617    /// Build the final SelectiveReadOptions
618    pub fn build(self) -> Result<SelectiveReadOptions, BinaryExportError> {
619        self.options.validate()?;
620        Ok(self.options)
621    }
622}
623
624impl Default for SelectiveReadOptionsBuilder {
625    fn default() -> Self {
626        Self::new()
627    }
628}
629
630#[cfg(test)]
631mod tests {
632    use super::*;
633
634    #[test]
635    fn test_selective_read_options_creation() {
636        let options = SelectiveReadOptions::new();
637        assert!(!options.include_fields.is_empty());
638        assert!(options.filters.is_empty());
639        assert_eq!(options.limit, None);
640        assert_eq!(options.offset, None);
641        assert_eq!(options.sort_by, None);
642        assert_eq!(options.batch_size, 1000);
643    }
644
645    #[test]
646    fn test_selective_read_options_builder() {
647        let options = SelectiveReadOptionsBuilder::new()
648            .include_field(AllocationField::Ptr)
649            .include_field(AllocationField::Size)
650            .filter(AllocationFilter::SizeRange(100, 1000))
651            .limit(50)
652            .offset(10)
653            .sort_by(SortField::Size, SortOrder::Descending)
654            .build()
655            .expect("Test operation failed");
656
657        assert!(options.includes_field(&AllocationField::Ptr));
658        assert!(options.includes_field(&AllocationField::Size));
659        assert_eq!(options.filters.len(), 1);
660        assert_eq!(options.limit, Some(50));
661        assert_eq!(options.offset, Some(10));
662        assert_eq!(options.sort_by, Some(SortField::Size));
663        assert_eq!(options.sort_order, SortOrder::Descending);
664    }
665
666    #[test]
667    fn test_allocation_field_sets() {
668        let basic_fields = AllocationField::all_basic_fields();
669        assert!(basic_fields.contains(&AllocationField::Ptr));
670        assert!(basic_fields.contains(&AllocationField::Size));
671        assert!(basic_fields.contains(&AllocationField::ThreadId));
672
673        let memory_fields = AllocationField::memory_analysis_fields();
674        assert!(memory_fields.contains(&AllocationField::Ptr));
675        assert!(memory_fields.contains(&AllocationField::Size));
676        assert!(memory_fields.contains(&AllocationField::IsLeaked));
677
678        let lifetime_fields = AllocationField::lifetime_analysis_fields();
679        assert!(lifetime_fields.contains(&AllocationField::TimestampAlloc));
680        assert!(lifetime_fields.contains(&AllocationField::TimestampDealloc));
681        assert!(lifetime_fields.contains(&AllocationField::LifetimeMs));
682    }
683
684    #[test]
685    fn test_allocation_filter_matching() {
686        let allocation = AllocationInfo {
687            ptr: 0x1000,
688            size: 1024,
689            var_name: Some("test_var".to_string()),
690            type_name: Some("Vec<u8>".to_string()),
691            scope_name: None,
692            timestamp_alloc: 1234567890,
693            timestamp_dealloc: None,
694            thread_id: "main".to_string(),
695            borrow_count: 2,
696            stack_trace: Some(vec!["frame1".to_string()]),
697            is_leaked: false,
698            lifetime_ms: Some(1000),
699            borrow_info: None,
700            clone_info: None,
701            ownership_history_available: false,
702            smart_pointer_info: None,
703            memory_layout: None,
704            generic_info: None,
705            dynamic_type_info: None,
706            runtime_state: None,
707            stack_allocation: None,
708            temporary_object: None,
709            fragmentation_analysis: None,
710            generic_instantiation: None,
711            type_relationships: None,
712            type_usage: None,
713            function_call_tracking: None,
714            lifecycle_tracking: None,
715            access_tracking: None,
716            drop_chain_analysis: None,
717        };
718
719        // Test various filters
720        assert!(AllocationFilter::PtrRange(0x500, 0x1500).matches(&allocation));
721        assert!(!AllocationFilter::PtrRange(0x2000, 0x3000).matches(&allocation));
722
723        assert!(AllocationFilter::SizeRange(500, 2000).matches(&allocation));
724        assert!(!AllocationFilter::SizeRange(2000, 3000).matches(&allocation));
725
726        assert!(AllocationFilter::ThreadEquals("main".to_string()).matches(&allocation));
727        assert!(!AllocationFilter::ThreadEquals("worker".to_string()).matches(&allocation));
728
729        assert!(AllocationFilter::ThreadContains("mai".to_string()).matches(&allocation));
730        assert!(!AllocationFilter::ThreadContains("work".to_string()).matches(&allocation));
731
732        assert!(AllocationFilter::TypeContains("Vec".to_string()).matches(&allocation));
733        assert!(!AllocationFilter::TypeContains("HashMap".to_string()).matches(&allocation));
734
735        assert!(AllocationFilter::HasStackTrace.matches(&allocation));
736        assert!(!AllocationFilter::NoStackTrace.matches(&allocation));
737
738        assert!(!AllocationFilter::LeakedOnly.matches(&allocation));
739        assert!(AllocationFilter::NotLeaked.matches(&allocation));
740
741        assert!(AllocationFilter::MinBorrowCount(1).matches(&allocation));
742        assert!(!AllocationFilter::MinBorrowCount(5).matches(&allocation));
743
744        assert!(AllocationFilter::MaxBorrowCount(5).matches(&allocation));
745        assert!(!AllocationFilter::MaxBorrowCount(1).matches(&allocation));
746    }
747
748    #[test]
749    fn test_options_validation() {
750        // Valid options
751        let valid_options = SelectiveReadOptions::new();
752        assert!(valid_options.validate().is_ok());
753
754        // Invalid: empty fields
755        let invalid_options = SelectiveReadOptions {
756            include_fields: HashSet::new(),
757            ..Default::default()
758        };
759        assert!(invalid_options.validate().is_err());
760
761        // Invalid: zero batch size
762        let invalid_options = SelectiveReadOptions {
763            batch_size: 0,
764            ..Default::default()
765        };
766        assert!(invalid_options.validate().is_err());
767
768        // Valid: offset can be greater than limit (skip 15, return 10)
769        let valid_options = SelectiveReadOptions {
770            limit: Some(10),
771            offset: Some(15),
772            ..Default::default()
773        };
774        assert!(valid_options.validate().is_ok());
775    }
776
777    #[test]
778    fn test_effective_limit_calculation() {
779        let options = SelectiveReadOptions {
780            limit: Some(100),
781            offset: Some(50),
782            ..Default::default()
783        };
784        assert_eq!(options.effective_limit(), Some(150));
785
786        let options = SelectiveReadOptions {
787            limit: Some(100),
788            offset: None,
789            ..Default::default()
790        };
791        assert_eq!(options.effective_limit(), Some(100));
792
793        let options = SelectiveReadOptions {
794            limit: None,
795            offset: Some(50),
796            ..Default::default()
797        };
798        assert_eq!(options.effective_limit(), None);
799    }
800
801    #[test]
802    fn test_specialized_builders() {
803        let memory_options = SelectiveReadOptionsBuilder::for_memory_analysis()
804            .build()
805            .expect("Test operation failed");
806        assert!(memory_options.includes_field(&AllocationField::Ptr));
807        assert!(memory_options.includes_field(&AllocationField::IsLeaked));
808
809        let lifetime_options = SelectiveReadOptionsBuilder::for_lifetime_analysis()
810            .build()
811            .expect("Test operation failed");
812        assert!(lifetime_options.includes_field(&AllocationField::TimestampAlloc));
813        assert!(lifetime_options.includes_field(&AllocationField::LifetimeMs));
814
815        let performance_options = SelectiveReadOptionsBuilder::for_performance_analysis()
816            .build()
817            .expect("Test operation failed");
818        assert!(performance_options.includes_field(&AllocationField::BorrowCount));
819        assert!(performance_options.includes_field(&AllocationField::FragmentationAnalysis));
820    }
821
822    #[test]
823    fn test_field_categorization() {
824        assert!(AllocationField::Ptr.is_basic_field());
825        assert!(AllocationField::Size.is_basic_field());
826        assert!(!AllocationField::VarName.is_basic_field());
827
828        assert!(!AllocationField::Ptr.requires_advanced_metrics());
829        assert!(AllocationField::SmartPointerInfo.requires_advanced_metrics());
830        assert!(AllocationField::GenericInfo.requires_advanced_metrics());
831    }
832
833    #[test]
834    fn test_filter_index_prefiltering_support() {
835        assert!(AllocationFilter::PtrRange(0, 1000).supports_index_prefiltering());
836        assert!(AllocationFilter::SizeRange(0, 1000).supports_index_prefiltering());
837        assert!(AllocationFilter::ThreadEquals("main".to_string()).supports_index_prefiltering());
838        assert!(!AllocationFilter::HasStackTrace.supports_index_prefiltering());
839        assert!(!AllocationFilter::LeakedOnly.supports_index_prefiltering());
840    }
841}
842
843/// Selective binary reader that uses indexes for optimized reading
844pub struct SelectiveBinaryReader {
845    /// Binary file index for fast lookups
846    index: BinaryIndex,
847
848    /// Buffered file reader
849    reader: BufReader<File>,
850
851    /// Cached allocations for batch processing
852    allocation_cache: Vec<AllocationInfo>,
853
854    /// Current position in the file
855    current_position: u64,
856}
857
858#[allow(dead_code)]
859impl SelectiveBinaryReader {
860    /// Create a new selective reader with an existing index
861    pub fn new_with_index<P: AsRef<Path>>(
862        file_path: P,
863        index: BinaryIndex,
864    ) -> Result<Self, BinaryExportError> {
865        let file = File::open(file_path)?;
866        let reader = BufReader::new(file);
867
868        Ok(Self {
869            index,
870            reader,
871            allocation_cache: Vec::new(),
872            current_position: 0,
873        })
874    }
875
876    /// Create a new selective reader and build index automatically
877    pub fn new<P: AsRef<Path>>(file_path: P) -> Result<Self, BinaryExportError> {
878        let index_builder = crate::export::binary::BinaryIndexBuilder::new();
879        let index = index_builder.build_index(&file_path)?;
880        Self::new_with_index(file_path, index)
881    }
882
883    /// Read allocations based on selective options
884    pub fn read_selective(
885        &mut self,
886        options: &SelectiveReadOptions,
887    ) -> Result<Vec<AllocationInfo>, BinaryExportError> {
888        // Validate options
889        options.validate()?;
890
891        // Get candidate record indices using index pre-filtering
892        let candidate_indices = self.pre_filter_with_index(&options.filters)?;
893
894        // Apply offset and limit to candidates
895        let filtered_indices = self.apply_offset_limit(&candidate_indices, options);
896
897        // Read and parse the selected records
898        let mut allocations = self.read_records_by_indices(&filtered_indices, options)?;
899
900        // Apply precise filtering to loaded records
901        allocations = self.apply_precise_filters(allocations, &options.filters)?;
902
903        // Apply sorting if requested
904        if let Some(sort_field) = options.sort_by {
905            self.sort_allocations(&mut allocations, sort_field, options.sort_order)?;
906        }
907
908        // Apply final limit after sorting
909        if let Some(limit) = options.limit {
910            allocations.truncate(limit);
911        }
912
913        Ok(allocations)
914    }
915
916    /// Read allocations in streaming mode with a callback
917    pub fn read_selective_streaming<F>(
918        &mut self,
919        options: &SelectiveReadOptions,
920        mut callback: F,
921    ) -> Result<usize, BinaryExportError>
922    where
923        F: FnMut(&AllocationInfo) -> Result<bool, BinaryExportError>, // Return false to stop
924    {
925        // Validate options
926        options.validate()?;
927
928        // Get candidate record indices using index pre-filtering
929        let candidate_indices = self.pre_filter_with_index(&options.filters)?;
930
931        // Apply offset and limit to candidates
932        let filtered_indices = self.apply_offset_limit(&candidate_indices, options);
933
934        let mut _processed_count = 0;
935        let mut returned_count = 0;
936
937        // Process records in batches for memory efficiency
938        for batch in filtered_indices.chunks(options.batch_size) {
939            let batch_allocations = self.read_records_by_indices(batch, options)?;
940
941            for allocation in batch_allocations {
942                _processed_count += 1;
943
944                // Apply precise filtering
945                if self.allocation_matches_filters(&allocation, &options.filters)? {
946                    // Apply offset
947                    if let Some(offset) = options.offset {
948                        if returned_count < offset {
949                            returned_count += 1;
950                            continue;
951                        }
952                    }
953
954                    // Apply limit
955                    if let Some(limit) = options.limit {
956                        if returned_count >= limit + options.offset.unwrap_or(0) {
957                            break;
958                        }
959                    }
960
961                    // Call the callback
962                    if !callback(&allocation)? {
963                        break;
964                    }
965
966                    returned_count += 1;
967                }
968            }
969        }
970
971        Ok(returned_count)
972    }
973
974    /// Read the next allocation record in streaming mode
975    pub fn read_next_allocation(&mut self) -> Result<Option<AllocationInfo>, BinaryExportError> {
976        // This is a simplified implementation for streaming
977        // In a full implementation, this would maintain state for sequential reading
978        if self.current_position >= self.index.record_count() as u64 {
979            return Ok(None);
980        }
981
982        let record_index = self.current_position as usize;
983        if let Some(offset) = self.index.get_record_offset(record_index) {
984            self.reader.seek(SeekFrom::Start(offset))?;
985            let allocation = self.parse_allocation_record()?;
986            self.current_position += 1;
987            Ok(Some(allocation))
988        } else {
989            Ok(None)
990        }
991    }
992
993    /// Get the underlying index
994    pub fn index(&self) -> &BinaryIndex {
995        &self.index
996    }
997
998    /// Get statistics about the binary file
999    pub fn get_stats(&self) -> SelectiveReaderStats {
1000        SelectiveReaderStats {
1001            total_records: self.index.record_count(),
1002            file_size: self.index.file_size,
1003            has_quick_filter: self.index.has_quick_filter_data(),
1004            cache_size: self.allocation_cache.len(),
1005        }
1006    }
1007
1008    // Private helper methods
1009
1010    /// Pre-filter record indices using the index
1011    fn pre_filter_with_index(
1012        &self,
1013        filters: &[AllocationFilter],
1014    ) -> Result<Vec<usize>, BinaryExportError> {
1015        let total_records = self.index.record_count() as usize;
1016        let mut candidates: Vec<usize> = (0..total_records).collect();
1017
1018        // If we have quick filter data, use it for pre-filtering
1019        if let Some(ref quick_filter) = self.index.allocations.quick_filter_data {
1020            candidates = self.apply_quick_filters(&candidates, filters, quick_filter)?;
1021        }
1022
1023        // Apply bloom filter checks for supported filters
1024        candidates = self.apply_bloom_filter_checks(&candidates, filters)?;
1025
1026        Ok(candidates)
1027    }
1028
1029    /// Apply quick filters using range data
1030    fn apply_quick_filters(
1031        &self,
1032        candidates: &[usize],
1033        filters: &[AllocationFilter],
1034        quick_filter: &crate::export::binary::index::QuickFilterData,
1035    ) -> Result<Vec<usize>, BinaryExportError> {
1036        let mut filtered_candidates = Vec::new();
1037
1038        for &candidate_index in candidates {
1039            let batch_index = candidate_index / quick_filter.batch_size;
1040            let mut matches = true;
1041
1042            // Check each filter against the batch ranges
1043            for filter in filters {
1044                match filter {
1045                    AllocationFilter::PtrRange(min, max) => {
1046                        if !quick_filter.ptr_might_be_in_batch(batch_index, *min)
1047                            && !quick_filter.ptr_might_be_in_batch(batch_index, *max)
1048                        {
1049                            matches = false;
1050                            break;
1051                        }
1052                    }
1053                    AllocationFilter::SizeRange(min, max) => {
1054                        if !quick_filter.size_might_be_in_batch(batch_index, *min)
1055                            && !quick_filter.size_might_be_in_batch(batch_index, *max)
1056                        {
1057                            matches = false;
1058                            break;
1059                        }
1060                    }
1061                    AllocationFilter::TimestampRange(min, max) => {
1062                        if !quick_filter.timestamp_might_be_in_batch(batch_index, *min)
1063                            && !quick_filter.timestamp_might_be_in_batch(batch_index, *max)
1064                        {
1065                            matches = false;
1066                            break;
1067                        }
1068                    }
1069                    _ => {} // Other filters can't be pre-filtered with ranges
1070                }
1071            }
1072
1073            if matches {
1074                filtered_candidates.push(candidate_index);
1075            }
1076        }
1077
1078        Ok(filtered_candidates)
1079    }
1080
1081    /// Apply bloom filter checks for string-based filters
1082    fn apply_bloom_filter_checks(
1083        &self,
1084        candidates: &[usize],
1085        filters: &[AllocationFilter],
1086    ) -> Result<Vec<usize>, BinaryExportError> {
1087        // This is a simplified implementation
1088        // In a full implementation, this would use the bloom filters from the index
1089        // to quickly eliminate candidates that definitely don't match string filters
1090
1091        let mut filtered_candidates = Vec::new();
1092
1093        for &candidate_index in candidates {
1094            let might_match = true;
1095
1096            // For now, we'll just pass through all candidates
1097            // In a real implementation, we would check bloom filters here
1098            for filter in filters {
1099                match filter {
1100                    AllocationFilter::ThreadEquals(_)
1101                    | AllocationFilter::ThreadContains(_)
1102                    | AllocationFilter::TypeEquals(_)
1103                    | AllocationFilter::TypeContains(_) => {
1104                        // Bloom filter checking for efficient duplicate detection
1105                        // For now, assume all candidates might match
1106                    }
1107                    _ => {}
1108                }
1109            }
1110
1111            if might_match {
1112                filtered_candidates.push(candidate_index);
1113            }
1114        }
1115
1116        Ok(filtered_candidates)
1117    }
1118
1119    /// Apply offset and limit to candidate indices
1120    fn apply_offset_limit(
1121        &self,
1122        candidates: &[usize],
1123        options: &SelectiveReadOptions,
1124    ) -> Vec<usize> {
1125        let start = options.offset.unwrap_or(0);
1126        let end = if let Some(limit) = options.limit {
1127            std::cmp::min(start + limit, candidates.len())
1128        } else {
1129            candidates.len()
1130        };
1131
1132        if start >= candidates.len() {
1133            Vec::new()
1134        } else {
1135            candidates[start..end].to_vec()
1136        }
1137    }
1138
1139    /// Read specific records by their indices
1140    fn read_records_by_indices(
1141        &mut self,
1142        indices: &[usize],
1143        options: &SelectiveReadOptions,
1144    ) -> Result<Vec<AllocationInfo>, BinaryExportError> {
1145        // Load all allocations once and cache them
1146        if self.allocation_cache.is_empty() {
1147            self.allocation_cache = BinaryParser::load_allocations(&self.index.file_path)?;
1148        }
1149
1150        let mut allocations = Vec::with_capacity(indices.len());
1151
1152        for &index in indices {
1153            if index < self.allocation_cache.len() {
1154                let allocation = &self.allocation_cache[index];
1155                // Only include requested fields
1156                let filtered_allocation = self.filter_allocation_fields(allocation, options);
1157                allocations.push(filtered_allocation);
1158            }
1159        }
1160
1161        Ok(allocations)
1162    }
1163
1164    /// Parse a single allocation record from the current position
1165    fn parse_allocation_record(&mut self) -> Result<AllocationInfo, BinaryExportError> {
1166        // For the initial implementation, we'll load all allocations once and cache them
1167        // This is not the most memory-efficient approach, but it's simple and correct
1168        if self.allocation_cache.is_empty() {
1169            self.allocation_cache = BinaryParser::load_allocations(&self.index.file_path)?;
1170        }
1171
1172        let current_index = self.current_position as usize;
1173        if current_index < self.allocation_cache.len() {
1174            Ok(self.allocation_cache[current_index].clone())
1175        } else {
1176            Err(BinaryExportError::CorruptedData(
1177                "Record index out of bounds".to_string(),
1178            ))
1179        }
1180    }
1181
1182    /// Filter allocation fields based on options
1183    fn filter_allocation_fields(
1184        &self,
1185        allocation: &AllocationInfo,
1186        options: &SelectiveReadOptions,
1187    ) -> AllocationInfo {
1188        // Create a new allocation with only the requested fields
1189        // This is a simplified implementation - in practice, we would
1190        // have a more efficient way to handle partial field loading
1191
1192        let mut filtered = allocation.clone();
1193
1194        // Clear fields that are not requested
1195        if !options.includes_field(&AllocationField::VarName) {
1196            filtered.var_name = None;
1197        }
1198        if !options.includes_field(&AllocationField::TypeName) {
1199            filtered.type_name = None;
1200        }
1201        if !options.includes_field(&AllocationField::ScopeName) {
1202            filtered.scope_name = None;
1203        }
1204        if !options.includes_field(&AllocationField::TimestampDealloc) {
1205            filtered.timestamp_dealloc = None;
1206        }
1207        if !options.includes_field(&AllocationField::LifetimeMs) {
1208            filtered.lifetime_ms = None;
1209        }
1210        // improve.md extensions
1211        if !options.includes_field(&AllocationField::BorrowInfo) {
1212            filtered.borrow_info = None;
1213        }
1214        if !options.includes_field(&AllocationField::CloneInfo) {
1215            filtered.clone_info = None;
1216        }
1217        if !options.includes_field(&AllocationField::OwnershipHistoryAvailable) {
1218            filtered.ownership_history_available = false;
1219        }
1220        if !options.includes_field(&AllocationField::StackTrace) {
1221            filtered.stack_trace = None;
1222        }
1223        if !options.includes_field(&AllocationField::SmartPointerInfo) {
1224            filtered.smart_pointer_info = None;
1225        }
1226        if !options.includes_field(&AllocationField::MemoryLayout) {
1227            filtered.memory_layout = None;
1228        }
1229        if !options.includes_field(&AllocationField::GenericInfo) {
1230            filtered.generic_info = None;
1231        }
1232        if !options.includes_field(&AllocationField::DynamicTypeInfo) {
1233            filtered.dynamic_type_info = None;
1234        }
1235        if !options.includes_field(&AllocationField::RuntimeState) {
1236            filtered.runtime_state = None;
1237        }
1238        if !options.includes_field(&AllocationField::StackAllocation) {
1239            filtered.stack_allocation = None;
1240        }
1241        if !options.includes_field(&AllocationField::TemporaryObject) {
1242            filtered.temporary_object = None;
1243        }
1244        if !options.includes_field(&AllocationField::FragmentationAnalysis) {
1245            filtered.fragmentation_analysis = None;
1246        }
1247        if !options.includes_field(&AllocationField::GenericInstantiation) {
1248            filtered.generic_instantiation = None;
1249        }
1250        if !options.includes_field(&AllocationField::TypeRelationships) {
1251            filtered.type_relationships = None;
1252        }
1253        if !options.includes_field(&AllocationField::TypeUsage) {
1254            filtered.type_usage = None;
1255        }
1256        if !options.includes_field(&AllocationField::FunctionCallTracking) {
1257            filtered.function_call_tracking = None;
1258        }
1259        if !options.includes_field(&AllocationField::LifecycleTracking) {
1260            filtered.lifecycle_tracking = None;
1261        }
1262        if !options.includes_field(&AllocationField::AccessTracking) {
1263            filtered.access_tracking = None;
1264        }
1265        if !options.includes_field(&AllocationField::DropChainAnalysis) {
1266            filtered.drop_chain_analysis = None;
1267        }
1268
1269        filtered
1270    }
1271
1272    /// Apply precise filters to loaded allocations
1273    fn apply_precise_filters(
1274        &self,
1275        allocations: Vec<AllocationInfo>,
1276        filters: &[AllocationFilter],
1277    ) -> Result<Vec<AllocationInfo>, BinaryExportError> {
1278        if filters.is_empty() {
1279            return Ok(allocations);
1280        }
1281
1282        let mut filtered = Vec::new();
1283        for allocation in allocations {
1284            if self.allocation_matches_filters(&allocation, filters)? {
1285                filtered.push(allocation);
1286            }
1287        }
1288
1289        Ok(filtered)
1290    }
1291
1292    /// Check if an allocation matches all filters
1293    fn allocation_matches_filters(
1294        &self,
1295        allocation: &AllocationInfo,
1296        filters: &[AllocationFilter],
1297    ) -> Result<bool, BinaryExportError> {
1298        for filter in filters {
1299            if !filter.matches(allocation) {
1300                return Ok(false);
1301            }
1302        }
1303        Ok(true)
1304    }
1305
1306    /// Sort allocations by the specified field and order
1307    fn sort_allocations(
1308        &self,
1309        allocations: &mut [AllocationInfo],
1310        sort_field: SortField,
1311        sort_order: SortOrder,
1312    ) -> Result<(), BinaryExportError> {
1313        match sort_field {
1314            SortField::Ptr => {
1315                allocations.sort_by_key(|a| a.ptr);
1316            }
1317            SortField::Size => {
1318                allocations.sort_by_key(|a| a.size);
1319            }
1320            SortField::TimestampAlloc => {
1321                allocations.sort_by_key(|a| a.timestamp_alloc);
1322            }
1323            SortField::TimestampDealloc => {
1324                allocations.sort_by_key(|a| a.timestamp_dealloc.unwrap_or(0));
1325            }
1326            SortField::LifetimeMs => {
1327                allocations.sort_by_key(|a| a.lifetime_ms.unwrap_or(0));
1328            }
1329            SortField::BorrowCount => {
1330                allocations.sort_by_key(|a| a.borrow_count);
1331            }
1332            SortField::ThreadId => {
1333                allocations.sort_by(|a, b| a.thread_id.cmp(&b.thread_id));
1334            }
1335            SortField::TypeName => {
1336                allocations.sort_by(|a, b| {
1337                    a.type_name
1338                        .as_deref()
1339                        .unwrap_or("")
1340                        .cmp(b.type_name.as_deref().unwrap_or(""))
1341                });
1342            }
1343            SortField::VarName => {
1344                allocations.sort_by(|a, b| {
1345                    a.var_name
1346                        .as_deref()
1347                        .unwrap_or("")
1348                        .cmp(b.var_name.as_deref().unwrap_or(""))
1349                });
1350            }
1351        }
1352
1353        if sort_order == SortOrder::Descending {
1354            allocations.reverse();
1355        }
1356
1357        Ok(())
1358    }
1359}
1360
1361/// Statistics about the selective reader
1362#[derive(Debug, Clone)]
1363pub struct SelectiveReaderStats {
1364    /// Total number of records in the file
1365    #[allow(dead_code)]
1366    pub total_records: u32,
1367
1368    /// Size of the binary file in bytes
1369    #[allow(dead_code)]
1370    pub file_size: u64,
1371
1372    /// Whether quick filter data is available
1373    #[allow(dead_code)]
1374    pub has_quick_filter: bool,
1375
1376    /// Current size of the allocation cache
1377    #[allow(dead_code)]
1378    pub cache_size: usize,
1379}
1380
1381// Additional tests for SelectiveBinaryReader
1382#[cfg(test)]
1383mod selective_reader_tests {
1384    use super::*;
1385    use crate::export::binary::writer::BinaryWriter;
1386    use tempfile::NamedTempFile;
1387
1388    fn create_test_binary_with_multiple_allocations() -> NamedTempFile {
1389        let temp_file = NamedTempFile::new().expect("Failed to create temp file");
1390        let test_allocations = vec![
1391            AllocationInfo {
1392                ptr: 0x1000,
1393                size: 1024,
1394                var_name: Some("var1".to_string()),
1395                type_name: Some("Vec<u8>".to_string()),
1396                scope_name: None,
1397                timestamp_alloc: 1000,
1398                timestamp_dealloc: Some(2000),
1399                thread_id: "main".to_string(),
1400                borrow_count: 1,
1401                stack_trace: Some(vec!["frame1".to_string()]),
1402                is_leaked: false,
1403                lifetime_ms: Some(1000),
1404                borrow_info: None,
1405                clone_info: None,
1406                ownership_history_available: false,
1407                smart_pointer_info: None,
1408                memory_layout: None,
1409                generic_info: None,
1410                dynamic_type_info: None,
1411                runtime_state: None,
1412                stack_allocation: None,
1413                temporary_object: None,
1414                fragmentation_analysis: None,
1415                generic_instantiation: None,
1416                type_relationships: None,
1417                type_usage: None,
1418                function_call_tracking: None,
1419                lifecycle_tracking: None,
1420                access_tracking: None,
1421                drop_chain_analysis: None,
1422            },
1423            AllocationInfo {
1424                ptr: 0x2000,
1425                size: 2048,
1426                var_name: Some("var2".to_string()),
1427                type_name: Some("String".to_string()),
1428                scope_name: None,
1429                timestamp_alloc: 1500,
1430                timestamp_dealloc: None,
1431                thread_id: "worker".to_string(),
1432                borrow_count: 3,
1433                stack_trace: None,
1434                is_leaked: true,
1435                lifetime_ms: None,
1436                borrow_info: None,
1437                clone_info: None,
1438                ownership_history_available: false,
1439                smart_pointer_info: None,
1440                memory_layout: None,
1441                generic_info: None,
1442                dynamic_type_info: None,
1443                runtime_state: None,
1444                stack_allocation: None,
1445                temporary_object: None,
1446                fragmentation_analysis: None,
1447                generic_instantiation: None,
1448                type_relationships: None,
1449                type_usage: None,
1450                function_call_tracking: None,
1451                lifecycle_tracking: None,
1452                access_tracking: None,
1453                drop_chain_analysis: None,
1454            },
1455            AllocationInfo {
1456                ptr: 0x3000,
1457                size: 512,
1458                var_name: Some("var3".to_string()),
1459                type_name: Some("HashMap<String, i32>".to_string()),
1460                scope_name: None,
1461                timestamp_alloc: 2000,
1462                timestamp_dealloc: Some(3000),
1463                thread_id: "main".to_string(),
1464                borrow_count: 0,
1465                stack_trace: Some(vec!["frame2".to_string(), "frame3".to_string()]),
1466                is_leaked: false,
1467                lifetime_ms: Some(1000),
1468                borrow_info: None,
1469                clone_info: None,
1470                ownership_history_available: false,
1471                smart_pointer_info: None,
1472                memory_layout: None,
1473                generic_info: None,
1474                dynamic_type_info: None,
1475                runtime_state: None,
1476                stack_allocation: None,
1477                temporary_object: None,
1478                fragmentation_analysis: None,
1479                generic_instantiation: None,
1480                type_relationships: None,
1481                type_usage: None,
1482                function_call_tracking: None,
1483                lifecycle_tracking: None,
1484                access_tracking: None,
1485                drop_chain_analysis: None,
1486            },
1487        ];
1488
1489        // Write test data to binary file
1490        {
1491            let mut writer = BinaryWriter::new(temp_file.path()).expect("Operation failed");
1492            writer
1493                .write_header(test_allocations.len() as u32)
1494                .expect("Operation failed");
1495            for alloc in &test_allocations {
1496                writer
1497                    .write_allocation(alloc)
1498                    .expect("Failed to write allocation");
1499            }
1500            writer.finish().expect("Failed to finish writing");
1501        }
1502
1503        temp_file
1504    }
1505
1506    #[test]
1507    fn test_selective_reader_creation() {
1508        let test_file = create_test_binary_with_multiple_allocations();
1509
1510        // First, let's test if BinaryParser can load the file
1511        let allocations_result =
1512            crate::export::binary::BinaryParser::load_allocations(test_file.path());
1513        if let Err(ref e) = allocations_result {
1514            println!("Error loading allocations with BinaryParser: {e:?}");
1515        }
1516        assert!(
1517            allocations_result.is_ok(),
1518            "BinaryParser should be able to load the file"
1519        );
1520
1521        let allocations = allocations_result.expect("Test operation failed");
1522        println!(
1523            "Successfully loaded {} allocations with BinaryParser",
1524            allocations.len()
1525        );
1526
1527        // Let's debug the file structure
1528        let file_size = std::fs::metadata(test_file.path())
1529            .expect("Test operation failed")
1530            .len();
1531        println!("Binary file size: {file_size} bytes");
1532
1533        // Read the file header manually
1534        let mut file = std::fs::File::open(test_file.path()).expect("Test operation failed");
1535        let mut header_bytes = [0u8; 16];
1536        std::io::Read::read_exact(&mut file, &mut header_bytes).expect("Test operation failed");
1537        println!("Header bytes: {header_bytes:?}");
1538
1539        // Read string table marker
1540        let mut marker = [0u8; 4];
1541        std::io::Read::read_exact(&mut file, &mut marker).expect("Test operation failed");
1542        println!(
1543            "String table marker: {marker:?} ({})",
1544            String::from_utf8_lossy(&marker)
1545        );
1546
1547        // Read string table size
1548        let mut size_bytes = [0u8; 4];
1549        std::io::Read::read_exact(&mut file, &mut size_bytes).expect("Test operation failed");
1550        let table_size = u32::from_le_bytes(size_bytes);
1551        println!("String table size: {table_size}");
1552
1553        // Current position should be where allocation records start
1554        let current_pos = std::io::Seek::seek(&mut file, std::io::SeekFrom::Current(0))
1555            .expect("Failed to get test value");
1556        println!("Current position after string table header: {current_pos}");
1557
1558        // Skip string table data if any
1559        if table_size > 0 {
1560            std::io::Seek::seek(&mut file, std::io::SeekFrom::Current(table_size as i64))
1561                .expect("Operation failed");
1562            let pos_after_table = std::io::Seek::seek(&mut file, std::io::SeekFrom::Current(0))
1563                .expect("Operation failed");
1564            println!("Position after skipping string table data: {pos_after_table}");
1565        }
1566
1567        // Try to read the first allocation record
1568        let mut record_type = [0u8; 1];
1569        if std::io::Read::read_exact(&mut file, &mut record_type).is_ok() {
1570            println!("First allocation record type: {record_type:?}");
1571        } else {
1572            println!("Failed to read first allocation record type");
1573        }
1574
1575        // Now test the index builder
1576        let index_builder = crate::export::binary::BinaryIndexBuilder::new();
1577        let index_result = index_builder.build_index(test_file.path());
1578        if let Err(ref e) = index_result {
1579            println!("Error building index: {e:?}");
1580        }
1581        assert!(
1582            index_result.is_ok(),
1583            "BinaryIndexBuilder should be able to build index"
1584        );
1585
1586        let index = index_result.expect("Test operation failed");
1587        println!(
1588            "Successfully built index with {} records",
1589            index.record_count()
1590        );
1591
1592        // Finally test the selective reader
1593        let reader = SelectiveBinaryReader::new(test_file.path());
1594        if let Err(ref e) = reader {
1595            println!("Error creating reader: {e:?}");
1596        }
1597        assert!(reader.is_ok());
1598
1599        let reader = reader.expect("Failed to get test value");
1600        let stats = reader.get_stats();
1601        assert_eq!(stats.total_records, 3);
1602        assert!(stats.file_size > 0);
1603    }
1604
1605    #[test]
1606    fn test_selective_reading_with_filters() {
1607        let test_file = create_test_binary_with_multiple_allocations();
1608        let mut reader =
1609            SelectiveBinaryReader::new(test_file.path()).expect("Test operation failed");
1610
1611        // Test size filter
1612        let options = SelectiveReadOptionsBuilder::new()
1613            .filter(AllocationFilter::SizeRange(1000, 3000))
1614            .build()
1615            .expect("Test operation failed");
1616
1617        let allocations = reader
1618            .read_selective(&options)
1619            .expect("Failed to read from binary file");
1620        assert_eq!(allocations.len(), 2); // Should match 1024 and 2048 byte allocations
1621
1622        // Test thread filter
1623        let options = SelectiveReadOptionsBuilder::new()
1624            .filter(AllocationFilter::ThreadEquals("main".to_string()))
1625            .build()
1626            .expect("Test operation failed");
1627
1628        let allocations = reader
1629            .read_selective(&options)
1630            .expect("Failed to read from binary file");
1631        assert_eq!(allocations.len(), 2); // Should match allocations from main thread
1632    }
1633
1634    #[test]
1635    fn test_selective_reading_with_limit_and_offset() {
1636        let test_file = create_test_binary_with_multiple_allocations();
1637        let mut reader =
1638            SelectiveBinaryReader::new(test_file.path()).expect("Test operation failed");
1639
1640        // Test limit
1641        let options = SelectiveReadOptionsBuilder::new()
1642            .limit(2)
1643            .build()
1644            .expect("Test operation failed");
1645
1646        let allocations = reader
1647            .read_selective(&options)
1648            .expect("Failed to read from binary file");
1649        assert_eq!(allocations.len(), 2);
1650
1651        // Test offset
1652        let options = SelectiveReadOptionsBuilder::new()
1653            .offset(1)
1654            .limit(1)
1655            .build()
1656            .expect("Test operation failed");
1657
1658        let allocations = reader
1659            .read_selective(&options)
1660            .expect("Failed to read from binary file");
1661        assert_eq!(allocations.len(), 1);
1662    }
1663
1664    #[test]
1665    fn test_selective_reading_with_sorting() {
1666        let test_file = create_test_binary_with_multiple_allocations();
1667        let mut reader =
1668            SelectiveBinaryReader::new(test_file.path()).expect("Test operation failed");
1669
1670        // Test sorting by size (ascending)
1671        let options = SelectiveReadOptionsBuilder::new()
1672            .sort_by(SortField::Size, SortOrder::Ascending)
1673            .build()
1674            .expect("Test operation failed");
1675
1676        let allocations = reader
1677            .read_selective(&options)
1678            .expect("Failed to read from binary file");
1679        assert_eq!(allocations.len(), 3);
1680        assert!(allocations[0].size <= allocations[1].size);
1681        assert!(allocations[1].size <= allocations[2].size);
1682
1683        // Test sorting by size (descending)
1684        let options = SelectiveReadOptionsBuilder::new()
1685            .sort_by(SortField::Size, SortOrder::Descending)
1686            .build()
1687            .expect("Test operation failed");
1688
1689        let allocations = reader
1690            .read_selective(&options)
1691            .expect("Failed to read from binary file");
1692        assert_eq!(allocations.len(), 3);
1693        assert!(allocations[0].size >= allocations[1].size);
1694        assert!(allocations[1].size >= allocations[2].size);
1695    }
1696
1697    #[test]
1698    fn test_streaming_read() {
1699        let test_file = create_test_binary_with_multiple_allocations();
1700        let mut reader =
1701            SelectiveBinaryReader::new(test_file.path()).expect("Test operation failed");
1702
1703        let options = SelectiveReadOptionsBuilder::new()
1704            .filter(AllocationFilter::ThreadEquals("main".to_string()))
1705            .build()
1706            .expect("Test operation failed");
1707
1708        let mut count = 0;
1709        let result = reader.read_selective_streaming(&options, |_allocation| {
1710            count += 1;
1711            Ok(true) // Continue processing
1712        });
1713
1714        assert!(result.is_ok());
1715        assert_eq!(count, 2); // Should process 2 allocations from main thread
1716    }
1717
1718    #[test]
1719    fn test_field_filtering() {
1720        let test_file = create_test_binary_with_multiple_allocations();
1721        let mut reader =
1722            SelectiveBinaryReader::new(test_file.path()).expect("Test operation failed");
1723
1724        // Only include basic fields
1725        let options = SelectiveReadOptionsBuilder::new()
1726            .with_fields(
1727                [AllocationField::Ptr, AllocationField::Size]
1728                    .into_iter()
1729                    .collect(),
1730            )
1731            .build()
1732            .expect("Test operation failed");
1733
1734        let allocations = reader
1735            .read_selective(&options)
1736            .expect("Failed to read from binary file");
1737        assert_eq!(allocations.len(), 3);
1738
1739        // Check that non-included fields are cleared
1740        for allocation in &allocations {
1741            // Basic fields should be present
1742            assert!(allocation.ptr > 0);
1743            assert!(allocation.size > 0);
1744
1745            // Non-included fields should be None/default
1746            // Note: This test assumes the field filtering is working correctly
1747            // In practice, we might need to adjust based on the actual implementation
1748        }
1749    }
1750}