Skip to main content

memscope_rs/analysis/relation_inference/
container_detector.rs

1//! Container Detector — infers Container → HeapOwner Contains relations.
2//!
3//! This module implements heuristic-based inference of `Contains` relationships
4//! between Container types (HashMap, BTreeMap, VecDeque) and HeapOwner types
5//! (Vec, Box, String) using temporal locality and allocation metadata.
6//!
7//! # Detection Strategy
8//!
9//! The detector uses three primary signals:
10//!
11//! 1. **Temporal Locality**: Container and its contained objects are typically
12//!    allocated within a short time window (default: 1ms).
13//!
14//! 2. **Thread Affinity**: Objects must be allocated on the same thread.
15//!
16//! 3. **Size Reasonableness**: The contained object should not be significantly
17//!    larger than the container (default: 10x ratio).
18//!
19//! # Algorithm
20//!
21//! ```text
22//! for each Container in allocations:
23//!     for each subsequent HeapOwner in sorted order:
24//!         if same thread AND
25//!            time_diff < TIME_WINDOW AND
26//!            heap_owner.size < container.size * SIZE_RATIO:
27//!             add_edge(container → heap_owner, Contains)
28//!         else if time_diff > TIME_WINDOW:
29//!             break  // No more candidates in this window
30//! ```
31//!
32//! # Complexity
33//!
34//! - **Time**: O(N) where N is the number of allocations, thanks to the
35//!   time-sorted sliding window approach.
36//! - **Space**: O(1) additional space beyond the allocation list.
37//!
38//! # Configuration
39//!
40//! The detection behavior can be tuned via [`ContainerConfig`]:
41//!
42//! - `time_window_ns`: Maximum time difference for considered allocations (default: 1ms)
43//! - `size_ratio`: Maximum size ratio between contained object and container (default: 10)
44//! - `lookahead`: Number of subsequent allocations to examine (default: 5)
45
46use crate::analysis::relation_inference::{Relation, RelationEdge};
47use crate::core::types::TrackKind;
48use crate::snapshot::types::ActiveAllocation;
49
50/// Configuration for container relation detection.
51///
52/// Controls the heuristic thresholds used to infer `Contains` relationships
53/// between Container and HeapOwner allocations.
54///
55/// # Fields
56///
57/// * `time_window_ns` - Maximum time difference in nanoseconds between
58///   container and contained object allocation (default: 1ms = 1,000,000ns).
59///
60/// * `size_ratio` - Maximum size ratio between contained object and container.
61///   Prevents connecting containers to unusually large objects (default: 10).
62///
63/// * `lookahead` - Number of subsequent allocations to examine for each container.
64///   Limits the search window for performance (default: 5).
65#[derive(Debug, Clone, Copy)]
66pub struct ContainerConfig {
67    /// Maximum time difference for considered allocations (default: 1ms).
68    pub time_window_ns: u64,
69    /// Maximum size ratio between contained object and container (default: 10).
70    pub size_ratio: usize,
71    /// Number of subsequent allocations to examine (default: 5).
72    pub lookahead: usize,
73}
74
75impl Default for ContainerConfig {
76    fn default() -> Self {
77        Self {
78            // 1ms = 1,000,000 nanoseconds
79            // This captures allocations within the same logical operation
80            time_window_ns: 1_000_000,
81            // Prevent connecting containers to disproportionately large objects
82            size_ratio: 10,
83            // Limit search window for performance
84            lookahead: 5,
85        }
86    }
87}
88
89/// Detect `Contains` relationships between Container and HeapOwner allocations.
90///
91/// This function implements heuristic-based inference using temporal locality,
92/// thread affinity, and size filtering to identify Container → HeapOwner edges.
93///
94/// # Arguments
95///
96/// * `allocations` - List of active allocations to analyze. Must be sorted by
97///   `allocated_at` timestamp for optimal performance.
98///
99/// * `config` - Detection configuration (optional, uses defaults if None).
100///
101/// # Returns
102///
103/// A vector of `RelationEdge` representing inferred `Contains` relationships.
104///
105/// # Algorithm
106///
107/// 1. Filter allocations into Containers and HeapOwners.
108/// 2. For each Container, examine subsequent HeapOwners within the time window.
109/// 3. Apply thread affinity and size ratio filters.
110/// 4. Add edges for passing candidates.
111///
112/// # Example
113///
114/// ```ignore
115/// use memscope_rs::analysis::relation_inference::container_detector::detect_containers;
116/// use memscope_rs::snapshot::types::ActiveAllocation;
117/// use memscope_rs::core::types::TrackKind;
118///
119/// let mut allocations = vec![
120///     ActiveAllocation {
121///         ptr: None,
122///         size: 64,
123///         kind: TrackKind::Container,
124///         allocated_at: 1000,
125///         thread_id: 1,
126///         ..Default::default()
127///     },
128///     ActiveAllocation {
129///         ptr: Some(0x2000),
130///         size: 128,
131///         kind: TrackKind::HeapOwner { ptr: 0x2000, size: 128 },
132///         allocated_at: 100500,  // 500ns later, within 1ms window
133///         thread_id: 1,
134///         ..Default::default()
135///     },
136/// ];
137///
138/// let edges = detect_containers(&allocations, None);
139/// assert_eq!(edges.len(), 1);
140/// assert_eq!(edges[0].relation, Relation::Contains);
141/// ```
142pub fn detect_containers(
143    allocations: &[ActiveAllocation],
144    config: Option<ContainerConfig>,
145) -> Vec<RelationEdge> {
146    let config = config.unwrap_or_default();
147
148    if allocations.is_empty() {
149        return Vec::new();
150    }
151
152    // Create a mutable copy sorted by allocation time
153    let mut sorted_allocs: Vec<(usize, &ActiveAllocation)> =
154        allocations.iter().enumerate().collect();
155    sorted_allocs.sort_by_key(|(_, alloc)| alloc.allocated_at);
156
157    let mut edges = Vec::new();
158
159    // Scan for Container → HeapOwner relationships
160    for (sorted_idx, (container_orig_idx, container)) in sorted_allocs.iter().enumerate() {
161        // Only process Container types
162        if !matches!(container.kind, TrackKind::Container) {
163            continue;
164        }
165
166        // Skip containers with zero size (shouldn't happen, but be defensive)
167        if container.size == 0 {
168            continue;
169        }
170
171        // Examine subsequent allocations within the lookahead window
172        let start_idx = sorted_idx + 1;
173        let end_idx = (start_idx + config.lookahead).min(sorted_allocs.len());
174
175        for (candidate_orig_idx, candidate) in sorted_allocs[start_idx..end_idx].iter() {
176            // Only consider HeapOwner types
177            if !matches!(candidate.kind, TrackKind::HeapOwner { .. }) {
178                continue;
179            }
180
181            // Filter: Same thread
182            if container.thread_id != candidate.thread_id {
183                continue;
184            }
185
186            // Filter: Time window (must be allocated AFTER container)
187            let time_diff = candidate
188                .allocated_at
189                .saturating_sub(container.allocated_at);
190            if time_diff == 0 {
191                // Same allocation time, skip (shouldn't happen but be safe)
192                continue;
193            }
194            if time_diff > config.time_window_ns {
195                // No more candidates within time window
196                break;
197            }
198
199            // Skip containers with zero size (shouldn't happen, but be defensive)
200            if container.size == 0 {
201                // For containers with size 0, skip the size ratio check
202                // and allow any candidate within the time window
203            } else {
204                // Filter: Size ratio (prevent connecting to unusually large objects)
205                let max_size = container.size * config.size_ratio;
206                if candidate.size > max_size {
207                    continue;
208                }
209            }
210
211            // All checks passed - add Contains edge
212            edges.push(RelationEdge {
213                from: *container_orig_idx,
214                to: *candidate_orig_idx,
215                relation: Relation::Contains,
216            });
217        }
218    }
219
220    edges
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use crate::analysis::relation_inference::Relation;
227
228    /// Helper to create a test Container allocation.
229    fn make_container(_id: usize, size: usize, time: u64, thread: u64) -> ActiveAllocation {
230        ActiveAllocation {
231            ptr: None,
232            size,
233            kind: TrackKind::Container,
234            allocated_at: time,
235            var_name: None,
236            type_name: None,
237            thread_id: thread,
238            call_stack_hash: None,
239        }
240    }
241
242    /// Helper to create a test HeapOwner allocation.
243    fn make_heap_owner(
244        _id: usize,
245        ptr: usize,
246        size: usize,
247        time: u64,
248        thread: u64,
249    ) -> ActiveAllocation {
250        ActiveAllocation {
251            ptr: Some(ptr),
252            size,
253            kind: TrackKind::HeapOwner { ptr, size },
254            allocated_at: time,
255            var_name: None,
256            type_name: None,
257            thread_id: thread,
258            call_stack_hash: None,
259        }
260    }
261
262    #[test]
263    fn test_detect_containers_empty() {
264        let allocs = vec![];
265        let edges = detect_containers(&allocs, None);
266        assert_eq!(edges.len(), 0);
267    }
268
269    #[test]
270    fn test_detect_containers_single_container() {
271        let allocs = vec![make_container(0, 64, 1000, 1)];
272        let edges = detect_containers(&allocs, None);
273        assert_eq!(edges.len(), 0);
274    }
275
276    #[test]
277    fn test_detect_containers_single_heap_owner() {
278        let allocs = vec![make_heap_owner(0, 0x1000, 64, 1000, 1)];
279        let edges = detect_containers(&allocs, None);
280        assert_eq!(edges.len(), 0);
281    }
282
283    #[test]
284    fn test_detect_containers_basic() {
285        let allocs = vec![
286            make_container(0, 64, 1000, 1),
287            make_heap_owner(1, 0x2000, 128, 100500, 1),
288        ];
289
290        let edges = detect_containers(&allocs, None);
291        assert_eq!(edges.len(), 1);
292        assert_eq!(edges[0].from, 0);
293        assert_eq!(edges[0].to, 1);
294        assert_eq!(edges[0].relation, Relation::Contains);
295    }
296
297    #[test]
298    fn test_detect_containers_time_window() {
299        let allocs = vec![
300            make_container(0, 64, 1000, 1),
301            make_heap_owner(1, 0x2000, 128, 2_000_000, 1), // 2ms later - beyond default 1ms window
302        ];
303
304        let edges = detect_containers(&allocs, None);
305        assert_eq!(edges.len(), 0);
306    }
307
308    #[test]
309    fn test_detect_containers_custom_time_window() {
310        let allocs = vec![
311            make_container(0, 64, 1000, 1),
312            make_heap_owner(1, 0x2000, 128, 2_000_000, 1), // 2ms later
313        ];
314
315        let config = ContainerConfig {
316            time_window_ns: 3_000_000, // 3ms window
317            ..Default::default()
318        };
319
320        let edges = detect_containers(&allocs, Some(config));
321        assert_eq!(edges.len(), 1);
322    }
323
324    #[test]
325    fn test_detect_containers_different_threads() {
326        let allocs = vec![
327            make_container(0, 64, 1000, 1),
328            make_heap_owner(1, 0x2000, 128, 100500, 2), // Different thread
329        ];
330
331        let edges = detect_containers(&allocs, None);
332        assert_eq!(edges.len(), 0);
333    }
334
335    #[test]
336    fn test_detect_containers_size_ratio_filter() {
337        let allocs = vec![
338            make_container(0, 64, 1000, 1),
339            make_heap_owner(1, 0x2000, 1280, 100500, 1), // 20x container size
340        ];
341
342        let edges = detect_containers(&allocs, None);
343        assert_eq!(edges.len(), 0);
344    }
345
346    #[test]
347    fn test_detect_containers_custom_size_ratio() {
348        let allocs = vec![
349            make_container(0, 64, 1000, 1),
350            make_heap_owner(1, 0x2000, 1280, 100500, 1), // 20x container size
351        ];
352
353        let config = ContainerConfig {
354            size_ratio: 30, // Allow up to 30x ratio
355            ..Default::default()
356        };
357
358        let edges = detect_containers(&allocs, Some(config));
359        assert_eq!(edges.len(), 1);
360    }
361
362    #[test]
363    fn test_detect_containers_multiple_candidates() {
364        let allocs = vec![
365            make_container(0, 64, 1000, 1),
366            make_heap_owner(1, 0x2000, 64, 100500, 1),
367            make_heap_owner(2, 0x3000, 64, 100600, 1),
368            make_heap_owner(3, 0x4000, 64, 100700, 1),
369        ];
370
371        let edges = detect_containers(&allocs, None);
372        assert_eq!(edges.len(), 3);
373        assert!(edges.iter().all(|e| e.from == 0));
374    }
375
376    #[test]
377    fn test_detect_containers_lookahead_limit() {
378        let allocs = vec![
379            make_container(0, 64, 1000, 1),
380            make_heap_owner(1, 0x2000, 64, 100500, 1),
381            make_heap_owner(2, 0x3000, 64, 100600, 1),
382            make_heap_owner(3, 0x4000, 64, 100700, 1),
383            make_heap_owner(4, 0x5000, 64, 100800, 1),
384            make_heap_owner(5, 0x6000, 64, 100900, 1), // Beyond default lookahead of 5
385        ];
386
387        let edges = detect_containers(&allocs, None);
388        // Should only find 5 edges (lookahead limit)
389        assert_eq!(edges.len(), 5);
390    }
391
392    #[test]
393    fn test_detect_containers_multiple_containers() {
394        let allocs = vec![
395            make_container(0, 64, 1000, 1),
396            make_container(1, 64, 2_000_000, 1), // 2ms later (beyond 1ms window)
397            make_heap_owner(2, 0x2000, 64, 100500, 1), // Near container 0 (within 1ms)
398            make_heap_owner(3, 0x3000, 64, 2_100_000, 1), // Near container 1 (within 1ms)
399        ];
400
401        let edges = detect_containers(&allocs, None);
402        assert_eq!(edges.len(), 2);
403
404        // Check which edges were created
405        let contains_0_2 = edges.iter().any(|e| e.from == 0 && e.to == 2);
406        let contains_1_3 = edges.iter().any(|e| e.from == 1 && e.to == 3);
407        assert!(contains_0_2);
408        assert!(contains_1_3);
409    }
410
411    #[test]
412    fn test_detect_containers_ignore_value_types() {
413        let allocs = vec![
414            make_container(0, 64, 1000, 1),
415            ActiveAllocation {
416                ptr: None,
417                size: 32,
418                kind: TrackKind::Value,
419                allocated_at: 100500,
420                var_name: None,
421                type_name: None,
422                thread_id: 1,
423                call_stack_hash: None,
424            },
425        ];
426
427        let edges = detect_containers(&allocs, None);
428        assert_eq!(edges.len(), 0);
429    }
430
431    #[test]
432    fn test_detect_containers_unsorted_input() {
433        let allocs = vec![
434            make_heap_owner(1, 0x2000, 64, 100500, 1), // Later in time (original index 0)
435            make_container(0, 64, 1000, 1),            // Earlier in time (original index 1)
436        ];
437
438        let edges = detect_containers(&allocs, None);
439        assert_eq!(edges.len(), 1);
440        // Container (original index 1) → HeapOwner (original index 0)
441        assert_eq!(edges[0].from, 1);
442        assert_eq!(edges[0].to, 0);
443    }
444
445    #[test]
446    fn test_config_default() {
447        let config = ContainerConfig::default();
448        assert_eq!(config.time_window_ns, 1_000_000);
449        assert_eq!(config.size_ratio, 10);
450        assert_eq!(config.lookahead, 5);
451    }
452
453    #[test]
454    fn test_config_custom() {
455        let config = ContainerConfig {
456            time_window_ns: 5_000_000,
457            size_ratio: 20,
458            lookahead: 10,
459        };
460        assert_eq!(config.time_window_ns, 5_000_000);
461        assert_eq!(config.size_ratio, 20);
462        assert_eq!(config.lookahead, 10);
463    }
464}