Skip to main content

memscope_rs/analysis/relation_inference/
container_detector.rs

1//! Container Detector — infers Container → HeapOwner Contains relations.
2//!
3//! This module implements heuristic-based inference of `Contains` relationships
4//! between Container types (HashMap, BTreeMap, VecDeque) and HeapOwner types
5//! (Vec, Box, String) using temporal locality and allocation metadata.
6//!
7//! # Detection Strategy
8//!
9//! The detector uses three primary signals:
10//!
11//! 1. **Temporal Locality**: Container and its contained objects are typically
12//!    allocated within a short time window (default: 1ms).
13//!
14//! 2. **Thread Affinity**: Objects must be allocated on the same thread.
15//!
16//! 3. **Size Reasonableness**: The contained object should not be significantly
17//!    larger than the container (default: 10x ratio).
18//!
19//! # Algorithm
20//!
21//! ```text
22//! for each Container in allocations:
23//!     for each subsequent HeapOwner in sorted order:
24//!         if same thread AND
25//!            time_diff < TIME_WINDOW AND
26//!            heap_owner.size < container.size * SIZE_RATIO:
27//!             add_edge(container → heap_owner, Contains)
28//!         else if time_diff > TIME_WINDOW:
29//!             break  // No more candidates in this window
30//! ```
31//!
32//! # Complexity
33//!
34//! - **Time**: O(N) where N is the number of allocations, thanks to the
35//!   time-sorted sliding window approach.
36//! - **Space**: O(1) additional space beyond the allocation list.
37//!
38//! # Configuration
39//!
40//! The detection behavior can be tuned via [`ContainerConfig`]:
41//!
42//! - `time_window_ns`: Maximum time difference for considered allocations (default: 1ms)
43//! - `size_ratio`: Maximum size ratio between contained object and container (default: 10)
44//! - `lookahead`: Number of subsequent allocations to examine (default: 5)
45
46use crate::analysis::relation_inference::{Relation, RelationEdge};
47use crate::core::types::TrackKind;
48use crate::snapshot::types::ActiveAllocation;
49
50/// Configuration for container relation detection.
51///
52/// Controls the heuristic thresholds used to infer `Contains` relationships
53/// between Container and HeapOwner allocations.
54///
55/// # Fields
56///
57/// * `time_window_ns` - Maximum time difference in nanoseconds between
58///   container and contained object allocation (default: 1ms = 1,000,000ns).
59///
60/// * `size_ratio` - Maximum size ratio between contained object and container.
61///   Prevents connecting containers to unusually large objects (default: 10).
62///
63/// * `lookahead` - Number of subsequent allocations to examine for each container.
64///   Limits the search window for performance (default: 5).
65#[derive(Debug, Clone, Copy)]
66pub struct ContainerConfig {
67    /// Maximum time difference for considered allocations (default: 1ms).
68    pub time_window_ns: u64,
69    /// Maximum size ratio between contained object and container (default: 10).
70    pub size_ratio: usize,
71    /// Number of subsequent allocations to examine (default: 5).
72    pub lookahead: usize,
73}
74
75impl Default for ContainerConfig {
76    fn default() -> Self {
77        Self {
78            // 1ms = 1,000,000 nanoseconds
79            // This captures allocations within the same logical operation
80            time_window_ns: 1_000_000,
81            // Prevent connecting containers to disproportionately large objects
82            size_ratio: 10,
83            // Limit search window for performance
84            lookahead: 5,
85        }
86    }
87}
88
89/// Detect `Contains` relationships between Container and HeapOwner allocations.
90///
91/// This function implements heuristic-based inference using temporal locality,
92/// thread affinity, and size filtering to identify Container → HeapOwner edges.
93///
94/// # Arguments
95///
96/// * `allocations` - List of active allocations to analyze. Must be sorted by
97///   `allocated_at` timestamp for optimal performance.
98///
99/// * `config` - Detection configuration (optional, uses defaults if None).
100///
101/// # Returns
102///
103/// A vector of `RelationEdge` representing inferred `Contains` relationships.
104///
105/// # Algorithm
106///
107/// 1. Filter allocations into Containers and HeapOwners.
108/// 2. For each Container, examine subsequent HeapOwners within the time window.
109/// 3. Apply thread affinity and size ratio filters.
110/// 4. Add edges for passing candidates.
111///
112/// # Example
113///
114/// ```ignore
115/// use memscope_rs::analysis::relation_inference::container_detector::detect_containers;
116/// use memscope_rs::snapshot::types::ActiveAllocation;
117/// use memscope_rs::core::types::TrackKind;
118///
119/// let mut allocations = vec![
120///     ActiveAllocation {
121///         ptr: None,
122///         size: 64,
123///         kind: TrackKind::Container,
124///         allocated_at: 1000,
125///         thread_id: 1,
126///         ..Default::default()
127///     },
128///     ActiveAllocation {
129///         ptr: Some(0x2000),
130///         size: 128,
131///         kind: TrackKind::HeapOwner { ptr: 0x2000, size: 128 },
132///         allocated_at: 100500,  // 500ns later, within 1ms window
133///         thread_id: 1,
134///         ..Default::default()
135///     },
136/// ];
137///
138/// let edges = detect_containers(&allocations, None);
139/// assert_eq!(edges.len(), 1);
140/// assert_eq!(edges[0].relation, Relation::Contains);
141/// ```
142pub fn detect_containers(
143    allocations: &[ActiveAllocation],
144    config: Option<ContainerConfig>,
145) -> Vec<RelationEdge> {
146    let config = config.unwrap_or_default();
147
148    if allocations.is_empty() {
149        return Vec::new();
150    }
151
152    // Create a mutable copy sorted by allocation time
153    let mut sorted_allocs: Vec<(usize, &ActiveAllocation)> =
154        allocations.iter().enumerate().collect();
155    sorted_allocs.sort_by_key(|(_, alloc)| alloc.allocated_at);
156
157    let mut edges = Vec::new();
158
159    // Scan for Container → HeapOwner relationships
160    for (sorted_idx, (container_orig_idx, container)) in sorted_allocs.iter().enumerate() {
161        // Only process Container types
162        if !matches!(container.kind, TrackKind::Container) {
163            continue;
164        }
165
166        // Skip containers with zero size (shouldn't happen, but be defensive)
167        if container.size == 0 {
168            continue;
169        }
170
171        // Examine subsequent allocations within the lookahead window
172        let start_idx = sorted_idx + 1;
173        let end_idx = (start_idx + config.lookahead).min(sorted_allocs.len());
174
175        for (candidate_orig_idx, candidate) in sorted_allocs[start_idx..end_idx].iter() {
176            // Only consider HeapOwner types
177            if !matches!(candidate.kind, TrackKind::HeapOwner { .. }) {
178                continue;
179            }
180
181            // Filter: Same thread
182            if container.thread_id != candidate.thread_id {
183                continue;
184            }
185
186            // Filter: Time window (must be allocated AFTER container)
187            let time_diff = candidate
188                .allocated_at
189                .saturating_sub(container.allocated_at);
190            if time_diff == 0 {
191                // Same allocation time, skip (shouldn't happen but be safe)
192                continue;
193            }
194            if time_diff > config.time_window_ns {
195                // No more candidates within time window
196                break;
197            }
198
199            // Skip containers with zero size (shouldn't happen, but be defensive)
200            if container.size == 0 {
201                // For containers with size 0, skip the size ratio check
202                // and allow any candidate within the time window
203            } else {
204                // Filter: Size ratio (prevent connecting to unusually large objects)
205                let max_size = container.size * config.size_ratio;
206                if candidate.size > max_size {
207                    continue;
208                }
209            }
210
211            // All checks passed - add Contains edge
212            edges.push(RelationEdge {
213                from: *container_orig_idx,
214                to: *candidate_orig_idx,
215                relation: Relation::Contains,
216            });
217        }
218    }
219
220    edges
221}
222
223#[cfg(test)]
224mod tests {
225    use super::*;
226    use crate::analysis::relation_inference::Relation;
227
228    /// Helper to create a test Container allocation.
229    fn make_container(_id: usize, size: usize, time: u64, thread: u64) -> ActiveAllocation {
230        ActiveAllocation {
231            ptr: None,
232            size,
233            kind: TrackKind::Container,
234            allocated_at: time,
235            var_name: None,
236            type_name: None,
237            thread_id: thread,
238            call_stack_hash: None,
239            module_path: None,
240            stack_ptr: None,
241        }
242    }
243
244    /// Helper to create a test HeapOwner allocation.
245    fn make_heap_owner(ptr: usize, size: usize, time: u64, thread: u64) -> ActiveAllocation {
246        ActiveAllocation {
247            ptr: Some(ptr),
248            size,
249            kind: TrackKind::HeapOwner { ptr, size },
250            allocated_at: time,
251            var_name: None,
252            type_name: None,
253            thread_id: thread,
254            call_stack_hash: None,
255            module_path: None,
256            stack_ptr: None,
257        }
258    }
259
260    #[test]
261    fn test_detect_containers_empty() {
262        let allocs = vec![];
263        let edges = detect_containers(&allocs, None);
264        assert_eq!(edges.len(), 0);
265    }
266
267    #[test]
268    fn test_detect_containers_single_container() {
269        let allocs = vec![make_container(0, 64, 1000, 1)];
270        let edges = detect_containers(&allocs, None);
271        assert_eq!(edges.len(), 0);
272    }
273
274    #[test]
275    fn test_detect_containers_single_heap_owner() {
276        let allocs = vec![make_heap_owner(0x1000, 64, 1000, 1)];
277        let edges = detect_containers(&allocs, None);
278        assert_eq!(edges.len(), 0);
279    }
280
281    #[test]
282    fn test_detect_containers_basic() {
283        let allocs = vec![
284            make_container(0, 64, 1000, 1),
285            make_heap_owner(0x2000, 128, 100500, 1),
286        ];
287
288        let edges = detect_containers(&allocs, None);
289        assert_eq!(edges.len(), 1);
290        assert_eq!(edges[0].from, 0);
291        assert_eq!(edges[0].to, 1);
292        assert_eq!(edges[0].relation, Relation::Contains);
293    }
294
295    #[test]
296    fn test_detect_containers_time_window() {
297        let allocs = vec![
298            make_container(0, 64, 1000, 1),
299            make_heap_owner(0x2000, 128, 2_000_000, 1), // 2ms later - beyond default 1ms window
300        ];
301
302        let edges = detect_containers(&allocs, None);
303        assert_eq!(edges.len(), 0);
304    }
305
306    #[test]
307    fn test_detect_containers_custom_time_window() {
308        let allocs = vec![
309            make_container(0, 64, 1000, 1),
310            make_heap_owner(0x2000, 128, 2_000_000, 1), // 2ms later
311        ];
312
313        let config = ContainerConfig {
314            time_window_ns: 3_000_000, // 3ms window
315            ..Default::default()
316        };
317
318        let edges = detect_containers(&allocs, Some(config));
319        assert_eq!(edges.len(), 1);
320    }
321
322    #[test]
323    fn test_detect_containers_different_threads() {
324        let allocs = vec![
325            make_container(0, 64, 1000, 1),
326            make_heap_owner(0x2000, 128, 100500, 2), // Different thread
327        ];
328
329        let edges = detect_containers(&allocs, None);
330        assert_eq!(edges.len(), 0);
331    }
332
333    #[test]
334    fn test_detect_containers_size_ratio_filter() {
335        let allocs = vec![
336            make_container(0, 64, 1000, 1),
337            make_heap_owner(0x2000, 1280, 100500, 1), // 20x container size
338        ];
339
340        let edges = detect_containers(&allocs, None);
341        assert_eq!(edges.len(), 0);
342    }
343
344    #[test]
345    fn test_detect_containers_custom_size_ratio() {
346        let allocs = vec![
347            make_container(0, 64, 1000, 1),
348            make_heap_owner(0x2000, 1280, 100500, 1), // 20x container size
349        ];
350
351        let config = ContainerConfig {
352            size_ratio: 30, // Allow up to 30x ratio
353            ..Default::default()
354        };
355
356        let edges = detect_containers(&allocs, Some(config));
357        assert_eq!(edges.len(), 1);
358    }
359
360    #[test]
361    fn test_detect_containers_multiple_candidates() {
362        let allocs = vec![
363            make_container(0, 64, 1000, 1),
364            make_heap_owner(0x2000, 64, 100500, 1),
365            make_heap_owner(0x3000, 64, 100600, 1),
366            make_heap_owner(0x4000, 64, 100700, 1),
367        ];
368
369        let edges = detect_containers(&allocs, None);
370        assert_eq!(edges.len(), 3);
371        assert!(edges.iter().all(|e| e.from == 0));
372    }
373
374    #[test]
375    fn test_detect_containers_lookahead_limit() {
376        let allocs = vec![
377            make_container(0, 64, 1000, 1),
378            make_heap_owner(0x2000, 64, 100500, 1),
379            make_heap_owner(0x3000, 64, 100600, 1),
380            make_heap_owner(0x4000, 64, 100700, 1),
381            make_heap_owner(0x5000, 64, 100800, 1),
382            make_heap_owner(0x6000, 64, 100900, 1), // Beyond default lookahead of 5
383        ];
384
385        let edges = detect_containers(&allocs, None);
386        // Should only find 5 edges (lookahead limit)
387        assert_eq!(edges.len(), 5);
388    }
389
390    #[test]
391    fn test_detect_containers_multiple_containers() {
392        let allocs = vec![
393            make_container(0, 64, 1000, 1),
394            make_container(1, 64, 2_000_000, 1), // 2ms later (beyond 1ms window)
395            make_heap_owner(0x2000, 64, 100500, 1), // Near container 0 (within 1ms)
396            make_heap_owner(0x3000, 64, 2_100_000, 1), // Near container 1 (within 1ms)
397        ];
398
399        let edges = detect_containers(&allocs, None);
400        assert_eq!(edges.len(), 2);
401
402        // Check which edges were created
403        let contains_0_2 = edges.iter().any(|e| e.from == 0 && e.to == 2);
404        let contains_1_3 = edges.iter().any(|e| e.from == 1 && e.to == 3);
405        assert!(contains_0_2);
406        assert!(contains_1_3);
407    }
408
409    #[test]
410    fn test_detect_containers_ignore_value_types() {
411        let allocs = vec![
412            make_container(0, 64, 1000, 1),
413            ActiveAllocation {
414                ptr: None,
415                size: 32,
416                kind: TrackKind::Value,
417                allocated_at: 100500,
418                var_name: None,
419                type_name: None,
420                thread_id: 1,
421                call_stack_hash: None,
422                module_path: None,
423                stack_ptr: None,
424            },
425        ];
426
427        let edges = detect_containers(&allocs, None);
428        assert_eq!(edges.len(), 0);
429    }
430
431    #[test]
432    fn test_detect_containers_unsorted_input() {
433        let allocs = vec![
434            make_heap_owner(0x2000, 64, 100500, 1), // Later in time (original index 0)
435            make_container(0, 64, 1000, 1),         // Earlier in time (original index 1)
436        ];
437
438        let edges = detect_containers(&allocs, None);
439        assert_eq!(edges.len(), 1);
440        // Container (original index 1) → HeapOwner (original index 0)
441        assert_eq!(edges[0].from, 1);
442        assert_eq!(edges[0].to, 0);
443    }
444
445    #[test]
446    fn test_config_default() {
447        let config = ContainerConfig::default();
448        assert_eq!(config.time_window_ns, 1_000_000);
449        assert_eq!(config.size_ratio, 10);
450        assert_eq!(config.lookahead, 5);
451    }
452
453    #[test]
454    fn test_config_custom() {
455        let config = ContainerConfig {
456            time_window_ns: 5_000_000,
457            size_ratio: 20,
458            lookahead: 10,
459        };
460        assert_eq!(config.time_window_ns, 5_000_000);
461        assert_eq!(config.size_ratio, 20);
462        assert_eq!(config.lookahead, 10);
463    }
464}