memscope_rs/analysis/relation_inference/container_detector.rs
1//! Container Detector — infers Container → HeapOwner Contains relations.
2//!
3//! This module implements heuristic-based inference of `Contains` relationships
4//! between Container types (HashMap, BTreeMap, VecDeque) and HeapOwner types
5//! (Vec, Box, String) using temporal locality and allocation metadata.
6//!
7//! # Detection Strategy
8//!
9//! The detector uses three primary signals:
10//!
11//! 1. **Temporal Locality**: Container and its contained objects are typically
12//! allocated within a short time window (default: 1ms).
13//!
14//! 2. **Thread Affinity**: Objects must be allocated on the same thread.
15//!
16//! 3. **Size Reasonableness**: The contained object should not be significantly
17//! larger than the container (default: 10x ratio).
18//!
19//! # Algorithm
20//!
21//! ```text
22//! for each Container in allocations:
23//! for each subsequent HeapOwner in sorted order:
24//! if same thread AND
25//! time_diff < TIME_WINDOW AND
26//! heap_owner.size < container.size * SIZE_RATIO:
27//! add_edge(container → heap_owner, Contains)
28//! else if time_diff > TIME_WINDOW:
29//! break // No more candidates in this window
30//! ```
31//!
32//! # Complexity
33//!
34//! - **Time**: O(N) where N is the number of allocations, thanks to the
35//! time-sorted sliding window approach.
36//! - **Space**: O(1) additional space beyond the allocation list.
37//!
38//! # Configuration
39//!
40//! The detection behavior can be tuned via [`ContainerConfig`]:
41//!
42//! - `time_window_ns`: Maximum time difference for considered allocations (default: 1ms)
43//! - `size_ratio`: Maximum size ratio between contained object and container (default: 10)
44//! - `lookahead`: Number of subsequent allocations to examine (default: 5)
45
46use crate::analysis::relation_inference::{Relation, RelationEdge};
47use crate::core::types::TrackKind;
48use crate::snapshot::types::ActiveAllocation;
49
50/// Configuration for container relation detection.
51///
52/// Controls the heuristic thresholds used to infer `Contains` relationships
53/// between Container and HeapOwner allocations.
54///
55/// # Fields
56///
57/// * `time_window_ns` - Maximum time difference in nanoseconds between
58/// container and contained object allocation (default: 1ms = 1,000,000ns).
59///
60/// * `size_ratio` - Maximum size ratio between contained object and container.
61/// Prevents connecting containers to unusually large objects (default: 10).
62///
63/// * `lookahead` - Number of subsequent allocations to examine for each container.
64/// Limits the search window for performance (default: 5).
65#[derive(Debug, Clone, Copy)]
66pub struct ContainerConfig {
67 /// Maximum time difference for considered allocations (default: 1ms).
68 pub time_window_ns: u64,
69 /// Maximum size ratio between contained object and container (default: 10).
70 pub size_ratio: usize,
71 /// Number of subsequent allocations to examine (default: 5).
72 pub lookahead: usize,
73}
74
75impl Default for ContainerConfig {
76 fn default() -> Self {
77 Self {
78 // 1ms = 1,000,000 nanoseconds
79 // This captures allocations within the same logical operation
80 time_window_ns: 1_000_000,
81 // Prevent connecting containers to disproportionately large objects
82 size_ratio: 10,
83 // Limit search window for performance
84 lookahead: 5,
85 }
86 }
87}
88
89/// Detect `Contains` relationships between Container and HeapOwner allocations.
90///
91/// This function implements heuristic-based inference using temporal locality,
92/// thread affinity, and size filtering to identify Container → HeapOwner edges.
93///
94/// # Arguments
95///
96/// * `allocations` - List of active allocations to analyze. Must be sorted by
97/// `allocated_at` timestamp for optimal performance.
98///
99/// * `config` - Detection configuration (optional, uses defaults if None).
100///
101/// # Returns
102///
103/// A vector of `RelationEdge` representing inferred `Contains` relationships.
104///
105/// # Algorithm
106///
107/// 1. Filter allocations into Containers and HeapOwners.
108/// 2. For each Container, examine subsequent HeapOwners within the time window.
109/// 3. Apply thread affinity and size ratio filters.
110/// 4. Add edges for passing candidates.
111///
112/// # Example
113///
114/// ```ignore
115/// use memscope_rs::analysis::relation_inference::container_detector::detect_containers;
116/// use memscope_rs::snapshot::types::ActiveAllocation;
117/// use memscope_rs::core::types::TrackKind;
118///
119/// let mut allocations = vec![
120/// ActiveAllocation {
121/// ptr: None,
122/// size: 64,
123/// kind: TrackKind::Container,
124/// allocated_at: 1000,
125/// thread_id: 1,
126/// ..Default::default()
127/// },
128/// ActiveAllocation {
129/// ptr: Some(0x2000),
130/// size: 128,
131/// kind: TrackKind::HeapOwner { ptr: 0x2000, size: 128 },
132/// allocated_at: 100500, // 500ns later, within 1ms window
133/// thread_id: 1,
134/// ..Default::default()
135/// },
136/// ];
137///
138/// let edges = detect_containers(&allocations, None);
139/// assert_eq!(edges.len(), 1);
140/// assert_eq!(edges[0].relation, Relation::Contains);
141/// ```
142pub fn detect_containers(
143 allocations: &[ActiveAllocation],
144 config: Option<ContainerConfig>,
145) -> Vec<RelationEdge> {
146 let config = config.unwrap_or_default();
147
148 if allocations.is_empty() {
149 return Vec::new();
150 }
151
152 // Create a mutable copy sorted by allocation time
153 let mut sorted_allocs: Vec<(usize, &ActiveAllocation)> =
154 allocations.iter().enumerate().collect();
155 sorted_allocs.sort_by_key(|(_, alloc)| alloc.allocated_at);
156
157 let mut edges = Vec::new();
158
159 // Scan for Container → HeapOwner relationships
160 for (sorted_idx, (container_orig_idx, container)) in sorted_allocs.iter().enumerate() {
161 // Only process Container types
162 if !matches!(container.kind, TrackKind::Container) {
163 continue;
164 }
165
166 // Skip containers with zero size (shouldn't happen, but be defensive)
167 if container.size == 0 {
168 continue;
169 }
170
171 // Examine subsequent allocations within the lookahead window
172 let start_idx = sorted_idx + 1;
173 let end_idx = (start_idx + config.lookahead).min(sorted_allocs.len());
174
175 for (candidate_orig_idx, candidate) in sorted_allocs[start_idx..end_idx].iter() {
176 // Only consider HeapOwner types
177 if !matches!(candidate.kind, TrackKind::HeapOwner { .. }) {
178 continue;
179 }
180
181 // Filter: Same thread
182 if container.thread_id != candidate.thread_id {
183 continue;
184 }
185
186 // Filter: Time window (must be allocated AFTER container)
187 let time_diff = candidate
188 .allocated_at
189 .saturating_sub(container.allocated_at);
190 if time_diff == 0 {
191 // Same allocation time, skip (shouldn't happen but be safe)
192 continue;
193 }
194 if time_diff > config.time_window_ns {
195 // No more candidates within time window
196 break;
197 }
198
199 // Skip containers with zero size (shouldn't happen, but be defensive)
200 if container.size == 0 {
201 // For containers with size 0, skip the size ratio check
202 // and allow any candidate within the time window
203 } else {
204 // Filter: Size ratio (prevent connecting to unusually large objects)
205 let max_size = container.size * config.size_ratio;
206 if candidate.size > max_size {
207 continue;
208 }
209 }
210
211 // All checks passed - add Contains edge
212 edges.push(RelationEdge {
213 from: *container_orig_idx,
214 to: *candidate_orig_idx,
215 relation: Relation::Contains,
216 });
217 }
218 }
219
220 edges
221}
222
223#[cfg(test)]
224mod tests {
225 use super::*;
226 use crate::analysis::relation_inference::Relation;
227
228 /// Helper to create a test Container allocation.
229 fn make_container(_id: usize, size: usize, time: u64, thread: u64) -> ActiveAllocation {
230 ActiveAllocation {
231 ptr: None,
232 size,
233 kind: TrackKind::Container,
234 allocated_at: time,
235 var_name: None,
236 type_name: None,
237 thread_id: thread,
238 call_stack_hash: None,
239 module_path: None,
240 stack_ptr: None,
241 }
242 }
243
244 /// Helper to create a test HeapOwner allocation.
245 fn make_heap_owner(ptr: usize, size: usize, time: u64, thread: u64) -> ActiveAllocation {
246 ActiveAllocation {
247 ptr: Some(ptr),
248 size,
249 kind: TrackKind::HeapOwner { ptr, size },
250 allocated_at: time,
251 var_name: None,
252 type_name: None,
253 thread_id: thread,
254 call_stack_hash: None,
255 module_path: None,
256 stack_ptr: None,
257 }
258 }
259
260 #[test]
261 fn test_detect_containers_empty() {
262 let allocs = vec![];
263 let edges = detect_containers(&allocs, None);
264 assert_eq!(edges.len(), 0);
265 }
266
267 #[test]
268 fn test_detect_containers_single_container() {
269 let allocs = vec![make_container(0, 64, 1000, 1)];
270 let edges = detect_containers(&allocs, None);
271 assert_eq!(edges.len(), 0);
272 }
273
274 #[test]
275 fn test_detect_containers_single_heap_owner() {
276 let allocs = vec![make_heap_owner(0x1000, 64, 1000, 1)];
277 let edges = detect_containers(&allocs, None);
278 assert_eq!(edges.len(), 0);
279 }
280
281 #[test]
282 fn test_detect_containers_basic() {
283 let allocs = vec![
284 make_container(0, 64, 1000, 1),
285 make_heap_owner(0x2000, 128, 100500, 1),
286 ];
287
288 let edges = detect_containers(&allocs, None);
289 assert_eq!(edges.len(), 1);
290 assert_eq!(edges[0].from, 0);
291 assert_eq!(edges[0].to, 1);
292 assert_eq!(edges[0].relation, Relation::Contains);
293 }
294
295 #[test]
296 fn test_detect_containers_time_window() {
297 let allocs = vec![
298 make_container(0, 64, 1000, 1),
299 make_heap_owner(0x2000, 128, 2_000_000, 1), // 2ms later - beyond default 1ms window
300 ];
301
302 let edges = detect_containers(&allocs, None);
303 assert_eq!(edges.len(), 0);
304 }
305
306 #[test]
307 fn test_detect_containers_custom_time_window() {
308 let allocs = vec![
309 make_container(0, 64, 1000, 1),
310 make_heap_owner(0x2000, 128, 2_000_000, 1), // 2ms later
311 ];
312
313 let config = ContainerConfig {
314 time_window_ns: 3_000_000, // 3ms window
315 ..Default::default()
316 };
317
318 let edges = detect_containers(&allocs, Some(config));
319 assert_eq!(edges.len(), 1);
320 }
321
322 #[test]
323 fn test_detect_containers_different_threads() {
324 let allocs = vec![
325 make_container(0, 64, 1000, 1),
326 make_heap_owner(0x2000, 128, 100500, 2), // Different thread
327 ];
328
329 let edges = detect_containers(&allocs, None);
330 assert_eq!(edges.len(), 0);
331 }
332
333 #[test]
334 fn test_detect_containers_size_ratio_filter() {
335 let allocs = vec![
336 make_container(0, 64, 1000, 1),
337 make_heap_owner(0x2000, 1280, 100500, 1), // 20x container size
338 ];
339
340 let edges = detect_containers(&allocs, None);
341 assert_eq!(edges.len(), 0);
342 }
343
344 #[test]
345 fn test_detect_containers_custom_size_ratio() {
346 let allocs = vec![
347 make_container(0, 64, 1000, 1),
348 make_heap_owner(0x2000, 1280, 100500, 1), // 20x container size
349 ];
350
351 let config = ContainerConfig {
352 size_ratio: 30, // Allow up to 30x ratio
353 ..Default::default()
354 };
355
356 let edges = detect_containers(&allocs, Some(config));
357 assert_eq!(edges.len(), 1);
358 }
359
360 #[test]
361 fn test_detect_containers_multiple_candidates() {
362 let allocs = vec![
363 make_container(0, 64, 1000, 1),
364 make_heap_owner(0x2000, 64, 100500, 1),
365 make_heap_owner(0x3000, 64, 100600, 1),
366 make_heap_owner(0x4000, 64, 100700, 1),
367 ];
368
369 let edges = detect_containers(&allocs, None);
370 assert_eq!(edges.len(), 3);
371 assert!(edges.iter().all(|e| e.from == 0));
372 }
373
374 #[test]
375 fn test_detect_containers_lookahead_limit() {
376 let allocs = vec![
377 make_container(0, 64, 1000, 1),
378 make_heap_owner(0x2000, 64, 100500, 1),
379 make_heap_owner(0x3000, 64, 100600, 1),
380 make_heap_owner(0x4000, 64, 100700, 1),
381 make_heap_owner(0x5000, 64, 100800, 1),
382 make_heap_owner(0x6000, 64, 100900, 1), // Beyond default lookahead of 5
383 ];
384
385 let edges = detect_containers(&allocs, None);
386 // Should only find 5 edges (lookahead limit)
387 assert_eq!(edges.len(), 5);
388 }
389
390 #[test]
391 fn test_detect_containers_multiple_containers() {
392 let allocs = vec![
393 make_container(0, 64, 1000, 1),
394 make_container(1, 64, 2_000_000, 1), // 2ms later (beyond 1ms window)
395 make_heap_owner(0x2000, 64, 100500, 1), // Near container 0 (within 1ms)
396 make_heap_owner(0x3000, 64, 2_100_000, 1), // Near container 1 (within 1ms)
397 ];
398
399 let edges = detect_containers(&allocs, None);
400 assert_eq!(edges.len(), 2);
401
402 // Check which edges were created
403 let contains_0_2 = edges.iter().any(|e| e.from == 0 && e.to == 2);
404 let contains_1_3 = edges.iter().any(|e| e.from == 1 && e.to == 3);
405 assert!(contains_0_2);
406 assert!(contains_1_3);
407 }
408
409 #[test]
410 fn test_detect_containers_ignore_value_types() {
411 let allocs = vec![
412 make_container(0, 64, 1000, 1),
413 ActiveAllocation {
414 ptr: None,
415 size: 32,
416 kind: TrackKind::Value,
417 allocated_at: 100500,
418 var_name: None,
419 type_name: None,
420 thread_id: 1,
421 call_stack_hash: None,
422 module_path: None,
423 stack_ptr: None,
424 },
425 ];
426
427 let edges = detect_containers(&allocs, None);
428 assert_eq!(edges.len(), 0);
429 }
430
431 #[test]
432 fn test_detect_containers_unsorted_input() {
433 let allocs = vec![
434 make_heap_owner(0x2000, 64, 100500, 1), // Later in time (original index 0)
435 make_container(0, 64, 1000, 1), // Earlier in time (original index 1)
436 ];
437
438 let edges = detect_containers(&allocs, None);
439 assert_eq!(edges.len(), 1);
440 // Container (original index 1) → HeapOwner (original index 0)
441 assert_eq!(edges[0].from, 1);
442 assert_eq!(edges[0].to, 0);
443 }
444
445 #[test]
446 fn test_config_default() {
447 let config = ContainerConfig::default();
448 assert_eq!(config.time_window_ns, 1_000_000);
449 assert_eq!(config.size_ratio, 10);
450 assert_eq!(config.lookahead, 5);
451 }
452
453 #[test]
454 fn test_config_custom() {
455 let config = ContainerConfig {
456 time_window_ns: 5_000_000,
457 size_ratio: 20,
458 lookahead: 10,
459 };
460 assert_eq!(config.time_window_ns, 5_000_000);
461 assert_eq!(config.size_ratio, 20);
462 assert_eq!(config.lookahead, 10);
463 }
464}