memscope_rs/analysis/relation_inference/container_detector.rs
1//! Container Detector — infers Container → HeapOwner Contains relations.
2//!
3//! This module implements heuristic-based inference of `Contains` relationships
4//! between Container types (HashMap, BTreeMap, VecDeque) and HeapOwner types
5//! (Vec, Box, String) using temporal locality and allocation metadata.
6//!
7//! # Detection Strategy
8//!
9//! The detector uses three primary signals:
10//!
11//! 1. **Temporal Locality**: Container and its contained objects are typically
12//! allocated within a short time window (default: 1ms).
13//!
14//! 2. **Thread Affinity**: Objects must be allocated on the same thread.
15//!
16//! 3. **Size Reasonableness**: The contained object should not be significantly
17//! larger than the container (default: 10x ratio).
18//!
19//! # Algorithm
20//!
21//! ```text
22//! for each Container in allocations:
23//! for each subsequent HeapOwner in sorted order:
24//! if same thread AND
25//! time_diff < TIME_WINDOW AND
26//! heap_owner.size < container.size * SIZE_RATIO:
27//! add_edge(container → heap_owner, Contains)
28//! else if time_diff > TIME_WINDOW:
29//! break // No more candidates in this window
30//! ```
31//!
32//! # Complexity
33//!
34//! - **Time**: O(N) where N is the number of allocations, thanks to the
35//! time-sorted sliding window approach.
36//! - **Space**: O(1) additional space beyond the allocation list.
37//!
38//! # Configuration
39//!
40//! The detection behavior can be tuned via [`ContainerConfig`]:
41//!
42//! - `time_window_ns`: Maximum time difference for considered allocations (default: 1ms)
43//! - `size_ratio`: Maximum size ratio between contained object and container (default: 10)
44//! - `lookahead`: Number of subsequent allocations to examine (default: 5)
45
46use crate::analysis::relation_inference::{Relation, RelationEdge};
47use crate::core::types::TrackKind;
48use crate::snapshot::types::ActiveAllocation;
49
50/// Configuration for container relation detection.
51///
52/// Controls the heuristic thresholds used to infer `Contains` relationships
53/// between Container and HeapOwner allocations.
54///
55/// # Fields
56///
57/// * `time_window_ns` - Maximum time difference in nanoseconds between
58/// container and contained object allocation (default: 1ms = 1,000,000ns).
59///
60/// * `size_ratio` - Maximum size ratio between contained object and container.
61/// Prevents connecting containers to unusually large objects (default: 10).
62///
63/// * `lookahead` - Number of subsequent allocations to examine for each container.
64/// Limits the search window for performance (default: 5).
65#[derive(Debug, Clone, Copy)]
66pub struct ContainerConfig {
67 /// Maximum time difference for considered allocations (default: 1ms).
68 pub time_window_ns: u64,
69 /// Maximum size ratio between contained object and container (default: 10).
70 pub size_ratio: usize,
71 /// Number of subsequent allocations to examine (default: 5).
72 pub lookahead: usize,
73}
74
75impl Default for ContainerConfig {
76 fn default() -> Self {
77 Self {
78 // 1ms = 1,000,000 nanoseconds
79 // This captures allocations within the same logical operation
80 time_window_ns: 1_000_000,
81 // Prevent connecting containers to disproportionately large objects
82 size_ratio: 10,
83 // Limit search window for performance
84 lookahead: 5,
85 }
86 }
87}
88
89/// Detect `Contains` relationships between Container and HeapOwner allocations.
90///
91/// This function implements heuristic-based inference using temporal locality,
92/// thread affinity, and size filtering to identify Container → HeapOwner edges.
93///
94/// # Arguments
95///
96/// * `allocations` - List of active allocations to analyze. Must be sorted by
97/// `allocated_at` timestamp for optimal performance.
98///
99/// * `config` - Detection configuration (optional, uses defaults if None).
100///
101/// # Returns
102///
103/// A vector of `RelationEdge` representing inferred `Contains` relationships.
104///
105/// # Algorithm
106///
107/// 1. Filter allocations into Containers and HeapOwners.
108/// 2. For each Container, examine subsequent HeapOwners within the time window.
109/// 3. Apply thread affinity and size ratio filters.
110/// 4. Add edges for passing candidates.
111///
112/// # Example
113///
114/// ```ignore
115/// use memscope_rs::analysis::relation_inference::container_detector::detect_containers;
116/// use memscope_rs::snapshot::types::ActiveAllocation;
117/// use memscope_rs::core::types::TrackKind;
118///
119/// let mut allocations = vec![
120/// ActiveAllocation {
121/// ptr: None,
122/// size: 64,
123/// kind: TrackKind::Container,
124/// allocated_at: 1000,
125/// thread_id: 1,
126/// ..Default::default()
127/// },
128/// ActiveAllocation {
129/// ptr: Some(0x2000),
130/// size: 128,
131/// kind: TrackKind::HeapOwner { ptr: 0x2000, size: 128 },
132/// allocated_at: 100500, // 500ns later, within 1ms window
133/// thread_id: 1,
134/// ..Default::default()
135/// },
136/// ];
137///
138/// let edges = detect_containers(&allocations, None);
139/// assert_eq!(edges.len(), 1);
140/// assert_eq!(edges[0].relation, Relation::Contains);
141/// ```
142pub fn detect_containers(
143 allocations: &[ActiveAllocation],
144 config: Option<ContainerConfig>,
145) -> Vec<RelationEdge> {
146 let config = config.unwrap_or_default();
147
148 if allocations.is_empty() {
149 return Vec::new();
150 }
151
152 // Create a mutable copy sorted by allocation time
153 let mut sorted_allocs: Vec<(usize, &ActiveAllocation)> =
154 allocations.iter().enumerate().collect();
155 sorted_allocs.sort_by_key(|(_, alloc)| alloc.allocated_at);
156
157 let mut edges = Vec::new();
158
159 // Scan for Container → HeapOwner relationships
160 for (sorted_idx, (container_orig_idx, container)) in sorted_allocs.iter().enumerate() {
161 // Only process Container types
162 if !matches!(container.kind, TrackKind::Container) {
163 continue;
164 }
165
166 // Skip containers with zero size (shouldn't happen, but be defensive)
167 if container.size == 0 {
168 continue;
169 }
170
171 // Examine subsequent allocations within the lookahead window
172 let start_idx = sorted_idx + 1;
173 let end_idx = (start_idx + config.lookahead).min(sorted_allocs.len());
174
175 for (candidate_orig_idx, candidate) in sorted_allocs[start_idx..end_idx].iter() {
176 // Only consider HeapOwner types
177 if !matches!(candidate.kind, TrackKind::HeapOwner { .. }) {
178 continue;
179 }
180
181 // Filter: Same thread
182 if container.thread_id != candidate.thread_id {
183 continue;
184 }
185
186 // Filter: Time window (must be allocated AFTER container)
187 let time_diff = candidate
188 .allocated_at
189 .saturating_sub(container.allocated_at);
190 if time_diff == 0 {
191 // Same allocation time, skip (shouldn't happen but be safe)
192 continue;
193 }
194 if time_diff > config.time_window_ns {
195 // No more candidates within time window
196 break;
197 }
198
199 // Skip containers with zero size (shouldn't happen, but be defensive)
200 if container.size == 0 {
201 // For containers with size 0, skip the size ratio check
202 // and allow any candidate within the time window
203 } else {
204 // Filter: Size ratio (prevent connecting to unusually large objects)
205 let max_size = container.size * config.size_ratio;
206 if candidate.size > max_size {
207 continue;
208 }
209 }
210
211 // All checks passed - add Contains edge
212 edges.push(RelationEdge {
213 from: *container_orig_idx,
214 to: *candidate_orig_idx,
215 relation: Relation::Contains,
216 });
217 }
218 }
219
220 edges
221}
222
223#[cfg(test)]
224mod tests {
225 use super::*;
226 use crate::analysis::relation_inference::Relation;
227
228 /// Helper to create a test Container allocation.
229 fn make_container(_id: usize, size: usize, time: u64, thread: u64) -> ActiveAllocation {
230 ActiveAllocation {
231 ptr: None,
232 size,
233 kind: TrackKind::Container,
234 allocated_at: time,
235 var_name: None,
236 type_name: None,
237 thread_id: thread,
238 call_stack_hash: None,
239 }
240 }
241
242 /// Helper to create a test HeapOwner allocation.
243 fn make_heap_owner(
244 _id: usize,
245 ptr: usize,
246 size: usize,
247 time: u64,
248 thread: u64,
249 ) -> ActiveAllocation {
250 ActiveAllocation {
251 ptr: Some(ptr),
252 size,
253 kind: TrackKind::HeapOwner { ptr, size },
254 allocated_at: time,
255 var_name: None,
256 type_name: None,
257 thread_id: thread,
258 call_stack_hash: None,
259 }
260 }
261
262 #[test]
263 fn test_detect_containers_empty() {
264 let allocs = vec![];
265 let edges = detect_containers(&allocs, None);
266 assert_eq!(edges.len(), 0);
267 }
268
269 #[test]
270 fn test_detect_containers_single_container() {
271 let allocs = vec![make_container(0, 64, 1000, 1)];
272 let edges = detect_containers(&allocs, None);
273 assert_eq!(edges.len(), 0);
274 }
275
276 #[test]
277 fn test_detect_containers_single_heap_owner() {
278 let allocs = vec![make_heap_owner(0, 0x1000, 64, 1000, 1)];
279 let edges = detect_containers(&allocs, None);
280 assert_eq!(edges.len(), 0);
281 }
282
283 #[test]
284 fn test_detect_containers_basic() {
285 let allocs = vec![
286 make_container(0, 64, 1000, 1),
287 make_heap_owner(1, 0x2000, 128, 100500, 1),
288 ];
289
290 let edges = detect_containers(&allocs, None);
291 assert_eq!(edges.len(), 1);
292 assert_eq!(edges[0].from, 0);
293 assert_eq!(edges[0].to, 1);
294 assert_eq!(edges[0].relation, Relation::Contains);
295 }
296
297 #[test]
298 fn test_detect_containers_time_window() {
299 let allocs = vec![
300 make_container(0, 64, 1000, 1),
301 make_heap_owner(1, 0x2000, 128, 2_000_000, 1), // 2ms later - beyond default 1ms window
302 ];
303
304 let edges = detect_containers(&allocs, None);
305 assert_eq!(edges.len(), 0);
306 }
307
308 #[test]
309 fn test_detect_containers_custom_time_window() {
310 let allocs = vec![
311 make_container(0, 64, 1000, 1),
312 make_heap_owner(1, 0x2000, 128, 2_000_000, 1), // 2ms later
313 ];
314
315 let config = ContainerConfig {
316 time_window_ns: 3_000_000, // 3ms window
317 ..Default::default()
318 };
319
320 let edges = detect_containers(&allocs, Some(config));
321 assert_eq!(edges.len(), 1);
322 }
323
324 #[test]
325 fn test_detect_containers_different_threads() {
326 let allocs = vec![
327 make_container(0, 64, 1000, 1),
328 make_heap_owner(1, 0x2000, 128, 100500, 2), // Different thread
329 ];
330
331 let edges = detect_containers(&allocs, None);
332 assert_eq!(edges.len(), 0);
333 }
334
335 #[test]
336 fn test_detect_containers_size_ratio_filter() {
337 let allocs = vec![
338 make_container(0, 64, 1000, 1),
339 make_heap_owner(1, 0x2000, 1280, 100500, 1), // 20x container size
340 ];
341
342 let edges = detect_containers(&allocs, None);
343 assert_eq!(edges.len(), 0);
344 }
345
346 #[test]
347 fn test_detect_containers_custom_size_ratio() {
348 let allocs = vec![
349 make_container(0, 64, 1000, 1),
350 make_heap_owner(1, 0x2000, 1280, 100500, 1), // 20x container size
351 ];
352
353 let config = ContainerConfig {
354 size_ratio: 30, // Allow up to 30x ratio
355 ..Default::default()
356 };
357
358 let edges = detect_containers(&allocs, Some(config));
359 assert_eq!(edges.len(), 1);
360 }
361
362 #[test]
363 fn test_detect_containers_multiple_candidates() {
364 let allocs = vec![
365 make_container(0, 64, 1000, 1),
366 make_heap_owner(1, 0x2000, 64, 100500, 1),
367 make_heap_owner(2, 0x3000, 64, 100600, 1),
368 make_heap_owner(3, 0x4000, 64, 100700, 1),
369 ];
370
371 let edges = detect_containers(&allocs, None);
372 assert_eq!(edges.len(), 3);
373 assert!(edges.iter().all(|e| e.from == 0));
374 }
375
376 #[test]
377 fn test_detect_containers_lookahead_limit() {
378 let allocs = vec![
379 make_container(0, 64, 1000, 1),
380 make_heap_owner(1, 0x2000, 64, 100500, 1),
381 make_heap_owner(2, 0x3000, 64, 100600, 1),
382 make_heap_owner(3, 0x4000, 64, 100700, 1),
383 make_heap_owner(4, 0x5000, 64, 100800, 1),
384 make_heap_owner(5, 0x6000, 64, 100900, 1), // Beyond default lookahead of 5
385 ];
386
387 let edges = detect_containers(&allocs, None);
388 // Should only find 5 edges (lookahead limit)
389 assert_eq!(edges.len(), 5);
390 }
391
392 #[test]
393 fn test_detect_containers_multiple_containers() {
394 let allocs = vec![
395 make_container(0, 64, 1000, 1),
396 make_container(1, 64, 2_000_000, 1), // 2ms later (beyond 1ms window)
397 make_heap_owner(2, 0x2000, 64, 100500, 1), // Near container 0 (within 1ms)
398 make_heap_owner(3, 0x3000, 64, 2_100_000, 1), // Near container 1 (within 1ms)
399 ];
400
401 let edges = detect_containers(&allocs, None);
402 assert_eq!(edges.len(), 2);
403
404 // Check which edges were created
405 let contains_0_2 = edges.iter().any(|e| e.from == 0 && e.to == 2);
406 let contains_1_3 = edges.iter().any(|e| e.from == 1 && e.to == 3);
407 assert!(contains_0_2);
408 assert!(contains_1_3);
409 }
410
411 #[test]
412 fn test_detect_containers_ignore_value_types() {
413 let allocs = vec![
414 make_container(0, 64, 1000, 1),
415 ActiveAllocation {
416 ptr: None,
417 size: 32,
418 kind: TrackKind::Value,
419 allocated_at: 100500,
420 var_name: None,
421 type_name: None,
422 thread_id: 1,
423 call_stack_hash: None,
424 },
425 ];
426
427 let edges = detect_containers(&allocs, None);
428 assert_eq!(edges.len(), 0);
429 }
430
431 #[test]
432 fn test_detect_containers_unsorted_input() {
433 let allocs = vec![
434 make_heap_owner(1, 0x2000, 64, 100500, 1), // Later in time (original index 0)
435 make_container(0, 64, 1000, 1), // Earlier in time (original index 1)
436 ];
437
438 let edges = detect_containers(&allocs, None);
439 assert_eq!(edges.len(), 1);
440 // Container (original index 1) → HeapOwner (original index 0)
441 assert_eq!(edges[0].from, 1);
442 assert_eq!(edges[0].to, 0);
443 }
444
445 #[test]
446 fn test_config_default() {
447 let config = ContainerConfig::default();
448 assert_eq!(config.time_window_ns, 1_000_000);
449 assert_eq!(config.size_ratio, 10);
450 assert_eq!(config.lookahead, 5);
451 }
452
453 #[test]
454 fn test_config_custom() {
455 let config = ContainerConfig {
456 time_window_ns: 5_000_000,
457 size_ratio: 20,
458 lookahead: 10,
459 };
460 assert_eq!(config.time_window_ns, 5_000_000);
461 assert_eq!(config.size_ratio, 20);
462 assert_eq!(config.lookahead, 10);
463 }
464}