Skip to main content

memscope_rs/analysis/relation_inference/
graph_builder.rs

1//! Graph Builder — main pipeline for relation inference.
2//!
3//! Orchestrates HeapScanner, UTI Engine, and all relation detectors
4//! to produce a complete `RelationGraph` from active allocations.
5
6use tracing::debug;
7
8use crate::analysis::{
9    heap_scanner::{HeapScanner, ScanResult},
10    relation_inference::{
11        clone_detector::{detect_clones, CloneConfig},
12        container_detector::{detect_containers, ContainerConfig},
13        pointer_scan::{detect_owner, InferenceRecord},
14        shared_detector::detect_shared,
15        slice_detector::detect_slice,
16        RangeMap, RelationEdge, RelationGraph,
17    },
18    unsafe_inference::{MemoryView, OwnedMemoryView, TypeKind, UnsafeInferenceEngine},
19};
20
21use crate::snapshot::types::ActiveAllocation;
22
23/// Configuration for the relation graph builder.
24///
25/// This struct controls the behavior of the relation inference pipeline,
26/// allowing fine-tuning of detection thresholds and parameters.
27///
28/// # Example
29///
30/// ```ignore
31/// use memscope_rs::analysis::relation_inference::{
32///     GraphBuilderConfig, CloneConfig,
33/// };
34///
35/// // Default configuration
36/// let config = GraphBuilderConfig::default();
37///
38/// // Custom configuration for stricter clone detection
39/// let config = GraphBuilderConfig {
40///     clone_config: CloneConfig {
41///         min_similarity: 0.95,  // Require 95% content match
42///         max_time_diff_ns: 5_000_000,  // 5ms window
43///         ..Default::default()
44///     },
45/// };
46///
47/// let graph = RelationGraphBuilder::build(&allocations, Some(config));
48/// ```
49///
50/// # Fields
51///
52/// * `clone_config` - Controls clone detection behavior. See [`CloneConfig`]
53///   for details on similarity thresholds and time windows.
54#[derive(Debug, Clone, Default)]
55pub struct GraphBuilderConfig {
56    /// Configuration for clone detection.
57    ///
58    /// Controls how the system identifies cloned allocations based on:
59    /// - Content similarity ratio (`min_similarity`)
60    /// - Maximum time difference between allocations (`max_time_diff_ns`)
61    /// - Number of bytes to compare (`compare_bytes`)
62    ///
63    /// Default values are tuned for typical Rust workloads:
64    /// - 80% similarity threshold
65    /// - 10ms time window
66    /// - 64 bytes comparison
67    pub clone_config: CloneConfig,
68
69    /// Configuration for container relation detection.
70    ///
71    /// Controls how the system infers `Contains` relationships between
72    /// Container types (HashMap, BTreeMap) and HeapOwner types (Vec, Box).
73    ///
74    /// Default values use temporal locality with 1ms time window:
75    /// - 1ms time window
76    /// - 10x size ratio limit
77    /// - 5 candidate lookahead
78    pub container_config: ContainerConfig,
79}
80
81/// Builds a relation graph from active allocations.
82///
83/// # Pipeline
84///
85/// ```text
86/// ActiveAllocation[]
87///     │
88///     ▼
89/// HeapScanner → ScanResult[]
90///     │
91///     ▼
92/// UTI Engine → InferenceRecord[]
93///     │
94///     ▼
95/// RangeMap + Owner/Slice/Clone detectors
96///     │
97///     ▼
98/// RelationGraph
99/// ```
100pub struct RelationGraphBuilder;
101
102impl RelationGraphBuilder {
103    /// Build a relation graph from a list of active allocations.
104    ///
105    /// # Arguments
106    ///
107    /// * `allocations` - List of active allocations from a snapshot.
108    /// * `config` - Builder configuration (optional, uses defaults if None).
109    ///
110    /// # Returns
111    ///
112    /// A `RelationGraph` containing all inferred relationships.
113    pub fn build(
114        allocations: &[ActiveAllocation],
115        config: Option<GraphBuilderConfig>,
116    ) -> RelationGraph {
117        let config = config.unwrap_or_default();
118
119        if allocations.is_empty() {
120            return RelationGraph::new();
121        }
122
123        // Step 1: Scan heap memory for all allocations.
124        debug!(step = 1, "HeapScanner::scan");
125        let scan_results = HeapScanner::scan(allocations);
126        debug!(
127            step = 1,
128            scan_results = scan_results.len(),
129            "HeapScanner completed"
130        );
131
132        // Create a mapping from (ptr, size) to scan result
133        debug!(step = 2, "Building scan_map");
134        let scan_map: std::collections::HashMap<(usize, usize), &ScanResult> = scan_results
135            .iter()
136            .map(|scan| ((scan.ptr, scan.size), scan))
137            .collect();
138        debug!(step = 2, "scan_map completed");
139
140        // Step 2: Run UTI Engine on each allocation.
141        // We ensure records has the same length as allocations to maintain index consistency.
142        debug!(step = 3, "Running UTI Engine");
143        let records: Vec<InferenceRecord> = allocations
144            .iter()
145            .enumerate()
146            .map(|(id, alloc)| {
147                let scan = scan_map.get(&(alloc.ptr.unwrap_or(0), alloc.size));
148
149                let (type_kind, confidence) =
150                    if let Some(memory) = scan.and_then(|s| s.memory.as_deref()) {
151                        let view = MemoryView::new(memory);
152                        let guess = UnsafeInferenceEngine::infer_single(&view, alloc.size);
153                        (guess.kind, guess.confidence)
154                    } else {
155                        (TypeKind::Unknown, 0)
156                    };
157
158                InferenceRecord {
159                    id,
160                    ptr: alloc.ptr.unwrap_or(0),
161                    size: alloc.size,
162                    memory: scan
163                        .and_then(|s| s.memory.clone())
164                        .map(OwnedMemoryView::new),
165                    type_kind,
166                    confidence,
167                    call_stack_hash: alloc.call_stack_hash,
168                    alloc_time: alloc.allocated_at,
169                    stack_ptr: alloc.stack_ptr,
170                }
171            })
172            .collect();
173        debug!(step = 3, records = records.len(), "UTI Engine completed");
174
175        // Step 3: Build RangeMap for address → allocation lookup.
176        debug!(step = 4, "Building RangeMap");
177        let range_map = RangeMap::new(allocations);
178        debug!(step = 4, "RangeMap completed");
179
180        // Step 4: Run relation detectors.
181        let mut graph = RelationGraph::new();
182
183        // Owner detection: scan each allocation's memory for pointers.
184        debug!(step = 5, "detect_owner");
185        for record in &records {
186            let edges = detect_owner(record, &range_map);
187            graph.add_edges(edges);
188        }
189        debug!(
190            step = 5,
191            edges = graph.edge_count(),
192            "detect_owner completed"
193        );
194
195        // Slice detection: check if each allocation points into another's interior.
196        debug!(step = 6, "detect_slice");
197        let slice_edges = detect_slice(&records, allocations, &range_map);
198        graph.add_edges(slice_edges);
199        debug!(
200            step = 6,
201            edges = graph.edge_count(),
202            "detect_slice completed"
203        );
204
205        // Clone detection: batch comparison by (type, size, stack_hash).
206        debug!(step = 7, "detect_clones");
207        let clone_edges = detect_clones(&records, &config.clone_config);
208        graph.add_edges(clone_edges);
209        debug!(
210            step = 7,
211            edges = graph.edge_count(),
212            "detect_clones completed"
213        );
214
215        // Container detection: infer Contains relationships using temporal locality.
216        debug!(step = 8, "detect_containers");
217        let container_edges = detect_containers(allocations, Some(config.container_config));
218        graph.add_edges(container_edges);
219        debug!(
220            step = 8,
221            edges = graph.edge_count(),
222            "detect_containers completed"
223        );
224
225        // Variable name evolution detection: infer relationships for allocations with same variable name
226        debug!(step = 9, "detect_variable_evolution");
227        let var_evolution_edges = detect_variable_evolution(allocations);
228        graph.add_edges(var_evolution_edges);
229        debug!(
230            step = 9,
231            edges = graph.edge_count(),
232            "detect_variable_evolution completed"
233        );
234
235        // Shared detection: find Arc/Rc shared ownership via Owner graph analysis.
236        debug!(step = 10, "detect_shared");
237        let shared_edges = detect_shared(&records, &graph.edges);
238        graph.add_edges(shared_edges);
239        debug!(
240            step = 10,
241            edges = graph.edge_count(),
242            "detect_shared completed"
243        );
244
245        debug!("RelationGraphBuilder all steps completed");
246        graph
247    }
248}
249
250/// Detect variable evolution relationships based on variable names.
251///
252/// For allocations with the same variable name, this function infers
253/// evolution relationships indicating that the same variable was
254/// tracked multiple times (e.g., growing HashMap, reallocated Vec).
255fn detect_variable_evolution(allocations: &[ActiveAllocation]) -> Vec<RelationEdge> {
256    use crate::analysis::relation_inference::Relation;
257    use std::collections::HashMap;
258
259    // Group allocations by variable name
260    let mut var_groups: HashMap<String, Vec<usize>> = HashMap::new();
261    for (i, alloc) in allocations.iter().enumerate() {
262        if let Some(ref var_name) = alloc.var_name {
263            if !var_name.is_empty() && var_name != "unknown" {
264                var_groups.entry(var_name.clone()).or_default().push(i);
265            }
266        }
267    }
268
269    let mut edges = Vec::new();
270
271    // For each variable with multiple allocations, create evolution edges
272    for indices in var_groups.values() {
273        if indices.len() < 2 {
274            continue;
275        }
276
277        // Sort by allocation time
278        let mut sorted_indices: Vec<(usize, u64)> = indices
279            .iter()
280            .map(|&i| (i, allocations[i].allocated_at))
281            .collect();
282        sorted_indices.sort_by_key(|&(_, time)| time);
283
284        // Create edges from earlier to later allocations
285        for window in sorted_indices.windows(2) {
286            let (from_idx, _) = window[0];
287            let (to_idx, _) = window[1];
288
289            edges.push(RelationEdge {
290                from: from_idx,
291                to: to_idx,
292                relation: Relation::Evolution,
293            });
294        }
295    }
296
297    edges
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303    use crate::analysis::relation_inference::Relation;
304    use crate::core::types::TrackKind;
305
306    fn make_alloc(ptr: usize, size: usize) -> ActiveAllocation {
307        ActiveAllocation {
308            ptr: Some(ptr),
309            size,
310            kind: TrackKind::HeapOwner { ptr, size },
311            allocated_at: 1000,
312            var_name: None,
313            type_name: None,
314            thread_id: 0,
315            call_stack_hash: None,
316            module_path: None,
317            stack_ptr: None,
318        }
319    }
320
321    #[test]
322    fn test_build_empty() {
323        let graph = RelationGraphBuilder::build(&[], None);
324        assert_eq!(graph.edge_count(), 0);
325    }
326
327    #[test]
328    fn test_build_basic_owner_relationship() {
329        // Use real heap allocations via Vec to ensure addresses are valid.
330        let buf1 = [0u8; 24];
331        let buf2 = vec![0u8; 1024];
332        let ptr1 = buf1.as_ptr() as usize;
333        let ptr2 = buf2.as_ptr() as usize;
334
335        let allocs = vec![make_alloc(ptr1, 24), make_alloc(ptr2, 1024)];
336        let graph = RelationGraphBuilder::build(&allocs, None);
337
338        // Graph should be built without panicking.
339        // The content is all zeros so no Owner relationships will be found.
340        assert_eq!(graph.edge_count(), 0);
341    }
342
343    #[test]
344    fn test_build_with_real_vec_metadata() {
345        // Create a real Vec-like metadata pattern: [ptr, len, cap]
346        let inner = vec![42u8; 256];
347        let ptr = inner.as_ptr() as usize;
348        let len = inner.len();
349        let cap = inner.capacity();
350
351        let mut metadata = [0u8; 24];
352        metadata[0..8].copy_from_slice(&ptr.to_le_bytes());
353        metadata[8..16].copy_from_slice(&len.to_le_bytes());
354        metadata[16..24].copy_from_slice(&cap.to_le_bytes());
355
356        let meta_ptr = metadata.as_ptr() as usize;
357        let inner_ptr = inner.as_ptr() as usize;
358
359        let allocs = vec![make_alloc(meta_ptr, 24), make_alloc(inner_ptr, 256)];
360        let graph = RelationGraphBuilder::build(&allocs, None);
361
362        // Should not crash. Actual edge depends on whether metadata is readable
363        // at meta_ptr (it's on the heap, so should be).
364        assert!(graph.edge_count() <= 2);
365    }
366
367    #[test]
368    fn test_build_single_allocation() {
369        let allocs = vec![make_alloc(0x1000, 64)];
370        let graph = RelationGraphBuilder::build(&allocs, None);
371
372        // Single allocation should not produce any relationships.
373        assert_eq!(graph.edge_count(), 0);
374    }
375
376    #[test]
377    fn test_builder_with_default_config() {
378        let allocs = vec![make_alloc(0x1000, 64)];
379        let graph = RelationGraphBuilder::build(&allocs, Some(GraphBuilderConfig::default()));
380        assert_eq!(graph.edge_count(), 0);
381    }
382
383    #[test]
384    fn test_graph_node_count() {
385        let mut graph = RelationGraph::new();
386        graph.add_edge(0, 1, Relation::Owns);
387        graph.add_edge(1, 2, Relation::Slice);
388
389        let nodes = graph.all_nodes();
390        assert_eq!(nodes, vec![0, 1, 2]);
391    }
392}