Skip to main content

memscope_rs/analysis/relation_inference/
graph_builder.rs

1//! Graph Builder — main pipeline for relation inference.
2//!
3//! Orchestrates HeapScanner, UTI Engine, and all relation detectors
4//! to produce a complete `RelationGraph` from active allocations.
5
6use tracing::debug;
7
8use crate::analysis::{
9    heap_scanner::{HeapScanner, ScanResult},
10    relation_inference::{
11        clone_detector::{detect_clones, CloneConfig},
12        container_detector::{detect_containers, ContainerConfig},
13        pointer_scan::{detect_owner, InferenceRecord},
14        shared_detector::detect_shared,
15        slice_detector::detect_slice,
16        RangeMap, RelationEdge, RelationGraph,
17    },
18    unsafe_inference::{MemoryView, OwnedMemoryView, TypeKind, UnsafeInferenceEngine},
19};
20
21use crate::snapshot::types::ActiveAllocation;
22
23/// Configuration for the relation graph builder.
24///
25/// This struct controls the behavior of the relation inference pipeline,
26/// allowing fine-tuning of detection thresholds and parameters.
27///
28/// # Example
29///
30/// ```ignore
31/// use memscope_rs::analysis::relation_inference::{
32///     GraphBuilderConfig, CloneConfig,
33/// };
34///
35/// // Default configuration
36/// let config = GraphBuilderConfig::default();
37///
38/// // Custom configuration for stricter clone detection
39/// let config = GraphBuilderConfig {
40///     clone_config: CloneConfig {
41///         min_similarity: 0.95,  // Require 95% content match
42///         max_time_diff_ns: 5_000_000,  // 5ms window
43///         ..Default::default()
44///     },
45/// };
46///
47/// let graph = RelationGraphBuilder::build(&allocations, Some(config));
48/// ```
49///
50/// # Fields
51///
52/// * `clone_config` - Controls clone detection behavior. See [`CloneConfig`]
53///   for details on similarity thresholds and time windows.
54#[derive(Debug, Clone, Default)]
55pub struct GraphBuilderConfig {
56    /// Configuration for clone detection.
57    ///
58    /// Controls how the system identifies cloned allocations based on:
59    /// - Content similarity ratio (`min_similarity`)
60    /// - Maximum time difference between allocations (`max_time_diff_ns`)
61    /// - Number of bytes to compare (`compare_bytes`)
62    ///
63    /// Default values are tuned for typical Rust workloads:
64    /// - 80% similarity threshold
65    /// - 10ms time window
66    /// - 64 bytes comparison
67    pub clone_config: CloneConfig,
68
69    /// Configuration for container relation detection.
70    ///
71    /// Controls how the system infers `Contains` relationships between
72    /// Container types (HashMap, BTreeMap) and HeapOwner types (Vec, Box).
73    ///
74    /// Default values use temporal locality with 1ms time window:
75    /// - 1ms time window
76    /// - 10x size ratio limit
77    /// - 5 candidate lookahead
78    pub container_config: ContainerConfig,
79}
80
81/// Builds a relation graph from active allocations.
82///
83/// # Pipeline
84///
85/// ```text
86/// ActiveAllocation[]
87///     │
88///     ▼
89/// HeapScanner → ScanResult[]
90///     │
91///     ▼
92/// UTI Engine → InferenceRecord[]
93///     │
94///     ▼
95/// RangeMap + Owner/Slice/Clone detectors
96///     │
97///     ▼
98/// RelationGraph
99/// ```
100pub struct RelationGraphBuilder;
101
102impl RelationGraphBuilder {
103    /// Build a relation graph from a list of active allocations.
104    ///
105    /// # Arguments
106    ///
107    /// * `allocations` - List of active allocations from a snapshot.
108    /// * `config` - Builder configuration (optional, uses defaults if None).
109    ///
110    /// # Returns
111    ///
112    /// A `RelationGraph` containing all inferred relationships.
113    pub fn build(
114        allocations: &[ActiveAllocation],
115        config: Option<GraphBuilderConfig>,
116    ) -> RelationGraph {
117        let config = config.unwrap_or_default();
118
119        if allocations.is_empty() {
120            return RelationGraph::new();
121        }
122
123        // Step 1: Scan heap memory for all allocations.
124        debug!(step = 1, "HeapScanner::scan");
125        let scan_results = HeapScanner::scan(allocations);
126        debug!(
127            step = 1,
128            scan_results = scan_results.len(),
129            "HeapScanner completed"
130        );
131
132        // Create a mapping from (ptr, size) to scan result
133        debug!(step = 2, "Building scan_map");
134        let scan_map: std::collections::HashMap<(usize, usize), &ScanResult> = scan_results
135            .iter()
136            .map(|scan| ((scan.ptr, scan.size), scan))
137            .collect();
138        debug!(step = 2, "scan_map completed");
139
140        // Step 2: Run UTI Engine on each allocation.
141        // We ensure records has the same length as allocations to maintain index consistency.
142        debug!(step = 3, "Running UTI Engine");
143        let records: Vec<InferenceRecord> = allocations
144            .iter()
145            .enumerate()
146            .map(|(id, alloc)| {
147                let scan = scan_map.get(&(alloc.ptr.unwrap_or(0), alloc.size));
148
149                let (type_kind, confidence) =
150                    if let Some(memory) = scan.and_then(|s| s.memory.as_deref()) {
151                        let view = MemoryView::new(memory);
152                        let guess = UnsafeInferenceEngine::infer_single(&view, alloc.size);
153                        (guess.kind, guess.confidence)
154                    } else {
155                        (TypeKind::Unknown, 0)
156                    };
157
158                InferenceRecord {
159                    id,
160                    ptr: alloc.ptr.unwrap_or(0),
161                    size: alloc.size,
162                    memory: scan
163                        .and_then(|s| s.memory.clone())
164                        .map(OwnedMemoryView::new),
165                    type_kind,
166                    confidence,
167                    call_stack_hash: alloc.call_stack_hash,
168                    alloc_time: alloc.allocated_at,
169                }
170            })
171            .collect();
172        debug!(step = 3, records = records.len(), "UTI Engine completed");
173
174        // Step 3: Build RangeMap for address → allocation lookup.
175        debug!(step = 4, "Building RangeMap");
176        let range_map = RangeMap::new(allocations);
177        debug!(step = 4, "RangeMap completed");
178
179        // Step 4: Run relation detectors.
180        let mut graph = RelationGraph::new();
181
182        // Owner detection: scan each allocation's memory for pointers.
183        debug!(step = 5, "detect_owner");
184        for record in &records {
185            let edges = detect_owner(record, &range_map);
186            graph.add_edges(edges);
187        }
188        debug!(
189            step = 5,
190            edges = graph.edge_count(),
191            "detect_owner completed"
192        );
193
194        // Slice detection: check if each allocation points into another's interior.
195        debug!(step = 6, "detect_slice");
196        let slice_edges = detect_slice(&records, allocations, &range_map);
197        graph.add_edges(slice_edges);
198        debug!(
199            step = 6,
200            edges = graph.edge_count(),
201            "detect_slice completed"
202        );
203
204        // Clone detection: batch comparison by (type, size, stack_hash).
205        debug!(step = 7, "detect_clones");
206        let clone_edges = detect_clones(&records, &config.clone_config);
207        graph.add_edges(clone_edges);
208        debug!(
209            step = 7,
210            edges = graph.edge_count(),
211            "detect_clones completed"
212        );
213
214        // Container detection: infer Contains relationships using temporal locality.
215        debug!(step = 8, "detect_containers");
216        let container_edges = detect_containers(allocations, Some(config.container_config));
217        graph.add_edges(container_edges);
218        debug!(
219            step = 8,
220            edges = graph.edge_count(),
221            "detect_containers completed"
222        );
223
224        // Variable name evolution detection: infer relationships for allocations with same variable name
225        debug!(step = 9, "detect_variable_evolution");
226        let var_evolution_edges = detect_variable_evolution(allocations);
227        graph.add_edges(var_evolution_edges);
228        debug!(
229            step = 9,
230            edges = graph.edge_count(),
231            "detect_variable_evolution completed"
232        );
233
234        // Shared detection: find Arc/Rc shared ownership via Owner graph analysis.
235        debug!(step = 10, "detect_shared");
236        let shared_edges = detect_shared(&records, &graph.edges);
237        graph.add_edges(shared_edges);
238        debug!(
239            step = 10,
240            edges = graph.edge_count(),
241            "detect_shared completed"
242        );
243
244        debug!("RelationGraphBuilder all steps completed");
245        graph
246    }
247}
248
249/// Detect variable evolution relationships based on variable names.
250///
251/// For allocations with the same variable name, this function infers
252/// evolution relationships indicating that the same variable was
253/// tracked multiple times (e.g., growing HashMap, reallocated Vec).
254fn detect_variable_evolution(allocations: &[ActiveAllocation]) -> Vec<RelationEdge> {
255    use crate::analysis::relation_inference::Relation;
256    use std::collections::HashMap;
257
258    // Group allocations by variable name
259    let mut var_groups: HashMap<String, Vec<usize>> = HashMap::new();
260    for (i, alloc) in allocations.iter().enumerate() {
261        if let Some(ref var_name) = alloc.var_name {
262            if !var_name.is_empty() && var_name != "unknown" {
263                var_groups.entry(var_name.clone()).or_default().push(i);
264            }
265        }
266    }
267
268    let mut edges = Vec::new();
269
270    // For each variable with multiple allocations, create evolution edges
271    for indices in var_groups.values() {
272        if indices.len() < 2 {
273            continue;
274        }
275
276        // Sort by allocation time
277        let mut sorted_indices: Vec<(usize, u64)> = indices
278            .iter()
279            .map(|&i| (i, allocations[i].allocated_at))
280            .collect();
281        sorted_indices.sort_by_key(|&(_, time)| time);
282
283        // Create edges from earlier to later allocations
284        for window in sorted_indices.windows(2) {
285            let (from_idx, _) = window[0];
286            let (to_idx, _) = window[1];
287
288            edges.push(RelationEdge {
289                from: from_idx,
290                to: to_idx,
291                relation: Relation::Evolution,
292            });
293        }
294    }
295
296    edges
297}
298
299#[cfg(test)]
300mod tests {
301    use super::*;
302    use crate::analysis::relation_inference::Relation;
303    use crate::core::types::TrackKind;
304
305    fn make_alloc(ptr: usize, size: usize) -> ActiveAllocation {
306        ActiveAllocation {
307            ptr: Some(ptr),
308            size,
309            kind: TrackKind::HeapOwner { ptr, size },
310            allocated_at: 1000,
311            var_name: None,
312            type_name: None,
313            thread_id: 0,
314            call_stack_hash: None,
315        }
316    }
317
318    #[test]
319    fn test_build_empty() {
320        let graph = RelationGraphBuilder::build(&[], None);
321        assert_eq!(graph.edge_count(), 0);
322    }
323
324    #[test]
325    fn test_build_basic_owner_relationship() {
326        // Use real heap allocations via Vec to ensure addresses are valid.
327        let buf1 = [0u8; 24];
328        let buf2 = vec![0u8; 1024];
329        let ptr1 = buf1.as_ptr() as usize;
330        let ptr2 = buf2.as_ptr() as usize;
331
332        let allocs = vec![make_alloc(ptr1, 24), make_alloc(ptr2, 1024)];
333        let graph = RelationGraphBuilder::build(&allocs, None);
334
335        // Graph should be built without panicking.
336        // The content is all zeros so no Owner relationships will be found.
337        assert_eq!(graph.edge_count(), 0);
338    }
339
340    #[test]
341    fn test_build_with_real_vec_metadata() {
342        // Create a real Vec-like metadata pattern: [ptr, len, cap]
343        let inner = vec![42u8; 256];
344        let ptr = inner.as_ptr() as usize;
345        let len = inner.len();
346        let cap = inner.capacity();
347
348        let mut metadata = [0u8; 24];
349        metadata[0..8].copy_from_slice(&ptr.to_le_bytes());
350        metadata[8..16].copy_from_slice(&len.to_le_bytes());
351        metadata[16..24].copy_from_slice(&cap.to_le_bytes());
352
353        let meta_ptr = metadata.as_ptr() as usize;
354        let inner_ptr = inner.as_ptr() as usize;
355
356        let allocs = vec![make_alloc(meta_ptr, 24), make_alloc(inner_ptr, 256)];
357        let graph = RelationGraphBuilder::build(&allocs, None);
358
359        // Should not crash. Actual edge depends on whether metadata is readable
360        // at meta_ptr (it's on the heap, so should be).
361        assert!(graph.edge_count() <= 2);
362    }
363
364    #[test]
365    fn test_build_single_allocation() {
366        let allocs = vec![make_alloc(0x1000, 64)];
367        let graph = RelationGraphBuilder::build(&allocs, None);
368
369        // Single allocation should not produce any relationships.
370        assert_eq!(graph.edge_count(), 0);
371    }
372
373    #[test]
374    fn test_builder_with_default_config() {
375        let allocs = vec![make_alloc(0x1000, 64)];
376        let graph = RelationGraphBuilder::build(&allocs, Some(GraphBuilderConfig::default()));
377        assert_eq!(graph.edge_count(), 0);
378    }
379
380    #[test]
381    fn test_graph_node_count() {
382        let mut graph = RelationGraph::new();
383        graph.add_edge(0, 1, Relation::Owns);
384        graph.add_edge(1, 2, Relation::Slice);
385
386        let nodes = graph.all_nodes();
387        assert_eq!(nodes, vec![0, 1, 2]);
388    }
389}