go_brrr/dfg/
types.rs

1//! DFG type definitions.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::OnceLock;
6
7/// Kind of data flow.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum DataflowKind {
10    /// Variable is defined
11    Definition,
12    /// Variable is read
13    Use,
14    /// Variable is mutated
15    Mutation,
16    /// Value is returned
17    Return,
18    /// Function parameter
19    Param,
20    /// Value is yielded from generator
21    Yield,
22    /// Comprehension-scoped variable (does not leak to outer scope in Python 3)
23    ComprehensionVar,
24    /// Lambda parameter (scoped to lambda body)
25    LambdaParam,
26    /// BUG-017 FIX: Variable declared as global (references module-level scope)
27    Global,
28    /// BUG-017 FIX: Variable declared as nonlocal (references enclosing function scope)
29    Nonlocal,
30    /// Nested function parameter (scoped to nested function body, used for closure analysis)
31    NestedParam,
32    /// Closure capture: use of outer variable from within nested function
33    ClosureCapture,
34    /// Go: Variable captured/used in goroutine (go func() {...})
35    Goroutine,
36    /// Go: Channel send operation (ch <- val)
37    ChannelSend,
38    /// Go: Channel receive operation (val := <-ch)
39    ChannelReceive,
40    /// Go: Deferred call (defer f())
41    Defer,
42    /// Go: Type assertion (x.(Type))
43    TypeAssertion,
44    // =========================================================================
45    // Go: Panic/Recover/Error handling flow
46    // =========================================================================
47    /// Go: Panic call (panic(err))
48    Panic,
49    /// Go: Recover call in deferred function (recover())
50    Recover,
51    /// Go: Error value assigned (err := ...)
52    ErrorAssign,
53    /// Go: Error check (if err != nil)
54    ErrorCheck,
55    /// Go: Named return value modified in defer
56    NamedReturnModify,
57    // =========================================================================
58    // Go: Channel and sync primitive operations
59    // =========================================================================
60    /// Go: Channel creation (make(chan T) or make(chan T, size))
61    ChannelMake,
62    /// Go: Channel close operation (close(ch))
63    ChannelCloseDfg,
64    /// Go: Select statement receive (case val := <-ch:)
65    SelectReceive,
66    /// Go: Select statement send (case ch <- val:)
67    SelectSend,
68    /// Go: Mutex lock (mu.Lock() or mu.RLock())
69    MutexLock,
70    /// Go: Mutex unlock (mu.Unlock() or mu.RUnlock())
71    MutexUnlock,
72    /// Go: WaitGroup.Add() operation
73    WaitGroupAdd,
74    /// Go: WaitGroup.Done() operation
75    WaitGroupDone,
76    /// Go: WaitGroup.Wait() synchronization point
77    WaitGroupWait,
78    /// Go: sync.Once.Do() call
79    OnceDo,
80    /// Go: Context.Done() check for cancellation
81    ContextDone,
82    /// Go: Context.Err() check for cancellation reason
83    ContextErr,
84    /// Go: Context.Value() retrieval
85    ContextValue,
86    /// Go: sync.Pool.Get()
87    PoolGet,
88    /// Go: sync.Pool.Put()
89    PoolPut,
90}
91
92/// A single data flow edge.
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct DataflowEdge {
95    /// Variable name
96    pub variable: String,
97    /// Source line (where data comes from)
98    pub from_line: usize,
99    /// Target line (where data flows to)
100    pub to_line: usize,
101    /// Kind of data flow
102    pub kind: DataflowKind,
103}
104
105/// Cached adjacency lists for efficient graph traversal.
106///
107/// Pre-computed once on first access, then reused for all subsequent
108/// slice operations. This avoids O(E) rebuild per slice call.
109///
110/// This is an internal implementation detail. Users interact with it through
111/// the `DFGInfo::get_adjacency_cache()` method.
112#[derive(Debug)]
113pub struct AdjacencyCache {
114    /// Incoming edges: to_line -> [from_lines]
115    /// Used for backward slicing.
116    pub incoming: HashMap<usize, Vec<usize>>,
117    /// Outgoing edges: from_line -> [to_lines]
118    /// Used for forward slicing.
119    pub outgoing: HashMap<usize, Vec<usize>>,
120}
121
122/// Per-variable adjacency cache for variable-filtered slicing.
123///
124/// Pre-computed once on first variable-filtered slice, then reused.
125/// This eliminates O(E) string comparisons per variable-filtered slice call.
126///
127/// This is an internal implementation detail. Users interact with it through
128/// the `DFGInfo::get_variable_adjacency_cache()` method.
129#[derive(Debug)]
130pub struct VariableAdjacencyCache {
131    /// Per-variable incoming edges: variable -> (to_line -> [from_lines])
132    /// Used for variable-specific backward slicing.
133    pub var_incoming: HashMap<String, HashMap<usize, Vec<usize>>>,
134    /// Per-variable outgoing edges: variable -> (from_line -> [to_lines])
135    /// Used for variable-specific forward slicing.
136    pub var_outgoing: HashMap<String, HashMap<usize, Vec<usize>>>,
137}
138
139/// Complete data flow graph for a function.
140///
141/// The DFG tracks variable definitions, uses, and the data flow edges between them.
142/// It provides efficient slicing operations through cached adjacency lists that are
143/// computed lazily on first access using interior mutability (OnceLock).
144///
145/// # Performance
146///
147/// The adjacency cache is built once on first slice operation and reused for all
148/// subsequent operations. This amortizes the O(E) cost of building adjacency lists
149/// across multiple slice calls, providing up to 15900x speedup for repeated queries.
150#[derive(Debug, Serialize, Deserialize)]
151pub struct DFGInfo {
152    /// Function name
153    pub function_name: String,
154    /// All data flow edges
155    pub edges: Vec<DataflowEdge>,
156    /// Where each variable is defined
157    pub definitions: HashMap<String, Vec<usize>>,
158    /// Where each variable is used
159    pub uses: HashMap<String, Vec<usize>>,
160    /// Cached adjacency lists for O(1) edge lookups during slicing.
161    /// Lazily initialized on first access via `get_adjacency_cache()`.
162    #[serde(skip)]
163    adjacency_cache: OnceLock<AdjacencyCache>,
164    /// Per-variable adjacency cache for variable-filtered slicing.
165    /// Lazily initialized on first variable-filtered slice via `get_variable_adjacency_cache()`.
166    #[serde(skip)]
167    variable_adjacency_cache: OnceLock<VariableAdjacencyCache>,
168}
169
170impl Clone for DFGInfo {
171    fn clone(&self) -> Self {
172        Self {
173            function_name: self.function_name.clone(),
174            edges: self.edges.clone(),
175            definitions: self.definitions.clone(),
176            uses: self.uses.clone(),
177            // Fresh caches for clone - will be rebuilt on first access if needed
178            adjacency_cache: OnceLock::new(),
179            variable_adjacency_cache: OnceLock::new(),
180        }
181    }
182}
183
184// Public API methods - used by library consumers for advanced DFG queries
185#[allow(dead_code)]
186impl DFGInfo {
187    /// Create a new DFGInfo with the given data.
188    ///
189    /// Both adjacency caches are initialized empty and will be lazily built
190    /// on first slice operation.
191    pub fn new(
192        function_name: String,
193        edges: Vec<DataflowEdge>,
194        definitions: HashMap<String, Vec<usize>>,
195        uses: HashMap<String, Vec<usize>>,
196    ) -> Self {
197        Self {
198            function_name,
199            edges,
200            definitions,
201            uses,
202            adjacency_cache: OnceLock::new(),
203            variable_adjacency_cache: OnceLock::new(),
204        }
205    }
206
207    /// Get or lazily build the adjacency cache.
208    ///
209    /// This method uses interior mutability (OnceLock) to build the cache
210    /// exactly once, even when called from multiple threads. The cache is
211    /// then reused for all subsequent slice operations.
212    ///
213    /// # Performance
214    ///
215    /// - First call: O(E) to build both incoming and outgoing adjacency maps
216    /// - Subsequent calls: O(1) to return cached reference
217    ///
218    /// This provides up to 15900x speedup for repeated slice operations
219    /// compared to rebuilding adjacency maps on every call.
220    #[inline]
221    pub fn get_adjacency_cache(&self) -> &AdjacencyCache {
222        self.adjacency_cache.get_or_init(|| {
223            let edge_count = self.edges.len();
224            let mut incoming: HashMap<usize, Vec<usize>> = HashMap::with_capacity(edge_count);
225            let mut outgoing: HashMap<usize, Vec<usize>> = HashMap::with_capacity(edge_count);
226
227            for edge in &self.edges {
228                incoming
229                    .entry(edge.to_line)
230                    .or_default()
231                    .push(edge.from_line);
232                outgoing
233                    .entry(edge.from_line)
234                    .or_default()
235                    .push(edge.to_line);
236            }
237
238            AdjacencyCache { incoming, outgoing }
239        })
240    }
241
242    /// Get incoming edges for a line (sources that flow TO this line).
243    ///
244    /// Returns `None` if no edges flow to this line.
245    /// Uses the cached adjacency list for O(1) lookup.
246    #[inline]
247    pub fn get_incoming(&self, line: usize) -> Option<&Vec<usize>> {
248        self.get_adjacency_cache().incoming.get(&line)
249    }
250
251    /// Get outgoing edges for a line (targets that this line flows TO).
252    ///
253    /// Returns `None` if no edges flow from this line.
254    /// Uses the cached adjacency list for O(1) lookup.
255    #[inline]
256    pub fn get_outgoing(&self, line: usize) -> Option<&Vec<usize>> {
257        self.get_adjacency_cache().outgoing.get(&line)
258    }
259
260    /// Check if the adjacency cache has been built.
261    ///
262    /// Returns true if `get_adjacency_cache()` has been called at least once.
263    #[inline]
264    pub fn has_adjacency_cache(&self) -> bool {
265        self.adjacency_cache.get().is_some()
266    }
267
268    /// Get or lazily build the per-variable adjacency cache.
269    ///
270    /// This method uses interior mutability (OnceLock) to build the cache
271    /// exactly once, even when called from multiple threads. The cache is
272    /// then reused for all subsequent variable-filtered slice operations.
273    ///
274    /// # Performance
275    ///
276    /// - First call: O(E) to build both incoming and outgoing per-variable adjacency maps
277    /// - Subsequent calls: O(1) to return cached reference
278    ///
279    /// This eliminates repeated O(E) string comparisons when performing
280    /// multiple variable-filtered slices on the same DFG.
281    #[inline]
282    pub fn get_variable_adjacency_cache(&self) -> &VariableAdjacencyCache {
283        self.variable_adjacency_cache.get_or_init(|| {
284            let mut var_incoming: HashMap<String, HashMap<usize, Vec<usize>>> = HashMap::new();
285            let mut var_outgoing: HashMap<String, HashMap<usize, Vec<usize>>> = HashMap::new();
286
287            for edge in &self.edges {
288                var_incoming
289                    .entry(edge.variable.clone())
290                    .or_default()
291                    .entry(edge.to_line)
292                    .or_default()
293                    .push(edge.from_line);
294
295                var_outgoing
296                    .entry(edge.variable.clone())
297                    .or_default()
298                    .entry(edge.from_line)
299                    .or_default()
300                    .push(edge.to_line);
301            }
302
303            VariableAdjacencyCache {
304                var_incoming,
305                var_outgoing,
306            }
307        })
308    }
309
310    /// Get incoming edges for a specific variable at a line.
311    ///
312    /// Returns the list of source lines that flow data for `variable` TO `line`.
313    /// Returns `None` if no edges for this variable flow to this line.
314    /// Uses the cached per-variable adjacency list for O(1) lookup.
315    ///
316    /// # Performance
317    /// First call builds the variable cache O(E), subsequent calls are O(1).
318    #[inline]
319    pub fn get_var_incoming(&self, variable: &str, line: usize) -> Option<&Vec<usize>> {
320        self.get_variable_adjacency_cache()
321            .var_incoming
322            .get(variable)?
323            .get(&line)
324    }
325
326    /// Get outgoing edges for a specific variable from a line.
327    ///
328    /// Returns the list of target lines that receive data for `variable` FROM `line`.
329    /// Returns `None` if no edges for this variable flow from this line.
330    /// Uses the cached per-variable adjacency list for O(1) lookup.
331    ///
332    /// # Performance
333    /// First call builds the variable cache O(E), subsequent calls are O(1).
334    #[inline]
335    pub fn get_var_outgoing(&self, variable: &str, line: usize) -> Option<&Vec<usize>> {
336        self.get_variable_adjacency_cache()
337            .var_outgoing
338            .get(variable)?
339            .get(&line)
340    }
341
342    /// Get all lines where a specific variable has incoming edges.
343    ///
344    /// Returns `None` if the variable has no incoming edges in the DFG.
345    #[inline]
346    pub fn get_var_incoming_lines(&self, variable: &str) -> Option<&HashMap<usize, Vec<usize>>> {
347        self.get_variable_adjacency_cache()
348            .var_incoming
349            .get(variable)
350    }
351
352    /// Get all lines where a specific variable has outgoing edges.
353    ///
354    /// Returns `None` if the variable has no outgoing edges in the DFG.
355    #[inline]
356    pub fn get_var_outgoing_lines(&self, variable: &str) -> Option<&HashMap<usize, Vec<usize>>> {
357        self.get_variable_adjacency_cache()
358            .var_outgoing
359            .get(variable)
360    }
361
362    /// Check if the per-variable adjacency cache has been built.
363    ///
364    /// Returns true if `get_variable_adjacency_cache()` has been called at least once.
365    #[inline]
366    pub fn has_variable_adjacency_cache(&self) -> bool {
367        self.variable_adjacency_cache.get().is_some()
368    }
369
370    /// Backward slice: find all lines that affect the target line.
371    ///
372    /// Delegates to the optimized O(V + E) implementation in slice module.
373    /// Uses cached adjacency list for efficient BFS traversal.
374    #[inline]
375    pub fn backward_slice(&self, target_line: usize) -> Vec<usize> {
376        crate::dfg::slice::backward_slice_all(self, target_line)
377    }
378
379    /// Forward slice: find all lines affected by the source line.
380    ///
381    /// Delegates to the optimized O(V + E) implementation in slice module.
382    /// Uses cached adjacency list for efficient BFS traversal.
383    #[inline]
384    pub fn forward_slice(&self, source_line: usize) -> Vec<usize> {
385        crate::dfg::slice::forward_slice_all(self, source_line)
386    }
387
388    /// Get all variables used in the function.
389    pub fn variables(&self) -> Vec<&str> {
390        let mut vars: Vec<_> = self.definitions.keys().map(|s| s.as_str()).collect();
391        vars.sort();
392        vars.dedup();
393        vars
394    }
395}
396
397#[cfg(test)]
398mod tests {
399    use super::*;
400
401    /// Create a test DFG with multiple variables.
402    fn create_test_dfg() -> DFGInfo {
403        DFGInfo::new(
404            "test_func".to_string(),
405            vec![
406                // x flows: 1 -> 2, 1 -> 4
407                DataflowEdge {
408                    variable: "x".to_string(),
409                    from_line: 1,
410                    to_line: 2,
411                    kind: DataflowKind::Definition,
412                },
413                DataflowEdge {
414                    variable: "x".to_string(),
415                    from_line: 1,
416                    to_line: 4,
417                    kind: DataflowKind::Use,
418                },
419                // y flows: 2 -> 3
420                DataflowEdge {
421                    variable: "y".to_string(),
422                    from_line: 2,
423                    to_line: 3,
424                    kind: DataflowKind::Definition,
425                },
426                // z flows: 3 -> 4
427                DataflowEdge {
428                    variable: "z".to_string(),
429                    from_line: 3,
430                    to_line: 4,
431                    kind: DataflowKind::Definition,
432                },
433            ],
434            [
435                ("x".to_string(), vec![1]),
436                ("y".to_string(), vec![2]),
437                ("z".to_string(), vec![3]),
438            ]
439            .into_iter()
440            .collect(),
441            [
442                ("x".to_string(), vec![2, 4]),
443                ("y".to_string(), vec![3]),
444                ("z".to_string(), vec![4]),
445            ]
446            .into_iter()
447            .collect(),
448        )
449    }
450
451    #[test]
452    fn test_variable_adjacency_cache_lazy_init() {
453        let dfg = create_test_dfg();
454
455        // Cache should not be built initially
456        assert!(!dfg.has_variable_adjacency_cache());
457
458        // Trigger cache build
459        let _ = dfg.get_variable_adjacency_cache();
460
461        // Cache should now be built
462        assert!(dfg.has_variable_adjacency_cache());
463    }
464
465    #[test]
466    fn test_get_var_incoming() {
467        let dfg = create_test_dfg();
468
469        // x at line 2 has incoming from line 1
470        let incoming = dfg.get_var_incoming("x", 2);
471        assert!(incoming.is_some());
472        assert!(incoming.unwrap().contains(&1));
473
474        // x at line 4 has incoming from line 1
475        let incoming = dfg.get_var_incoming("x", 4);
476        assert!(incoming.is_some());
477        assert!(incoming.unwrap().contains(&1));
478
479        // y at line 3 has incoming from line 2
480        let incoming = dfg.get_var_incoming("y", 3);
481        assert!(incoming.is_some());
482        assert!(incoming.unwrap().contains(&2));
483
484        // No x incoming at line 3
485        let incoming = dfg.get_var_incoming("x", 3);
486        assert!(incoming.is_none());
487
488        // Nonexistent variable
489        let incoming = dfg.get_var_incoming("nonexistent", 1);
490        assert!(incoming.is_none());
491    }
492
493    #[test]
494    fn test_get_var_outgoing() {
495        let dfg = create_test_dfg();
496
497        // x at line 1 flows to lines 2 and 4
498        let outgoing = dfg.get_var_outgoing("x", 1);
499        assert!(outgoing.is_some());
500        let outgoing = outgoing.unwrap();
501        assert!(outgoing.contains(&2));
502        assert!(outgoing.contains(&4));
503
504        // y at line 2 flows to line 3
505        let outgoing = dfg.get_var_outgoing("y", 2);
506        assert!(outgoing.is_some());
507        assert!(outgoing.unwrap().contains(&3));
508
509        // No x outgoing from line 3
510        let outgoing = dfg.get_var_outgoing("x", 3);
511        assert!(outgoing.is_none());
512
513        // Nonexistent variable
514        let outgoing = dfg.get_var_outgoing("nonexistent", 1);
515        assert!(outgoing.is_none());
516    }
517
518    #[test]
519    fn test_get_var_incoming_lines() {
520        let dfg = create_test_dfg();
521
522        // All incoming edges for x
523        let x_incoming = dfg.get_var_incoming_lines("x");
524        assert!(x_incoming.is_some());
525        let x_incoming = x_incoming.unwrap();
526
527        // x has incoming edges at lines 2 and 4
528        assert!(x_incoming.contains_key(&2));
529        assert!(x_incoming.contains_key(&4));
530        assert!(!x_incoming.contains_key(&1)); // No incoming to line 1
531
532        // Nonexistent variable
533        assert!(dfg.get_var_incoming_lines("nonexistent").is_none());
534    }
535
536    #[test]
537    fn test_get_var_outgoing_lines() {
538        let dfg = create_test_dfg();
539
540        // All outgoing edges for x
541        let x_outgoing = dfg.get_var_outgoing_lines("x");
542        assert!(x_outgoing.is_some());
543        let x_outgoing = x_outgoing.unwrap();
544
545        // x has outgoing edge from line 1 only
546        assert!(x_outgoing.contains_key(&1));
547        assert!(!x_outgoing.contains_key(&2)); // No outgoing from line 2
548
549        // Nonexistent variable
550        assert!(dfg.get_var_outgoing_lines("nonexistent").is_none());
551    }
552
553    #[test]
554    fn test_variable_cache_clone_independent() {
555        let dfg = create_test_dfg();
556
557        // Build cache on original
558        let _ = dfg.get_variable_adjacency_cache();
559        assert!(dfg.has_variable_adjacency_cache());
560
561        // Clone should have fresh cache
562        let cloned = dfg.clone();
563        assert!(!cloned.has_variable_adjacency_cache());
564
565        // Building cache on clone should work independently
566        let _ = cloned.get_variable_adjacency_cache();
567        assert!(cloned.has_variable_adjacency_cache());
568    }
569
570    #[test]
571    fn test_empty_dfg_variable_cache() {
572        let dfg = DFGInfo::new(
573            "empty".to_string(),
574            vec![],
575            HashMap::new(),
576            HashMap::new(),
577        );
578
579        // Should not panic on empty DFG
580        let cache = dfg.get_variable_adjacency_cache();
581        assert!(cache.var_incoming.is_empty());
582        assert!(cache.var_outgoing.is_empty());
583
584        // All lookups should return None
585        assert!(dfg.get_var_incoming("x", 1).is_none());
586        assert!(dfg.get_var_outgoing("x", 1).is_none());
587        assert!(dfg.get_var_incoming_lines("x").is_none());
588        assert!(dfg.get_var_outgoing_lines("x").is_none());
589    }
590}