go_brrr/dfg/types.rs
1//! DFG type definitions.
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5use std::sync::OnceLock;
6
7/// Kind of data flow.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum DataflowKind {
10 /// Variable is defined
11 Definition,
12 /// Variable is read
13 Use,
14 /// Variable is mutated
15 Mutation,
16 /// Value is returned
17 Return,
18 /// Function parameter
19 Param,
20 /// Value is yielded from generator
21 Yield,
22 /// Comprehension-scoped variable (does not leak to outer scope in Python 3)
23 ComprehensionVar,
24 /// Lambda parameter (scoped to lambda body)
25 LambdaParam,
26 /// BUG-017 FIX: Variable declared as global (references module-level scope)
27 Global,
28 /// BUG-017 FIX: Variable declared as nonlocal (references enclosing function scope)
29 Nonlocal,
30 /// Nested function parameter (scoped to nested function body, used for closure analysis)
31 NestedParam,
32 /// Closure capture: use of outer variable from within nested function
33 ClosureCapture,
34 /// Go: Variable captured/used in goroutine (go func() {...})
35 Goroutine,
36 /// Go: Channel send operation (ch <- val)
37 ChannelSend,
38 /// Go: Channel receive operation (val := <-ch)
39 ChannelReceive,
40 /// Go: Deferred call (defer f())
41 Defer,
42 /// Go: Type assertion (x.(Type))
43 TypeAssertion,
44 // =========================================================================
45 // Go: Panic/Recover/Error handling flow
46 // =========================================================================
47 /// Go: Panic call (panic(err))
48 Panic,
49 /// Go: Recover call in deferred function (recover())
50 Recover,
51 /// Go: Error value assigned (err := ...)
52 ErrorAssign,
53 /// Go: Error check (if err != nil)
54 ErrorCheck,
55 /// Go: Named return value modified in defer
56 NamedReturnModify,
57 // =========================================================================
58 // Go: Channel and sync primitive operations
59 // =========================================================================
60 /// Go: Channel creation (make(chan T) or make(chan T, size))
61 ChannelMake,
62 /// Go: Channel close operation (close(ch))
63 ChannelCloseDfg,
64 /// Go: Select statement receive (case val := <-ch:)
65 SelectReceive,
66 /// Go: Select statement send (case ch <- val:)
67 SelectSend,
68 /// Go: Mutex lock (mu.Lock() or mu.RLock())
69 MutexLock,
70 /// Go: Mutex unlock (mu.Unlock() or mu.RUnlock())
71 MutexUnlock,
72 /// Go: WaitGroup.Add() operation
73 WaitGroupAdd,
74 /// Go: WaitGroup.Done() operation
75 WaitGroupDone,
76 /// Go: WaitGroup.Wait() synchronization point
77 WaitGroupWait,
78 /// Go: sync.Once.Do() call
79 OnceDo,
80 /// Go: Context.Done() check for cancellation
81 ContextDone,
82 /// Go: Context.Err() check for cancellation reason
83 ContextErr,
84 /// Go: Context.Value() retrieval
85 ContextValue,
86 /// Go: sync.Pool.Get()
87 PoolGet,
88 /// Go: sync.Pool.Put()
89 PoolPut,
90}
91
92/// A single data flow edge.
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct DataflowEdge {
95 /// Variable name
96 pub variable: String,
97 /// Source line (where data comes from)
98 pub from_line: usize,
99 /// Target line (where data flows to)
100 pub to_line: usize,
101 /// Kind of data flow
102 pub kind: DataflowKind,
103}
104
105/// Cached adjacency lists for efficient graph traversal.
106///
107/// Pre-computed once on first access, then reused for all subsequent
108/// slice operations. This avoids O(E) rebuild per slice call.
109///
110/// This is an internal implementation detail. Users interact with it through
111/// the `DFGInfo::get_adjacency_cache()` method.
112#[derive(Debug)]
113pub struct AdjacencyCache {
114 /// Incoming edges: to_line -> [from_lines]
115 /// Used for backward slicing.
116 pub incoming: HashMap<usize, Vec<usize>>,
117 /// Outgoing edges: from_line -> [to_lines]
118 /// Used for forward slicing.
119 pub outgoing: HashMap<usize, Vec<usize>>,
120}
121
122/// Per-variable adjacency cache for variable-filtered slicing.
123///
124/// Pre-computed once on first variable-filtered slice, then reused.
125/// This eliminates O(E) string comparisons per variable-filtered slice call.
126///
127/// This is an internal implementation detail. Users interact with it through
128/// the `DFGInfo::get_variable_adjacency_cache()` method.
129#[derive(Debug)]
130pub struct VariableAdjacencyCache {
131 /// Per-variable incoming edges: variable -> (to_line -> [from_lines])
132 /// Used for variable-specific backward slicing.
133 pub var_incoming: HashMap<String, HashMap<usize, Vec<usize>>>,
134 /// Per-variable outgoing edges: variable -> (from_line -> [to_lines])
135 /// Used for variable-specific forward slicing.
136 pub var_outgoing: HashMap<String, HashMap<usize, Vec<usize>>>,
137}
138
139/// Complete data flow graph for a function.
140///
141/// The DFG tracks variable definitions, uses, and the data flow edges between them.
142/// It provides efficient slicing operations through cached adjacency lists that are
143/// computed lazily on first access using interior mutability (OnceLock).
144///
145/// # Performance
146///
147/// The adjacency cache is built once on first slice operation and reused for all
148/// subsequent operations. This amortizes the O(E) cost of building adjacency lists
149/// across multiple slice calls, providing up to 15900x speedup for repeated queries.
150#[derive(Debug, Serialize, Deserialize)]
151pub struct DFGInfo {
152 /// Function name
153 pub function_name: String,
154 /// All data flow edges
155 pub edges: Vec<DataflowEdge>,
156 /// Where each variable is defined
157 pub definitions: HashMap<String, Vec<usize>>,
158 /// Where each variable is used
159 pub uses: HashMap<String, Vec<usize>>,
160 /// Cached adjacency lists for O(1) edge lookups during slicing.
161 /// Lazily initialized on first access via `get_adjacency_cache()`.
162 #[serde(skip)]
163 adjacency_cache: OnceLock<AdjacencyCache>,
164 /// Per-variable adjacency cache for variable-filtered slicing.
165 /// Lazily initialized on first variable-filtered slice via `get_variable_adjacency_cache()`.
166 #[serde(skip)]
167 variable_adjacency_cache: OnceLock<VariableAdjacencyCache>,
168}
169
170impl Clone for DFGInfo {
171 fn clone(&self) -> Self {
172 Self {
173 function_name: self.function_name.clone(),
174 edges: self.edges.clone(),
175 definitions: self.definitions.clone(),
176 uses: self.uses.clone(),
177 // Fresh caches for clone - will be rebuilt on first access if needed
178 adjacency_cache: OnceLock::new(),
179 variable_adjacency_cache: OnceLock::new(),
180 }
181 }
182}
183
184// Public API methods - used by library consumers for advanced DFG queries
185#[allow(dead_code)]
186impl DFGInfo {
187 /// Create a new DFGInfo with the given data.
188 ///
189 /// Both adjacency caches are initialized empty and will be lazily built
190 /// on first slice operation.
191 pub fn new(
192 function_name: String,
193 edges: Vec<DataflowEdge>,
194 definitions: HashMap<String, Vec<usize>>,
195 uses: HashMap<String, Vec<usize>>,
196 ) -> Self {
197 Self {
198 function_name,
199 edges,
200 definitions,
201 uses,
202 adjacency_cache: OnceLock::new(),
203 variable_adjacency_cache: OnceLock::new(),
204 }
205 }
206
207 /// Get or lazily build the adjacency cache.
208 ///
209 /// This method uses interior mutability (OnceLock) to build the cache
210 /// exactly once, even when called from multiple threads. The cache is
211 /// then reused for all subsequent slice operations.
212 ///
213 /// # Performance
214 ///
215 /// - First call: O(E) to build both incoming and outgoing adjacency maps
216 /// - Subsequent calls: O(1) to return cached reference
217 ///
218 /// This provides up to 15900x speedup for repeated slice operations
219 /// compared to rebuilding adjacency maps on every call.
220 #[inline]
221 pub fn get_adjacency_cache(&self) -> &AdjacencyCache {
222 self.adjacency_cache.get_or_init(|| {
223 let edge_count = self.edges.len();
224 let mut incoming: HashMap<usize, Vec<usize>> = HashMap::with_capacity(edge_count);
225 let mut outgoing: HashMap<usize, Vec<usize>> = HashMap::with_capacity(edge_count);
226
227 for edge in &self.edges {
228 incoming
229 .entry(edge.to_line)
230 .or_default()
231 .push(edge.from_line);
232 outgoing
233 .entry(edge.from_line)
234 .or_default()
235 .push(edge.to_line);
236 }
237
238 AdjacencyCache { incoming, outgoing }
239 })
240 }
241
242 /// Get incoming edges for a line (sources that flow TO this line).
243 ///
244 /// Returns `None` if no edges flow to this line.
245 /// Uses the cached adjacency list for O(1) lookup.
246 #[inline]
247 pub fn get_incoming(&self, line: usize) -> Option<&Vec<usize>> {
248 self.get_adjacency_cache().incoming.get(&line)
249 }
250
251 /// Get outgoing edges for a line (targets that this line flows TO).
252 ///
253 /// Returns `None` if no edges flow from this line.
254 /// Uses the cached adjacency list for O(1) lookup.
255 #[inline]
256 pub fn get_outgoing(&self, line: usize) -> Option<&Vec<usize>> {
257 self.get_adjacency_cache().outgoing.get(&line)
258 }
259
260 /// Check if the adjacency cache has been built.
261 ///
262 /// Returns true if `get_adjacency_cache()` has been called at least once.
263 #[inline]
264 pub fn has_adjacency_cache(&self) -> bool {
265 self.adjacency_cache.get().is_some()
266 }
267
268 /// Get or lazily build the per-variable adjacency cache.
269 ///
270 /// This method uses interior mutability (OnceLock) to build the cache
271 /// exactly once, even when called from multiple threads. The cache is
272 /// then reused for all subsequent variable-filtered slice operations.
273 ///
274 /// # Performance
275 ///
276 /// - First call: O(E) to build both incoming and outgoing per-variable adjacency maps
277 /// - Subsequent calls: O(1) to return cached reference
278 ///
279 /// This eliminates repeated O(E) string comparisons when performing
280 /// multiple variable-filtered slices on the same DFG.
281 #[inline]
282 pub fn get_variable_adjacency_cache(&self) -> &VariableAdjacencyCache {
283 self.variable_adjacency_cache.get_or_init(|| {
284 let mut var_incoming: HashMap<String, HashMap<usize, Vec<usize>>> = HashMap::new();
285 let mut var_outgoing: HashMap<String, HashMap<usize, Vec<usize>>> = HashMap::new();
286
287 for edge in &self.edges {
288 var_incoming
289 .entry(edge.variable.clone())
290 .or_default()
291 .entry(edge.to_line)
292 .or_default()
293 .push(edge.from_line);
294
295 var_outgoing
296 .entry(edge.variable.clone())
297 .or_default()
298 .entry(edge.from_line)
299 .or_default()
300 .push(edge.to_line);
301 }
302
303 VariableAdjacencyCache {
304 var_incoming,
305 var_outgoing,
306 }
307 })
308 }
309
310 /// Get incoming edges for a specific variable at a line.
311 ///
312 /// Returns the list of source lines that flow data for `variable` TO `line`.
313 /// Returns `None` if no edges for this variable flow to this line.
314 /// Uses the cached per-variable adjacency list for O(1) lookup.
315 ///
316 /// # Performance
317 /// First call builds the variable cache O(E), subsequent calls are O(1).
318 #[inline]
319 pub fn get_var_incoming(&self, variable: &str, line: usize) -> Option<&Vec<usize>> {
320 self.get_variable_adjacency_cache()
321 .var_incoming
322 .get(variable)?
323 .get(&line)
324 }
325
326 /// Get outgoing edges for a specific variable from a line.
327 ///
328 /// Returns the list of target lines that receive data for `variable` FROM `line`.
329 /// Returns `None` if no edges for this variable flow from this line.
330 /// Uses the cached per-variable adjacency list for O(1) lookup.
331 ///
332 /// # Performance
333 /// First call builds the variable cache O(E), subsequent calls are O(1).
334 #[inline]
335 pub fn get_var_outgoing(&self, variable: &str, line: usize) -> Option<&Vec<usize>> {
336 self.get_variable_adjacency_cache()
337 .var_outgoing
338 .get(variable)?
339 .get(&line)
340 }
341
342 /// Get all lines where a specific variable has incoming edges.
343 ///
344 /// Returns `None` if the variable has no incoming edges in the DFG.
345 #[inline]
346 pub fn get_var_incoming_lines(&self, variable: &str) -> Option<&HashMap<usize, Vec<usize>>> {
347 self.get_variable_adjacency_cache()
348 .var_incoming
349 .get(variable)
350 }
351
352 /// Get all lines where a specific variable has outgoing edges.
353 ///
354 /// Returns `None` if the variable has no outgoing edges in the DFG.
355 #[inline]
356 pub fn get_var_outgoing_lines(&self, variable: &str) -> Option<&HashMap<usize, Vec<usize>>> {
357 self.get_variable_adjacency_cache()
358 .var_outgoing
359 .get(variable)
360 }
361
362 /// Check if the per-variable adjacency cache has been built.
363 ///
364 /// Returns true if `get_variable_adjacency_cache()` has been called at least once.
365 #[inline]
366 pub fn has_variable_adjacency_cache(&self) -> bool {
367 self.variable_adjacency_cache.get().is_some()
368 }
369
370 /// Backward slice: find all lines that affect the target line.
371 ///
372 /// Delegates to the optimized O(V + E) implementation in slice module.
373 /// Uses cached adjacency list for efficient BFS traversal.
374 #[inline]
375 pub fn backward_slice(&self, target_line: usize) -> Vec<usize> {
376 crate::dfg::slice::backward_slice_all(self, target_line)
377 }
378
379 /// Forward slice: find all lines affected by the source line.
380 ///
381 /// Delegates to the optimized O(V + E) implementation in slice module.
382 /// Uses cached adjacency list for efficient BFS traversal.
383 #[inline]
384 pub fn forward_slice(&self, source_line: usize) -> Vec<usize> {
385 crate::dfg::slice::forward_slice_all(self, source_line)
386 }
387
388 /// Get all variables used in the function.
389 pub fn variables(&self) -> Vec<&str> {
390 let mut vars: Vec<_> = self.definitions.keys().map(|s| s.as_str()).collect();
391 vars.sort();
392 vars.dedup();
393 vars
394 }
395}
396
397#[cfg(test)]
398mod tests {
399 use super::*;
400
401 /// Create a test DFG with multiple variables.
402 fn create_test_dfg() -> DFGInfo {
403 DFGInfo::new(
404 "test_func".to_string(),
405 vec![
406 // x flows: 1 -> 2, 1 -> 4
407 DataflowEdge {
408 variable: "x".to_string(),
409 from_line: 1,
410 to_line: 2,
411 kind: DataflowKind::Definition,
412 },
413 DataflowEdge {
414 variable: "x".to_string(),
415 from_line: 1,
416 to_line: 4,
417 kind: DataflowKind::Use,
418 },
419 // y flows: 2 -> 3
420 DataflowEdge {
421 variable: "y".to_string(),
422 from_line: 2,
423 to_line: 3,
424 kind: DataflowKind::Definition,
425 },
426 // z flows: 3 -> 4
427 DataflowEdge {
428 variable: "z".to_string(),
429 from_line: 3,
430 to_line: 4,
431 kind: DataflowKind::Definition,
432 },
433 ],
434 [
435 ("x".to_string(), vec![1]),
436 ("y".to_string(), vec![2]),
437 ("z".to_string(), vec![3]),
438 ]
439 .into_iter()
440 .collect(),
441 [
442 ("x".to_string(), vec![2, 4]),
443 ("y".to_string(), vec![3]),
444 ("z".to_string(), vec![4]),
445 ]
446 .into_iter()
447 .collect(),
448 )
449 }
450
451 #[test]
452 fn test_variable_adjacency_cache_lazy_init() {
453 let dfg = create_test_dfg();
454
455 // Cache should not be built initially
456 assert!(!dfg.has_variable_adjacency_cache());
457
458 // Trigger cache build
459 let _ = dfg.get_variable_adjacency_cache();
460
461 // Cache should now be built
462 assert!(dfg.has_variable_adjacency_cache());
463 }
464
465 #[test]
466 fn test_get_var_incoming() {
467 let dfg = create_test_dfg();
468
469 // x at line 2 has incoming from line 1
470 let incoming = dfg.get_var_incoming("x", 2);
471 assert!(incoming.is_some());
472 assert!(incoming.unwrap().contains(&1));
473
474 // x at line 4 has incoming from line 1
475 let incoming = dfg.get_var_incoming("x", 4);
476 assert!(incoming.is_some());
477 assert!(incoming.unwrap().contains(&1));
478
479 // y at line 3 has incoming from line 2
480 let incoming = dfg.get_var_incoming("y", 3);
481 assert!(incoming.is_some());
482 assert!(incoming.unwrap().contains(&2));
483
484 // No x incoming at line 3
485 let incoming = dfg.get_var_incoming("x", 3);
486 assert!(incoming.is_none());
487
488 // Nonexistent variable
489 let incoming = dfg.get_var_incoming("nonexistent", 1);
490 assert!(incoming.is_none());
491 }
492
493 #[test]
494 fn test_get_var_outgoing() {
495 let dfg = create_test_dfg();
496
497 // x at line 1 flows to lines 2 and 4
498 let outgoing = dfg.get_var_outgoing("x", 1);
499 assert!(outgoing.is_some());
500 let outgoing = outgoing.unwrap();
501 assert!(outgoing.contains(&2));
502 assert!(outgoing.contains(&4));
503
504 // y at line 2 flows to line 3
505 let outgoing = dfg.get_var_outgoing("y", 2);
506 assert!(outgoing.is_some());
507 assert!(outgoing.unwrap().contains(&3));
508
509 // No x outgoing from line 3
510 let outgoing = dfg.get_var_outgoing("x", 3);
511 assert!(outgoing.is_none());
512
513 // Nonexistent variable
514 let outgoing = dfg.get_var_outgoing("nonexistent", 1);
515 assert!(outgoing.is_none());
516 }
517
518 #[test]
519 fn test_get_var_incoming_lines() {
520 let dfg = create_test_dfg();
521
522 // All incoming edges for x
523 let x_incoming = dfg.get_var_incoming_lines("x");
524 assert!(x_incoming.is_some());
525 let x_incoming = x_incoming.unwrap();
526
527 // x has incoming edges at lines 2 and 4
528 assert!(x_incoming.contains_key(&2));
529 assert!(x_incoming.contains_key(&4));
530 assert!(!x_incoming.contains_key(&1)); // No incoming to line 1
531
532 // Nonexistent variable
533 assert!(dfg.get_var_incoming_lines("nonexistent").is_none());
534 }
535
536 #[test]
537 fn test_get_var_outgoing_lines() {
538 let dfg = create_test_dfg();
539
540 // All outgoing edges for x
541 let x_outgoing = dfg.get_var_outgoing_lines("x");
542 assert!(x_outgoing.is_some());
543 let x_outgoing = x_outgoing.unwrap();
544
545 // x has outgoing edge from line 1 only
546 assert!(x_outgoing.contains_key(&1));
547 assert!(!x_outgoing.contains_key(&2)); // No outgoing from line 2
548
549 // Nonexistent variable
550 assert!(dfg.get_var_outgoing_lines("nonexistent").is_none());
551 }
552
553 #[test]
554 fn test_variable_cache_clone_independent() {
555 let dfg = create_test_dfg();
556
557 // Build cache on original
558 let _ = dfg.get_variable_adjacency_cache();
559 assert!(dfg.has_variable_adjacency_cache());
560
561 // Clone should have fresh cache
562 let cloned = dfg.clone();
563 assert!(!cloned.has_variable_adjacency_cache());
564
565 // Building cache on clone should work independently
566 let _ = cloned.get_variable_adjacency_cache();
567 assert!(cloned.has_variable_adjacency_cache());
568 }
569
570 #[test]
571 fn test_empty_dfg_variable_cache() {
572 let dfg = DFGInfo::new(
573 "empty".to_string(),
574 vec![],
575 HashMap::new(),
576 HashMap::new(),
577 );
578
579 // Should not panic on empty DFG
580 let cache = dfg.get_variable_adjacency_cache();
581 assert!(cache.var_incoming.is_empty());
582 assert!(cache.var_outgoing.is_empty());
583
584 // All lookups should return None
585 assert!(dfg.get_var_incoming("x", 1).is_none());
586 assert!(dfg.get_var_outgoing("x", 1).is_none());
587 assert!(dfg.get_var_incoming_lines("x").is_none());
588 assert!(dfg.get_var_outgoing_lines("x").is_none());
589 }
590}