go_brrr/cfg/types.rs
1//! CFG type definitions.
2
3use once_cell::sync::OnceCell;
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7/// Cached adjacency lists for O(1) successor/predecessor lookups.
8///
9/// Built lazily on first access to avoid overhead when not needed.
10/// Uses separate HashMaps for outgoing (successors) and incoming (predecessors) edges.
11/// NOTE: This type is public for `CFGInfo.adjacency_cache` but is an internal implementation detail.
12#[derive(Debug)]
13pub struct AdjacencyCache {
14 /// BlockId -> list of successor BlockIds (outgoing edges)
15 successors: HashMap<BlockId, Vec<BlockId>>,
16 /// BlockId -> list of predecessor BlockIds (incoming edges)
17 predecessors: HashMap<BlockId, Vec<BlockId>>,
18}
19
20/// Errors that can occur during CFG validation.
21///
22/// These errors indicate structural inconsistencies in the control flow graph
23/// that would cause issues during analysis or rendering.
24#[derive(Debug, thiserror::Error)]
25#[allow(dead_code)]
26pub enum CFGError {
27 /// Entry block ID does not exist in the blocks map.
28 #[error("Entry block {0:?} not found in blocks")]
29 InvalidEntry(BlockId),
30
31 /// An exit block ID does not exist in the blocks map.
32 #[error("Exit block {0:?} not found in blocks")]
33 InvalidExit(BlockId),
34
35 /// Duplicate block ID was inserted (would silently overwrite).
36 #[error("Duplicate block ID {0:?}")]
37 DuplicateBlockId(BlockId),
38
39 /// An edge references a block that does not exist.
40 #[error("Edge references non-existent block {0:?}")]
41 InvalidEdgeBlock(BlockId),
42}
43
44/// Unique identifier for a basic block.
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
46pub struct BlockId(pub usize);
47
48/// Type of a basic block in the control flow graph.
49///
50/// Categorizes blocks by their role in the control flow:
51/// - Entry/Exit: Function boundaries
52/// - Branch: Conditional decision points
53/// - LoopHeader/LoopBody: Loop constructs
54/// - Return: Explicit return statements
55/// - Body: Regular sequential code
56/// - Exception/Finally: Error handling constructs
57/// - TryOperator/ErrorReturn/PanicSite: Rust Result/Option handling
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
59#[serde(rename_all = "snake_case")]
60pub enum BlockType {
61 /// Function entry point
62 Entry,
63 /// Function exit point
64 Exit,
65 /// Conditional branch (if, elif, match case)
66 Branch,
67 /// Loop header (condition evaluation)
68 LoopHeader,
69 /// Loop body
70 LoopBody,
71 /// Return statement block
72 Return,
73 /// Regular code block (default)
74 #[default]
75 Body,
76 /// Exception handler (except, catch)
77 Exception,
78 /// Finally block
79 Finally,
80 /// Rust ? operator - may propagate error/none (Result/Option)
81 TryOperator,
82 /// Rust early return from ? operator on Err/None
83 ErrorReturn,
84 /// Potential panic site (.unwrap(), .expect(), panic!(), unreachable!(), todo!())
85 PanicSite,
86 /// Pattern match block (if let, while let, match arm)
87 PatternMatch,
88 /// Closure body (separate control flow context)
89 Closure,
90 // =========================================================================
91 // Go-specific block types for defer/panic/recover flow
92 // =========================================================================
93 /// Go: Deferred function call (executes on function exit in LIFO order)
94 DeferredCall,
95 /// Go: Recover call site (recover() in deferred function)
96 RecoverCall,
97 /// Go: Error check pattern (if err != nil { return err })
98 ErrorCheck,
99 /// Go: Panic site (explicit panic() or potential runtime panic)
100 GoPanicSite,
101 // =========================================================================
102 // Go-specific block types for concurrency (goroutines, channels, sync)
103 // =========================================================================
104 /// Go: Goroutine spawn (go func() {...} or go existingFunc())
105 GoroutineSpawn,
106 /// Go: Channel send operation (ch <- value) - may block
107 ChannelSend,
108 /// Go: Channel receive operation (val := <-ch or <-ch) - may block
109 ChannelReceive,
110 /// Go: Select statement block (multi-way channel operation)
111 SelectBlock,
112 /// Go: Select case (case <-ch: or case ch <-:)
113 SelectCase,
114 /// Go: Mutex lock operation (mu.Lock() or mu.RLock())
115 MutexLock,
116 /// Go: Mutex unlock operation (mu.Unlock() or mu.RUnlock())
117 MutexUnlock,
118 /// Go: WaitGroup wait operation (wg.Wait()) - synchronization point
119 WaitGroupWait,
120 /// Go: WaitGroup add/done operation (wg.Add(), wg.Done())
121 WaitGroupOp,
122 /// Go: sync.Once.Do() call - one-time initialization
123 OnceCall,
124 /// Go: Channel make/creation (make(chan T) or make(chan T, size))
125 ChannelMake,
126 /// Go: Channel close operation (close(ch))
127 ChannelClose,
128 /// Go: Context cancellation check (ctx.Done(), ctx.Err())
129 ContextCheck,
130 // =========================================================================
131 // Python async/await block types
132 // =========================================================================
133 /// Python: Await expression - suspension point where execution yields to event loop
134 AwaitPoint,
135 /// Python: Async for loop header
136 AsyncForLoop,
137 /// Python: Async with block (async context manager)
138 AsyncWithBlock,
139 /// Python: Task spawn point (asyncio.create_task, asyncio.gather, etc.)
140 TaskSpawn,
141 // =========================================================================
142 // JavaScript/TypeScript async/await and Promise block types
143 // =========================================================================
144 /// JS/TS: Promise.then() callback block - executes on promise resolution
145 PromiseThen,
146 /// JS/TS: Promise.catch() callback block - executes on promise rejection
147 PromiseCatch,
148 /// JS/TS: Promise.finally() callback block - executes regardless of outcome
149 PromiseFinally,
150 /// JS/TS: Promise.all() - parallel execution, waits for all promises
151 PromiseAll,
152 /// JS/TS: Promise.race() - parallel execution, resolves with first settled
153 PromiseRace,
154 /// JS/TS: Promise.allSettled() - parallel execution, waits for all to settle
155 PromiseAllSettled,
156 /// JS/TS: Promise.any() - parallel execution, resolves with first fulfilled
157 PromiseAny,
158 /// JS/TS: Async generator yield point (yield* in async generator)
159 AsyncGeneratorYield,
160 /// JS/TS: for await...of loop header (async iteration)
161 ForAwaitOf,
162 // =========================================================================
163 // Rust async/await block types
164 // =========================================================================
165 /// Rust: Await expression (.await) - suspension point where Future yields
166 /// Critical for tracking async state machine transitions and cancellation safety.
167 RustAwaitPoint,
168 /// Rust: Task spawn point (tokio::spawn, async_std::spawn, etc.)
169 /// Creates a concurrent task that runs independently.
170 RustTaskSpawn,
171 /// Rust: tokio::join! - concurrent execution, waits for all futures
172 /// All branches run concurrently, function continues when all complete.
173 RustJoin,
174 /// Rust: tokio::select! - concurrent execution, first completion wins
175 /// All branches race, first to complete is taken, others are cancelled.
176 RustSelect,
177 /// Rust: select! branch (individual arm in select!)
178 RustSelectBranch,
179 /// Rust: Blocking call in async context (std::thread::sleep, blocking I/O)
180 /// ANTI-PATTERN: Blocks the executor thread, starving other tasks.
181 RustBlockingCall,
182 /// Rust: Lock held across await (MutexGuard, RwLockGuard across .await)
183 /// ANTI-PATTERN: Causes deadlocks and breaks Send bounds.
184 RustLockAcrossAwait,
185 /// Rust: Async block (async { ... }) - creates anonymous Future
186 RustAsyncBlock,
187}
188
189/// A basic block in the control flow graph.
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct CFGBlock {
192 /// Unique block identifier
193 pub id: BlockId,
194 /// Human-readable label
195 pub label: String,
196 /// Type of block (entry, exit, branch, loop, etc.)
197 #[serde(default)]
198 pub block_type: BlockType,
199 /// Statements in this block
200 pub statements: Vec<String>,
201 /// Functions called within this block
202 #[serde(default)]
203 pub func_calls: Vec<String>,
204 /// Starting line number
205 pub start_line: usize,
206 /// Ending line number
207 pub end_line: usize,
208}
209
210/// Semantic type of a CFG edge.
211///
212/// This enum represents the control flow semantics, while `condition`
213/// provides human-readable display text (e.g., "x > 0").
214#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
215#[serde(rename_all = "snake_case")]
216pub enum EdgeType {
217 /// Unconditional edge (fallthrough, sequential)
218 Unconditional,
219 /// True branch of a conditional
220 True,
221 /// False branch of a conditional
222 False,
223 /// Back edge in a loop
224 BackEdge,
225 /// Break statement exiting a loop
226 Break,
227 /// Continue statement restarting loop iteration
228 Continue,
229 /// Exception handling path
230 Exception,
231 /// Finally block execution
232 Finally,
233 /// Loop iteration edge
234 Iterate,
235 /// Return from function
236 Return,
237 /// Loop entry edge
238 Enter,
239 /// Loop exit edge (done/exit)
240 Exit,
241 /// Switch/match case
242 Case,
243 /// Pattern match in match statement
244 PatternMatch,
245 /// Goto statement (C/C++)
246 Goto,
247 /// Goroutine spawn (Go)
248 Spawn,
249 /// Deferred execution (Go)
250 Defer,
251 /// Channel send (Go)
252 Send,
253 /// Resource acquire (Java try-with-resources)
254 Acquire,
255 /// Successful completion
256 Success,
257 /// Resource close (Java try-with-resources)
258 Close,
259 /// Else branch
260 Else,
261 /// Result/Option Ok/Some variant (Rust)
262 OkSome,
263 /// Result/Option Err/None variant (Rust)
264 ErrNone,
265 /// Loop next iteration
266 Next,
267 /// Match expression dispatch
268 Match,
269 /// Loop continuation (back to condition)
270 Loop,
271 /// Assert pass (condition true)
272 Pass,
273 /// Assert fail (condition false, raises AssertionError)
274 Fail,
275 /// Loop exhausted (iteration complete)
276 Exhausted,
277 /// No exception occurred (try/else)
278 NoException,
279 /// Exception group handling (except*)
280 ExceptionGroup,
281 /// Context manager enter success
282 EnterSuccess,
283 /// Context manager enter exception
284 EnterException,
285 /// Cleanup path
286 Cleanup,
287 /// Exception propagation
288 Propagate,
289 /// Re-raise current exception (bare `raise` statement)
290 Reraise,
291 /// Typed exception edge (edge to specific exception handler)
292 TypedException,
293 // =========================================================================
294 // Go-specific edge types for defer/panic/recover flow
295 // =========================================================================
296 /// Go: Panic propagation (control flow on panic)
297 Panic,
298 /// Go: Recover success (panic caught by recover())
299 Recover,
300 /// Go: Deferred execution chain (between deferred calls)
301 DeferChain,
302 /// Go: Error return path (err != nil -> return)
303 ErrorPath,
304 /// Go: Success path (err == nil -> continue)
305 SuccessPath,
306 // =========================================================================
307 // Go-specific edge types for concurrency (goroutines, channels, sync)
308 // =========================================================================
309 /// Go: Channel receive operation edge
310 Receive,
311 /// Go: Select case branch (case <-ch: or case ch <- val:)
312 SelectCase,
313 /// Go: Select default branch (non-blocking)
314 SelectDefault,
315 /// Go: Mutex lock acquisition
316 Lock,
317 /// Go: Mutex unlock release
318 Unlock,
319 /// Go: WaitGroup synchronization
320 WaitSync,
321 /// Go: Channel close operation
322 ChannelClose,
323 /// Go: Context cancellation path (ctx.Done() triggered)
324 ContextCancel,
325 /// Go: Context continue path (ctx not cancelled)
326 ContextContinue,
327 // =========================================================================
328 // Python async/await edge types
329 // =========================================================================
330 /// Python: Await suspension point - execution yields to event loop
331 Await,
332 /// Python: Async for iteration (like Iterate but async)
333 AsyncIterate,
334 /// Python: Async for exhausted (like Exhausted but async)
335 AsyncExhausted,
336 /// Python: Async with enter success
337 AsyncEnterSuccess,
338 /// Python: Async with enter exception
339 AsyncEnterException,
340 /// Python: Task spawn (concurrent execution begins)
341 TaskSpawn,
342 /// Python: Task join (await on task result)
343 TaskJoin,
344 // =========================================================================
345 // JavaScript/TypeScript async/await and Promise edge types
346 // =========================================================================
347 /// JS/TS: Promise resolved - .then() callback path
348 PromiseResolved,
349 /// JS/TS: Promise rejected - .catch() callback path
350 PromiseRejected,
351 /// JS/TS: Promise settled - .finally() callback path (always executes)
352 PromiseSettled,
353 /// JS/TS: Promise.all parallel branch - edge to each task
354 PromiseAllBranch,
355 /// JS/TS: Promise.all join - all tasks completed
356 PromiseAllJoin,
357 /// JS/TS: Promise.race branch - edge to each racing task
358 PromiseRaceBranch,
359 /// JS/TS: Promise.race winner - first settled wins
360 PromiseRaceWinner,
361 /// JS/TS: for await...of iteration edge
362 ForAwaitIterate,
363 /// JS/TS: for await...of exhausted edge
364 ForAwaitExhausted,
365 /// JS/TS: Async generator yield edge
366 AsyncYield,
367 /// JS/TS: Async generator resume edge (after yield)
368 AsyncResume,
369 // =========================================================================
370 // Rust async/await edge types
371 // =========================================================================
372 /// Rust: .await suspension point - Future yields to executor
373 RustAwait,
374 /// Rust: .await resumption - Future resumes after poll returns Ready
375 RustResume,
376 /// Rust: tokio::spawn - new task spawned (concurrent execution)
377 RustSpawn,
378 /// Rust: JoinHandle.await - waiting for spawned task result
379 RustJoinTask,
380 /// Rust: join! branch - edge to each concurrent future
381 RustJoinBranch,
382 /// Rust: join! complete - all futures completed
383 RustJoinComplete,
384 /// Rust: select! branch - edge to each racing future
385 RustSelectBranch,
386 /// Rust: select! winner - first future to complete
387 RustSelectWinner,
388 /// Rust: select! cancelled - future cancelled (not selected)
389 RustSelectCancelled,
390 /// Rust: Blocking operation in async context (anti-pattern)
391 RustBlocking,
392 /// Rust: Lock acquisition in async context (potential issue)
393 RustLockAcquire,
394 /// Rust: Await while holding lock (anti-pattern)
395 RustAwaitWithLock,
396}
397
398impl EdgeType {
399 /// Get the default display label for this edge type.
400 pub fn default_label(&self) -> &'static str {
401 match self {
402 EdgeType::Unconditional => "",
403 EdgeType::True => "true",
404 EdgeType::False => "false",
405 EdgeType::BackEdge => "back_edge",
406 EdgeType::Break => "break",
407 EdgeType::Continue => "continue",
408 EdgeType::Exception => "exception",
409 EdgeType::Finally => "finally",
410 EdgeType::Iterate => "iterate",
411 EdgeType::Return => "return",
412 EdgeType::Enter => "enter",
413 EdgeType::Exit => "exit",
414 EdgeType::Case => "case",
415 EdgeType::PatternMatch => "pattern_match",
416 EdgeType::Goto => "goto",
417 EdgeType::Spawn => "spawn",
418 EdgeType::Defer => "defer",
419 EdgeType::Send => "send",
420 EdgeType::Acquire => "acquire",
421 EdgeType::Success => "success",
422 EdgeType::Close => "close",
423 EdgeType::Else => "else",
424 EdgeType::OkSome => "Ok/Some",
425 EdgeType::ErrNone => "Err/None",
426 EdgeType::Next => "next",
427 EdgeType::Match => "match",
428 EdgeType::Loop => "loop",
429 EdgeType::Pass => "pass",
430 EdgeType::Fail => "fail",
431 EdgeType::Exhausted => "exhausted",
432 EdgeType::NoException => "no_exception",
433 EdgeType::ExceptionGroup => "exception_group",
434 EdgeType::EnterSuccess => "enter_success",
435 EdgeType::EnterException => "enter_exception",
436 EdgeType::Cleanup => "cleanup",
437 EdgeType::Propagate => "propagate",
438 EdgeType::Reraise => "reraise",
439 EdgeType::TypedException => "typed_exception",
440 // Go-specific edge labels
441 EdgeType::Panic => "panic",
442 EdgeType::Recover => "recover",
443 EdgeType::DeferChain => "defer_chain",
444 EdgeType::ErrorPath => "err != nil",
445 EdgeType::SuccessPath => "err == nil",
446 // Go concurrency edge labels
447 EdgeType::Receive => "receive",
448 EdgeType::SelectCase => "select_case",
449 EdgeType::SelectDefault => "select_default",
450 EdgeType::Lock => "lock",
451 EdgeType::Unlock => "unlock",
452 EdgeType::WaitSync => "wait",
453 EdgeType::ChannelClose => "close_chan",
454 EdgeType::ContextCancel => "ctx_cancel",
455 EdgeType::ContextContinue => "ctx_continue",
456 // Python async/await edge labels
457 EdgeType::Await => "await",
458 EdgeType::AsyncIterate => "async_iterate",
459 EdgeType::AsyncExhausted => "async_exhausted",
460 EdgeType::AsyncEnterSuccess => "async_enter_success",
461 EdgeType::AsyncEnterException => "async_enter_exception",
462 EdgeType::TaskSpawn => "spawn_task",
463 EdgeType::TaskJoin => "join_task",
464 // JavaScript/TypeScript async/await edge labels
465 EdgeType::PromiseResolved => "resolved",
466 EdgeType::PromiseRejected => "rejected",
467 EdgeType::PromiseSettled => "settled",
468 EdgeType::PromiseAllBranch => "all_branch",
469 EdgeType::PromiseAllJoin => "all_join",
470 EdgeType::PromiseRaceBranch => "race_branch",
471 EdgeType::PromiseRaceWinner => "race_winner",
472 EdgeType::ForAwaitIterate => "for_await_iterate",
473 EdgeType::ForAwaitExhausted => "for_await_exhausted",
474 EdgeType::AsyncYield => "async_yield",
475 EdgeType::AsyncResume => "async_resume",
476 // Rust async/await edge labels
477 EdgeType::RustAwait => ".await",
478 EdgeType::RustResume => "resume",
479 EdgeType::RustSpawn => "rust_spawn",
480 EdgeType::RustJoinTask => "join_task",
481 EdgeType::RustJoinBranch => "join_branch",
482 EdgeType::RustJoinComplete => "join_complete",
483 EdgeType::RustSelectBranch => "select_branch",
484 EdgeType::RustSelectWinner => "select_winner",
485 EdgeType::RustSelectCancelled => "select_cancel",
486 EdgeType::RustBlocking => "blocking",
487 EdgeType::RustLockAcquire => "lock_acquire",
488 EdgeType::RustAwaitWithLock => "await_with_lock",
489 }
490 }
491}
492
493/// An edge in the control flow graph.
494#[derive(Debug, Clone, Serialize, Deserialize)]
495pub struct CFGEdge {
496 /// Source block
497 #[serde(rename = "source_id")]
498 pub from: BlockId,
499 /// Target block
500 #[serde(rename = "target_id")]
501 pub to: BlockId,
502 /// Semantic edge type
503 #[serde(rename = "type")]
504 pub edge_type: EdgeType,
505 /// Human-readable condition (e.g., "x > 0")
506 #[serde(skip_serializing_if = "Option::is_none")]
507 pub condition: Option<String>,
508}
509
510impl CFGEdge {
511 /// Create a new unconditional edge.
512 pub fn unconditional(from: BlockId, to: BlockId) -> Self {
513 Self {
514 from,
515 to,
516 edge_type: EdgeType::Unconditional,
517 condition: None,
518 }
519 }
520
521 /// Create a new edge with a specific type.
522 pub fn new(from: BlockId, to: BlockId, edge_type: EdgeType) -> Self {
523 Self {
524 from,
525 to,
526 edge_type,
527 condition: None,
528 }
529 }
530
531 /// Create a new edge with type and condition.
532 pub fn with_condition(from: BlockId, to: BlockId, edge_type: EdgeType, condition: String) -> Self {
533 Self {
534 from,
535 to,
536 edge_type,
537 condition: Some(condition),
538 }
539 }
540
541 /// Create edge from legacy label format (backward compatibility).
542 ///
543 /// This method allows gradual migration from the old `label: Option<String>`
544 /// format to the new `edge_type` + `condition` format.
545 pub fn from_label(from: BlockId, to: BlockId, label: Option<String>) -> Self {
546 match label.as_deref() {
547 None => Self::unconditional(from, to),
548 Some("true") => Self::new(from, to, EdgeType::True),
549 Some("false") => Self::new(from, to, EdgeType::False),
550 Some("break") => Self::new(from, to, EdgeType::Break),
551 Some("continue") => Self::new(from, to, EdgeType::Continue),
552 Some("exception") => Self::new(from, to, EdgeType::Exception),
553 Some("finally") => Self::new(from, to, EdgeType::Finally),
554 Some("iterate") => Self::new(from, to, EdgeType::Iterate),
555 Some("return") => Self::new(from, to, EdgeType::Return),
556 Some("enter") => Self::new(from, to, EdgeType::Enter),
557 Some("exit") => Self::new(from, to, EdgeType::Exit),
558 Some("case") => Self::new(from, to, EdgeType::Case),
559 Some("pattern_match") => Self::new(from, to, EdgeType::PatternMatch),
560 Some("spawn") => Self::new(from, to, EdgeType::Spawn),
561 Some("defer") => Self::new(from, to, EdgeType::Defer),
562 Some("send") => Self::new(from, to, EdgeType::Send),
563 Some("acquire") => Self::new(from, to, EdgeType::Acquire),
564 Some("success") => Self::new(from, to, EdgeType::Success),
565 Some("close") => Self::new(from, to, EdgeType::Close),
566 Some("else") => Self::new(from, to, EdgeType::Else),
567 Some("Ok/Some") => Self::new(from, to, EdgeType::OkSome),
568 Some("Err/None") => Self::new(from, to, EdgeType::ErrNone),
569 Some("next") => Self::new(from, to, EdgeType::Next),
570 Some("match") => Self::new(from, to, EdgeType::Match),
571 Some("loop") => Self::new(from, to, EdgeType::Loop),
572 Some("back_edge") => Self::new(from, to, EdgeType::BackEdge),
573 Some("done") => Self::new(from, to, EdgeType::Exit),
574 Some("reraise") => Self::new(from, to, EdgeType::Reraise),
575 Some("typed_exception") => Self::new(from, to, EdgeType::TypedException),
576 Some("propagate") => Self::new(from, to, EdgeType::Propagate),
577 Some("cleanup") => Self::new(from, to, EdgeType::Cleanup),
578 Some("no_exception") => Self::new(from, to, EdgeType::NoException),
579 Some("enter_success") => Self::new(from, to, EdgeType::EnterSuccess),
580 Some("enter_exception") => Self::new(from, to, EdgeType::EnterException),
581 // Go-specific edge labels
582 Some("panic") => Self::new(from, to, EdgeType::Panic),
583 Some("recover") => Self::new(from, to, EdgeType::Recover),
584 Some("defer_chain") => Self::new(from, to, EdgeType::DeferChain),
585 Some("err != nil") => Self::new(from, to, EdgeType::ErrorPath),
586 Some("err == nil") => Self::new(from, to, EdgeType::SuccessPath),
587 // Go concurrency edge labels
588 Some("receive") => Self::new(from, to, EdgeType::Receive),
589 Some("select_case") => Self::new(from, to, EdgeType::SelectCase),
590 Some("select_default") => Self::new(from, to, EdgeType::SelectDefault),
591 Some("lock") => Self::new(from, to, EdgeType::Lock),
592 Some("unlock") => Self::new(from, to, EdgeType::Unlock),
593 Some("wait") => Self::new(from, to, EdgeType::WaitSync),
594 Some("close_chan") => Self::new(from, to, EdgeType::ChannelClose),
595 Some("ctx_cancel") => Self::new(from, to, EdgeType::ContextCancel),
596 Some("ctx_continue") => Self::new(from, to, EdgeType::ContextContinue),
597 // Python async/await edge labels
598 Some("await") => Self::new(from, to, EdgeType::Await),
599 Some("async_iterate") => Self::new(from, to, EdgeType::AsyncIterate),
600 Some("async_exhausted") => Self::new(from, to, EdgeType::AsyncExhausted),
601 Some("async_enter_success") => Self::new(from, to, EdgeType::AsyncEnterSuccess),
602 Some("async_enter_exception") => Self::new(from, to, EdgeType::AsyncEnterException),
603 Some("spawn_task") => Self::new(from, to, EdgeType::TaskSpawn),
604 Some("join_task") => Self::new(from, to, EdgeType::TaskJoin),
605 // JavaScript/TypeScript async/await edge labels
606 Some("resolved") => Self::new(from, to, EdgeType::PromiseResolved),
607 Some("rejected") => Self::new(from, to, EdgeType::PromiseRejected),
608 Some("settled") => Self::new(from, to, EdgeType::PromiseSettled),
609 Some("all_branch") => Self::new(from, to, EdgeType::PromiseAllBranch),
610 Some("all_join") => Self::new(from, to, EdgeType::PromiseAllJoin),
611 Some("race_branch") => Self::new(from, to, EdgeType::PromiseRaceBranch),
612 Some("race_winner") => Self::new(from, to, EdgeType::PromiseRaceWinner),
613 Some("for_await_iterate") => Self::new(from, to, EdgeType::ForAwaitIterate),
614 Some("for_await_exhausted") => Self::new(from, to, EdgeType::ForAwaitExhausted),
615 Some("async_yield") => Self::new(from, to, EdgeType::AsyncYield),
616 Some("async_resume") => Self::new(from, to, EdgeType::AsyncResume),
617 // Rust async/await edge labels
618 Some(".await") => Self::new(from, to, EdgeType::RustAwait),
619 Some("resume") => Self::new(from, to, EdgeType::RustResume),
620 Some("rust_spawn") => Self::new(from, to, EdgeType::RustSpawn),
621 Some("rust_join_task") => Self::new(from, to, EdgeType::RustJoinTask),
622 Some("join_branch") => Self::new(from, to, EdgeType::RustJoinBranch),
623 Some("join_complete") => Self::new(from, to, EdgeType::RustJoinComplete),
624 Some("select_branch") => Self::new(from, to, EdgeType::RustSelectBranch),
625 Some("select_winner") => Self::new(from, to, EdgeType::RustSelectWinner),
626 Some("select_cancel") => Self::new(from, to, EdgeType::RustSelectCancelled),
627 Some("blocking") => Self::new(from, to, EdgeType::RustBlocking),
628 Some("lock_acquire") => Self::new(from, to, EdgeType::RustLockAcquire),
629 Some("await_with_lock") => Self::new(from, to, EdgeType::RustAwaitWithLock),
630 Some(s) if s.starts_with("goto ") => {
631 Self::with_condition(from, to, EdgeType::Goto, s.to_string())
632 }
633 Some(other) => {
634 // Unknown label: store as condition with Unconditional type
635 Self::with_condition(from, to, EdgeType::Unconditional, other.to_string())
636 }
637 }
638 }
639
640 /// Get the display label for this edge.
641 ///
642 /// Returns the condition if present, otherwise the default label for the edge type.
643 pub fn label(&self) -> String {
644 self.condition.clone().unwrap_or_else(|| {
645 self.edge_type.default_label().to_string()
646 })
647 }
648}
649
650/// Complete control flow graph for a function.
651#[derive(Debug, Serialize, Deserialize)]
652pub struct CFGInfo {
653 /// Function name
654 pub function_name: String,
655 /// All blocks indexed by ID
656 pub blocks: HashMap<BlockId, CFGBlock>,
657 /// All edges
658 pub edges: Vec<CFGEdge>,
659 /// Entry block
660 pub entry: BlockId,
661 /// Exit blocks
662 pub exits: Vec<BlockId>,
663 /// Total decision points in the function.
664 ///
665 /// Tracks all control flow decisions:
666 /// - if statements (+1)
667 /// - elif clauses (+1 each)
668 /// - while loops (+1)
669 /// - for loops (+1)
670 /// - except handlers (+1 each)
671 /// - assert statements (+1)
672 /// - match cases (+1 each)
673 /// - match guards (+1 each)
674 /// - comprehension if clauses (+1 each)
675 ///
676 /// Used for accurate cyclomatic complexity: decision_points + 1
677 #[serde(default)]
678 pub decision_points: usize,
679 /// Extra decision points from comprehensions (if_clause, for_in_clause).
680 /// These affect cyclomatic complexity but don't create separate CFG blocks
681 /// since comprehensions are expressions with internal iteration/filtering.
682 /// NOTE: Kept for backward compatibility, but now included in decision_points.
683 #[serde(default)]
684 pub comprehension_decision_points: usize,
685 /// Nested function/closure CFGs (name -> CFG).
686 ///
687 /// Stores control flow graphs for inner functions and closures defined
688 /// within this function. Maps the nested function name to its CFGInfo.
689 /// This matches Python's `nested_cfgs: dict[str, CFGInfo]` field.
690 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
691 pub nested_cfgs: HashMap<String, Box<CFGInfo>>,
692 // =========================================================================
693 // Python async/await tracking
694 // =========================================================================
695 /// Whether this is an async function (async def in Python).
696 /// Important for async flow analysis and detecting blocking calls.
697 #[serde(default)]
698 pub is_async: bool,
699 /// Number of await suspension points in this function.
700 /// Each await creates a point where execution yields to the event loop.
701 /// Higher counts indicate more complex async interleaving potential.
702 #[serde(default)]
703 pub await_points: usize,
704 /// Detected blocking calls in async context (e.g., time.sleep in async def).
705 /// These are potential bugs that block the event loop.
706 /// Format: Vec<(function_name, line_number)>
707 #[serde(default, skip_serializing_if = "Vec::is_empty")]
708 pub blocking_calls: Vec<(String, usize)>,
709 // =========================================================================
710 // Performance optimization: Lazy adjacency list cache
711 // =========================================================================
712 /// Lazily-built adjacency cache for O(1) successor/predecessor lookups.
713 ///
714 /// Built on first call to `successors()` or `predecessors()`.
715 /// Amortizes O(E) edge scan across all subsequent lookups.
716 /// Skipped during serialization; rebuilt on demand after deserialization.
717 /// NOTE: This field is public for struct literal construction but should
718 /// be treated as internal. Use `OnceCell::new()` when constructing.
719 #[serde(skip)]
720 pub adjacency_cache: OnceCell<AdjacencyCache>,
721}
722
723impl Clone for CFGInfo {
724 fn clone(&self) -> Self {
725 Self {
726 function_name: self.function_name.clone(),
727 blocks: self.blocks.clone(),
728 edges: self.edges.clone(),
729 entry: self.entry,
730 exits: self.exits.clone(),
731 decision_points: self.decision_points,
732 comprehension_decision_points: self.comprehension_decision_points,
733 nested_cfgs: self.nested_cfgs.clone(),
734 is_async: self.is_async,
735 await_points: self.await_points,
736 blocking_calls: self.blocking_calls.clone(),
737 // Reset cache on clone - will be rebuilt lazily if needed
738 adjacency_cache: OnceCell::new(),
739 }
740 }
741}
742
743impl Default for CFGInfo {
744 fn default() -> Self {
745 Self {
746 function_name: String::new(),
747 blocks: HashMap::new(),
748 edges: Vec::new(),
749 entry: BlockId(0),
750 exits: Vec::new(),
751 decision_points: 0,
752 comprehension_decision_points: 0,
753 nested_cfgs: HashMap::new(),
754 is_async: false,
755 await_points: 0,
756 blocking_calls: Vec::new(),
757 adjacency_cache: OnceCell::new(),
758 }
759 }
760}
761
762impl CFGInfo {
763 /// Create a new CFGInfo with the essential fields.
764 ///
765 /// This constructor handles the internal adjacency cache initialization.
766 /// Use this instead of struct literal syntax.
767 ///
768 /// # Arguments
769 /// * `function_name` - Name of the function this CFG represents
770 /// * `blocks` - Map of block IDs to blocks
771 /// * `edges` - List of edges connecting blocks
772 /// * `entry` - Entry block ID
773 /// * `exits` - List of exit block IDs
774 #[must_use]
775 pub fn new(
776 function_name: String,
777 blocks: HashMap<BlockId, CFGBlock>,
778 edges: Vec<CFGEdge>,
779 entry: BlockId,
780 exits: Vec<BlockId>,
781 ) -> Self {
782 Self {
783 function_name,
784 blocks,
785 edges,
786 entry,
787 exits,
788 decision_points: 0,
789 comprehension_decision_points: 0,
790 nested_cfgs: HashMap::new(),
791 is_async: false,
792 await_points: 0,
793 blocking_calls: Vec::new(),
794 adjacency_cache: OnceCell::new(),
795 }
796 }
797
798 /// Create a CFGInfo with all fields specified.
799 ///
800 /// Use this when you need to set decision points, async info, etc.
801 #[must_use]
802 #[allow(clippy::too_many_arguments)]
803 pub fn with_details(
804 function_name: String,
805 blocks: HashMap<BlockId, CFGBlock>,
806 edges: Vec<CFGEdge>,
807 entry: BlockId,
808 exits: Vec<BlockId>,
809 decision_points: usize,
810 comprehension_decision_points: usize,
811 is_async: bool,
812 await_points: usize,
813 blocking_calls: Vec<(String, usize)>,
814 ) -> Self {
815 Self {
816 function_name,
817 blocks,
818 edges,
819 entry,
820 exits,
821 decision_points,
822 comprehension_decision_points,
823 nested_cfgs: HashMap::new(),
824 is_async,
825 await_points,
826 blocking_calls,
827 adjacency_cache: OnceCell::new(),
828 }
829 }
830}
831
832impl CFGInfo {
833 /// Calculate cyclomatic complexity using decision points.
834 ///
835 /// Uses the formula: M = decision_points + 1
836 ///
837 /// This matches Python's implementation and accurately counts:
838 /// - if statements (+1)
839 /// - elif clauses (+1 each)
840 /// - while loops (+1)
841 /// - for loops (+1)
842 /// - except handlers (+1 each)
843 /// - assert statements (+1)
844 /// - match cases (+1 each)
845 /// - match guards (+1 each)
846 /// - comprehension if clauses (+1 each)
847 ///
848 /// Higher values indicate more complex control flow.
849 /// General guidelines:
850 /// - 1-10: Simple, low risk
851 /// - 11-20: Moderate complexity
852 /// - 21-50: Complex, higher risk
853 /// - 50+: Very high risk, consider refactoring
854 pub fn cyclomatic_complexity(&self) -> usize {
855 self.decision_points + 1
856 }
857
858 /// Calculate cyclomatic complexity using graph formula.
859 ///
860 /// Uses the classic formula: M = E - N + 2P + C where:
861 /// - E = number of edges
862 /// - N = number of nodes (blocks)
863 /// - P = number of connected components (always 1 for a single function)
864 /// - C = comprehension decision points (for internal comprehension control flow)
865 ///
866 /// NOTE: This formula may give different results than `cyclomatic_complexity()`
867 /// due to unreachable blocks (after return/raise) and graph structure variations.
868 /// The decision-point-based method is recommended for consistency with Python.
869 #[allow(dead_code)]
870 pub fn cyclomatic_complexity_graph(&self) -> usize {
871 let edges = self.edges.len();
872 let nodes = self.blocks.len();
873 edges.saturating_sub(nodes) + 2 + self.comprehension_decision_points
874 }
875
876 /// Build adjacency cache from edges (called once, lazily).
877 ///
878 /// Scans all edges once to build both successor and predecessor maps.
879 /// Time: O(E) where E = number of edges
880 /// Space: O(E) for storing all edge endpoints
881 fn build_adjacency(&self) -> AdjacencyCache {
882 let mut successors: HashMap<BlockId, Vec<BlockId>> =
883 HashMap::with_capacity(self.blocks.len());
884 let mut predecessors: HashMap<BlockId, Vec<BlockId>> =
885 HashMap::with_capacity(self.blocks.len());
886
887 for edge in &self.edges {
888 successors.entry(edge.from).or_default().push(edge.to);
889 predecessors.entry(edge.to).or_default().push(edge.from);
890 }
891
892 AdjacencyCache {
893 successors,
894 predecessors,
895 }
896 }
897
898 /// Get the adjacency cache, building it if necessary.
899 ///
900 /// Thread-safe: uses `OnceCell` for lazy initialization.
901 /// First call: O(E) to build cache
902 /// Subsequent calls: O(1)
903 #[inline]
904 fn adjacency(&self) -> &AdjacencyCache {
905 self.adjacency_cache.get_or_init(|| self.build_adjacency())
906 }
907
908 /// Get successors of a block (outgoing edges).
909 ///
910 /// Returns a slice of successor block IDs for O(1) lookup after cache is built.
911 /// First call triggers O(E) cache construction; subsequent calls are O(1).
912 ///
913 /// # Arguments
914 /// * `block_id` - The block to get successors for
915 ///
916 /// # Returns
917 /// Slice of successor BlockIds (empty if block has no outgoing edges)
918 #[allow(dead_code)]
919 pub fn successors(&self, block_id: BlockId) -> &[BlockId] {
920 self.adjacency()
921 .successors
922 .get(&block_id)
923 .map(Vec::as_slice)
924 .unwrap_or(&[])
925 }
926
927 /// Get predecessors of a block (incoming edges).
928 ///
929 /// Returns a slice of predecessor block IDs for O(1) lookup after cache is built.
930 /// First call triggers O(E) cache construction; subsequent calls are O(1).
931 ///
932 /// # Arguments
933 /// * `block_id` - The block to get predecessors for
934 ///
935 /// # Returns
936 /// Slice of predecessor BlockIds (empty if block has no incoming edges)
937 #[allow(dead_code)]
938 pub fn predecessors(&self, block_id: BlockId) -> &[BlockId] {
939 self.adjacency()
940 .predecessors
941 .get(&block_id)
942 .map(Vec::as_slice)
943 .unwrap_or(&[])
944 }
945
946 /// Invalidate the adjacency cache (call after modifying edges).
947 ///
948 /// This method is useful if edges are modified after CFG construction.
949 /// The cache will be rebuilt on the next call to `successors()` or `predecessors()`.
950 ///
951 /// Note: This requires mutable access. If the CFG is shared immutably,
952 /// prefer creating a new CFGInfo instead of modifying edges.
953 #[allow(dead_code)]
954 pub fn invalidate_adjacency_cache(&mut self) {
955 self.adjacency_cache = OnceCell::new();
956 }
957
958 /// Add a nested CFG for an inner function or closure.
959 ///
960 /// # Arguments
961 /// * `name` - The name of the nested function/closure
962 /// * `cfg` - The control flow graph for the nested function
963 #[allow(dead_code)]
964 pub fn add_nested(&mut self, name: String, cfg: CFGInfo) {
965 self.nested_cfgs.insert(name, Box::new(cfg));
966 }
967
968 /// Get nested CFG by name.
969 ///
970 /// # Arguments
971 /// * `name` - The name of the nested function to retrieve
972 ///
973 /// # Returns
974 /// Reference to the nested CFGInfo if found, None otherwise
975 #[allow(dead_code)]
976 pub fn get_nested(&self, name: &str) -> Option<&CFGInfo> {
977 self.nested_cfgs.get(name).map(|boxed| boxed.as_ref())
978 }
979
980 /// Get mutable reference to nested CFG by name.
981 ///
982 /// # Arguments
983 /// * `name` - The name of the nested function to retrieve
984 ///
985 /// # Returns
986 /// Mutable reference to the nested CFGInfo if found, None otherwise
987 #[allow(dead_code)]
988 pub fn get_nested_mut(&mut self, name: &str) -> Option<&mut CFGInfo> {
989 self.nested_cfgs.get_mut(name).map(|boxed| boxed.as_mut())
990 }
991
992 /// Iterate over all nested CFGs.
993 ///
994 /// # Returns
995 /// Iterator yielding (name, cfg) pairs for all nested functions
996 #[allow(dead_code)]
997 pub fn nested_iter(&self) -> impl Iterator<Item = (&String, &CFGInfo)> {
998 self.nested_cfgs.iter().map(|(k, v)| (k, v.as_ref()))
999 }
1000
1001 /// Check if this CFG has any nested functions/closures.
1002 #[allow(dead_code)]
1003 pub fn has_nested(&self) -> bool {
1004 !self.nested_cfgs.is_empty()
1005 }
1006
1007 /// Get the count of nested CFGs.
1008 #[allow(dead_code)]
1009 pub fn nested_count(&self) -> usize {
1010 self.nested_cfgs.len()
1011 }
1012
1013 /// Validate CFG structural invariants.
1014 ///
1015 /// Checks that:
1016 /// - Entry block exists in the blocks map
1017 /// - All exit blocks exist in the blocks map
1018 /// - All edge source and target blocks exist in the blocks map
1019 ///
1020 /// # Errors
1021 ///
1022 /// Returns `CFGError` if any invariant is violated:
1023 /// - `InvalidEntry` if entry block is not in blocks
1024 /// - `InvalidExit` if any exit block is not in blocks
1025 /// - `InvalidEdgeBlock` if any edge references a non-existent block
1026 ///
1027 /// # Example
1028 ///
1029 /// ```
1030 /// use go_brrr::cfg::types::{CFGInfo, CFGBlock, BlockId, BlockType};
1031 /// use std::collections::HashMap;
1032 ///
1033 /// let entry_id = BlockId(0);
1034 /// let mut blocks = HashMap::new();
1035 /// blocks.insert(entry_id, CFGBlock {
1036 /// id: entry_id,
1037 /// label: "entry".to_string(),
1038 /// block_type: BlockType::Entry,
1039 /// statements: vec![],
1040 /// func_calls: vec![],
1041 /// start_line: 1,
1042 /// end_line: 1,
1043 /// });
1044 ///
1045 /// let cfg = CFGInfo::new(
1046 /// "test".to_string(),
1047 /// blocks,
1048 /// vec![],
1049 /// entry_id,
1050 /// vec![entry_id],
1051 /// );
1052 ///
1053 /// assert!(cfg.validate().is_ok());
1054 /// ```
1055 #[allow(dead_code)]
1056 pub fn validate(&self) -> Result<(), CFGError> {
1057 // Check entry block exists
1058 if !self.blocks.contains_key(&self.entry) {
1059 return Err(CFGError::InvalidEntry(self.entry));
1060 }
1061
1062 // Check all exit blocks exist
1063 for exit in &self.exits {
1064 if !self.blocks.contains_key(exit) {
1065 return Err(CFGError::InvalidExit(*exit));
1066 }
1067 }
1068
1069 // Check all edge endpoints exist
1070 for edge in &self.edges {
1071 if !self.blocks.contains_key(&edge.from) {
1072 return Err(CFGError::InvalidEdgeBlock(edge.from));
1073 }
1074 if !self.blocks.contains_key(&edge.to) {
1075 return Err(CFGError::InvalidEdgeBlock(edge.to));
1076 }
1077 }
1078
1079 Ok(())
1080 }
1081
1082 /// Insert a block with duplicate detection.
1083 ///
1084 /// Unlike direct `blocks.insert()`, this method returns an error if
1085 /// a block with the same ID already exists, preventing silent overwrites.
1086 ///
1087 /// # Errors
1088 ///
1089 /// Returns `CFGError::DuplicateBlockId` if a block with the same ID
1090 /// already exists in the blocks map.
1091 ///
1092 /// # Example
1093 ///
1094 /// ```
1095 /// use go_brrr::cfg::types::{CFGInfo, CFGBlock, BlockId, BlockType};
1096 /// use std::collections::HashMap;
1097 ///
1098 /// let mut cfg = CFGInfo::new(
1099 /// "test".to_string(),
1100 /// HashMap::new(),
1101 /// vec![],
1102 /// BlockId(0),
1103 /// vec![],
1104 /// );
1105 ///
1106 /// let block = CFGBlock {
1107 /// id: BlockId(0),
1108 /// label: "entry".to_string(),
1109 /// block_type: BlockType::Entry,
1110 /// statements: vec![],
1111 /// func_calls: vec![],
1112 /// start_line: 1,
1113 /// end_line: 1,
1114 /// };
1115 ///
1116 /// // First insert succeeds
1117 /// assert!(cfg.insert_block(block.clone()).is_ok());
1118 ///
1119 /// // Duplicate insert fails
1120 /// assert!(cfg.insert_block(block).is_err());
1121 /// ```
1122 #[allow(dead_code)]
1123 pub fn insert_block(&mut self, block: CFGBlock) -> Result<(), CFGError> {
1124 if self.blocks.contains_key(&block.id) {
1125 return Err(CFGError::DuplicateBlockId(block.id));
1126 }
1127 self.blocks.insert(block.id, block);
1128 Ok(())
1129 }
1130
1131 /// Safe getter for a block by ID.
1132 ///
1133 /// Returns a reference to the block if it exists, None otherwise.
1134 /// Prefer this over direct `blocks.get()` for API consistency.
1135 ///
1136 /// # Arguments
1137 /// * `id` - The block ID to look up
1138 ///
1139 /// # Returns
1140 /// `Some(&CFGBlock)` if found, `None` otherwise
1141 #[allow(dead_code)]
1142 pub fn get_block(&self, id: BlockId) -> Option<&CFGBlock> {
1143 self.blocks.get(&id)
1144 }
1145
1146 /// Check if a block is the entry block.
1147 ///
1148 /// # Arguments
1149 /// * `id` - The block ID to check
1150 ///
1151 /// # Returns
1152 /// `true` if the block is the entry point, `false` otherwise
1153 #[allow(dead_code)]
1154 pub fn is_entry(&self, id: BlockId) -> bool {
1155 self.entry == id
1156 }
1157
1158 /// Check if a block is an exit block.
1159 ///
1160 /// # Arguments
1161 /// * `id` - The block ID to check
1162 ///
1163 /// # Returns
1164 /// `true` if the block is in the exits list, `false` otherwise
1165 #[allow(dead_code)]
1166 pub fn is_exit(&self, id: BlockId) -> bool {
1167 self.exits.contains(&id)
1168 }
1169
1170 /// Detect if the CFG has any back edges (indicating loops).
1171 ///
1172 /// A back edge is an edge from a node to one of its ancestors in the
1173 /// DFS tree, which indicates a cycle (loop) in the control flow.
1174 ///
1175 /// # Returns
1176 /// `true` if the CFG contains at least one back edge (loop), `false` otherwise
1177 ///
1178 /// # Algorithm
1179 /// Uses depth-first search with a recursion stack to detect cycles.
1180 /// An edge to a node currently in the recursion stack is a back edge.
1181 #[allow(dead_code)]
1182 pub fn has_back_edge(&self) -> bool {
1183 let mut visited = HashSet::new();
1184 let mut stack = HashSet::new();
1185 self.has_back_edge_dfs(self.entry, &mut visited, &mut stack)
1186 }
1187
1188 /// Internal DFS helper for back edge detection.
1189 ///
1190 /// Traverses the graph maintaining both visited set and current recursion stack.
1191 /// A back edge is detected when we encounter a successor already in the stack.
1192 #[allow(dead_code)]
1193 fn has_back_edge_dfs(
1194 &self,
1195 node: BlockId,
1196 visited: &mut HashSet<BlockId>,
1197 stack: &mut HashSet<BlockId>,
1198 ) -> bool {
1199 visited.insert(node);
1200 stack.insert(node);
1201
1202 for &succ in self.successors(node) {
1203 if !visited.contains(&succ) {
1204 if self.has_back_edge_dfs(succ, visited, stack) {
1205 return true;
1206 }
1207 } else if stack.contains(&succ) {
1208 // Found a back edge: edge to ancestor in DFS tree
1209 return true;
1210 }
1211 }
1212
1213 stack.remove(&node);
1214 false
1215 }
1216
1217 /// Check if the graph is connected from the entry block.
1218 ///
1219 /// A CFG is considered connected if all blocks are reachable from the entry.
1220 /// Unreachable blocks indicate dead code or a malformed graph.
1221 ///
1222 /// # Returns
1223 /// `true` if all blocks are reachable from entry, `false` otherwise
1224 #[allow(dead_code)]
1225 pub fn is_connected(&self) -> bool {
1226 let reachable = self.reachable_from(self.entry);
1227 reachable.len() == self.blocks.len()
1228 }
1229
1230 /// Compute the set of blocks reachable from a given start block.
1231 ///
1232 /// Uses breadth-first traversal to find all blocks reachable via
1233 /// forward edges from the start block.
1234 ///
1235 /// # Arguments
1236 /// * `start` - The block ID to start traversal from
1237 ///
1238 /// # Returns
1239 /// Set of all reachable block IDs (includes the start block)
1240 #[allow(dead_code)]
1241 pub fn reachable_from(&self, start: BlockId) -> HashSet<BlockId> {
1242 let mut reachable = HashSet::new();
1243 let mut queue = vec![start];
1244
1245 while let Some(node) = queue.pop() {
1246 if reachable.insert(node) {
1247 queue.extend(self.successors(node).iter().copied());
1248 }
1249 }
1250
1251 reachable
1252 }
1253
1254 /// Compute a topological ordering of blocks for dataflow analysis.
1255 ///
1256 /// Uses Kahn's algorithm to produce an ordering where each block appears
1257 /// after all its predecessors (excluding back edges). This is essential
1258 /// for correct dataflow analysis where we need to process definitions
1259 /// before their uses.
1260 ///
1261 /// # Returns
1262 /// Vector of BlockIds in topological order. For cyclic graphs (loops),
1263 /// back edges are ignored to produce a valid ordering.
1264 ///
1265 /// # Algorithm
1266 /// 1. Build in-degree map (counting non-back-edge predecessors)
1267 /// 2. Start with blocks that have zero in-degree
1268 /// 3. Process each block, decrementing in-degree of successors
1269 /// 4. Add successors to queue when their in-degree reaches zero
1270 pub fn topological_order(&self) -> Vec<BlockId> {
1271 use std::collections::VecDeque;
1272
1273 // Identify back edges using DFS
1274 let back_edges = self.find_back_edges();
1275
1276 // Build in-degree map (excluding back edges)
1277 let mut in_degree: HashMap<BlockId, usize> = HashMap::new();
1278 for block_id in self.blocks.keys() {
1279 in_degree.insert(*block_id, 0);
1280 }
1281
1282 for edge in &self.edges {
1283 if !back_edges.contains(&(edge.from, edge.to)) {
1284 *in_degree.entry(edge.to).or_insert(0) += 1;
1285 }
1286 }
1287
1288 // Start with zero in-degree blocks (should include entry)
1289 let mut queue: VecDeque<BlockId> = in_degree
1290 .iter()
1291 .filter(|(_, °)| deg == 0)
1292 .map(|(&id, _)| id)
1293 .collect();
1294
1295 // Ensure entry is processed first if it has zero in-degree
1296 if let Some(pos) = queue.iter().position(|&id| id == self.entry) {
1297 queue.swap(0, pos);
1298 }
1299
1300 let mut result = Vec::with_capacity(self.blocks.len());
1301
1302 while let Some(block_id) = queue.pop_front() {
1303 result.push(block_id);
1304
1305 // Process successors (excluding back edges)
1306 for edge in &self.edges {
1307 if edge.from == block_id && !back_edges.contains(&(edge.from, edge.to)) {
1308 if let Some(deg) = in_degree.get_mut(&edge.to) {
1309 *deg = deg.saturating_sub(1);
1310 if *deg == 0 {
1311 queue.push_back(edge.to);
1312 }
1313 }
1314 }
1315 }
1316 }
1317
1318 result
1319 }
1320
1321 /// Find all back edges in the CFG using DFS.
1322 ///
1323 /// A back edge is an edge from a node to one of its ancestors in the
1324 /// DFS tree. These indicate loops in the control flow.
1325 ///
1326 /// # Returns
1327 /// Set of (from, to) block ID pairs representing back edges.
1328 fn find_back_edges(&self) -> HashSet<(BlockId, BlockId)> {
1329 let mut back_edges = HashSet::new();
1330 let mut visited = HashSet::new();
1331 let mut stack = HashSet::new();
1332 self.find_back_edges_dfs(self.entry, &mut visited, &mut stack, &mut back_edges);
1333 back_edges
1334 }
1335
1336 /// Internal DFS helper for finding back edges.
1337 fn find_back_edges_dfs(
1338 &self,
1339 node: BlockId,
1340 visited: &mut HashSet<BlockId>,
1341 stack: &mut HashSet<BlockId>,
1342 back_edges: &mut HashSet<(BlockId, BlockId)>,
1343 ) {
1344 visited.insert(node);
1345 stack.insert(node);
1346
1347 for &succ in self.successors(node) {
1348 if !visited.contains(&succ) {
1349 self.find_back_edges_dfs(succ, visited, stack, back_edges);
1350 } else if stack.contains(&succ) {
1351 // Found a back edge
1352 back_edges.insert((node, succ));
1353 }
1354 }
1355
1356 stack.remove(&node);
1357 }
1358
1359 /// Get the block containing a specific line number.
1360 ///
1361 /// Searches through all blocks to find which block contains the given line.
1362 /// When multiple blocks contain the same line (overlapping ranges), prefers:
1363 /// 1. The block with the smallest range (most specific)
1364 /// 2. If equal range, the block that ends at the line (definitions belong to ending block)
1365 ///
1366 /// This is important for CFG-aware DFG analysis where variable definitions
1367 /// at boundary lines need to be assigned to the correct block.
1368 ///
1369 /// # Arguments
1370 /// * `line` - The line number to look up (1-indexed)
1371 ///
1372 /// # Returns
1373 /// The BlockId of the block containing the line, or None if not found.
1374 pub fn block_for_line(&self, line: usize) -> Option<BlockId> {
1375 // Find all blocks that contain this line
1376 let mut candidates: Vec<(BlockId, &CFGBlock)> = self
1377 .blocks
1378 .iter()
1379 .filter(|(_, block)| line >= block.start_line && line <= block.end_line)
1380 .map(|(id, block)| (*id, block))
1381 .collect();
1382
1383 if candidates.is_empty() {
1384 // Fallback: find the closest block that starts at or before the line
1385 let mut best_match: Option<(BlockId, usize)> = None;
1386 for (id, block) in &self.blocks {
1387 if block.start_line <= line {
1388 match best_match {
1389 None => best_match = Some((*id, block.start_line)),
1390 Some((_, best_start)) if block.start_line > best_start => {
1391 best_match = Some((*id, block.start_line));
1392 }
1393 _ => {}
1394 }
1395 }
1396 }
1397 return best_match.map(|(id, _)| id);
1398 }
1399
1400 if candidates.len() == 1 {
1401 return Some(candidates[0].0);
1402 }
1403
1404 // Multiple blocks contain this line - need to pick the most appropriate one
1405 // For DFG analysis, we want definitions at a line to belong to the block
1406 // where they're executed, not merge/join blocks which are conceptual points.
1407 //
1408 // Priority:
1409 // 1. Prefer blocks that end at this line (the definition belongs there)
1410 // 2. Prefer blocks with later start_line (more specific to this location)
1411 // 3. Prefer larger ranges (actual code blocks over phantom merge points)
1412 candidates.sort_by(|(_, a), (_, b)| {
1413 // First priority: prefer block that ends at this line (definitions belong to ending block)
1414 let ends_at_line_a = a.end_line == line;
1415 let ends_at_line_b = b.end_line == line;
1416 let starts_at_line_a = a.start_line == line;
1417 let starts_at_line_b = b.start_line == line;
1418
1419 // If one block only starts at this line while another ends at it,
1420 // prefer the one that ends (the definition is in the ending block)
1421 match (ends_at_line_a && !starts_at_line_a, ends_at_line_b && !starts_at_line_b) {
1422 (true, false) => return std::cmp::Ordering::Less,
1423 (false, true) => return std::cmp::Ordering::Greater,
1424 _ => {}
1425 }
1426
1427 let range_a = a.end_line.saturating_sub(a.start_line);
1428 let range_b = b.end_line.saturating_sub(b.start_line);
1429
1430 // Prefer larger range (actual code blocks over single-line merge points)
1431 match range_b.cmp(&range_a) {
1432 std::cmp::Ordering::Equal => {
1433 // If same range, prefer block with later start
1434 b.start_line.cmp(&a.start_line)
1435 }
1436 other => other,
1437 }
1438 });
1439
1440 Some(candidates[0].0)
1441 }
1442}