go_brrr/cfg/
types.rs

1//! CFG type definitions.
2
3use once_cell::sync::OnceCell;
4use serde::{Deserialize, Serialize};
5use std::collections::{HashMap, HashSet};
6
7/// Cached adjacency lists for O(1) successor/predecessor lookups.
8///
9/// Built lazily on first access to avoid overhead when not needed.
10/// Uses separate HashMaps for outgoing (successors) and incoming (predecessors) edges.
11/// NOTE: This type is public for `CFGInfo.adjacency_cache` but is an internal implementation detail.
12#[derive(Debug)]
13pub struct AdjacencyCache {
14    /// BlockId -> list of successor BlockIds (outgoing edges)
15    successors: HashMap<BlockId, Vec<BlockId>>,
16    /// BlockId -> list of predecessor BlockIds (incoming edges)
17    predecessors: HashMap<BlockId, Vec<BlockId>>,
18}
19
20/// Errors that can occur during CFG validation.
21///
22/// These errors indicate structural inconsistencies in the control flow graph
23/// that would cause issues during analysis or rendering.
24#[derive(Debug, thiserror::Error)]
25#[allow(dead_code)]
26pub enum CFGError {
27    /// Entry block ID does not exist in the blocks map.
28    #[error("Entry block {0:?} not found in blocks")]
29    InvalidEntry(BlockId),
30
31    /// An exit block ID does not exist in the blocks map.
32    #[error("Exit block {0:?} not found in blocks")]
33    InvalidExit(BlockId),
34
35    /// Duplicate block ID was inserted (would silently overwrite).
36    #[error("Duplicate block ID {0:?}")]
37    DuplicateBlockId(BlockId),
38
39    /// An edge references a block that does not exist.
40    #[error("Edge references non-existent block {0:?}")]
41    InvalidEdgeBlock(BlockId),
42}
43
44/// Unique identifier for a basic block.
45#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
46pub struct BlockId(pub usize);
47
48/// Type of a basic block in the control flow graph.
49///
50/// Categorizes blocks by their role in the control flow:
51/// - Entry/Exit: Function boundaries
52/// - Branch: Conditional decision points
53/// - LoopHeader/LoopBody: Loop constructs
54/// - Return: Explicit return statements
55/// - Body: Regular sequential code
56/// - Exception/Finally: Error handling constructs
57/// - TryOperator/ErrorReturn/PanicSite: Rust Result/Option handling
58#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
59#[serde(rename_all = "snake_case")]
60pub enum BlockType {
61    /// Function entry point
62    Entry,
63    /// Function exit point
64    Exit,
65    /// Conditional branch (if, elif, match case)
66    Branch,
67    /// Loop header (condition evaluation)
68    LoopHeader,
69    /// Loop body
70    LoopBody,
71    /// Return statement block
72    Return,
73    /// Regular code block (default)
74    #[default]
75    Body,
76    /// Exception handler (except, catch)
77    Exception,
78    /// Finally block
79    Finally,
80    /// Rust ? operator - may propagate error/none (Result/Option)
81    TryOperator,
82    /// Rust early return from ? operator on Err/None
83    ErrorReturn,
84    /// Potential panic site (.unwrap(), .expect(), panic!(), unreachable!(), todo!())
85    PanicSite,
86    /// Pattern match block (if let, while let, match arm)
87    PatternMatch,
88    /// Closure body (separate control flow context)
89    Closure,
90    // =========================================================================
91    // Go-specific block types for defer/panic/recover flow
92    // =========================================================================
93    /// Go: Deferred function call (executes on function exit in LIFO order)
94    DeferredCall,
95    /// Go: Recover call site (recover() in deferred function)
96    RecoverCall,
97    /// Go: Error check pattern (if err != nil { return err })
98    ErrorCheck,
99    /// Go: Panic site (explicit panic() or potential runtime panic)
100    GoPanicSite,
101    // =========================================================================
102    // Go-specific block types for concurrency (goroutines, channels, sync)
103    // =========================================================================
104    /// Go: Goroutine spawn (go func() {...} or go existingFunc())
105    GoroutineSpawn,
106    /// Go: Channel send operation (ch <- value) - may block
107    ChannelSend,
108    /// Go: Channel receive operation (val := <-ch or <-ch) - may block
109    ChannelReceive,
110    /// Go: Select statement block (multi-way channel operation)
111    SelectBlock,
112    /// Go: Select case (case <-ch: or case ch <-:)
113    SelectCase,
114    /// Go: Mutex lock operation (mu.Lock() or mu.RLock())
115    MutexLock,
116    /// Go: Mutex unlock operation (mu.Unlock() or mu.RUnlock())
117    MutexUnlock,
118    /// Go: WaitGroup wait operation (wg.Wait()) - synchronization point
119    WaitGroupWait,
120    /// Go: WaitGroup add/done operation (wg.Add(), wg.Done())
121    WaitGroupOp,
122    /// Go: sync.Once.Do() call - one-time initialization
123    OnceCall,
124    /// Go: Channel make/creation (make(chan T) or make(chan T, size))
125    ChannelMake,
126    /// Go: Channel close operation (close(ch))
127    ChannelClose,
128    /// Go: Context cancellation check (ctx.Done(), ctx.Err())
129    ContextCheck,
130    // =========================================================================
131    // Python async/await block types
132    // =========================================================================
133    /// Python: Await expression - suspension point where execution yields to event loop
134    AwaitPoint,
135    /// Python: Async for loop header
136    AsyncForLoop,
137    /// Python: Async with block (async context manager)
138    AsyncWithBlock,
139    /// Python: Task spawn point (asyncio.create_task, asyncio.gather, etc.)
140    TaskSpawn,
141    // =========================================================================
142    // JavaScript/TypeScript async/await and Promise block types
143    // =========================================================================
144    /// JS/TS: Promise.then() callback block - executes on promise resolution
145    PromiseThen,
146    /// JS/TS: Promise.catch() callback block - executes on promise rejection
147    PromiseCatch,
148    /// JS/TS: Promise.finally() callback block - executes regardless of outcome
149    PromiseFinally,
150    /// JS/TS: Promise.all() - parallel execution, waits for all promises
151    PromiseAll,
152    /// JS/TS: Promise.race() - parallel execution, resolves with first settled
153    PromiseRace,
154    /// JS/TS: Promise.allSettled() - parallel execution, waits for all to settle
155    PromiseAllSettled,
156    /// JS/TS: Promise.any() - parallel execution, resolves with first fulfilled
157    PromiseAny,
158    /// JS/TS: Async generator yield point (yield* in async generator)
159    AsyncGeneratorYield,
160    /// JS/TS: for await...of loop header (async iteration)
161    ForAwaitOf,
162    // =========================================================================
163    // Rust async/await block types
164    // =========================================================================
165    /// Rust: Await expression (.await) - suspension point where Future yields
166    /// Critical for tracking async state machine transitions and cancellation safety.
167    RustAwaitPoint,
168    /// Rust: Task spawn point (tokio::spawn, async_std::spawn, etc.)
169    /// Creates a concurrent task that runs independently.
170    RustTaskSpawn,
171    /// Rust: tokio::join! - concurrent execution, waits for all futures
172    /// All branches run concurrently, function continues when all complete.
173    RustJoin,
174    /// Rust: tokio::select! - concurrent execution, first completion wins
175    /// All branches race, first to complete is taken, others are cancelled.
176    RustSelect,
177    /// Rust: select! branch (individual arm in select!)
178    RustSelectBranch,
179    /// Rust: Blocking call in async context (std::thread::sleep, blocking I/O)
180    /// ANTI-PATTERN: Blocks the executor thread, starving other tasks.
181    RustBlockingCall,
182    /// Rust: Lock held across await (MutexGuard, RwLockGuard across .await)
183    /// ANTI-PATTERN: Causes deadlocks and breaks Send bounds.
184    RustLockAcrossAwait,
185    /// Rust: Async block (async { ... }) - creates anonymous Future
186    RustAsyncBlock,
187}
188
189/// A basic block in the control flow graph.
190#[derive(Debug, Clone, Serialize, Deserialize)]
191pub struct CFGBlock {
192    /// Unique block identifier
193    pub id: BlockId,
194    /// Human-readable label
195    pub label: String,
196    /// Type of block (entry, exit, branch, loop, etc.)
197    #[serde(default)]
198    pub block_type: BlockType,
199    /// Statements in this block
200    pub statements: Vec<String>,
201    /// Functions called within this block
202    #[serde(default)]
203    pub func_calls: Vec<String>,
204    /// Starting line number
205    pub start_line: usize,
206    /// Ending line number
207    pub end_line: usize,
208}
209
210/// Semantic type of a CFG edge.
211///
212/// This enum represents the control flow semantics, while `condition`
213/// provides human-readable display text (e.g., "x > 0").
214#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
215#[serde(rename_all = "snake_case")]
216pub enum EdgeType {
217    /// Unconditional edge (fallthrough, sequential)
218    Unconditional,
219    /// True branch of a conditional
220    True,
221    /// False branch of a conditional
222    False,
223    /// Back edge in a loop
224    BackEdge,
225    /// Break statement exiting a loop
226    Break,
227    /// Continue statement restarting loop iteration
228    Continue,
229    /// Exception handling path
230    Exception,
231    /// Finally block execution
232    Finally,
233    /// Loop iteration edge
234    Iterate,
235    /// Return from function
236    Return,
237    /// Loop entry edge
238    Enter,
239    /// Loop exit edge (done/exit)
240    Exit,
241    /// Switch/match case
242    Case,
243    /// Pattern match in match statement
244    PatternMatch,
245    /// Goto statement (C/C++)
246    Goto,
247    /// Goroutine spawn (Go)
248    Spawn,
249    /// Deferred execution (Go)
250    Defer,
251    /// Channel send (Go)
252    Send,
253    /// Resource acquire (Java try-with-resources)
254    Acquire,
255    /// Successful completion
256    Success,
257    /// Resource close (Java try-with-resources)
258    Close,
259    /// Else branch
260    Else,
261    /// Result/Option Ok/Some variant (Rust)
262    OkSome,
263    /// Result/Option Err/None variant (Rust)
264    ErrNone,
265    /// Loop next iteration
266    Next,
267    /// Match expression dispatch
268    Match,
269    /// Loop continuation (back to condition)
270    Loop,
271    /// Assert pass (condition true)
272    Pass,
273    /// Assert fail (condition false, raises AssertionError)
274    Fail,
275    /// Loop exhausted (iteration complete)
276    Exhausted,
277    /// No exception occurred (try/else)
278    NoException,
279    /// Exception group handling (except*)
280    ExceptionGroup,
281    /// Context manager enter success
282    EnterSuccess,
283    /// Context manager enter exception
284    EnterException,
285    /// Cleanup path
286    Cleanup,
287    /// Exception propagation
288    Propagate,
289    /// Re-raise current exception (bare `raise` statement)
290    Reraise,
291    /// Typed exception edge (edge to specific exception handler)
292    TypedException,
293    // =========================================================================
294    // Go-specific edge types for defer/panic/recover flow
295    // =========================================================================
296    /// Go: Panic propagation (control flow on panic)
297    Panic,
298    /// Go: Recover success (panic caught by recover())
299    Recover,
300    /// Go: Deferred execution chain (between deferred calls)
301    DeferChain,
302    /// Go: Error return path (err != nil -> return)
303    ErrorPath,
304    /// Go: Success path (err == nil -> continue)
305    SuccessPath,
306    // =========================================================================
307    // Go-specific edge types for concurrency (goroutines, channels, sync)
308    // =========================================================================
309    /// Go: Channel receive operation edge
310    Receive,
311    /// Go: Select case branch (case <-ch: or case ch <- val:)
312    SelectCase,
313    /// Go: Select default branch (non-blocking)
314    SelectDefault,
315    /// Go: Mutex lock acquisition
316    Lock,
317    /// Go: Mutex unlock release
318    Unlock,
319    /// Go: WaitGroup synchronization
320    WaitSync,
321    /// Go: Channel close operation
322    ChannelClose,
323    /// Go: Context cancellation path (ctx.Done() triggered)
324    ContextCancel,
325    /// Go: Context continue path (ctx not cancelled)
326    ContextContinue,
327    // =========================================================================
328    // Python async/await edge types
329    // =========================================================================
330    /// Python: Await suspension point - execution yields to event loop
331    Await,
332    /// Python: Async for iteration (like Iterate but async)
333    AsyncIterate,
334    /// Python: Async for exhausted (like Exhausted but async)
335    AsyncExhausted,
336    /// Python: Async with enter success
337    AsyncEnterSuccess,
338    /// Python: Async with enter exception
339    AsyncEnterException,
340    /// Python: Task spawn (concurrent execution begins)
341    TaskSpawn,
342    /// Python: Task join (await on task result)
343    TaskJoin,
344    // =========================================================================
345    // JavaScript/TypeScript async/await and Promise edge types
346    // =========================================================================
347    /// JS/TS: Promise resolved - .then() callback path
348    PromiseResolved,
349    /// JS/TS: Promise rejected - .catch() callback path
350    PromiseRejected,
351    /// JS/TS: Promise settled - .finally() callback path (always executes)
352    PromiseSettled,
353    /// JS/TS: Promise.all parallel branch - edge to each task
354    PromiseAllBranch,
355    /// JS/TS: Promise.all join - all tasks completed
356    PromiseAllJoin,
357    /// JS/TS: Promise.race branch - edge to each racing task
358    PromiseRaceBranch,
359    /// JS/TS: Promise.race winner - first settled wins
360    PromiseRaceWinner,
361    /// JS/TS: for await...of iteration edge
362    ForAwaitIterate,
363    /// JS/TS: for await...of exhausted edge
364    ForAwaitExhausted,
365    /// JS/TS: Async generator yield edge
366    AsyncYield,
367    /// JS/TS: Async generator resume edge (after yield)
368    AsyncResume,
369    // =========================================================================
370    // Rust async/await edge types
371    // =========================================================================
372    /// Rust: .await suspension point - Future yields to executor
373    RustAwait,
374    /// Rust: .await resumption - Future resumes after poll returns Ready
375    RustResume,
376    /// Rust: tokio::spawn - new task spawned (concurrent execution)
377    RustSpawn,
378    /// Rust: JoinHandle.await - waiting for spawned task result
379    RustJoinTask,
380    /// Rust: join! branch - edge to each concurrent future
381    RustJoinBranch,
382    /// Rust: join! complete - all futures completed
383    RustJoinComplete,
384    /// Rust: select! branch - edge to each racing future
385    RustSelectBranch,
386    /// Rust: select! winner - first future to complete
387    RustSelectWinner,
388    /// Rust: select! cancelled - future cancelled (not selected)
389    RustSelectCancelled,
390    /// Rust: Blocking operation in async context (anti-pattern)
391    RustBlocking,
392    /// Rust: Lock acquisition in async context (potential issue)
393    RustLockAcquire,
394    /// Rust: Await while holding lock (anti-pattern)
395    RustAwaitWithLock,
396}
397
398impl EdgeType {
399    /// Get the default display label for this edge type.
400    pub fn default_label(&self) -> &'static str {
401        match self {
402            EdgeType::Unconditional => "",
403            EdgeType::True => "true",
404            EdgeType::False => "false",
405            EdgeType::BackEdge => "back_edge",
406            EdgeType::Break => "break",
407            EdgeType::Continue => "continue",
408            EdgeType::Exception => "exception",
409            EdgeType::Finally => "finally",
410            EdgeType::Iterate => "iterate",
411            EdgeType::Return => "return",
412            EdgeType::Enter => "enter",
413            EdgeType::Exit => "exit",
414            EdgeType::Case => "case",
415            EdgeType::PatternMatch => "pattern_match",
416            EdgeType::Goto => "goto",
417            EdgeType::Spawn => "spawn",
418            EdgeType::Defer => "defer",
419            EdgeType::Send => "send",
420            EdgeType::Acquire => "acquire",
421            EdgeType::Success => "success",
422            EdgeType::Close => "close",
423            EdgeType::Else => "else",
424            EdgeType::OkSome => "Ok/Some",
425            EdgeType::ErrNone => "Err/None",
426            EdgeType::Next => "next",
427            EdgeType::Match => "match",
428            EdgeType::Loop => "loop",
429            EdgeType::Pass => "pass",
430            EdgeType::Fail => "fail",
431            EdgeType::Exhausted => "exhausted",
432            EdgeType::NoException => "no_exception",
433            EdgeType::ExceptionGroup => "exception_group",
434            EdgeType::EnterSuccess => "enter_success",
435            EdgeType::EnterException => "enter_exception",
436            EdgeType::Cleanup => "cleanup",
437            EdgeType::Propagate => "propagate",
438            EdgeType::Reraise => "reraise",
439            EdgeType::TypedException => "typed_exception",
440            // Go-specific edge labels
441            EdgeType::Panic => "panic",
442            EdgeType::Recover => "recover",
443            EdgeType::DeferChain => "defer_chain",
444            EdgeType::ErrorPath => "err != nil",
445            EdgeType::SuccessPath => "err == nil",
446            // Go concurrency edge labels
447            EdgeType::Receive => "receive",
448            EdgeType::SelectCase => "select_case",
449            EdgeType::SelectDefault => "select_default",
450            EdgeType::Lock => "lock",
451            EdgeType::Unlock => "unlock",
452            EdgeType::WaitSync => "wait",
453            EdgeType::ChannelClose => "close_chan",
454            EdgeType::ContextCancel => "ctx_cancel",
455            EdgeType::ContextContinue => "ctx_continue",
456            // Python async/await edge labels
457            EdgeType::Await => "await",
458            EdgeType::AsyncIterate => "async_iterate",
459            EdgeType::AsyncExhausted => "async_exhausted",
460            EdgeType::AsyncEnterSuccess => "async_enter_success",
461            EdgeType::AsyncEnterException => "async_enter_exception",
462            EdgeType::TaskSpawn => "spawn_task",
463            EdgeType::TaskJoin => "join_task",
464            // JavaScript/TypeScript async/await edge labels
465            EdgeType::PromiseResolved => "resolved",
466            EdgeType::PromiseRejected => "rejected",
467            EdgeType::PromiseSettled => "settled",
468            EdgeType::PromiseAllBranch => "all_branch",
469            EdgeType::PromiseAllJoin => "all_join",
470            EdgeType::PromiseRaceBranch => "race_branch",
471            EdgeType::PromiseRaceWinner => "race_winner",
472            EdgeType::ForAwaitIterate => "for_await_iterate",
473            EdgeType::ForAwaitExhausted => "for_await_exhausted",
474            EdgeType::AsyncYield => "async_yield",
475            EdgeType::AsyncResume => "async_resume",
476            // Rust async/await edge labels
477            EdgeType::RustAwait => ".await",
478            EdgeType::RustResume => "resume",
479            EdgeType::RustSpawn => "rust_spawn",
480            EdgeType::RustJoinTask => "join_task",
481            EdgeType::RustJoinBranch => "join_branch",
482            EdgeType::RustJoinComplete => "join_complete",
483            EdgeType::RustSelectBranch => "select_branch",
484            EdgeType::RustSelectWinner => "select_winner",
485            EdgeType::RustSelectCancelled => "select_cancel",
486            EdgeType::RustBlocking => "blocking",
487            EdgeType::RustLockAcquire => "lock_acquire",
488            EdgeType::RustAwaitWithLock => "await_with_lock",
489        }
490    }
491}
492
493/// An edge in the control flow graph.
494#[derive(Debug, Clone, Serialize, Deserialize)]
495pub struct CFGEdge {
496    /// Source block
497    #[serde(rename = "source_id")]
498    pub from: BlockId,
499    /// Target block
500    #[serde(rename = "target_id")]
501    pub to: BlockId,
502    /// Semantic edge type
503    #[serde(rename = "type")]
504    pub edge_type: EdgeType,
505    /// Human-readable condition (e.g., "x > 0")
506    #[serde(skip_serializing_if = "Option::is_none")]
507    pub condition: Option<String>,
508}
509
510impl CFGEdge {
511    /// Create a new unconditional edge.
512    pub fn unconditional(from: BlockId, to: BlockId) -> Self {
513        Self {
514            from,
515            to,
516            edge_type: EdgeType::Unconditional,
517            condition: None,
518        }
519    }
520
521    /// Create a new edge with a specific type.
522    pub fn new(from: BlockId, to: BlockId, edge_type: EdgeType) -> Self {
523        Self {
524            from,
525            to,
526            edge_type,
527            condition: None,
528        }
529    }
530
531    /// Create a new edge with type and condition.
532    pub fn with_condition(from: BlockId, to: BlockId, edge_type: EdgeType, condition: String) -> Self {
533        Self {
534            from,
535            to,
536            edge_type,
537            condition: Some(condition),
538        }
539    }
540
541    /// Create edge from legacy label format (backward compatibility).
542    ///
543    /// This method allows gradual migration from the old `label: Option<String>`
544    /// format to the new `edge_type` + `condition` format.
545    pub fn from_label(from: BlockId, to: BlockId, label: Option<String>) -> Self {
546        match label.as_deref() {
547            None => Self::unconditional(from, to),
548            Some("true") => Self::new(from, to, EdgeType::True),
549            Some("false") => Self::new(from, to, EdgeType::False),
550            Some("break") => Self::new(from, to, EdgeType::Break),
551            Some("continue") => Self::new(from, to, EdgeType::Continue),
552            Some("exception") => Self::new(from, to, EdgeType::Exception),
553            Some("finally") => Self::new(from, to, EdgeType::Finally),
554            Some("iterate") => Self::new(from, to, EdgeType::Iterate),
555            Some("return") => Self::new(from, to, EdgeType::Return),
556            Some("enter") => Self::new(from, to, EdgeType::Enter),
557            Some("exit") => Self::new(from, to, EdgeType::Exit),
558            Some("case") => Self::new(from, to, EdgeType::Case),
559            Some("pattern_match") => Self::new(from, to, EdgeType::PatternMatch),
560            Some("spawn") => Self::new(from, to, EdgeType::Spawn),
561            Some("defer") => Self::new(from, to, EdgeType::Defer),
562            Some("send") => Self::new(from, to, EdgeType::Send),
563            Some("acquire") => Self::new(from, to, EdgeType::Acquire),
564            Some("success") => Self::new(from, to, EdgeType::Success),
565            Some("close") => Self::new(from, to, EdgeType::Close),
566            Some("else") => Self::new(from, to, EdgeType::Else),
567            Some("Ok/Some") => Self::new(from, to, EdgeType::OkSome),
568            Some("Err/None") => Self::new(from, to, EdgeType::ErrNone),
569            Some("next") => Self::new(from, to, EdgeType::Next),
570            Some("match") => Self::new(from, to, EdgeType::Match),
571            Some("loop") => Self::new(from, to, EdgeType::Loop),
572            Some("back_edge") => Self::new(from, to, EdgeType::BackEdge),
573            Some("done") => Self::new(from, to, EdgeType::Exit),
574            Some("reraise") => Self::new(from, to, EdgeType::Reraise),
575            Some("typed_exception") => Self::new(from, to, EdgeType::TypedException),
576            Some("propagate") => Self::new(from, to, EdgeType::Propagate),
577            Some("cleanup") => Self::new(from, to, EdgeType::Cleanup),
578            Some("no_exception") => Self::new(from, to, EdgeType::NoException),
579            Some("enter_success") => Self::new(from, to, EdgeType::EnterSuccess),
580            Some("enter_exception") => Self::new(from, to, EdgeType::EnterException),
581            // Go-specific edge labels
582            Some("panic") => Self::new(from, to, EdgeType::Panic),
583            Some("recover") => Self::new(from, to, EdgeType::Recover),
584            Some("defer_chain") => Self::new(from, to, EdgeType::DeferChain),
585            Some("err != nil") => Self::new(from, to, EdgeType::ErrorPath),
586            Some("err == nil") => Self::new(from, to, EdgeType::SuccessPath),
587            // Go concurrency edge labels
588            Some("receive") => Self::new(from, to, EdgeType::Receive),
589            Some("select_case") => Self::new(from, to, EdgeType::SelectCase),
590            Some("select_default") => Self::new(from, to, EdgeType::SelectDefault),
591            Some("lock") => Self::new(from, to, EdgeType::Lock),
592            Some("unlock") => Self::new(from, to, EdgeType::Unlock),
593            Some("wait") => Self::new(from, to, EdgeType::WaitSync),
594            Some("close_chan") => Self::new(from, to, EdgeType::ChannelClose),
595            Some("ctx_cancel") => Self::new(from, to, EdgeType::ContextCancel),
596            Some("ctx_continue") => Self::new(from, to, EdgeType::ContextContinue),
597            // Python async/await edge labels
598            Some("await") => Self::new(from, to, EdgeType::Await),
599            Some("async_iterate") => Self::new(from, to, EdgeType::AsyncIterate),
600            Some("async_exhausted") => Self::new(from, to, EdgeType::AsyncExhausted),
601            Some("async_enter_success") => Self::new(from, to, EdgeType::AsyncEnterSuccess),
602            Some("async_enter_exception") => Self::new(from, to, EdgeType::AsyncEnterException),
603            Some("spawn_task") => Self::new(from, to, EdgeType::TaskSpawn),
604            Some("join_task") => Self::new(from, to, EdgeType::TaskJoin),
605            // JavaScript/TypeScript async/await edge labels
606            Some("resolved") => Self::new(from, to, EdgeType::PromiseResolved),
607            Some("rejected") => Self::new(from, to, EdgeType::PromiseRejected),
608            Some("settled") => Self::new(from, to, EdgeType::PromiseSettled),
609            Some("all_branch") => Self::new(from, to, EdgeType::PromiseAllBranch),
610            Some("all_join") => Self::new(from, to, EdgeType::PromiseAllJoin),
611            Some("race_branch") => Self::new(from, to, EdgeType::PromiseRaceBranch),
612            Some("race_winner") => Self::new(from, to, EdgeType::PromiseRaceWinner),
613            Some("for_await_iterate") => Self::new(from, to, EdgeType::ForAwaitIterate),
614            Some("for_await_exhausted") => Self::new(from, to, EdgeType::ForAwaitExhausted),
615            Some("async_yield") => Self::new(from, to, EdgeType::AsyncYield),
616            Some("async_resume") => Self::new(from, to, EdgeType::AsyncResume),
617            // Rust async/await edge labels
618            Some(".await") => Self::new(from, to, EdgeType::RustAwait),
619            Some("resume") => Self::new(from, to, EdgeType::RustResume),
620            Some("rust_spawn") => Self::new(from, to, EdgeType::RustSpawn),
621            Some("rust_join_task") => Self::new(from, to, EdgeType::RustJoinTask),
622            Some("join_branch") => Self::new(from, to, EdgeType::RustJoinBranch),
623            Some("join_complete") => Self::new(from, to, EdgeType::RustJoinComplete),
624            Some("select_branch") => Self::new(from, to, EdgeType::RustSelectBranch),
625            Some("select_winner") => Self::new(from, to, EdgeType::RustSelectWinner),
626            Some("select_cancel") => Self::new(from, to, EdgeType::RustSelectCancelled),
627            Some("blocking") => Self::new(from, to, EdgeType::RustBlocking),
628            Some("lock_acquire") => Self::new(from, to, EdgeType::RustLockAcquire),
629            Some("await_with_lock") => Self::new(from, to, EdgeType::RustAwaitWithLock),
630            Some(s) if s.starts_with("goto ") => {
631                Self::with_condition(from, to, EdgeType::Goto, s.to_string())
632            }
633            Some(other) => {
634                // Unknown label: store as condition with Unconditional type
635                Self::with_condition(from, to, EdgeType::Unconditional, other.to_string())
636            }
637        }
638    }
639
640    /// Get the display label for this edge.
641    ///
642    /// Returns the condition if present, otherwise the default label for the edge type.
643    pub fn label(&self) -> String {
644        self.condition.clone().unwrap_or_else(|| {
645            self.edge_type.default_label().to_string()
646        })
647    }
648}
649
650/// Complete control flow graph for a function.
651#[derive(Debug, Serialize, Deserialize)]
652pub struct CFGInfo {
653    /// Function name
654    pub function_name: String,
655    /// All blocks indexed by ID
656    pub blocks: HashMap<BlockId, CFGBlock>,
657    /// All edges
658    pub edges: Vec<CFGEdge>,
659    /// Entry block
660    pub entry: BlockId,
661    /// Exit blocks
662    pub exits: Vec<BlockId>,
663    /// Total decision points in the function.
664    ///
665    /// Tracks all control flow decisions:
666    /// - if statements (+1)
667    /// - elif clauses (+1 each)
668    /// - while loops (+1)
669    /// - for loops (+1)
670    /// - except handlers (+1 each)
671    /// - assert statements (+1)
672    /// - match cases (+1 each)
673    /// - match guards (+1 each)
674    /// - comprehension if clauses (+1 each)
675    ///
676    /// Used for accurate cyclomatic complexity: decision_points + 1
677    #[serde(default)]
678    pub decision_points: usize,
679    /// Extra decision points from comprehensions (if_clause, for_in_clause).
680    /// These affect cyclomatic complexity but don't create separate CFG blocks
681    /// since comprehensions are expressions with internal iteration/filtering.
682    /// NOTE: Kept for backward compatibility, but now included in decision_points.
683    #[serde(default)]
684    pub comprehension_decision_points: usize,
685    /// Nested function/closure CFGs (name -> CFG).
686    ///
687    /// Stores control flow graphs for inner functions and closures defined
688    /// within this function. Maps the nested function name to its CFGInfo.
689    /// This matches Python's `nested_cfgs: dict[str, CFGInfo]` field.
690    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
691    pub nested_cfgs: HashMap<String, Box<CFGInfo>>,
692    // =========================================================================
693    // Python async/await tracking
694    // =========================================================================
695    /// Whether this is an async function (async def in Python).
696    /// Important for async flow analysis and detecting blocking calls.
697    #[serde(default)]
698    pub is_async: bool,
699    /// Number of await suspension points in this function.
700    /// Each await creates a point where execution yields to the event loop.
701    /// Higher counts indicate more complex async interleaving potential.
702    #[serde(default)]
703    pub await_points: usize,
704    /// Detected blocking calls in async context (e.g., time.sleep in async def).
705    /// These are potential bugs that block the event loop.
706    /// Format: Vec<(function_name, line_number)>
707    #[serde(default, skip_serializing_if = "Vec::is_empty")]
708    pub blocking_calls: Vec<(String, usize)>,
709    // =========================================================================
710    // Performance optimization: Lazy adjacency list cache
711    // =========================================================================
712    /// Lazily-built adjacency cache for O(1) successor/predecessor lookups.
713    ///
714    /// Built on first call to `successors()` or `predecessors()`.
715    /// Amortizes O(E) edge scan across all subsequent lookups.
716    /// Skipped during serialization; rebuilt on demand after deserialization.
717    /// NOTE: This field is public for struct literal construction but should
718    /// be treated as internal. Use `OnceCell::new()` when constructing.
719    #[serde(skip)]
720    pub adjacency_cache: OnceCell<AdjacencyCache>,
721}
722
723impl Clone for CFGInfo {
724    fn clone(&self) -> Self {
725        Self {
726            function_name: self.function_name.clone(),
727            blocks: self.blocks.clone(),
728            edges: self.edges.clone(),
729            entry: self.entry,
730            exits: self.exits.clone(),
731            decision_points: self.decision_points,
732            comprehension_decision_points: self.comprehension_decision_points,
733            nested_cfgs: self.nested_cfgs.clone(),
734            is_async: self.is_async,
735            await_points: self.await_points,
736            blocking_calls: self.blocking_calls.clone(),
737            // Reset cache on clone - will be rebuilt lazily if needed
738            adjacency_cache: OnceCell::new(),
739        }
740    }
741}
742
743impl Default for CFGInfo {
744    fn default() -> Self {
745        Self {
746            function_name: String::new(),
747            blocks: HashMap::new(),
748            edges: Vec::new(),
749            entry: BlockId(0),
750            exits: Vec::new(),
751            decision_points: 0,
752            comprehension_decision_points: 0,
753            nested_cfgs: HashMap::new(),
754            is_async: false,
755            await_points: 0,
756            blocking_calls: Vec::new(),
757            adjacency_cache: OnceCell::new(),
758        }
759    }
760}
761
762impl CFGInfo {
763    /// Create a new CFGInfo with the essential fields.
764    ///
765    /// This constructor handles the internal adjacency cache initialization.
766    /// Use this instead of struct literal syntax.
767    ///
768    /// # Arguments
769    /// * `function_name` - Name of the function this CFG represents
770    /// * `blocks` - Map of block IDs to blocks
771    /// * `edges` - List of edges connecting blocks
772    /// * `entry` - Entry block ID
773    /// * `exits` - List of exit block IDs
774    #[must_use]
775    pub fn new(
776        function_name: String,
777        blocks: HashMap<BlockId, CFGBlock>,
778        edges: Vec<CFGEdge>,
779        entry: BlockId,
780        exits: Vec<BlockId>,
781    ) -> Self {
782        Self {
783            function_name,
784            blocks,
785            edges,
786            entry,
787            exits,
788            decision_points: 0,
789            comprehension_decision_points: 0,
790            nested_cfgs: HashMap::new(),
791            is_async: false,
792            await_points: 0,
793            blocking_calls: Vec::new(),
794            adjacency_cache: OnceCell::new(),
795        }
796    }
797
798    /// Create a CFGInfo with all fields specified.
799    ///
800    /// Use this when you need to set decision points, async info, etc.
801    #[must_use]
802    #[allow(clippy::too_many_arguments)]
803    pub fn with_details(
804        function_name: String,
805        blocks: HashMap<BlockId, CFGBlock>,
806        edges: Vec<CFGEdge>,
807        entry: BlockId,
808        exits: Vec<BlockId>,
809        decision_points: usize,
810        comprehension_decision_points: usize,
811        is_async: bool,
812        await_points: usize,
813        blocking_calls: Vec<(String, usize)>,
814    ) -> Self {
815        Self {
816            function_name,
817            blocks,
818            edges,
819            entry,
820            exits,
821            decision_points,
822            comprehension_decision_points,
823            nested_cfgs: HashMap::new(),
824            is_async,
825            await_points,
826            blocking_calls,
827            adjacency_cache: OnceCell::new(),
828        }
829    }
830}
831
832impl CFGInfo {
833    /// Calculate cyclomatic complexity using decision points.
834    ///
835    /// Uses the formula: M = decision_points + 1
836    ///
837    /// This matches Python's implementation and accurately counts:
838    /// - if statements (+1)
839    /// - elif clauses (+1 each)
840    /// - while loops (+1)
841    /// - for loops (+1)
842    /// - except handlers (+1 each)
843    /// - assert statements (+1)
844    /// - match cases (+1 each)
845    /// - match guards (+1 each)
846    /// - comprehension if clauses (+1 each)
847    ///
848    /// Higher values indicate more complex control flow.
849    /// General guidelines:
850    /// - 1-10: Simple, low risk
851    /// - 11-20: Moderate complexity
852    /// - 21-50: Complex, higher risk
853    /// - 50+: Very high risk, consider refactoring
854    pub fn cyclomatic_complexity(&self) -> usize {
855        self.decision_points + 1
856    }
857
858    /// Calculate cyclomatic complexity using graph formula.
859    ///
860    /// Uses the classic formula: M = E - N + 2P + C where:
861    /// - E = number of edges
862    /// - N = number of nodes (blocks)
863    /// - P = number of connected components (always 1 for a single function)
864    /// - C = comprehension decision points (for internal comprehension control flow)
865    ///
866    /// NOTE: This formula may give different results than `cyclomatic_complexity()`
867    /// due to unreachable blocks (after return/raise) and graph structure variations.
868    /// The decision-point-based method is recommended for consistency with Python.
869    #[allow(dead_code)]
870    pub fn cyclomatic_complexity_graph(&self) -> usize {
871        let edges = self.edges.len();
872        let nodes = self.blocks.len();
873        edges.saturating_sub(nodes) + 2 + self.comprehension_decision_points
874    }
875
876    /// Build adjacency cache from edges (called once, lazily).
877    ///
878    /// Scans all edges once to build both successor and predecessor maps.
879    /// Time: O(E) where E = number of edges
880    /// Space: O(E) for storing all edge endpoints
881    fn build_adjacency(&self) -> AdjacencyCache {
882        let mut successors: HashMap<BlockId, Vec<BlockId>> =
883            HashMap::with_capacity(self.blocks.len());
884        let mut predecessors: HashMap<BlockId, Vec<BlockId>> =
885            HashMap::with_capacity(self.blocks.len());
886
887        for edge in &self.edges {
888            successors.entry(edge.from).or_default().push(edge.to);
889            predecessors.entry(edge.to).or_default().push(edge.from);
890        }
891
892        AdjacencyCache {
893            successors,
894            predecessors,
895        }
896    }
897
898    /// Get the adjacency cache, building it if necessary.
899    ///
900    /// Thread-safe: uses `OnceCell` for lazy initialization.
901    /// First call: O(E) to build cache
902    /// Subsequent calls: O(1)
903    #[inline]
904    fn adjacency(&self) -> &AdjacencyCache {
905        self.adjacency_cache.get_or_init(|| self.build_adjacency())
906    }
907
908    /// Get successors of a block (outgoing edges).
909    ///
910    /// Returns a slice of successor block IDs for O(1) lookup after cache is built.
911    /// First call triggers O(E) cache construction; subsequent calls are O(1).
912    ///
913    /// # Arguments
914    /// * `block_id` - The block to get successors for
915    ///
916    /// # Returns
917    /// Slice of successor BlockIds (empty if block has no outgoing edges)
918    #[allow(dead_code)]
919    pub fn successors(&self, block_id: BlockId) -> &[BlockId] {
920        self.adjacency()
921            .successors
922            .get(&block_id)
923            .map(Vec::as_slice)
924            .unwrap_or(&[])
925    }
926
927    /// Get predecessors of a block (incoming edges).
928    ///
929    /// Returns a slice of predecessor block IDs for O(1) lookup after cache is built.
930    /// First call triggers O(E) cache construction; subsequent calls are O(1).
931    ///
932    /// # Arguments
933    /// * `block_id` - The block to get predecessors for
934    ///
935    /// # Returns
936    /// Slice of predecessor BlockIds (empty if block has no incoming edges)
937    #[allow(dead_code)]
938    pub fn predecessors(&self, block_id: BlockId) -> &[BlockId] {
939        self.adjacency()
940            .predecessors
941            .get(&block_id)
942            .map(Vec::as_slice)
943            .unwrap_or(&[])
944    }
945
946    /// Invalidate the adjacency cache (call after modifying edges).
947    ///
948    /// This method is useful if edges are modified after CFG construction.
949    /// The cache will be rebuilt on the next call to `successors()` or `predecessors()`.
950    ///
951    /// Note: This requires mutable access. If the CFG is shared immutably,
952    /// prefer creating a new CFGInfo instead of modifying edges.
953    #[allow(dead_code)]
954    pub fn invalidate_adjacency_cache(&mut self) {
955        self.adjacency_cache = OnceCell::new();
956    }
957
958    /// Add a nested CFG for an inner function or closure.
959    ///
960    /// # Arguments
961    /// * `name` - The name of the nested function/closure
962    /// * `cfg` - The control flow graph for the nested function
963    #[allow(dead_code)]
964    pub fn add_nested(&mut self, name: String, cfg: CFGInfo) {
965        self.nested_cfgs.insert(name, Box::new(cfg));
966    }
967
968    /// Get nested CFG by name.
969    ///
970    /// # Arguments
971    /// * `name` - The name of the nested function to retrieve
972    ///
973    /// # Returns
974    /// Reference to the nested CFGInfo if found, None otherwise
975    #[allow(dead_code)]
976    pub fn get_nested(&self, name: &str) -> Option<&CFGInfo> {
977        self.nested_cfgs.get(name).map(|boxed| boxed.as_ref())
978    }
979
980    /// Get mutable reference to nested CFG by name.
981    ///
982    /// # Arguments
983    /// * `name` - The name of the nested function to retrieve
984    ///
985    /// # Returns
986    /// Mutable reference to the nested CFGInfo if found, None otherwise
987    #[allow(dead_code)]
988    pub fn get_nested_mut(&mut self, name: &str) -> Option<&mut CFGInfo> {
989        self.nested_cfgs.get_mut(name).map(|boxed| boxed.as_mut())
990    }
991
992    /// Iterate over all nested CFGs.
993    ///
994    /// # Returns
995    /// Iterator yielding (name, cfg) pairs for all nested functions
996    #[allow(dead_code)]
997    pub fn nested_iter(&self) -> impl Iterator<Item = (&String, &CFGInfo)> {
998        self.nested_cfgs.iter().map(|(k, v)| (k, v.as_ref()))
999    }
1000
1001    /// Check if this CFG has any nested functions/closures.
1002    #[allow(dead_code)]
1003    pub fn has_nested(&self) -> bool {
1004        !self.nested_cfgs.is_empty()
1005    }
1006
1007    /// Get the count of nested CFGs.
1008    #[allow(dead_code)]
1009    pub fn nested_count(&self) -> usize {
1010        self.nested_cfgs.len()
1011    }
1012
1013    /// Validate CFG structural invariants.
1014    ///
1015    /// Checks that:
1016    /// - Entry block exists in the blocks map
1017    /// - All exit blocks exist in the blocks map
1018    /// - All edge source and target blocks exist in the blocks map
1019    ///
1020    /// # Errors
1021    ///
1022    /// Returns `CFGError` if any invariant is violated:
1023    /// - `InvalidEntry` if entry block is not in blocks
1024    /// - `InvalidExit` if any exit block is not in blocks
1025    /// - `InvalidEdgeBlock` if any edge references a non-existent block
1026    ///
1027    /// # Example
1028    ///
1029    /// ```
1030    /// use go_brrr::cfg::types::{CFGInfo, CFGBlock, BlockId, BlockType};
1031    /// use std::collections::HashMap;
1032    ///
1033    /// let entry_id = BlockId(0);
1034    /// let mut blocks = HashMap::new();
1035    /// blocks.insert(entry_id, CFGBlock {
1036    ///     id: entry_id,
1037    ///     label: "entry".to_string(),
1038    ///     block_type: BlockType::Entry,
1039    ///     statements: vec![],
1040    ///     func_calls: vec![],
1041    ///     start_line: 1,
1042    ///     end_line: 1,
1043    /// });
1044    ///
1045    /// let cfg = CFGInfo::new(
1046    ///     "test".to_string(),
1047    ///     blocks,
1048    ///     vec![],
1049    ///     entry_id,
1050    ///     vec![entry_id],
1051    /// );
1052    ///
1053    /// assert!(cfg.validate().is_ok());
1054    /// ```
1055    #[allow(dead_code)]
1056    pub fn validate(&self) -> Result<(), CFGError> {
1057        // Check entry block exists
1058        if !self.blocks.contains_key(&self.entry) {
1059            return Err(CFGError::InvalidEntry(self.entry));
1060        }
1061
1062        // Check all exit blocks exist
1063        for exit in &self.exits {
1064            if !self.blocks.contains_key(exit) {
1065                return Err(CFGError::InvalidExit(*exit));
1066            }
1067        }
1068
1069        // Check all edge endpoints exist
1070        for edge in &self.edges {
1071            if !self.blocks.contains_key(&edge.from) {
1072                return Err(CFGError::InvalidEdgeBlock(edge.from));
1073            }
1074            if !self.blocks.contains_key(&edge.to) {
1075                return Err(CFGError::InvalidEdgeBlock(edge.to));
1076            }
1077        }
1078
1079        Ok(())
1080    }
1081
1082    /// Insert a block with duplicate detection.
1083    ///
1084    /// Unlike direct `blocks.insert()`, this method returns an error if
1085    /// a block with the same ID already exists, preventing silent overwrites.
1086    ///
1087    /// # Errors
1088    ///
1089    /// Returns `CFGError::DuplicateBlockId` if a block with the same ID
1090    /// already exists in the blocks map.
1091    ///
1092    /// # Example
1093    ///
1094    /// ```
1095    /// use go_brrr::cfg::types::{CFGInfo, CFGBlock, BlockId, BlockType};
1096    /// use std::collections::HashMap;
1097    ///
1098    /// let mut cfg = CFGInfo::new(
1099    ///     "test".to_string(),
1100    ///     HashMap::new(),
1101    ///     vec![],
1102    ///     BlockId(0),
1103    ///     vec![],
1104    /// );
1105    ///
1106    /// let block = CFGBlock {
1107    ///     id: BlockId(0),
1108    ///     label: "entry".to_string(),
1109    ///     block_type: BlockType::Entry,
1110    ///     statements: vec![],
1111    ///     func_calls: vec![],
1112    ///     start_line: 1,
1113    ///     end_line: 1,
1114    /// };
1115    ///
1116    /// // First insert succeeds
1117    /// assert!(cfg.insert_block(block.clone()).is_ok());
1118    ///
1119    /// // Duplicate insert fails
1120    /// assert!(cfg.insert_block(block).is_err());
1121    /// ```
1122    #[allow(dead_code)]
1123    pub fn insert_block(&mut self, block: CFGBlock) -> Result<(), CFGError> {
1124        if self.blocks.contains_key(&block.id) {
1125            return Err(CFGError::DuplicateBlockId(block.id));
1126        }
1127        self.blocks.insert(block.id, block);
1128        Ok(())
1129    }
1130
1131    /// Safe getter for a block by ID.
1132    ///
1133    /// Returns a reference to the block if it exists, None otherwise.
1134    /// Prefer this over direct `blocks.get()` for API consistency.
1135    ///
1136    /// # Arguments
1137    /// * `id` - The block ID to look up
1138    ///
1139    /// # Returns
1140    /// `Some(&CFGBlock)` if found, `None` otherwise
1141    #[allow(dead_code)]
1142    pub fn get_block(&self, id: BlockId) -> Option<&CFGBlock> {
1143        self.blocks.get(&id)
1144    }
1145
1146    /// Check if a block is the entry block.
1147    ///
1148    /// # Arguments
1149    /// * `id` - The block ID to check
1150    ///
1151    /// # Returns
1152    /// `true` if the block is the entry point, `false` otherwise
1153    #[allow(dead_code)]
1154    pub fn is_entry(&self, id: BlockId) -> bool {
1155        self.entry == id
1156    }
1157
1158    /// Check if a block is an exit block.
1159    ///
1160    /// # Arguments
1161    /// * `id` - The block ID to check
1162    ///
1163    /// # Returns
1164    /// `true` if the block is in the exits list, `false` otherwise
1165    #[allow(dead_code)]
1166    pub fn is_exit(&self, id: BlockId) -> bool {
1167        self.exits.contains(&id)
1168    }
1169
1170    /// Detect if the CFG has any back edges (indicating loops).
1171    ///
1172    /// A back edge is an edge from a node to one of its ancestors in the
1173    /// DFS tree, which indicates a cycle (loop) in the control flow.
1174    ///
1175    /// # Returns
1176    /// `true` if the CFG contains at least one back edge (loop), `false` otherwise
1177    ///
1178    /// # Algorithm
1179    /// Uses depth-first search with a recursion stack to detect cycles.
1180    /// An edge to a node currently in the recursion stack is a back edge.
1181    #[allow(dead_code)]
1182    pub fn has_back_edge(&self) -> bool {
1183        let mut visited = HashSet::new();
1184        let mut stack = HashSet::new();
1185        self.has_back_edge_dfs(self.entry, &mut visited, &mut stack)
1186    }
1187
1188    /// Internal DFS helper for back edge detection.
1189    ///
1190    /// Traverses the graph maintaining both visited set and current recursion stack.
1191    /// A back edge is detected when we encounter a successor already in the stack.
1192    #[allow(dead_code)]
1193    fn has_back_edge_dfs(
1194        &self,
1195        node: BlockId,
1196        visited: &mut HashSet<BlockId>,
1197        stack: &mut HashSet<BlockId>,
1198    ) -> bool {
1199        visited.insert(node);
1200        stack.insert(node);
1201
1202        for &succ in self.successors(node) {
1203            if !visited.contains(&succ) {
1204                if self.has_back_edge_dfs(succ, visited, stack) {
1205                    return true;
1206                }
1207            } else if stack.contains(&succ) {
1208                // Found a back edge: edge to ancestor in DFS tree
1209                return true;
1210            }
1211        }
1212
1213        stack.remove(&node);
1214        false
1215    }
1216
1217    /// Check if the graph is connected from the entry block.
1218    ///
1219    /// A CFG is considered connected if all blocks are reachable from the entry.
1220    /// Unreachable blocks indicate dead code or a malformed graph.
1221    ///
1222    /// # Returns
1223    /// `true` if all blocks are reachable from entry, `false` otherwise
1224    #[allow(dead_code)]
1225    pub fn is_connected(&self) -> bool {
1226        let reachable = self.reachable_from(self.entry);
1227        reachable.len() == self.blocks.len()
1228    }
1229
1230    /// Compute the set of blocks reachable from a given start block.
1231    ///
1232    /// Uses breadth-first traversal to find all blocks reachable via
1233    /// forward edges from the start block.
1234    ///
1235    /// # Arguments
1236    /// * `start` - The block ID to start traversal from
1237    ///
1238    /// # Returns
1239    /// Set of all reachable block IDs (includes the start block)
1240    #[allow(dead_code)]
1241    pub fn reachable_from(&self, start: BlockId) -> HashSet<BlockId> {
1242        let mut reachable = HashSet::new();
1243        let mut queue = vec![start];
1244
1245        while let Some(node) = queue.pop() {
1246            if reachable.insert(node) {
1247                queue.extend(self.successors(node).iter().copied());
1248            }
1249        }
1250
1251        reachable
1252    }
1253
1254    /// Compute a topological ordering of blocks for dataflow analysis.
1255    ///
1256    /// Uses Kahn's algorithm to produce an ordering where each block appears
1257    /// after all its predecessors (excluding back edges). This is essential
1258    /// for correct dataflow analysis where we need to process definitions
1259    /// before their uses.
1260    ///
1261    /// # Returns
1262    /// Vector of BlockIds in topological order. For cyclic graphs (loops),
1263    /// back edges are ignored to produce a valid ordering.
1264    ///
1265    /// # Algorithm
1266    /// 1. Build in-degree map (counting non-back-edge predecessors)
1267    /// 2. Start with blocks that have zero in-degree
1268    /// 3. Process each block, decrementing in-degree of successors
1269    /// 4. Add successors to queue when their in-degree reaches zero
1270    pub fn topological_order(&self) -> Vec<BlockId> {
1271        use std::collections::VecDeque;
1272
1273        // Identify back edges using DFS
1274        let back_edges = self.find_back_edges();
1275
1276        // Build in-degree map (excluding back edges)
1277        let mut in_degree: HashMap<BlockId, usize> = HashMap::new();
1278        for block_id in self.blocks.keys() {
1279            in_degree.insert(*block_id, 0);
1280        }
1281
1282        for edge in &self.edges {
1283            if !back_edges.contains(&(edge.from, edge.to)) {
1284                *in_degree.entry(edge.to).or_insert(0) += 1;
1285            }
1286        }
1287
1288        // Start with zero in-degree blocks (should include entry)
1289        let mut queue: VecDeque<BlockId> = in_degree
1290            .iter()
1291            .filter(|(_, &deg)| deg == 0)
1292            .map(|(&id, _)| id)
1293            .collect();
1294
1295        // Ensure entry is processed first if it has zero in-degree
1296        if let Some(pos) = queue.iter().position(|&id| id == self.entry) {
1297            queue.swap(0, pos);
1298        }
1299
1300        let mut result = Vec::with_capacity(self.blocks.len());
1301
1302        while let Some(block_id) = queue.pop_front() {
1303            result.push(block_id);
1304
1305            // Process successors (excluding back edges)
1306            for edge in &self.edges {
1307                if edge.from == block_id && !back_edges.contains(&(edge.from, edge.to)) {
1308                    if let Some(deg) = in_degree.get_mut(&edge.to) {
1309                        *deg = deg.saturating_sub(1);
1310                        if *deg == 0 {
1311                            queue.push_back(edge.to);
1312                        }
1313                    }
1314                }
1315            }
1316        }
1317
1318        result
1319    }
1320
1321    /// Find all back edges in the CFG using DFS.
1322    ///
1323    /// A back edge is an edge from a node to one of its ancestors in the
1324    /// DFS tree. These indicate loops in the control flow.
1325    ///
1326    /// # Returns
1327    /// Set of (from, to) block ID pairs representing back edges.
1328    fn find_back_edges(&self) -> HashSet<(BlockId, BlockId)> {
1329        let mut back_edges = HashSet::new();
1330        let mut visited = HashSet::new();
1331        let mut stack = HashSet::new();
1332        self.find_back_edges_dfs(self.entry, &mut visited, &mut stack, &mut back_edges);
1333        back_edges
1334    }
1335
1336    /// Internal DFS helper for finding back edges.
1337    fn find_back_edges_dfs(
1338        &self,
1339        node: BlockId,
1340        visited: &mut HashSet<BlockId>,
1341        stack: &mut HashSet<BlockId>,
1342        back_edges: &mut HashSet<(BlockId, BlockId)>,
1343    ) {
1344        visited.insert(node);
1345        stack.insert(node);
1346
1347        for &succ in self.successors(node) {
1348            if !visited.contains(&succ) {
1349                self.find_back_edges_dfs(succ, visited, stack, back_edges);
1350            } else if stack.contains(&succ) {
1351                // Found a back edge
1352                back_edges.insert((node, succ));
1353            }
1354        }
1355
1356        stack.remove(&node);
1357    }
1358
1359    /// Get the block containing a specific line number.
1360    ///
1361    /// Searches through all blocks to find which block contains the given line.
1362    /// When multiple blocks contain the same line (overlapping ranges), prefers:
1363    /// 1. The block with the smallest range (most specific)
1364    /// 2. If equal range, the block that ends at the line (definitions belong to ending block)
1365    ///
1366    /// This is important for CFG-aware DFG analysis where variable definitions
1367    /// at boundary lines need to be assigned to the correct block.
1368    ///
1369    /// # Arguments
1370    /// * `line` - The line number to look up (1-indexed)
1371    ///
1372    /// # Returns
1373    /// The BlockId of the block containing the line, or None if not found.
1374    pub fn block_for_line(&self, line: usize) -> Option<BlockId> {
1375        // Find all blocks that contain this line
1376        let mut candidates: Vec<(BlockId, &CFGBlock)> = self
1377            .blocks
1378            .iter()
1379            .filter(|(_, block)| line >= block.start_line && line <= block.end_line)
1380            .map(|(id, block)| (*id, block))
1381            .collect();
1382
1383        if candidates.is_empty() {
1384            // Fallback: find the closest block that starts at or before the line
1385            let mut best_match: Option<(BlockId, usize)> = None;
1386            for (id, block) in &self.blocks {
1387                if block.start_line <= line {
1388                    match best_match {
1389                        None => best_match = Some((*id, block.start_line)),
1390                        Some((_, best_start)) if block.start_line > best_start => {
1391                            best_match = Some((*id, block.start_line));
1392                        }
1393                        _ => {}
1394                    }
1395                }
1396            }
1397            return best_match.map(|(id, _)| id);
1398        }
1399
1400        if candidates.len() == 1 {
1401            return Some(candidates[0].0);
1402        }
1403
1404        // Multiple blocks contain this line - need to pick the most appropriate one
1405        // For DFG analysis, we want definitions at a line to belong to the block
1406        // where they're executed, not merge/join blocks which are conceptual points.
1407        //
1408        // Priority:
1409        // 1. Prefer blocks that end at this line (the definition belongs there)
1410        // 2. Prefer blocks with later start_line (more specific to this location)
1411        // 3. Prefer larger ranges (actual code blocks over phantom merge points)
1412        candidates.sort_by(|(_, a), (_, b)| {
1413            // First priority: prefer block that ends at this line (definitions belong to ending block)
1414            let ends_at_line_a = a.end_line == line;
1415            let ends_at_line_b = b.end_line == line;
1416            let starts_at_line_a = a.start_line == line;
1417            let starts_at_line_b = b.start_line == line;
1418
1419            // If one block only starts at this line while another ends at it,
1420            // prefer the one that ends (the definition is in the ending block)
1421            match (ends_at_line_a && !starts_at_line_a, ends_at_line_b && !starts_at_line_b) {
1422                (true, false) => return std::cmp::Ordering::Less,
1423                (false, true) => return std::cmp::Ordering::Greater,
1424                _ => {}
1425            }
1426
1427            let range_a = a.end_line.saturating_sub(a.start_line);
1428            let range_b = b.end_line.saturating_sub(b.start_line);
1429
1430            // Prefer larger range (actual code blocks over single-line merge points)
1431            match range_b.cmp(&range_a) {
1432                std::cmp::Ordering::Equal => {
1433                    // If same range, prefer block with later start
1434                    b.start_line.cmp(&a.start_line)
1435                }
1436                other => other,
1437            }
1438        });
1439
1440        Some(candidates[0].0)
1441    }
1442}