Skip to main content

tldr_core/dataflow/
available.rs

1//! Available Expressions Analysis Types
2//!
3//! This module provides the core types for available expressions dataflow analysis:
4//!
5//! - `Expression`: A computed expression that can be tracked for CSE
6//! - `BlockExpressions`: Gen/kill sets for a single CFG block
7//! - `AvailableExprsInfo`: Analysis results with query methods
8//!
9//! # Capabilities Implemented
10//!
11//! - CAP-AE-01: Expression struct (frozen/hashable by text only)
12//! - CAP-AE-02: Commutative expression normalization
13//! - CAP-AE-03 to CAP-AE-07: BlockExpressions (gen/kill per block)
14//! - CAP-AE-08 to CAP-AE-11: AvailableExprsInfo with query methods
15//! - CAP-AE-06, CAP-AE-07: redundant_computations with intra-block kill precision
16//! - CAP-AE-10: get_available_at_line query method
17//!
18//! # TIGER Mitigations
19//!
20//! - TIGER-PASS2-4: Use IndexMap or sort keys for deterministic JSON output
21//! - ELEPHANT-PASS1-5: Apply trim() to operands in normalize_expression
22//! - TIGER-PASS1-3: Commutative ops conflate bitwise/logical (documented - safe
23//!   when side-effects filtered via function call exclusion in CAP-AE-12)
24//! - TIGER-PASS1-12: Intra-block kill tracking algorithm for redundant_computations
25//! - TIGER-PASS3-3: Stop processing statements after return/throw in gen/kill
26
27use std::collections::{HashMap, HashSet};
28use std::hash::{Hash, Hasher};
29
30use indexmap::IndexMap;
31use serde::{Deserialize, Serialize};
32
33use super::types::{build_predecessors, reverse_postorder, validate_cfg, BlockId, DataflowError};
34use crate::types::{CfgInfo, DfgInfo, RefType, VarRef};
35
36// =============================================================================
37// Confidence and Uncertain Types
38// =============================================================================
39
40/// Confidence level for analysis results.
41///
42/// Indicates how much trust should be placed in the analysis output:
43/// - `Low`: Analysis couldn't determine much (many uncertain items)
44/// - `Medium`: Analysis produced some results but with gaps
45/// - `High`: Analysis produced comprehensive results with few uncertainties
46#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
47#[serde(rename_all = "lowercase")]
48pub enum Confidence {
49    /// Low confidence - many items uncertain
50    #[default]
51    Low,
52    /// Medium confidence - some items uncertain
53    Medium,
54    /// High confidence - few or no items uncertain
55    High,
56}
57
58/// An expression that was skipped from confirmed results but might be relevant.
59///
60/// Instead of silently discarding expressions that contain function calls or
61/// other impure constructs, we collect them here so consumers can see what
62/// was skipped and why.
63///
64/// # Example
65///
66/// ```rust,ignore
67/// UncertainFinding {
68///     expr: "obj.length + x".to_string(),
69///     line: 15,
70///     reason: "contains method access - purity unknown".to_string(),
71/// }
72/// ```
73#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
74pub struct UncertainFinding {
75    /// The expression text that was skipped
76    pub expr: String,
77    /// Source line where the expression appears
78    pub line: usize,
79    /// Why this expression couldn't be confirmed
80    pub reason: String,
81}
82
83// =============================================================================
84// Constants
85// =============================================================================
86
87/// CAP-AE-02: Commutative operators that allow operand reordering.
88///
89/// For these operators, `normalize_expression` will sort operands alphabetically
90/// to ensure "a + b" and "b + a" produce the same normalized text.
91///
92/// # TIGER-PASS1-3 Note
93///
94/// This list conflates bitwise and logical operators (e.g., `&` vs `and`).
95/// This is safe because:
96/// 1. Function calls are excluded from CSE (CAP-AE-12)
97/// 2. Side effects are already filtered out
98/// 3. The expression text will still differ in practice
99pub const COMMUTATIVE_OPS: &[&str] = &["+", "*", "==", "!=", "and", "or", "&", "|", "^"];
100
101// =============================================================================
102// Expression
103// =============================================================================
104
105/// Represents a computed expression for availability analysis.
106///
107/// An expression is a binary operation like `a + b` that can be tracked
108/// to detect Common Subexpression Elimination (CSE) opportunities.
109///
110/// # CAP-AE-01: Equality and Hashing
111///
112/// **Critical**: Equality and hashing are based on `text` only, not `line`.
113/// This allows expressions with the same normalized text to be considered
114/// equal regardless of where they appear in the code.
115///
116/// # Example
117///
118/// ```rust,ignore
119/// let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
120/// let expr2 = Expression::new("a + b", vec!["a", "b"], 100);
121///
122/// // Same text means equal, even with different lines
123/// assert_eq!(expr1, expr2);
124///
125/// // Can be used in HashSet (only one entry)
126/// let mut set = HashSet::new();
127/// set.insert(expr1);
128/// set.insert(expr2);
129/// assert_eq!(set.len(), 1);
130/// ```
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct Expression {
133    /// Normalized expression string (e.g., "a + b").
134    /// This is the canonical form after applying normalization rules.
135    pub text: String,
136
137    /// Variables used in this expression (sorted alphabetically).
138    /// Used for kill detection - if any operand is redefined, the expression is killed.
139    pub operands: Vec<String>,
140
141    /// Source line where this expression first appears.
142    /// Note: This field is NOT used in equality or hashing (CAP-AE-01).
143    pub line: usize,
144}
145
146impl PartialEq for Expression {
147    fn eq(&self, other: &Self) -> bool {
148        // CAP-AE-01: Equality based on text only
149        self.text == other.text
150    }
151}
152
153impl Eq for Expression {}
154
155impl Hash for Expression {
156    fn hash<H: Hasher>(&self, state: &mut H) {
157        // CAP-AE-01: Hash based on text only
158        self.text.hash(state);
159    }
160}
161
162impl Expression {
163    /// Create a new Expression with the given components.
164    ///
165    /// # Arguments
166    ///
167    /// * `text` - The normalized expression text
168    /// * `operands` - Variables used in the expression (will be sorted)
169    /// * `line` - Source line where expression appears
170    ///
171    /// # Example
172    ///
173    /// ```rust,ignore
174    /// let expr = Expression::new("a + b", vec!["a", "b"], 5);
175    /// assert_eq!(expr.text, "a + b");
176    /// assert_eq!(expr.operands, vec!["a", "b"]);
177    /// assert_eq!(expr.line, 5);
178    /// ```
179    pub fn new(text: impl Into<String>, operands: Vec<impl Into<String>>, line: usize) -> Self {
180        let mut ops: Vec<String> = operands.into_iter().map(|s| s.into()).collect();
181        ops.sort();
182        Self {
183            text: text.into(),
184            operands: ops,
185            line,
186        }
187    }
188
189    /// Check if redefining the given variable kills this expression.
190    ///
191    /// An expression is killed when any of its operands is redefined.
192    /// This is the foundation of the "kill" set in dataflow analysis.
193    ///
194    /// # Arguments
195    ///
196    /// * `var` - The variable being redefined
197    ///
198    /// # Returns
199    ///
200    /// `true` if this expression uses `var` and would be killed by its redefinition.
201    ///
202    /// # Example
203    ///
204    /// ```rust,ignore
205    /// let expr = Expression::new("a + b", vec!["a", "b"], 1);
206    /// assert!(expr.is_killed_by("a"));  // Uses 'a'
207    /// assert!(expr.is_killed_by("b"));  // Uses 'b'
208    /// assert!(!expr.is_killed_by("c")); // Doesn't use 'c'
209    /// ```
210    pub fn is_killed_by(&self, var: &str) -> bool {
211        self.operands.iter().any(|op| op == var)
212    }
213}
214
215// =============================================================================
216// Normalization
217// =============================================================================
218
219/// CAP-AE-02: Normalize binary expression to canonical form.
220///
221/// For commutative operators, operands are sorted alphabetically to ensure
222/// that expressions like "a + b" and "b + a" produce the same normalized text.
223///
224/// # TIGER Mitigations
225///
226/// - ELEPHANT-PASS1-5: Applies `trim()` to operands for whitespace normalization
227/// - TIGER-PASS1-3: Documents that bitwise/logical conflation is safe (see COMMUTATIVE_OPS)
228///
229/// # Arguments
230///
231/// * `left` - Left operand (will be trimmed)
232/// * `op` - The binary operator
233/// * `right` - Right operand (will be trimmed)
234///
235/// # Returns
236///
237/// Normalized expression string in the form "left op right" or "right op left"
238/// (for commutative operators, alphabetically sorted).
239///
240/// # Examples
241///
242/// ```rust,ignore
243/// // Commutative operators - operands sorted
244/// assert_eq!(normalize_expression("b", "+", "a"), "a + b");
245/// assert_eq!(normalize_expression("y", "*", "x"), "x * y");
246/// assert_eq!(normalize_expression("foo", "==", "bar"), "bar == foo");
247///
248/// // Non-commutative operators - order preserved
249/// assert_eq!(normalize_expression("a", "-", "b"), "a - b");
250/// assert_eq!(normalize_expression("x", "/", "y"), "x / y");
251///
252/// // Whitespace is trimmed (ELEPHANT-PASS1-5)
253/// assert_eq!(normalize_expression("  a  ", "+", "  b  "), "a + b");
254/// ```
255pub fn normalize_expression(left: &str, op: &str, right: &str) -> String {
256    // ELEPHANT-PASS1-5: Apply trim() for whitespace normalization
257    let left = left.trim();
258    let right = right.trim();
259
260    if COMMUTATIVE_OPS.contains(&op) {
261        // Sort operands alphabetically for commutative operators
262        let mut operands = [left, right];
263        operands.sort();
264        format!("{} {} {}", operands[0], op, operands[1])
265    } else {
266        // Preserve order for non-commutative operators
267        format!("{} {} {}", left, op, right)
268    }
269}
270
271// =============================================================================
272// BlockExpressions
273// =============================================================================
274
275/// Expressions generated and killed in a single CFG block.
276///
277/// This is the per-block analysis result used during fixpoint iteration:
278///
279/// - **gen**: Expressions computed in this block (before any operand is killed)
280/// - **kill**: Variables redefined in this block (kills expressions using that var)
281///
282/// # Dataflow Equations
283///
284/// ```text
285/// avail_out[B] = gen[B] | (avail_in[B] - killed_by[B])
286/// ```
287///
288/// where `killed_by[B]` = expressions whose operands are in `kill[B]`
289#[derive(Debug, Clone, Default)]
290pub struct BlockExpressions {
291    /// Expressions computed in this block (before any operand is killed).
292    ///
293    /// An expression is in `gen` if it's computed and none of its operands
294    /// are redefined between the start of the block and its computation.
295    pub gen: HashSet<Expression>,
296
297    /// Variables redefined in this block.
298    ///
299    /// Used to compute the kill set: any expression using a variable in
300    /// `kill` is no longer available after this block.
301    pub kill: HashSet<String>,
302}
303
304impl BlockExpressions {
305    /// Create a new empty BlockExpressions.
306    pub fn new() -> Self {
307        Self::default()
308    }
309}
310
311// =============================================================================
312// ExprInstance - Expression with Block Context
313// =============================================================================
314
315/// An expression instance with its block context for intra-block tracking.
316///
317/// This struct pairs an expression with the block it appears in,
318/// enabling proper kill tracking within blocks.
319///
320/// # CAP-AE-07 Support
321///
322/// Used by `redundant_computations()` to track expressions with their
323/// block IDs for intra-block kill handling.
324#[derive(Debug, Clone)]
325pub struct ExprInstance {
326    /// The expression itself
327    pub expr: Expression,
328    /// Block ID where this expression instance appears
329    pub block_id: BlockId,
330}
331
332impl ExprInstance {
333    /// Create a new expression instance.
334    pub fn new(expr: Expression, block_id: BlockId) -> Self {
335        Self { expr, block_id }
336    }
337}
338
339// =============================================================================
340// AvailableExprsInfo
341// =============================================================================
342
343/// Available expressions analysis results.
344///
345/// An expression is available at a program point if:
346/// 1. It has been computed on EVERY path reaching that point (MUST analysis)
347/// 2. None of its operands have been redefined since computation
348///
349/// # Capabilities
350///
351/// - CAP-AE-08: `is_available()` - check availability at block entry
352/// - CAP-AE-09: `is_available_at_exit()` - check availability at block exit
353/// - CAP-AE-10: `get_available_at_line()` - query by source line (placeholder)
354/// - CAP-AE-11: `to_json()` - serialize to JSON-compatible structure
355///
356/// # Example
357///
358/// ```rust,ignore
359/// let info = compute_available_exprs(&cfg, &dfg)?;
360///
361/// // Check if expression is available at block entry
362/// let expr = Expression::new("a + b", vec!["a", "b"], 1);
363/// if info.is_available(2, &expr) {
364///     println!("a + b is available at entry to block 2");
365/// }
366///
367/// // Find CSE opportunities
368/// for (text, first_line, redundant_line) in info.redundant_computations() {
369///     println!("{} first at line {}, redundant at line {}", text, first_line, redundant_line);
370/// }
371/// ```
372#[derive(Debug, Clone, Default, Serialize, Deserialize)]
373pub struct AvailableExprsInfo {
374    /// Expressions available at block entry.
375    ///
376    /// `avail_in[B] = intersection(avail_out[P] for P in predecessors[B])`
377    ///
378    /// For the entry block, this is always empty.
379    #[serde(
380        serialize_with = "serialize_avail_map",
381        deserialize_with = "deserialize_avail_map"
382    )]
383    pub avail_in: HashMap<BlockId, HashSet<Expression>>,
384
385    /// Expressions available at block exit.
386    ///
387    /// `avail_out[B] = gen[B] | (avail_in[B] - killed_by_block[B])`
388    #[serde(
389        serialize_with = "serialize_avail_map",
390        deserialize_with = "deserialize_avail_map"
391    )]
392    pub avail_out: HashMap<BlockId, HashSet<Expression>>,
393
394    /// All unique expressions found in the function.
395    pub all_exprs: HashSet<Expression>,
396
397    /// Entry block ID.
398    pub entry_block: BlockId,
399
400    /// All expression instances including duplicates (for CSE detection).
401    ///
402    /// This preserves the order expressions appear in the code,
403    /// which is needed for `redundant_computations()`.
404    #[serde(skip)]
405    pub expr_instances: Vec<Expression>,
406
407    /// Expression instances with block context for intra-block tracking.
408    ///
409    /// CAP-AE-07: Used for proper intra-block kill handling in redundant_computations().
410    #[serde(skip)]
411    pub expr_instances_with_blocks: Vec<ExprInstance>,
412
413    /// Definitions (variable assignments) per line for intra-block kill tracking.
414    ///
415    /// TIGER-PASS1-12: Maps line number to set of variables defined on that line.
416    #[serde(skip)]
417    pub defs_per_line: HashMap<usize, HashSet<String>>,
418
419    /// Maps lines to their containing block IDs.
420    ///
421    /// Used by get_available_at_line to find the containing block.
422    #[serde(skip)]
423    pub line_to_block: HashMap<usize, BlockId>,
424
425    /// Expressions that were skipped from confirmed results but might be relevant.
426    ///
427    /// Contains expressions that were filtered out (e.g., function calls) but
428    /// could still be informative for consumers.
429    #[serde(default, skip_serializing_if = "Vec::is_empty")]
430    pub uncertain_exprs: Vec<UncertainFinding>,
431
432    /// Overall confidence level for this analysis result.
433    ///
434    /// Computed based on the ratio of confirmed vs uncertain expressions.
435    #[serde(default)]
436    pub confidence: Confidence,
437}
438
439/// Custom serializer for avail_in/avail_out maps.
440///
441/// TIGER-PASS2-4: Ensures deterministic JSON output by sorting keys.
442fn serialize_avail_map<S>(
443    map: &HashMap<BlockId, HashSet<Expression>>,
444    serializer: S,
445) -> Result<S::Ok, S::Error>
446where
447    S: serde::Serializer,
448{
449    // Sort by block ID for deterministic output
450    let sorted: IndexMap<String, Vec<&Expression>> = map
451        .iter()
452        .map(|(k, v)| {
453            let mut exprs: Vec<_> = v.iter().collect();
454            // Also sort expressions by text for determinism
455            exprs.sort_by_key(|e| &e.text);
456            (k.to_string(), exprs)
457        })
458        .collect::<Vec<_>>()
459        .into_iter()
460        .collect();
461
462    // Use IndexMap's ordered serialization
463    let ordered: IndexMap<_, _> = sorted.into_iter().collect();
464    ordered.serialize(serializer)
465}
466
467/// Custom deserializer for avail_in/avail_out maps.
468fn deserialize_avail_map<'de, D>(
469    deserializer: D,
470) -> Result<HashMap<BlockId, HashSet<Expression>>, D::Error>
471where
472    D: serde::Deserializer<'de>,
473{
474    let map: HashMap<String, Vec<Expression>> = HashMap::deserialize(deserializer)?;
475    Ok(map
476        .into_iter()
477        .map(|(k, v)| {
478            let block_id: BlockId = k.parse().unwrap_or(0);
479            let exprs: HashSet<Expression> = v.into_iter().collect();
480            (block_id, exprs)
481        })
482        .collect())
483}
484
485impl AvailableExprsInfo {
486    /// Create an empty AvailableExprsInfo for the given entry block.
487    ///
488    /// This is used when there are no expressions to analyze.
489    pub fn empty(entry_block: BlockId) -> Self {
490        Self {
491            avail_in: HashMap::new(),
492            avail_out: HashMap::new(),
493            all_exprs: HashSet::new(),
494            entry_block,
495            expr_instances: Vec::new(),
496            expr_instances_with_blocks: Vec::new(),
497            defs_per_line: HashMap::new(),
498            line_to_block: HashMap::new(),
499            uncertain_exprs: Vec::new(),
500            confidence: Confidence::default(),
501        }
502    }
503
504    /// Create a new AvailableExprsInfo with the given entry block.
505    pub fn new(entry_block: BlockId) -> Self {
506        Self::empty(entry_block)
507    }
508
509    /// CAP-AE-08: Check if expression is available at entry to block.
510    ///
511    /// # Arguments
512    ///
513    /// * `block` - The block ID to check
514    /// * `expr` - The expression to check for availability
515    ///
516    /// # Returns
517    ///
518    /// `true` if the expression is in `avail_in[block]`, `false` otherwise.
519    pub fn is_available(&self, block: BlockId, expr: &Expression) -> bool {
520        self.avail_in
521            .get(&block)
522            .is_some_and(|set| set.contains(expr))
523    }
524
525    /// CAP-AE-09: Check if expression is available at exit of block.
526    ///
527    /// # Arguments
528    ///
529    /// * `block` - The block ID to check
530    /// * `expr` - The expression to check for availability
531    ///
532    /// # Returns
533    ///
534    /// `true` if the expression is in `avail_out[block]`, `false` otherwise.
535    pub fn is_available_at_exit(&self, block: BlockId, expr: &Expression) -> bool {
536        self.avail_out
537            .get(&block)
538            .is_some_and(|set| set.contains(expr))
539    }
540
541    /// CAP-AE-06, CAP-AE-07: Find expressions computed when already available (CSE opportunities).
542    ///
543    /// Returns a list of redundant computations where an expression is
544    /// recomputed when it's already available from a previous computation.
545    ///
546    /// # Algorithm (TIGER-PASS1-12: Intra-block Precision)
547    ///
548    /// 1. Track `killed_so_far` set within each block
549    /// 2. For each expr_instance in source order:
550    ///    a. Check if expr in `avail_in[block]` AND not `killed_so_far`
551    ///    b. If redundant, add to result
552    ///    c. Update `killed_so_far` with defs after this line
553    ///
554    /// This correctly handles cases like:
555    /// ```text
556    /// x = a + b;  // first computation
557    /// a = 5;      // kills "a + b"
558    /// y = a + b;  // NOT redundant (killed between)
559    /// ```
560    ///
561    /// # Returns
562    ///
563    /// `Vec<(expr_text, first_line, redundant_line)>` sorted by redundant_line.
564    ///
565    /// # Example
566    ///
567    /// ```rust,ignore
568    /// let redundant = info.redundant_computations();
569    /// for (text, first, second) in redundant {
570    ///     println!("{} at line {} is redundant (first at {})", text, second, first);
571    /// }
572    /// ```
573    pub fn redundant_computations(&self) -> Vec<(String, usize, usize)> {
574        let mut redundant = Vec::new();
575
576        // If we have expr_instances_with_blocks (Phase 4 data), use intra-block precision
577        if !self.expr_instances_with_blocks.is_empty() {
578            // Track first occurrence of each expression (text -> (line, block))
579            let mut first_seen: HashMap<String, (usize, BlockId)> = HashMap::new();
580
581            // Group expressions by block for intra-block processing
582            let mut block_exprs: HashMap<BlockId, Vec<&ExprInstance>> = HashMap::new();
583            for inst in &self.expr_instances_with_blocks {
584                block_exprs.entry(inst.block_id).or_default().push(inst);
585            }
586
587            // Sort expressions within each block by line
588            for exprs in block_exprs.values_mut() {
589                exprs.sort_by_key(|e| e.expr.line);
590            }
591
592            // Process blocks in order (by entry block first, then others)
593            let mut block_ids: Vec<_> = block_exprs.keys().copied().collect();
594            block_ids.sort();
595
596            for &block_id in &block_ids {
597                if let Some(exprs) = block_exprs.get(&block_id) {
598                    // Track what expressions have been generated in this block with their line
599                    let mut gen_in_block: HashMap<String, usize> = HashMap::new();
600
601                    for inst in exprs {
602                        let expr = &inst.expr;
603                        let line = expr.line;
604
605                        // TIGER-PASS1-12: Check if any operand was killed between
606                        // first occurrence and this line
607                        let mut is_redundant = false;
608
609                        if let Some(&(first_line, first_block)) = first_seen.get(&expr.text) {
610                            // Expression was seen before - check for kills in between
611                            let start_line = if first_block == block_id {
612                                // Same block - check from first occurrence to now
613                                first_line + 1
614                            } else {
615                                // Different block - we'd need inter-block analysis
616                                // For now, trust avail_in and check from block start
617                                1 // Conservative: check all lines
618                            };
619
620                            // Check if any operand was killed between first and current
621                            let mut killed = false;
622                            for check_line in start_line..line {
623                                if let Some(defs) = self.defs_per_line.get(&check_line) {
624                                    if expr.operands.iter().any(|op| defs.contains(op)) {
625                                        killed = true;
626                                        break;
627                                    }
628                                }
629                            }
630
631                            if !killed && first_line != line {
632                                // Expression is available (not killed) - this is redundant
633                                is_redundant = true;
634                                redundant.push((expr.text.clone(), first_line, line));
635                            }
636                        }
637
638                        // Also check if expression was generated earlier in this block
639                        // and no kills happened since then
640                        if !is_redundant {
641                            if let Some(&gen_line) = gen_in_block.get(&expr.text) {
642                                let mut killed = false;
643                                for check_line in (gen_line + 1)..line {
644                                    if let Some(defs) = self.defs_per_line.get(&check_line) {
645                                        if expr.operands.iter().any(|op| defs.contains(op)) {
646                                            killed = true;
647                                            break;
648                                        }
649                                    }
650                                }
651
652                                if !killed && gen_line != line {
653                                    // Check also if first_seen exists for proper first_line
654                                    let first_line = first_seen
655                                        .get(&expr.text)
656                                        .map(|(l, _)| *l)
657                                        .unwrap_or(gen_line);
658                                    if first_line != line {
659                                        redundant.push((expr.text.clone(), first_line, line));
660                                    }
661                                }
662                            }
663                        }
664
665                        // Record first occurrence globally
666                        if !first_seen.contains_key(&expr.text) {
667                            first_seen.insert(expr.text.clone(), (line, block_id));
668                        }
669
670                        // Record in this block's gen set
671                        gen_in_block.entry(expr.text.clone()).or_insert(line);
672                    }
673                }
674            }
675        } else {
676            // Fall back to simple implementation for backward compatibility
677            let mut seen: HashMap<String, usize> = HashMap::new();
678
679            for expr in &self.expr_instances {
680                if let Some(&first_line) = seen.get(&expr.text) {
681                    redundant.push((expr.text.clone(), first_line, expr.line));
682                } else {
683                    seen.insert(expr.text.clone(), expr.line);
684                }
685            }
686        }
687
688        // Sort by redundant line for deterministic output
689        redundant.sort_by_key(|(_, _, line)| *line);
690        redundant
691    }
692
693    /// CAP-AE-10: Get expressions available at a specific source line.
694    ///
695    /// Returns the set of expressions that are available at the given source line.
696    /// This is determined by finding the containing block and returning its `avail_in`,
697    /// adjusted for any kills that occur before the line within the block.
698    ///
699    /// # Arguments
700    ///
701    /// * `line` - The source line number to query
702    /// * `cfg` - The control flow graph (used to find the containing block)
703    ///
704    /// # Returns
705    ///
706    /// `HashSet<Expression>` containing all expressions available at that line.
707    ///
708    /// # Example
709    ///
710    /// ```rust,ignore
711    /// let available = info.get_available_at_line(5, &cfg);
712    /// for expr in &available {
713    ///     println!("Available at line 5: {}", expr.text);
714    /// }
715    /// ```
716    pub fn get_available_at_line(&self, line: usize, cfg: &CfgInfo) -> HashSet<Expression> {
717        // Find which block contains this line
718        let block_id = if let Some(&bid) = self.line_to_block.get(&line) {
719            bid
720        } else {
721            // Fall back to searching the CFG
722            let found = cfg.blocks.iter().find(|b| {
723                let (start, end) = b.lines;
724                (start as usize) <= line && line <= (end as usize)
725            });
726            match found {
727                Some(block) => block.id,
728                None => return HashSet::new(),
729            }
730        };
731
732        // Start with avail_in for this block
733        let mut available = self.avail_in.get(&block_id).cloned().unwrap_or_default();
734
735        // Find the block's start line to compute intra-block kills
736        let block_start = cfg
737            .blocks
738            .iter()
739            .find(|b| b.id == block_id)
740            .map(|b| b.lines.0 as usize)
741            .unwrap_or(line);
742
743        // Remove expressions killed by definitions between block start and this line
744        // (TIGER-PASS1-12: Intra-block precision)
745        let mut killed: HashSet<String> = HashSet::new();
746        for check_line in block_start..line {
747            if let Some(defs) = self.defs_per_line.get(&check_line) {
748                for def in defs {
749                    killed.insert(def.clone());
750                }
751            }
752        }
753
754        // Remove killed expressions
755        available.retain(|expr| !expr.operands.iter().any(|op| killed.contains(op)));
756
757        available
758    }
759
760    /// CAP-AE-11: Serialize to JSON-compatible structure.
761    ///
762    /// # TIGER-PASS2-4 Mitigation
763    ///
764    /// Keys are sorted for deterministic output:
765    /// - Block IDs are converted to strings and sorted numerically
766    /// - Expressions within each block are sorted by text
767    ///
768    /// # Output Format
769    ///
770    /// ```json
771    /// {
772    ///   "avail_in": {
773    ///     "0": [],
774    ///     "1": [{"text": "a + b", "operands": ["a", "b"], "line": 2}]
775    ///   },
776    ///   "avail_out": {...},
777    ///   "all_expressions": [...],
778    ///   "entry_block": 0,
779    ///   "redundant_computations": [{"expr": "a + b", "first_at": 2, "redundant_at": 4}]
780    /// }
781    /// ```
782    pub fn to_json(&self) -> serde_json::Value {
783        // TIGER-PASS2-4: Sort keys for deterministic output
784
785        // Helper to convert avail map to sorted JSON
786        let avail_map_to_json = |map: &HashMap<BlockId, HashSet<Expression>>| -> IndexMap<String, Vec<serde_json::Value>> {
787            let mut sorted: Vec<_> = map.iter().collect();
788            sorted.sort_by_key(|(k, _)| *k);
789            sorted
790                .into_iter()
791                .map(|(k, v)| {
792                    let mut exprs: Vec<_> = v.iter().collect();
793                    exprs.sort_by_key(|e| &e.text);
794                    let expr_values: Vec<_> = exprs
795                        .into_iter()
796                        .map(|e| {
797                            serde_json::json!({
798                                "text": e.text,
799                                "operands": e.operands,
800                                "line": e.line,
801                            })
802                        })
803                        .collect();
804                    (k.to_string(), expr_values)
805                })
806                .collect()
807        };
808
809        let avail_in = avail_map_to_json(&self.avail_in);
810        let avail_out = avail_map_to_json(&self.avail_out);
811
812        // Sort all_expressions by text
813        let mut all_exprs: Vec<_> = self.all_exprs.iter().collect();
814        all_exprs.sort_by_key(|e| &e.text);
815        let all_expressions: Vec<_> = all_exprs
816            .into_iter()
817            .map(|e| {
818                serde_json::json!({
819                    "text": e.text,
820                    "operands": e.operands,
821                    "line": e.line,
822                })
823            })
824            .collect();
825
826        // Redundant computations (already sorted by redundant_line)
827        let redundant: Vec<_> = self
828            .redundant_computations()
829            .into_iter()
830            .map(|(expr, first, redundant)| {
831                serde_json::json!({
832                    "expr": expr,
833                    "first_at": first,
834                    "redundant_at": redundant,
835                })
836            })
837            .collect();
838
839        // Uncertain expressions
840        let uncertain: Vec<_> = self
841            .uncertain_exprs
842            .iter()
843            .map(|uf| {
844                serde_json::json!({
845                    "expr": uf.expr,
846                    "line": uf.line,
847                    "reason": uf.reason,
848                })
849            })
850            .collect();
851
852        let confidence_str = match self.confidence {
853            Confidence::Low => "low",
854            Confidence::Medium => "medium",
855            Confidence::High => "high",
856        };
857
858        serde_json::json!({
859            "avail_in": avail_in,
860            "avail_out": avail_out,
861            "all_expressions": all_expressions,
862            "entry_block": self.entry_block,
863            "redundant_computations": redundant,
864            "uncertain_exprs": uncertain,
865            "confidence": confidence_str,
866        })
867    }
868}
869
870// =============================================================================
871// Phase 2: Expression Extraction from DFG
872// =============================================================================
873
874/// Binary operators that can form expressions for CSE analysis.
875///
876/// These are the operators we'll look for when parsing source lines
877/// to extract binary expressions like `a + b`, `x * y`, etc.
878const BINARY_OPS: &[&str] = &[
879    // Arithmetic
880    "+", "-", "*", "/", "%", "//", "**", // Comparison
881    "==", "!=", "<", ">", "<=", ">=", // Logical
882    "and", "or", "&&", "||", // Bitwise
883    "&", "|", "^", "<<", ">>",
884];
885
886/// CAP-AE-12: Check if text looks like a function call.
887///
888/// Function calls have side effects and should be excluded from CSE analysis.
889/// This checks for patterns like:
890/// - `foo(`
891/// - `bar.baz(`
892/// - `obj.method(`
893///
894/// # Arguments
895///
896/// * `text` - The text to check for function call patterns
897///
898/// # Returns
899///
900/// `true` if the text appears to contain a function call.
901///
902/// # TIGER Mitigations
903///
904/// - TIGER-PASS1-10: Part of expression extraction algorithm
905///
906/// # Examples
907///
908/// ```rust,ignore
909/// assert!(is_function_call("foo(x)"));
910/// assert!(is_function_call("bar.baz(1, 2)"));
911/// assert!(!is_function_call("a + b"));
912/// assert!(!is_function_call("x * y"));
913/// ```
914pub fn is_function_call(text: &str) -> bool {
915    let trimmed = text.trim();
916
917    // Look for function call pattern: identifier followed by (
918    // This handles: foo(, bar.baz(, obj.method(
919
920    // Find opening paren
921    if let Some(paren_idx) = trimmed.find('(') {
922        // Check if what comes before the paren looks like an identifier/method access
923        let before_paren = &trimmed[..paren_idx];
924        let before_trimmed = before_paren.trim_end();
925
926        if before_trimmed.is_empty() {
927            return false;
928        }
929
930        // Check if it ends with an identifier character (alphanumeric or underscore)
931        // This would indicate a function call like `foo(` or `bar.baz(`
932        if let Some(last_char) = before_trimmed.chars().last() {
933            if last_char.is_alphanumeric() || last_char == '_' {
934                return true;
935            }
936        }
937    }
938
939    false
940}
941
942/// Detect if a source line contains an expression that was skipped due to
943/// function calls or method accesses, and return it as an uncertain finding.
944///
945/// This is called when `parse_expression_from_line` returns `None`, to determine
946/// if the line was skipped because it contained impure constructs (function calls,
947/// method accesses) rather than simply not being a binary expression.
948///
949/// # Arguments
950///
951/// * `line` - The source line to check
952/// * `line_num` - The source line number (1-indexed)
953///
954/// # Returns
955///
956/// `Some(UncertainFinding)` if the line contains a skipped expression, `None` otherwise.
957fn detect_uncertain_expression(line: &str, line_num: usize) -> Option<UncertainFinding> {
958    let trimmed = line.trim();
959
960    // Skip empty lines, comments, and structural/declaration lines
961    const SKIP_PREFIXES: &[&str] = &[
962        "#",
963        "//",
964        "/*",
965        "@",
966        "import ",
967        "from ",
968        "use ",
969        "class ",
970        "def ",
971        "fn ",
972        "func ",
973        "function ",
974        "pub ",
975        "struct ",
976        "enum ",
977        "trait ",
978        "impl ",
979        "interface ",
980    ];
981    if trimmed.is_empty() || SKIP_PREFIXES.iter().any(|p| trimmed.starts_with(p)) {
982        return None;
983    }
984
985    /// Check whether text contains an arithmetic binary operator.
986    fn has_binary_operator(s: &str) -> bool {
987        s.contains(" + ") || s.contains(" - ") || s.contains(" * ") || s.contains(" / ")
988    }
989
990    // Check for assignment lines with function calls on the RHS
991    // Pattern: `var = expr` where expr contains a function call
992    if let Some(eq_idx) = trimmed.find('=') {
993        // Skip comparison operators
994        if eq_idx > 0 {
995            let before = trimmed.as_bytes().get(eq_idx.wrapping_sub(1));
996            let after = trimmed.as_bytes().get(eq_idx + 1);
997            let is_comparison =
998                matches!(before, Some(b'!' | b'<' | b'>' | b'=')) || matches!(after, Some(b'='));
999            if !is_comparison {
1000                let rhs = trimmed[eq_idx + 1..].trim();
1001                // Check if RHS contains a function call mixed with an operator
1002                if is_function_call(rhs) && has_binary_operator(rhs) {
1003                    return Some(UncertainFinding {
1004                        expr: rhs.to_string(),
1005                        line: line_num,
1006                        reason: "contains function call - purity unknown".to_string(),
1007                    });
1008                }
1009                // Check for standalone function call on RHS (method access)
1010                if is_function_call(rhs) && rhs.contains('.') {
1011                    return Some(UncertainFinding {
1012                        expr: rhs.to_string(),
1013                        line: line_num,
1014                        reason: "contains method access - purity unknown".to_string(),
1015                    });
1016                }
1017            }
1018        }
1019    }
1020
1021    // Check for standalone expressions with function calls that look like they
1022    // could be binary expressions
1023    if is_function_call(trimmed) && has_binary_operator(trimmed) {
1024        return Some(UncertainFinding {
1025            expr: trimmed.to_string(),
1026            line: line_num,
1027            reason: "function calls may have side effects".to_string(),
1028        });
1029    }
1030
1031    None
1032}
1033
1034/// Parse expression from source line like "x = a + b".
1035///
1036/// Extracts the left operand, operator, and right operand from an assignment
1037/// statement containing a binary expression.
1038///
1039/// # Arguments
1040///
1041/// * `line` - The source line to parse
1042///
1043/// # Returns
1044///
1045/// `Some((left, op, right))` if a binary expression is found, `None` otherwise.
1046///
1047/// # TIGER Mitigations
1048///
1049/// - TIGER-PASS2-1: Only process rightmost assignment on line
1050/// - TIGER-PASS3-5: Limit operand extraction to base variable only
1051///
1052/// # Examples
1053///
1054/// ```rust,ignore
1055/// assert_eq!(
1056///     parse_expression_from_line("x = a + b"),
1057///     Some(("a".to_string(), "+".to_string(), "b".to_string()))
1058/// );
1059/// assert_eq!(
1060///     parse_expression_from_line("result = foo * bar"),
1061///     Some(("foo".to_string(), "*".to_string(), "bar".to_string()))
1062/// );
1063/// assert_eq!(parse_expression_from_line("x = foo()"), None);
1064/// ```
1065pub fn parse_expression_from_line(line: &str) -> Option<(String, String, String)> {
1066    // Strip comments (simple approach - single-line comments only)
1067    let line = if let Some(idx) = line.find('#') {
1068        &line[..idx]
1069    } else if let Some(idx) = line.find("//") {
1070        &line[..idx]
1071    } else {
1072        line
1073    };
1074
1075    // TIGER-PASS2-1: Find rightmost assignment (for a = b = 5, we want b = 5)
1076    // Look for '=' that's not part of ==, !=, <=, >=, :=
1077    let mut rhs_start = None;
1078    let chars: Vec<char> = line.chars().collect();
1079
1080    for i in (0..chars.len()).rev() {
1081        if chars[i] == '=' {
1082            // Check it's not ==, !=, <=, >=, :=
1083            let is_comparison = (i > 0 && matches!(chars[i - 1], '=' | '!' | '<' | '>' | ':'))
1084                || (i + 1 < chars.len() && chars[i + 1] == '=');
1085
1086            if !is_comparison {
1087                rhs_start = Some(i + 1);
1088                break;
1089            }
1090        }
1091    }
1092
1093    // Determine expression region to search for binary operators
1094    let expr_text = if let Some(start) = rhs_start {
1095        // Found assignment - search RHS
1096        line[start..].trim()
1097    } else {
1098        // No assignment found - try the whole line (for return, if, standalone exprs)
1099        let trimmed = line.trim();
1100        // Strip common prefixes: return, if, elif, while, for, yield, assert, etc.
1101        let stripped = if let Some(rest) = trimmed.strip_prefix("return ") {
1102            rest.trim()
1103        } else if let Some(rest) = trimmed.strip_prefix("return(") {
1104            // Handle return(expr)
1105            rest.trim()
1106        } else if let Some(rest) = trimmed.strip_prefix("if ") {
1107            // Strip trailing colon for Python if
1108            let r = rest.trim();
1109            let r = r.strip_suffix(':').unwrap_or(r);
1110            // Strip trailing brace for C-like if
1111            let r = r.strip_suffix('{').unwrap_or(r);
1112            // Strip surrounding parens if present
1113            let r = r.strip_prefix('(').unwrap_or(r);
1114            let r = r.strip_suffix(')').unwrap_or(r);
1115            r.trim()
1116        } else if let Some(rest) = trimmed.strip_prefix("elif ") {
1117            let r = rest.trim();
1118            r.strip_suffix(':').unwrap_or(r).trim()
1119        } else if let Some(rest) = trimmed.strip_prefix("else if ") {
1120            let r = rest.trim();
1121            let r = r.strip_suffix('{').unwrap_or(r);
1122            let r = r.strip_prefix('(').unwrap_or(r);
1123            let r = r.strip_suffix(')').unwrap_or(r);
1124            r.trim()
1125        } else if let Some(rest) = trimmed.strip_prefix("while ") {
1126            let r = rest.trim();
1127            let r = r.strip_suffix(':').unwrap_or(r);
1128            let r = r.strip_suffix('{').unwrap_or(r);
1129            let r = r.strip_prefix('(').unwrap_or(r);
1130            let r = r.strip_suffix(')').unwrap_or(r);
1131            r.trim()
1132        } else if let Some(rest) = trimmed.strip_prefix("assert ") {
1133            rest.trim()
1134        } else if let Some(rest) = trimmed.strip_prefix("yield ") {
1135            rest.trim()
1136        } else {
1137            // Try the whole line as-is (standalone expression)
1138            trimmed
1139        };
1140        stripped
1141    };
1142
1143    // Skip if the expression is a function call (CAP-AE-12)
1144    if is_function_call(expr_text) {
1145        return None;
1146    }
1147
1148    // Try to find a binary operator in the expression
1149    // Sort operators by length (descending) to match longer operators first
1150    let mut ops_by_len: Vec<&str> = BINARY_OPS.to_vec();
1151    ops_by_len.sort_by_key(|op| std::cmp::Reverse(op.len()));
1152
1153    for op in ops_by_len {
1154        // Look for operator surrounded by spaces or at word boundaries
1155        if let Some(op_idx) = find_operator_in_expr(expr_text, op) {
1156            let left = expr_text[..op_idx].trim();
1157            let right = expr_text[op_idx + op.len()..].trim();
1158
1159            // TIGER-PASS3-5: Limit to base variable (truncate deeply nested field access)
1160            let left = extract_base_variable(left);
1161            let right = extract_base_variable(right);
1162
1163            // Skip if either operand looks like a function call
1164            if is_function_call(&left) || is_function_call(&right) {
1165                return None;
1166            }
1167
1168            // Skip if operands are empty or are just literals
1169            if left.is_empty() || right.is_empty() {
1170                continue;
1171            }
1172
1173            // Skip if both operands are numeric literals (constant folding, not CSE)
1174            if is_numeric_literal(&left) && is_numeric_literal(&right) {
1175                continue;
1176            }
1177
1178            return Some((left, op.to_string(), right));
1179        }
1180    }
1181
1182    None
1183}
1184
1185/// Find operator in expression, avoiding operators inside parentheses or strings.
1186fn find_operator_in_expr(expr: &str, op: &str) -> Option<usize> {
1187    let chars: Vec<char> = expr.chars().collect();
1188    let op_chars: Vec<char> = op.chars().collect();
1189    let mut paren_depth: usize = 0;
1190    let mut in_string = false;
1191    let mut string_char = '"';
1192
1193    let mut i = 0;
1194    while i < chars.len() {
1195        let c = chars[i];
1196
1197        // Track string state
1198        if !in_string && (c == '"' || c == '\'') {
1199            in_string = true;
1200            string_char = c;
1201            i += 1;
1202            continue;
1203        }
1204        if in_string && c == string_char && (i == 0 || chars[i - 1] != '\\') {
1205            in_string = false;
1206            i += 1;
1207            continue;
1208        }
1209        if in_string {
1210            i += 1;
1211            continue;
1212        }
1213
1214        // Track paren depth
1215        if c == '(' || c == '[' || c == '{' {
1216            paren_depth += 1;
1217            i += 1;
1218            continue;
1219        }
1220        if c == ')' || c == ']' || c == '}' {
1221            paren_depth = paren_depth.saturating_sub(1);
1222            i += 1;
1223            continue;
1224        }
1225
1226        // Only look for operators at top level (not inside parens)
1227        if paren_depth == 0 {
1228            // Check if operator matches at this position
1229            if i + op_chars.len() <= chars.len() {
1230                let matches = op_chars
1231                    .iter()
1232                    .enumerate()
1233                    .all(|(j, &oc)| chars[i + j] == oc);
1234
1235                if matches {
1236                    // For word operators (and, or), check word boundaries
1237                    if op.chars().all(|c| c.is_alphabetic()) {
1238                        let before_ok = i == 0 || !chars[i - 1].is_alphanumeric();
1239                        let after_ok =
1240                            i + op.len() >= chars.len() || !chars[i + op.len()].is_alphanumeric();
1241                        if before_ok && after_ok {
1242                            return Some(i);
1243                        }
1244                    } else {
1245                        return Some(i);
1246                    }
1247                }
1248            }
1249        }
1250
1251        i += 1;
1252    }
1253
1254    None
1255}
1256
1257/// Extract base variable from potentially nested field access.
1258///
1259/// TIGER-PASS3-5: Limit operand extraction to base variable only
1260/// to avoid issues with circular field references.
1261///
1262/// # Examples
1263///
1264/// ```rust,ignore
1265/// assert_eq!(extract_base_variable("x"), "x");
1266/// assert_eq!(extract_base_variable("x.a"), "x.a");
1267/// assert_eq!(extract_base_variable("x.a.b.c.d.e"), "x.a.b"); // Truncated
1268/// ```
1269fn extract_base_variable(text: &str) -> String {
1270    let trimmed = text.trim();
1271
1272    // Count dots to detect deep nesting
1273    let parts: Vec<&str> = trimmed.split('.').collect();
1274
1275    // TIGER-PASS3-5: Limit to 3 levels of field access
1276    if parts.len() > 3 {
1277        parts[..3].join(".")
1278    } else {
1279        trimmed.to_string()
1280    }
1281}
1282
1283/// Check if text is a numeric literal.
1284fn is_numeric_literal(text: &str) -> bool {
1285    let trimmed = text.trim();
1286
1287    // Integer
1288    if trimmed.parse::<i64>().is_ok() {
1289        return true;
1290    }
1291
1292    // Float
1293    if trimmed.parse::<f64>().is_ok() {
1294        return true;
1295    }
1296
1297    // Hex, octal, binary
1298    if trimmed.starts_with("0x") || trimmed.starts_with("0o") || trimmed.starts_with("0b") {
1299        return true;
1300    }
1301
1302    false
1303}
1304
1305/// Result of expression extraction including Phase 4 data.
1306///
1307/// Contains all data needed for both Phase 3 MUST analysis and
1308/// Phase 4 intra-block CSE detection.
1309#[derive(Debug, Clone, Default)]
1310pub struct ExtractionResult {
1311    /// All unique expressions found
1312    pub all_exprs: HashSet<Expression>,
1313    /// Per-block gen/kill sets
1314    pub block_info: HashMap<BlockId, BlockExpressions>,
1315    /// Expression instances in order (legacy)
1316    pub expr_instances: Vec<Expression>,
1317    /// Expression instances with block context (Phase 4)
1318    pub expr_instances_with_blocks: Vec<ExprInstance>,
1319    /// Definitions per line for intra-block kill tracking (Phase 4)
1320    pub defs_per_line: HashMap<usize, HashSet<String>>,
1321    /// Maps lines to their containing block IDs
1322    pub line_to_block: HashMap<usize, BlockId>,
1323    /// Expressions skipped during extraction (function calls, impure constructs)
1324    pub uncertain_exprs: Vec<UncertainFinding>,
1325}
1326
1327/// Check if text is a valid identifier (variable name).
1328#[allow(dead_code)]
1329fn is_identifier(text: &str) -> bool {
1330    let trimmed = text.trim();
1331    if trimmed.is_empty() {
1332        return false;
1333    }
1334
1335    let mut chars = trimmed.chars();
1336
1337    // First character must be letter or underscore
1338    match chars.next() {
1339        Some(c) if c.is_alphabetic() || c == '_' => {}
1340        _ => return false,
1341    }
1342
1343    // Rest must be alphanumeric, underscore, or dot (for field access)
1344    chars.all(|c| c.is_alphanumeric() || c == '_' || c == '.')
1345}
1346
1347/// Extract expressions from DFG variable references.
1348///
1349/// This function bridges the gap between the DFG (which tracks variable references)
1350/// and available expressions analysis (which tracks binary expressions).
1351///
1352/// # Algorithm
1353///
1354/// 1. Group VarRefs by (block_id, line)
1355/// 2. For groups with 2+ Uses on same line as 1 Def, extract expr
1356/// 3. Parse line text to extract operator
1357/// 4. Apply whitespace normalization
1358/// 5. Filter out function calls (CAP-AE-12)
1359///
1360/// # Arguments
1361///
1362/// * `cfg` - The control flow graph
1363/// * `dfg` - The data flow graph with variable references
1364///
1365/// # Returns
1366///
1367/// A tuple of:
1368/// - `HashSet<Expression>`: All unique expressions found
1369/// - `HashMap<BlockId, BlockExpressions>`: Per-block gen/kill sets
1370/// - `Vec<Expression>`: All expression instances (including duplicates) in order
1371///
1372/// # TIGER Mitigations
1373///
1374/// - TIGER-PASS1-10: Algorithm detailed in steps above
1375/// - TIGER-PASS2-1: Multiple assignment targets (a = b = 5) - only process rightmost
1376/// - TIGER-PASS3-5: Circular field references - limit to base variable
1377///
1378/// # Example
1379///
1380/// ```rust,ignore
1381/// let (all_exprs, block_info, expr_instances) = extract_expressions_from_refs(&cfg, &dfg);
1382///
1383/// // all_exprs contains unique expressions like "a + b"
1384/// // block_info maps block IDs to their gen/kill sets
1385/// // expr_instances preserves order for CSE detection
1386/// ```
1387pub fn extract_expressions_from_refs(
1388    cfg: &CfgInfo,
1389    dfg: &DfgInfo,
1390) -> (
1391    HashSet<Expression>,
1392    HashMap<BlockId, BlockExpressions>,
1393    Vec<Expression>,
1394) {
1395    let mut all_exprs: HashSet<Expression> = HashSet::new();
1396    let mut block_info: HashMap<BlockId, BlockExpressions> = HashMap::new();
1397    let mut expr_instances: Vec<Expression> = Vec::new();
1398
1399    // Initialize block_info for all blocks
1400    for block in &cfg.blocks {
1401        block_info.insert(block.id, BlockExpressions::new());
1402    }
1403
1404    // Group VarRefs by (block, line) to find expressions
1405    // An expression typically has: 1 Def (the assignment target) and 2+ Uses (the operands)
1406    let mut refs_by_line: HashMap<(BlockId, u32), Vec<&VarRef>> = HashMap::new();
1407
1408    for var_ref in &dfg.refs {
1409        // Find which block this ref belongs to
1410        let block_id = find_block_for_line(cfg, var_ref.line);
1411        if let Some(bid) = block_id {
1412            refs_by_line
1413                .entry((bid, var_ref.line))
1414                .or_default()
1415                .push(var_ref);
1416        }
1417    }
1418
1419    // Process each (block, line) group
1420    for ((block_id, line), refs) in refs_by_line {
1421        // Count defs and uses
1422        let defs: Vec<_> = refs
1423            .iter()
1424            .filter(|r| matches!(r.ref_type, RefType::Definition))
1425            .collect();
1426        let uses: Vec<_> = refs
1427            .iter()
1428            .filter(|r| matches!(r.ref_type, RefType::Use))
1429            .collect();
1430
1431        // Skip if no assignment (no def) or not enough uses for a binary expression
1432        if defs.is_empty() || uses.len() < 2 {
1433            // Even without a def, we might have a use of an expression
1434            // but for Phase 2 we focus on assignments like x = a + b
1435            continue;
1436        }
1437
1438        // We have an assignment with 2+ uses - likely a binary expression
1439        // Try to extract the expression from the source line
1440
1441        // Get the variable names that are used
1442        let use_names: Vec<&str> = uses.iter().map(|r| r.name.as_str()).collect();
1443
1444        // If we have exactly 2 distinct uses, this could be a binary expression
1445        let unique_uses: HashSet<&str> = use_names.iter().copied().collect();
1446
1447        if unique_uses.len() >= 2 {
1448            // Create expression from the uses
1449            // We need to determine the operator - for now, use a heuristic based on ordering
1450            let mut operands: Vec<String> = unique_uses.iter().map(|s| s.to_string()).collect();
1451            operands.sort();
1452
1453            // Create a placeholder expression - we'll need source code to get the actual operator
1454            // For now, create expression with uses and infer operator pattern
1455            if let Some(op) = infer_operator_from_uses(&use_names) {
1456                let text = normalize_expression(&operands[0], &op, &operands[1]);
1457                let expr = Expression::new(text, operands.clone(), line as usize);
1458
1459                // Add to all_exprs
1460                all_exprs.insert(expr.clone());
1461
1462                // Add to expr_instances
1463                expr_instances.push(expr.clone());
1464
1465                // Update block's gen set
1466                if let Some(block_expr) = block_info.get_mut(&block_id) {
1467                    block_expr.gen.insert(expr);
1468                }
1469            }
1470        }
1471
1472        // Update kill set for defs
1473        for def in &defs {
1474            if let Some(block_expr) = block_info.get_mut(&block_id) {
1475                block_expr.kill.insert(def.name.clone());
1476            }
1477        }
1478    }
1479
1480    // Sort expr_instances by line for deterministic order
1481    expr_instances.sort_by_key(|e| e.line);
1482
1483    (all_exprs, block_info, expr_instances)
1484}
1485
1486/// Extract expressions from DFG with source code for better accuracy.
1487///
1488/// This is an enhanced version that uses source code to accurately parse
1489/// the operator from assignment statements.
1490///
1491/// # Arguments
1492///
1493/// * `cfg` - The control flow graph
1494/// * `dfg` - The data flow graph with variable references
1495/// * `source_lines` - Optional source code lines for accurate operator extraction
1496///
1497/// # Returns
1498///
1499/// Same as `extract_expressions_from_refs` but with accurate operators when source is provided.
1500pub fn extract_expressions_from_refs_with_source(
1501    cfg: &CfgInfo,
1502    dfg: &DfgInfo,
1503    source_lines: Option<&[String]>,
1504) -> (
1505    HashSet<Expression>,
1506    HashMap<BlockId, BlockExpressions>,
1507    Vec<Expression>,
1508) {
1509    let mut all_exprs: HashSet<Expression> = HashSet::new();
1510    let mut block_info: HashMap<BlockId, BlockExpressions> = HashMap::new();
1511    let mut expr_instances: Vec<Expression> = Vec::new();
1512
1513    // Initialize block_info for all blocks
1514    for block in &cfg.blocks {
1515        block_info.insert(block.id, BlockExpressions::new());
1516    }
1517
1518    // If we have source lines, use them for accurate parsing
1519    if let Some(lines) = source_lines {
1520        // Process each line that might contain an expression
1521        for (line_num, line_text) in lines.iter().enumerate() {
1522            let line = (line_num + 1) as u32; // 1-indexed
1523
1524            // Try to parse expression from line
1525            if let Some((left, op, right)) = parse_expression_from_line(line_text) {
1526                // Validate that the operands appear in the DFG
1527                let refs_on_line: Vec<_> = dfg.refs.iter().filter(|r| r.line == line).collect();
1528
1529                let has_left = refs_on_line.iter().any(|r| r.name == left);
1530                let has_right = refs_on_line.iter().any(|r| r.name == right);
1531
1532                // Only create expression if we can validate operands from DFG
1533                if has_left || has_right || is_numeric_literal(&left) || is_numeric_literal(&right)
1534                {
1535                    let text = normalize_expression(&left, &op, &right);
1536                    let operands = vec![left.clone(), right.clone()];
1537                    let expr = Expression::new(text, operands, line as usize);
1538
1539                    // Find block for this line
1540                    if let Some(block_id) = find_block_for_line(cfg, line) {
1541                        // Add to all_exprs
1542                        all_exprs.insert(expr.clone());
1543
1544                        // Add to expr_instances
1545                        expr_instances.push(expr.clone());
1546
1547                        // Update block's gen set
1548                        if let Some(block_expr) = block_info.get_mut(&block_id) {
1549                            // Only add to gen if operands haven't been killed yet in this block
1550                            block_expr.gen.insert(expr);
1551                        }
1552                    }
1553                }
1554            }
1555        }
1556
1557        // Build kill sets from definitions in DFG
1558        for var_ref in &dfg.refs {
1559            if matches!(var_ref.ref_type, RefType::Definition | RefType::Update) {
1560                if let Some(block_id) = find_block_for_line(cfg, var_ref.line) {
1561                    if let Some(block_expr) = block_info.get_mut(&block_id) {
1562                        block_expr.kill.insert(var_ref.name.clone());
1563                    }
1564                }
1565            }
1566        }
1567    } else {
1568        // Fall back to DFG-only extraction
1569        return extract_expressions_from_refs(cfg, dfg);
1570    }
1571
1572    // Sort expr_instances by line for deterministic order
1573    expr_instances.sort_by_key(|e| e.line);
1574
1575    (all_exprs, block_info, expr_instances)
1576}
1577
1578/// Find the block ID containing a given line number.
1579fn find_block_for_line(cfg: &CfgInfo, line: u32) -> Option<BlockId> {
1580    for block in &cfg.blocks {
1581        if block.lines.0 <= line && line <= block.lines.1 {
1582            return Some(block.id);
1583        }
1584    }
1585    // If no block contains the line exactly, find the closest one
1586    // This handles edge cases where line numbers don't perfectly match block ranges
1587    cfg.blocks
1588        .iter()
1589        .min_by_key(|b| {
1590            let dist_start = (b.lines.0 as i64 - line as i64).abs();
1591            let dist_end = (b.lines.1 as i64 - line as i64).abs();
1592            dist_start.min(dist_end)
1593        })
1594        .map(|b| b.id)
1595}
1596
1597/// Infer operator from use patterns when source code is not available.
1598///
1599/// This is a heuristic fallback - it assumes common patterns like
1600/// binary operations when we see exactly 2 uses.
1601fn infer_operator_from_uses(uses: &[&str]) -> Option<String> {
1602    // If we have exactly 2 uses, assume some binary operation
1603    // Default to "+" as a placeholder - this will be normalized anyway
1604    if uses.len() >= 2 {
1605        Some("+".to_string())
1606    } else {
1607        None
1608    }
1609}
1610
1611/// Extract expressions with full Phase 4 data for intra-block CSE detection.
1612///
1613/// This is the Phase 4 version that returns additional data for proper
1614/// intra-block kill tracking in redundant_computations().
1615///
1616/// # Arguments
1617///
1618/// * `cfg` - The control flow graph
1619/// * `dfg` - The data flow graph with variable references
1620/// * `source_lines` - Optional source code lines for accurate operator extraction
1621///
1622/// # Returns
1623///
1624/// `ExtractionResult` containing all data needed for Phase 4 CSE detection.
1625pub fn extract_expressions_full(
1626    cfg: &CfgInfo,
1627    dfg: &DfgInfo,
1628    source_lines: Option<&[String]>,
1629) -> ExtractionResult {
1630    extract_expressions_full_with_lang(cfg, dfg, source_lines, None)
1631}
1632
1633/// Extract expressions with full Phase 4 data and optional language for AST extraction.
1634///
1635/// When `lang` is provided, this function supplements text-based extraction with
1636/// AST-based binary expression extraction using tree-sitter.
1637///
1638/// # Arguments
1639///
1640/// * `cfg` - The control flow graph
1641/// * `dfg` - The data flow graph with variable references
1642/// * `source_lines` - Optional source code lines for accurate operator extraction
1643/// * `lang` - Optional language for AST-based extraction (enhances results)
1644///
1645/// # Returns
1646///
1647/// `ExtractionResult` containing all data needed for Phase 4 CSE detection.
1648pub fn extract_expressions_full_with_lang(
1649    cfg: &CfgInfo,
1650    dfg: &DfgInfo,
1651    source_lines: Option<&[String]>,
1652    lang: Option<Language>,
1653) -> ExtractionResult {
1654    let mut result = ExtractionResult::default();
1655
1656    // Initialize block_info for all blocks and build line_to_block mapping
1657    for block in &cfg.blocks {
1658        result.block_info.insert(block.id, BlockExpressions::new());
1659        // Map each line in this block to the block ID
1660        for line in block.lines.0..=block.lines.1 {
1661            result.line_to_block.insert(line as usize, block.id);
1662        }
1663    }
1664
1665    // Build defs_per_line from DFG
1666    for var_ref in &dfg.refs {
1667        if matches!(var_ref.ref_type, RefType::Definition | RefType::Update) {
1668            result
1669                .defs_per_line
1670                .entry(var_ref.line as usize)
1671                .or_default()
1672                .insert(var_ref.name.clone());
1673
1674            // Also update block's kill set
1675            if let Some(block_id) = find_block_for_line(cfg, var_ref.line) {
1676                if let Some(block_expr) = result.block_info.get_mut(&block_id) {
1677                    block_expr.kill.insert(var_ref.name.clone());
1678                }
1679            }
1680        }
1681    }
1682
1683    // If we have source lines, use them for accurate expression parsing
1684    if let Some(lines) = source_lines {
1685        for (line_num, line_text) in lines.iter().enumerate() {
1686            let line = (line_num + 1) as u32; // 1-indexed
1687
1688            if let Some((left, op, right)) = parse_expression_from_line(line_text) {
1689                // Validate operands from DFG
1690                let refs_on_line: Vec<_> = dfg.refs.iter().filter(|r| r.line == line).collect();
1691
1692                let has_left = refs_on_line.iter().any(|r| r.name == left);
1693                let has_right = refs_on_line.iter().any(|r| r.name == right);
1694
1695                if has_left || has_right || is_numeric_literal(&left) || is_numeric_literal(&right)
1696                {
1697                    let text = normalize_expression(&left, &op, &right);
1698                    let operands = vec![left.clone(), right.clone()];
1699                    let expr = Expression::new(text, operands, line as usize);
1700
1701                    if let Some(block_id) = find_block_for_line(cfg, line) {
1702                        result.all_exprs.insert(expr.clone());
1703                        result.expr_instances.push(expr.clone());
1704                        result
1705                            .expr_instances_with_blocks
1706                            .push(ExprInstance::new(expr.clone(), block_id));
1707
1708                        if let Some(block_expr) = result.block_info.get_mut(&block_id) {
1709                            block_expr.gen.insert(expr);
1710                        }
1711                    }
1712                }
1713            } else {
1714                // Check if this line was skipped because it contains a function call
1715                // If so, collect it as an uncertain finding
1716                if let Some(uncertain) = detect_uncertain_expression(line_text, line as usize) {
1717                    // Only include if this line is within the function's CFG range
1718                    if find_block_for_line(cfg, line).is_some() {
1719                        result.uncertain_exprs.push(uncertain);
1720                    }
1721                }
1722            }
1723        }
1724    } else {
1725        // Fall back to DFG-only extraction
1726        let mut refs_by_line: HashMap<(BlockId, u32), Vec<&VarRef>> = HashMap::new();
1727
1728        for var_ref in &dfg.refs {
1729            if let Some(bid) = find_block_for_line(cfg, var_ref.line) {
1730                refs_by_line
1731                    .entry((bid, var_ref.line))
1732                    .or_default()
1733                    .push(var_ref);
1734            }
1735        }
1736
1737        for ((block_id, line), refs) in refs_by_line {
1738            let defs: Vec<_> = refs
1739                .iter()
1740                .filter(|r| matches!(r.ref_type, RefType::Definition))
1741                .collect();
1742            let uses: Vec<_> = refs
1743                .iter()
1744                .filter(|r| matches!(r.ref_type, RefType::Use))
1745                .collect();
1746
1747            if defs.is_empty() || uses.len() < 2 {
1748                continue;
1749            }
1750
1751            let use_names: Vec<&str> = uses.iter().map(|r| r.name.as_str()).collect();
1752            let unique_uses: HashSet<&str> = use_names.iter().copied().collect();
1753
1754            if unique_uses.len() >= 2 {
1755                let mut operands: Vec<String> = unique_uses.iter().map(|s| s.to_string()).collect();
1756                operands.sort();
1757
1758                if let Some(op) = infer_operator_from_uses(&use_names) {
1759                    let text = normalize_expression(&operands[0], &op, &operands[1]);
1760                    let expr = Expression::new(text, operands.clone(), line as usize);
1761
1762                    result.all_exprs.insert(expr.clone());
1763                    result.expr_instances.push(expr.clone());
1764                    result
1765                        .expr_instances_with_blocks
1766                        .push(ExprInstance::new(expr.clone(), block_id));
1767
1768                    if let Some(block_expr) = result.block_info.get_mut(&block_id) {
1769                        block_expr.gen.insert(expr);
1770                    }
1771                }
1772            }
1773        }
1774    }
1775
1776    // AST-based extraction: supplement text-based extraction with tree-sitter
1777    // This catches expressions in non-assignment contexts (return, if, while, etc.)
1778    if let (Some(lines), Some(language)) = (source_lines, lang) {
1779        let full_source = lines.join("\n");
1780        // Determine the line range from the CFG
1781        let min_line = cfg.blocks.iter().map(|b| b.lines.0).min().unwrap_or(1) as usize;
1782        let max_line = cfg.blocks.iter().map(|b| b.lines.1).max().unwrap_or(1) as usize;
1783
1784        let ast_exprs = extract_binary_exprs_from_ast(&full_source, language, min_line, max_line);
1785
1786        for (text, _op, left, right, line) in ast_exprs {
1787            let line_u32 = line as u32;
1788
1789            // Check if this expression was already found by text-based extraction
1790            let already_found = result
1791                .all_exprs
1792                .iter()
1793                .any(|e| e.text == text && e.line == line);
1794            if already_found {
1795                continue;
1796            }
1797
1798            // Create the expression
1799            let operands = vec![left.clone(), right.clone()];
1800            let expr = Expression::new(text.clone(), operands, line);
1801
1802            if let Some(block_id) = find_block_for_line(cfg, line_u32) {
1803                result.all_exprs.insert(expr.clone());
1804                result.expr_instances.push(expr.clone());
1805                result
1806                    .expr_instances_with_blocks
1807                    .push(ExprInstance::new(expr.clone(), block_id));
1808
1809                if let Some(block_expr) = result.block_info.get_mut(&block_id) {
1810                    block_expr.gen.insert(expr);
1811                }
1812            }
1813        }
1814    }
1815
1816    // Sort expr_instances by line for deterministic order
1817    result.expr_instances.sort_by_key(|e| e.line);
1818    result
1819        .expr_instances_with_blocks
1820        .sort_by_key(|e| e.expr.line);
1821
1822    result
1823}
1824
1825// =============================================================================
1826// Phase 3: MUST Analysis - Compute Available Expressions
1827// =============================================================================
1828
1829/// Compute available expressions using MUST (intersection) analysis.
1830///
1831/// MUST semantics: An expression is available at a program point only if it
1832/// has been computed on ALL paths reaching that point.
1833///
1834/// # Algorithm
1835///
1836/// 1. Extract expressions from DFG (Phase 2)
1837/// 2. Initialize: entry = {}, others = ALL_EXPRS (optimistic for MUST)
1838/// 3. Iterate until fixpoint:
1839///    - avail_in[b] = intersection(avail_out[p] for p in preds[b])
1840///    - avail_out[b] = gen[b] | (avail_in[b] - killed[b])
1841/// 4. Return AvailableExprsInfo
1842///
1843/// # Arguments
1844///
1845/// * `cfg` - The control flow graph
1846/// * `dfg` - The data flow graph with variable references
1847///
1848/// # Returns
1849///
1850/// `Result<AvailableExprsInfo, DataflowError>` containing the analysis results.
1851///
1852/// # TIGER Mitigations
1853///
1854/// - TIGER-PASS1-4: Iteration bound = blocks * expressions * 2 + 10
1855/// - TIGER-PASS3-4: Validates CFG doesn't exceed MAX_BLOCKS
1856///
1857/// # Example
1858///
1859/// ```rust,ignore
1860/// let result = compute_available_exprs(&cfg, &dfg)?;
1861///
1862/// // Check if expression is available at a block
1863/// let expr = Expression::new("a + b", vec!["a", "b"], 1);
1864/// if result.is_available(block_id, &expr) {
1865///     println!("Expression is available - can use CSE");
1866/// }
1867///
1868/// // Find redundant computations
1869/// for (text, first, redundant) in result.redundant_computations() {
1870///     println!("CSE opportunity: {} at line {} (first at {})", text, redundant, first);
1871/// }
1872/// ```
1873pub fn compute_available_exprs(
1874    cfg: &CfgInfo,
1875    dfg: &DfgInfo,
1876) -> Result<AvailableExprsInfo, DataflowError> {
1877    // Validate CFG (TIGER-PASS3-4)
1878    validate_cfg(cfg)?;
1879
1880    let entry = cfg.entry_block;
1881
1882    // Step 1: Extract expressions from DFG (Phase 4: use full extraction for intra-block tracking)
1883    let extraction = extract_expressions_full(cfg, dfg, None);
1884    let all_exprs = extraction.all_exprs;
1885    let block_info = extraction.block_info;
1886    let expr_instances = extraction.expr_instances;
1887    let expr_instances_with_blocks = extraction.expr_instances_with_blocks;
1888    let defs_per_line = extraction.defs_per_line;
1889    let line_to_block = extraction.line_to_block;
1890
1891    // Early return if no expressions (spec line 212-214)
1892    if all_exprs.is_empty() {
1893        let mut info = AvailableExprsInfo::empty(entry);
1894        // Initialize avail_in and avail_out for all blocks (even if empty)
1895        for block in &cfg.blocks {
1896            info.avail_in.insert(block.id, HashSet::new());
1897            info.avail_out.insert(block.id, HashSet::new());
1898        }
1899        return Ok(info);
1900    }
1901
1902    // Step 2: Build predecessor map
1903    let predecessors = build_predecessors(cfg);
1904
1905    // Step 3: Initialize (MUST analysis: start optimistic except entry)
1906    // For MUST analysis:
1907    // - Entry block avail_in = {} (nothing available)
1908    // - Other blocks avail_in = ALL_EXPRS (optimistic, will be intersected down)
1909    let mut avail_in: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
1910    let mut avail_out: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
1911
1912    // Entry block: nothing available at entry
1913    avail_in.insert(entry, HashSet::new());
1914
1915    // Entry block avail_out = gen[entry] (only what's generated in entry)
1916    let entry_gen = block_info
1917        .get(&entry)
1918        .map(|b| b.gen.clone())
1919        .unwrap_or_default();
1920    avail_out.insert(entry, entry_gen);
1921
1922    // Other blocks: initialize to ALL_EXPRS (optimistic)
1923    for block in &cfg.blocks {
1924        if block.id != entry {
1925            avail_in.insert(block.id, all_exprs.clone());
1926            avail_out.insert(block.id, all_exprs.clone());
1927        }
1928    }
1929
1930    // Step 4: Iterate until fixpoint
1931    // TIGER-PASS1-4: Iteration bound
1932    let max_iterations = cfg.blocks.len() * all_exprs.len() * 2 + 10;
1933    let mut changed = true;
1934    let mut iteration = 0;
1935
1936    // Get iteration order (reverse postorder for efficiency)
1937    let block_order = reverse_postorder(cfg);
1938
1939    while changed && iteration < max_iterations {
1940        changed = false;
1941        iteration += 1;
1942
1943        for &block_id in &block_order {
1944            // Skip entry block (already initialized)
1945            if block_id == entry {
1946                continue;
1947            }
1948
1949            // avail_in[b] = INTERSECTION of all predecessor's avail_out
1950            let preds = predecessors
1951                .get(&block_id)
1952                .map(|v| v.as_slice())
1953                .unwrap_or(&[]);
1954
1955            let new_in: HashSet<Expression> = if preds.is_empty() {
1956                // No predecessors (unreachable block) -> nothing available
1957                HashSet::new()
1958            } else {
1959                // MUST = intersection of all predecessors
1960                let mut result = avail_out
1961                    .get(&preds[0])
1962                    .cloned()
1963                    .unwrap_or_else(|| all_exprs.clone());
1964
1965                for &pred in &preds[1..] {
1966                    let pred_out = avail_out
1967                        .get(&pred)
1968                        .cloned()
1969                        .unwrap_or_else(|| all_exprs.clone());
1970                    // Intersection
1971                    result = result.intersection(&pred_out).cloned().collect();
1972                }
1973                result
1974            };
1975
1976            // avail_out[b] = gen[b] | (avail_in[b] - killed_by_block[b])
1977            let info = block_info.get(&block_id);
1978            let gen = info.map(|i| &i.gen).cloned().unwrap_or_default();
1979            let kill = info.map(|i| &i.kill).cloned().unwrap_or_default();
1980
1981            // Compute killed expressions: those whose operands are in kill set
1982            let not_killed: HashSet<Expression> = new_in
1983                .iter()
1984                .filter(|expr| !is_killed(expr, &kill))
1985                .cloned()
1986                .collect();
1987
1988            // avail_out = gen | (avail_in - killed)
1989            let new_out: HashSet<Expression> = gen.union(&not_killed).cloned().collect();
1990
1991            // Check for change
1992            if avail_in.get(&block_id) != Some(&new_in)
1993                || avail_out.get(&block_id) != Some(&new_out)
1994            {
1995                changed = true;
1996                avail_in.insert(block_id, new_in);
1997                avail_out.insert(block_id, new_out);
1998            }
1999        }
2000    }
2001
2002    // Check for non-convergence (shouldn't happen with valid CFG)
2003    if iteration >= max_iterations {
2004        return Err(DataflowError::IterationLimit {
2005            iterations: iteration,
2006        });
2007    }
2008
2009    Ok(AvailableExprsInfo {
2010        avail_in,
2011        avail_out,
2012        all_exprs,
2013        entry_block: entry,
2014        expr_instances,
2015        expr_instances_with_blocks,
2016        defs_per_line,
2017        line_to_block,
2018        uncertain_exprs: Vec::new(),
2019        confidence: Confidence::High,
2020    })
2021}
2022
2023/// Compute available expressions with source code for better accuracy.
2024///
2025/// This version uses source code to accurately parse operators from assignment
2026/// statements, providing more precise expression extraction.
2027///
2028/// # Arguments
2029///
2030/// * `cfg` - The control flow graph
2031/// * `dfg` - The data flow graph with variable references
2032/// * `source_lines` - Source code lines for accurate operator extraction
2033///
2034/// # Returns
2035///
2036/// `Result<AvailableExprsInfo, DataflowError>` containing the analysis results.
2037pub fn compute_available_exprs_with_source(
2038    cfg: &CfgInfo,
2039    dfg: &DfgInfo,
2040    source_lines: &[String],
2041) -> Result<AvailableExprsInfo, DataflowError> {
2042    compute_available_exprs_with_source_and_lang(cfg, dfg, source_lines, None)
2043}
2044
2045/// Compute available expressions with source code and language for AST-enhanced accuracy.
2046///
2047/// This version combines text-based and AST-based expression extraction
2048/// for maximum coverage across all programming languages.
2049///
2050/// # Arguments
2051///
2052/// * `cfg` - The control flow graph
2053/// * `dfg` - The data flow graph with variable references
2054/// * `source_lines` - Source code lines for accurate operator extraction
2055/// * `lang` - Optional programming language for AST-based extraction
2056///
2057/// # Returns
2058///
2059/// `Result<AvailableExprsInfo, DataflowError>` containing the analysis results.
2060pub fn compute_available_exprs_with_source_and_lang(
2061    cfg: &CfgInfo,
2062    dfg: &DfgInfo,
2063    source_lines: &[String],
2064    lang: Option<Language>,
2065) -> Result<AvailableExprsInfo, DataflowError> {
2066    // Validate CFG (TIGER-PASS3-4)
2067    validate_cfg(cfg)?;
2068
2069    let entry = cfg.entry_block;
2070
2071    // Step 1: Extract expressions from DFG with source (Phase 4: use full extraction)
2072    let extraction = extract_expressions_full_with_lang(cfg, dfg, Some(source_lines), lang);
2073    let all_exprs = extraction.all_exprs;
2074    let block_info = extraction.block_info;
2075    let expr_instances = extraction.expr_instances;
2076    let expr_instances_with_blocks = extraction.expr_instances_with_blocks;
2077    let defs_per_line = extraction.defs_per_line;
2078    let line_to_block = extraction.line_to_block;
2079    let uncertain_exprs = extraction.uncertain_exprs;
2080
2081    // Early return if no expressions
2082    if all_exprs.is_empty() {
2083        let mut info = AvailableExprsInfo::empty(entry);
2084        info.uncertain_exprs = uncertain_exprs.clone();
2085        // Compute confidence based on what we found
2086        info.confidence = compute_confidence(0, uncertain_exprs.len());
2087        for block in &cfg.blocks {
2088            info.avail_in.insert(block.id, HashSet::new());
2089            info.avail_out.insert(block.id, HashSet::new());
2090        }
2091        return Ok(info);
2092    }
2093
2094    // Build predecessor map
2095    let predecessors = build_predecessors(cfg);
2096
2097    // Initialize (MUST analysis)
2098    let mut avail_in: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
2099    let mut avail_out: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
2100
2101    avail_in.insert(entry, HashSet::new());
2102    let entry_gen = block_info
2103        .get(&entry)
2104        .map(|b| b.gen.clone())
2105        .unwrap_or_default();
2106    avail_out.insert(entry, entry_gen);
2107
2108    for block in &cfg.blocks {
2109        if block.id != entry {
2110            avail_in.insert(block.id, all_exprs.clone());
2111            avail_out.insert(block.id, all_exprs.clone());
2112        }
2113    }
2114
2115    // Iterate until fixpoint
2116    let max_iterations = cfg.blocks.len() * all_exprs.len() * 2 + 10;
2117    let mut changed = true;
2118    let mut iteration = 0;
2119    let block_order = reverse_postorder(cfg);
2120
2121    while changed && iteration < max_iterations {
2122        changed = false;
2123        iteration += 1;
2124
2125        for &block_id in &block_order {
2126            if block_id == entry {
2127                continue;
2128            }
2129
2130            let preds = predecessors
2131                .get(&block_id)
2132                .map(|v| v.as_slice())
2133                .unwrap_or(&[]);
2134
2135            let new_in: HashSet<Expression> = if preds.is_empty() {
2136                HashSet::new()
2137            } else {
2138                let mut result = avail_out
2139                    .get(&preds[0])
2140                    .cloned()
2141                    .unwrap_or_else(|| all_exprs.clone());
2142
2143                for &pred in &preds[1..] {
2144                    let pred_out = avail_out
2145                        .get(&pred)
2146                        .cloned()
2147                        .unwrap_or_else(|| all_exprs.clone());
2148                    result = result.intersection(&pred_out).cloned().collect();
2149                }
2150                result
2151            };
2152
2153            let info = block_info.get(&block_id);
2154            let gen = info.map(|i| &i.gen).cloned().unwrap_or_default();
2155            let kill = info.map(|i| &i.kill).cloned().unwrap_or_default();
2156
2157            let not_killed: HashSet<Expression> = new_in
2158                .iter()
2159                .filter(|expr| !is_killed(expr, &kill))
2160                .cloned()
2161                .collect();
2162
2163            let new_out: HashSet<Expression> = gen.union(&not_killed).cloned().collect();
2164
2165            if avail_in.get(&block_id) != Some(&new_in)
2166                || avail_out.get(&block_id) != Some(&new_out)
2167            {
2168                changed = true;
2169                avail_in.insert(block_id, new_in);
2170                avail_out.insert(block_id, new_out);
2171            }
2172        }
2173    }
2174
2175    if iteration >= max_iterations {
2176        return Err(DataflowError::IterationLimit {
2177            iterations: iteration,
2178        });
2179    }
2180
2181    let confidence = compute_confidence(all_exprs.len(), uncertain_exprs.len());
2182
2183    Ok(AvailableExprsInfo {
2184        avail_in,
2185        avail_out,
2186        all_exprs,
2187        entry_block: entry,
2188        expr_instances,
2189        expr_instances_with_blocks,
2190        defs_per_line,
2191        line_to_block,
2192        uncertain_exprs,
2193        confidence,
2194    })
2195}
2196
2197/// Compute confidence level based on confirmed vs uncertain expression counts.
2198fn compute_confidence(confirmed: usize, uncertain: usize) -> Confidence {
2199    if confirmed == 0 && uncertain == 0 {
2200        Confidence::Low
2201    } else if uncertain == 0 {
2202        Confidence::High
2203    } else if confirmed >= uncertain {
2204        Confidence::Medium
2205    } else {
2206        Confidence::Low
2207    }
2208}
2209
2210/// Check if an expression is killed by any variable in the kill set.
2211///
2212/// An expression is killed if any of its operands is redefined (in the kill set).
2213///
2214/// # Arguments
2215///
2216/// * `expr` - The expression to check
2217/// * `kills` - Set of variables that are redefined (killed)
2218///
2219/// # Returns
2220///
2221/// `true` if the expression uses any variable in the kill set.
2222fn is_killed(expr: &Expression, kills: &HashSet<String>) -> bool {
2223    expr.operands.iter().any(|op| kills.contains(op))
2224}
2225
2226// =============================================================================
2227// AST-based binary expression extraction
2228// =============================================================================
2229
2230use crate::types::Language;
2231
2232/// Binary operator node kind names per language for tree-sitter.
2233/// Returns the node kind(s) that represent binary expressions in each language.
2234fn binary_expr_node_kinds(lang: Language) -> &'static [&'static str] {
2235    match lang {
2236        Language::Python => &["binary_operator", "boolean_operator", "comparison_operator"],
2237        Language::Go => &["binary_expression"],
2238        Language::TypeScript | Language::JavaScript => &["binary_expression"],
2239        Language::Java => &["binary_expression"],
2240        Language::Rust => &["binary_expression"],
2241        Language::C | Language::Cpp => &["binary_expression"],
2242        Language::Ruby => &["binary"],
2243        Language::Php => &["binary_expression"],
2244        Language::Kotlin => &["binary_expression"],
2245        Language::CSharp => &["binary_expression"],
2246        Language::Scala => &["infix_expression"],
2247        Language::Elixir => &["binary_operator"],
2248        Language::Ocaml => &["infix_expression"],
2249        Language::Lua | Language::Luau => &["binary_expression"],
2250        Language::Swift => &["infix_expression"],
2251    }
2252}
2253
2254/// Extract the operator text from a binary expression node.
2255///
2256/// Different languages use different AST structures:
2257/// - Some have an "operator" field (Python, Go, Java, C, C++, C#, Ruby, PHP, Rust)
2258/// - Some embed the operator as a child node
2259/// - Kotlin uses separate node kinds for each operator type
2260fn extract_operator_from_node<'a>(
2261    node: &tree_sitter::Node<'a>,
2262    source: &'a [u8],
2263    _lang: Language,
2264) -> Option<String> {
2265    // Try "operator" field first (most languages)
2266    if let Some(op_node) = node.child_by_field_name("operator") {
2267        let op_text = op_node.utf8_text(source).unwrap_or("").trim().to_string();
2268        if !op_text.is_empty() {
2269            return Some(op_text);
2270        }
2271    }
2272
2273    // For languages that embed operator as unnamed children, scan children
2274    let child_count = node.child_count();
2275    for i in 0..child_count {
2276        if let Some(child) = node.child(i) {
2277            if !child.is_named() {
2278                let text = child.utf8_text(source).unwrap_or("").trim();
2279                // Check if it looks like a binary operator
2280                if matches!(
2281                    text,
2282                    "+" | "-"
2283                        | "*"
2284                        | "/"
2285                        | "%"
2286                        | "//"
2287                        | "**"
2288                        | "=="
2289                        | "!="
2290                        | "<"
2291                        | ">"
2292                        | "<="
2293                        | ">="
2294                        | "&&"
2295                        | "||"
2296                        | "and"
2297                        | "or"
2298                        | "&"
2299                        | "|"
2300                        | "^"
2301                        | "<<"
2302                        | ">>"
2303                        | "<>"
2304                        | "==="
2305                        | "!=="
2306                        | "<=>"
2307                        | ".."
2308                        | "..."
2309                        | "in"
2310                        | "not in"
2311                        | "|>"
2312                        | "<|"
2313                        | "++"
2314                ) {
2315                    return Some(text.to_string());
2316                }
2317            }
2318            // Note: Kotlin (tree-sitter-kotlin-ng) uses standard binary_expression
2319            // with left/operator/right fields, so no special handling needed
2320        }
2321    }
2322
2323    None
2324}
2325
2326/// Extract left and right operand text from a binary expression node.
2327///
2328/// Uses field names ("left"/"right") first, then falls back to first/last named children.
2329fn extract_operands_from_node<'a>(
2330    node: &tree_sitter::Node<'a>,
2331    source: &'a [u8],
2332    lang: Language,
2333) -> Option<(String, String)> {
2334    // Try standard field names
2335    let left = node.child_by_field_name("left").or_else(|| {
2336        // Fallback: first named child
2337        node.named_child(0)
2338    });
2339
2340    let right = node.child_by_field_name("right").or_else(|| {
2341        // Fallback: last named child (if different from first)
2342        let count = node.named_child_count();
2343        if count >= 2 {
2344            node.named_child(count - 1)
2345        } else {
2346            None
2347        }
2348    });
2349
2350    match (left, right) {
2351        (Some(l), Some(r)) if l.id() != r.id() => {
2352            let left_text = l.utf8_text(source).unwrap_or("").trim().to_string();
2353            let right_text = r.utf8_text(source).unwrap_or("").trim().to_string();
2354
2355            if left_text.is_empty() || right_text.is_empty() {
2356                return None;
2357            }
2358
2359            // Extract base variable (limit depth for field access chains)
2360            let left_base = extract_base_variable(&left_text);
2361            let right_base = extract_base_variable(&right_text);
2362
2363            // Skip if either side looks like a function call
2364            if is_function_call(&left_base) || is_function_call(&right_base) {
2365                return None;
2366            }
2367
2368            // Skip if both sides are numeric literals (constant folding, not CSE)
2369            if is_numeric_literal(&left_base) && is_numeric_literal(&right_base) {
2370                return None;
2371            }
2372
2373            // For PHP, strip $ prefix for consistency
2374            let left_final = if matches!(lang, Language::Php) {
2375                left_base.trim_start_matches('$').to_string()
2376            } else {
2377                left_base
2378            };
2379            let right_final = if matches!(lang, Language::Php) {
2380                right_base.trim_start_matches('$').to_string()
2381            } else {
2382                right_base
2383            };
2384
2385            Some((left_final, right_final))
2386        }
2387        _ => None,
2388    }
2389}
2390
2391/// Extract binary expressions from source code using tree-sitter AST.
2392///
2393/// Walks the AST to find all binary expression nodes within the given line range,
2394/// extracting their operator and operands. This is language-aware and handles
2395/// each language's specific AST structure.
2396///
2397/// # Arguments
2398///
2399/// * `source` - The full source code
2400/// * `lang` - The programming language
2401/// * `start_line` - Start line (1-indexed, inclusive)
2402/// * `end_line` - End line (1-indexed, inclusive)
2403///
2404/// # Returns
2405///
2406/// A vector of `(normalized_text, operator, left_operand, right_operand, line)` tuples.
2407pub fn extract_binary_exprs_from_ast(
2408    source: &str,
2409    lang: Language,
2410    start_line: usize,
2411    end_line: usize,
2412) -> Vec<(String, String, String, String, usize)> {
2413    let mut results = Vec::new();
2414
2415    // Parse source with tree-sitter
2416    let tree = match crate::ast::parser::parse(source, lang) {
2417        Ok(t) => t,
2418        Err(_) => return results,
2419    };
2420
2421    let root = tree.root_node();
2422    let source_bytes = source.as_bytes();
2423    let node_kinds = binary_expr_node_kinds(lang);
2424
2425    if node_kinds.is_empty() {
2426        return results;
2427    }
2428
2429    // Walk the AST to find binary expression nodes
2430    let mut cursor = root.walk();
2431    collect_binary_exprs(
2432        &mut cursor,
2433        source_bytes,
2434        lang,
2435        node_kinds,
2436        start_line,
2437        end_line,
2438        &mut results,
2439    );
2440
2441    results
2442}
2443
2444/// Recursively collect binary expressions from the AST.
2445fn collect_binary_exprs(
2446    cursor: &mut tree_sitter::TreeCursor,
2447    source: &[u8],
2448    lang: Language,
2449    node_kinds: &[&str],
2450    start_line: usize,
2451    end_line: usize,
2452    results: &mut Vec<(String, String, String, String, usize)>,
2453) {
2454    let node = cursor.node();
2455    let line = node.start_position().row + 1; // tree-sitter is 0-indexed
2456
2457    // Skip nodes outside line range
2458    let node_start_line = node.start_position().row + 1;
2459    let node_end_line = node.end_position().row + 1;
2460
2461    // If the entire node is outside the range, skip
2462    if node_end_line < start_line || node_start_line > end_line {
2463        return;
2464    }
2465
2466    // Check if this node is a binary expression
2467    let kind = node.kind();
2468    if node_kinds.contains(&kind) && line >= start_line && line <= end_line {
2469        // Try to extract operator and operands
2470        if let Some(op) = extract_operator_from_node(&node, source, lang) {
2471            if let Some((left, right)) = extract_operands_from_node(&node, source, lang) {
2472                // Normalize the expression text
2473                let normalized = normalize_expression(&left, &op, &right);
2474                results.push((normalized, op, left, right, line));
2475            }
2476        }
2477    }
2478
2479    // Recurse into children
2480    if cursor.goto_first_child() {
2481        loop {
2482            collect_binary_exprs(
2483                cursor, source, lang, node_kinds, start_line, end_line, results,
2484            );
2485            if !cursor.goto_next_sibling() {
2486                break;
2487            }
2488        }
2489        cursor.goto_parent();
2490    }
2491}
2492
2493// =============================================================================
2494// Tests
2495// =============================================================================
2496
2497#[cfg(test)]
2498mod tests {
2499    use super::*;
2500
2501    // =========================================================================
2502    // Expression Tests
2503    // =========================================================================
2504
2505    #[test]
2506    fn test_expression_new() {
2507        let expr = Expression::new("a + b", vec!["b", "a"], 5);
2508        assert_eq!(expr.text, "a + b");
2509        // Operands should be sorted
2510        assert_eq!(expr.operands, vec!["a", "b"]);
2511        assert_eq!(expr.line, 5);
2512    }
2513
2514    #[test]
2515    fn test_expression_equality_by_text() {
2516        let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
2517        let expr2 = Expression::new("a + b", vec!["a", "b"], 100);
2518        assert_eq!(expr1, expr2);
2519    }
2520
2521    #[test]
2522    fn test_expression_inequality_by_text() {
2523        let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
2524        let expr2 = Expression::new("a - b", vec!["a", "b"], 1);
2525        assert_ne!(expr1, expr2);
2526    }
2527
2528    #[test]
2529    fn test_expression_hash_by_text() {
2530        use std::collections::hash_map::DefaultHasher;
2531
2532        let expr1 = Expression::new("x * y", vec!["x", "y"], 1);
2533        let expr2 = Expression::new("x * y", vec!["x", "y"], 999);
2534
2535        let mut hasher1 = DefaultHasher::new();
2536        let mut hasher2 = DefaultHasher::new();
2537        expr1.hash(&mut hasher1);
2538        expr2.hash(&mut hasher2);
2539
2540        assert_eq!(hasher1.finish(), hasher2.finish());
2541    }
2542
2543    #[test]
2544    fn test_expression_is_killed_by() {
2545        let expr = Expression::new("a + b", vec!["a", "b"], 1);
2546        assert!(expr.is_killed_by("a"));
2547        assert!(expr.is_killed_by("b"));
2548        assert!(!expr.is_killed_by("c"));
2549    }
2550
2551    #[test]
2552    fn test_expression_hashset() {
2553        let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
2554        let expr2 = Expression::new("a + b", vec!["a", "b"], 5);
2555
2556        let mut set: HashSet<Expression> = HashSet::new();
2557        set.insert(expr1);
2558        set.insert(expr2);
2559
2560        // Same text = only one entry
2561        assert_eq!(set.len(), 1);
2562    }
2563
2564    // =========================================================================
2565    // Normalization Tests
2566    // =========================================================================
2567
2568    #[test]
2569    fn test_normalize_commutative_addition() {
2570        assert_eq!(normalize_expression("a", "+", "b"), "a + b");
2571        assert_eq!(normalize_expression("b", "+", "a"), "a + b");
2572    }
2573
2574    #[test]
2575    fn test_normalize_commutative_multiplication() {
2576        assert_eq!(normalize_expression("x", "*", "y"), "x * y");
2577        assert_eq!(normalize_expression("y", "*", "x"), "x * y");
2578    }
2579
2580    #[test]
2581    fn test_normalize_commutative_equality() {
2582        assert_eq!(normalize_expression("foo", "==", "bar"), "bar == foo");
2583        assert_eq!(normalize_expression("bar", "==", "foo"), "bar == foo");
2584    }
2585
2586    #[test]
2587    fn test_normalize_non_commutative_subtraction() {
2588        assert_eq!(normalize_expression("a", "-", "b"), "a - b");
2589        assert_eq!(normalize_expression("b", "-", "a"), "b - a");
2590    }
2591
2592    #[test]
2593    fn test_normalize_non_commutative_division() {
2594        assert_eq!(normalize_expression("x", "/", "y"), "x / y");
2595        assert_eq!(normalize_expression("y", "/", "x"), "y / x");
2596    }
2597
2598    #[test]
2599    fn test_normalize_whitespace_trimmed() {
2600        // ELEPHANT-PASS1-5: Whitespace should be trimmed
2601        assert_eq!(normalize_expression("  a  ", "+", "  b  "), "a + b");
2602        assert_eq!(normalize_expression("\ta\n", "-", "\tb\n"), "a - b");
2603    }
2604
2605    // =========================================================================
2606    // BlockExpressions Tests
2607    // =========================================================================
2608
2609    #[test]
2610    fn test_block_expressions_default() {
2611        let block = BlockExpressions::new();
2612        assert!(block.gen.is_empty());
2613        assert!(block.kill.is_empty());
2614    }
2615
2616    // =========================================================================
2617    // AvailableExprsInfo Tests
2618    // =========================================================================
2619
2620    #[test]
2621    fn test_available_exprs_info_empty() {
2622        let info = AvailableExprsInfo::empty(0);
2623        assert!(info.avail_in.is_empty());
2624        assert!(info.avail_out.is_empty());
2625        assert!(info.all_exprs.is_empty());
2626        assert_eq!(info.entry_block, 0);
2627        assert!(info.expr_instances.is_empty());
2628    }
2629
2630    #[test]
2631    fn test_is_available_true() {
2632        let mut info = AvailableExprsInfo::new(0);
2633        let expr = Expression::new("a + b", vec!["a", "b"], 1);
2634
2635        let mut block_exprs = HashSet::new();
2636        block_exprs.insert(expr.clone());
2637        info.avail_in.insert(1, block_exprs);
2638
2639        assert!(info.is_available(1, &expr));
2640    }
2641
2642    #[test]
2643    fn test_is_available_false_not_in_set() {
2644        let info = AvailableExprsInfo::new(0);
2645        let expr = Expression::new("a + b", vec!["a", "b"], 1);
2646        assert!(!info.is_available(1, &expr));
2647    }
2648
2649    #[test]
2650    fn test_is_available_false_unknown_block() {
2651        let info = AvailableExprsInfo::new(0);
2652        let expr = Expression::new("a + b", vec!["a", "b"], 1);
2653        assert!(!info.is_available(999, &expr));
2654    }
2655
2656    #[test]
2657    fn test_is_available_at_exit_true() {
2658        let mut info = AvailableExprsInfo::new(0);
2659        let expr = Expression::new("a + b", vec!["a", "b"], 1);
2660
2661        let mut block_exprs = HashSet::new();
2662        block_exprs.insert(expr.clone());
2663        info.avail_out.insert(1, block_exprs);
2664
2665        assert!(info.is_available_at_exit(1, &expr));
2666    }
2667
2668    #[test]
2669    fn test_to_json_serializable() {
2670        let mut info = AvailableExprsInfo::new(0);
2671        let expr = Expression::new("a + b", vec!["a", "b"], 2);
2672
2673        let mut block_exprs = HashSet::new();
2674        block_exprs.insert(expr.clone());
2675        info.avail_in.insert(0, HashSet::new());
2676        info.avail_in.insert(1, block_exprs.clone());
2677        info.avail_out.insert(0, block_exprs);
2678        info.all_exprs.insert(expr);
2679
2680        let json = info.to_json();
2681
2682        // Verify it's valid JSON
2683        assert!(json.is_object());
2684        assert!(json.get("avail_in").is_some());
2685        assert!(json.get("avail_out").is_some());
2686        assert!(json.get("all_expressions").is_some());
2687        assert!(json.get("entry_block").is_some());
2688        assert!(json.get("redundant_computations").is_some());
2689    }
2690
2691    #[test]
2692    fn test_to_json_includes_redundant_computations() {
2693        let mut info = AvailableExprsInfo::new(0);
2694
2695        // Add same expression twice (different lines)
2696        info.expr_instances
2697            .push(Expression::new("a + b", vec!["a", "b"], 2));
2698        info.expr_instances
2699            .push(Expression::new("a + b", vec!["a", "b"], 5));
2700
2701        let json = info.to_json();
2702        let redundant = json
2703            .get("redundant_computations")
2704            .unwrap()
2705            .as_array()
2706            .unwrap();
2707
2708        assert_eq!(redundant.len(), 1);
2709        assert_eq!(redundant[0]["expr"], "a + b");
2710        assert_eq!(redundant[0]["first_at"], 2);
2711        assert_eq!(redundant[0]["redundant_at"], 5);
2712    }
2713
2714    // =========================================================================
2715    // Phase 2: Expression Extraction Tests (CAP-AE-12)
2716    // =========================================================================
2717
2718    #[test]
2719    fn test_is_function_call_simple() {
2720        // Simple function calls
2721        assert!(is_function_call("foo()"));
2722        assert!(is_function_call("bar(x)"));
2723        assert!(is_function_call("baz(1, 2, 3)"));
2724    }
2725
2726    #[test]
2727    fn test_is_function_call_method() {
2728        // Method calls
2729        assert!(is_function_call("obj.method()"));
2730        assert!(is_function_call("x.foo(bar)"));
2731        assert!(is_function_call("self.process(data)"));
2732    }
2733
2734    #[test]
2735    fn test_is_function_call_chained() {
2736        // Chained method calls
2737        assert!(is_function_call("a.b.c()"));
2738        assert!(is_function_call("foo().bar()"));
2739    }
2740
2741    #[test]
2742    fn test_is_function_call_false_for_binary_ops() {
2743        // Binary operations should NOT be function calls
2744        assert!(!is_function_call("a + b"));
2745        assert!(!is_function_call("x * y"));
2746        assert!(!is_function_call("foo - bar"));
2747        assert!(!is_function_call("1 + 2"));
2748    }
2749
2750    #[test]
2751    fn test_is_function_call_false_for_parens_in_expr() {
2752        // Parentheses in expressions (not calls)
2753        assert!(!is_function_call("(a + b)"));
2754        assert!(!is_function_call("(x * y) + z"));
2755    }
2756
2757    #[test]
2758    fn test_parse_expression_simple_addition() {
2759        let result = parse_expression_from_line("x = a + b");
2760        assert!(result.is_some());
2761        let (left, op, right) = result.unwrap();
2762        assert_eq!(left, "a");
2763        assert_eq!(op, "+");
2764        assert_eq!(right, "b");
2765    }
2766
2767    #[test]
2768    fn test_parse_expression_multiplication() {
2769        let result = parse_expression_from_line("result = foo * bar");
2770        assert!(result.is_some());
2771        let (left, op, right) = result.unwrap();
2772        assert_eq!(left, "foo");
2773        assert_eq!(op, "*");
2774        assert_eq!(right, "bar");
2775    }
2776
2777    #[test]
2778    fn test_parse_expression_subtraction() {
2779        let result = parse_expression_from_line("y = x - z");
2780        assert!(result.is_some());
2781        let (left, op, right) = result.unwrap();
2782        assert_eq!(left, "x");
2783        assert_eq!(op, "-");
2784        assert_eq!(right, "z");
2785    }
2786
2787    #[test]
2788    fn test_parse_expression_comparison() {
2789        let result = parse_expression_from_line("flag = a == b");
2790        assert!(result.is_some());
2791        let (left, op, right) = result.unwrap();
2792        assert_eq!(left, "a");
2793        assert_eq!(op, "==");
2794        assert_eq!(right, "b");
2795    }
2796
2797    #[test]
2798    fn test_parse_expression_with_comment() {
2799        // Comment should be stripped
2800        let result = parse_expression_from_line("x = a + b  # compute sum");
2801        assert!(result.is_some());
2802        let (left, op, right) = result.unwrap();
2803        assert_eq!(left, "a");
2804        assert_eq!(op, "+");
2805        assert_eq!(right, "b");
2806    }
2807
2808    #[test]
2809    fn test_parse_expression_function_call_excluded() {
2810        // Function calls should NOT produce expressions (CAP-AE-12)
2811        assert!(parse_expression_from_line("x = foo()").is_none());
2812        assert!(parse_expression_from_line("y = bar.baz()").is_none());
2813        assert!(parse_expression_from_line("z = process(data)").is_none());
2814    }
2815
2816    #[test]
2817    fn test_parse_expression_constant_only_excluded() {
2818        // Pure constant expressions should not be CSE targets
2819        assert!(parse_expression_from_line("x = 1 + 2").is_none());
2820        assert!(parse_expression_from_line("y = 10 * 20").is_none());
2821    }
2822
2823    #[test]
2824    fn test_parse_expression_with_spaces() {
2825        // Whitespace should be handled
2826        let result = parse_expression_from_line("   x   =   a   +   b   ");
2827        assert!(result.is_some());
2828        let (left, op, right) = result.unwrap();
2829        assert_eq!(left, "a");
2830        assert_eq!(op, "+");
2831        assert_eq!(right, "b");
2832    }
2833
2834    #[test]
2835    fn test_parse_expression_rightmost_assignment() {
2836        // TIGER-PASS2-1: Only process rightmost assignment
2837        // For "a = b = c + d", we should get "c + d", not "b = c + d"
2838        let result = parse_expression_from_line("a = b = c + d");
2839        assert!(result.is_some());
2840        let (left, op, right) = result.unwrap();
2841        assert_eq!(left, "c");
2842        assert_eq!(op, "+");
2843        assert_eq!(right, "d");
2844    }
2845
2846    #[test]
2847    fn test_extract_base_variable_simple() {
2848        assert_eq!(extract_base_variable("x"), "x");
2849        assert_eq!(extract_base_variable("foo"), "foo");
2850    }
2851
2852    #[test]
2853    fn test_extract_base_variable_field_access() {
2854        assert_eq!(extract_base_variable("x.a"), "x.a");
2855        assert_eq!(extract_base_variable("obj.field"), "obj.field");
2856    }
2857
2858    #[test]
2859    fn test_extract_base_variable_deep_nesting_truncated() {
2860        // TIGER-PASS3-5: Limit to 3 levels
2861        assert_eq!(extract_base_variable("x.a.b.c.d.e"), "x.a.b");
2862        assert_eq!(extract_base_variable("a.b.c.d"), "a.b.c");
2863    }
2864
2865    #[test]
2866    fn test_is_numeric_literal() {
2867        // Integers
2868        assert!(is_numeric_literal("42"));
2869        assert!(is_numeric_literal("-10"));
2870        assert!(is_numeric_literal("0"));
2871
2872        // Floats
2873        assert!(is_numeric_literal("3.14"));
2874        assert!(is_numeric_literal("-2.5"));
2875
2876        // Hex/octal/binary
2877        assert!(is_numeric_literal("0x1f"));
2878        assert!(is_numeric_literal("0o17"));
2879        assert!(is_numeric_literal("0b1010"));
2880
2881        // Not numeric
2882        assert!(!is_numeric_literal("foo"));
2883        assert!(!is_numeric_literal("x"));
2884        assert!(!is_numeric_literal("a + b"));
2885    }
2886
2887    #[test]
2888    fn test_is_identifier() {
2889        assert!(is_identifier("x"));
2890        assert!(is_identifier("foo"));
2891        assert!(is_identifier("_private"));
2892        assert!(is_identifier("var123"));
2893        assert!(is_identifier("obj.field"));
2894
2895        assert!(!is_identifier(""));
2896        assert!(!is_identifier("123"));
2897        assert!(!is_identifier("a + b"));
2898    }
2899
2900    #[test]
2901    fn test_find_operator_in_expr() {
2902        assert_eq!(find_operator_in_expr("a + b", "+"), Some(2));
2903        assert_eq!(find_operator_in_expr("x * y", "*"), Some(2));
2904        assert_eq!(find_operator_in_expr("foo - bar", "-"), Some(4));
2905
2906        // Should not find operator inside parens
2907        assert_eq!(find_operator_in_expr("(a + b) * c", "+"), None);
2908
2909        // Should find outer operator
2910        assert_eq!(find_operator_in_expr("(a + b) * c", "*"), Some(8));
2911    }
2912
2913    #[test]
2914    fn test_find_operator_not_in_string() {
2915        // Operator inside string should not be found
2916        assert_eq!(find_operator_in_expr("\"a + b\"", "+"), None);
2917        assert_eq!(find_operator_in_expr("'x * y'", "*"), None);
2918    }
2919
2920    // =========================================================================
2921    // Phase 3: compute_available_exprs Tests
2922    // =========================================================================
2923
2924    use crate::types::{BlockType, CfgBlock, CfgEdge, EdgeType};
2925
2926    /// Helper to create a minimal CFG for testing
2927    fn make_test_cfg(
2928        blocks: Vec<(usize, BlockType, (u32, u32))>,
2929        edges: Vec<(usize, usize)>,
2930        entry: usize,
2931    ) -> CfgInfo {
2932        CfgInfo {
2933            function: "test".to_string(),
2934            blocks: blocks
2935                .into_iter()
2936                .map(|(id, block_type, lines)| CfgBlock {
2937                    id,
2938                    block_type,
2939                    lines,
2940                    calls: vec![],
2941                })
2942                .collect(),
2943            edges: edges
2944                .into_iter()
2945                .map(|(from, to)| CfgEdge {
2946                    from,
2947                    to,
2948                    edge_type: EdgeType::Unconditional,
2949                    condition: None,
2950                })
2951                .collect(),
2952            entry_block: entry,
2953            exit_blocks: vec![],
2954            cyclomatic_complexity: 1,
2955            nested_functions: HashMap::new(),
2956        }
2957    }
2958
2959    /// Helper to create an empty DFG
2960    fn make_empty_dfg() -> DfgInfo {
2961        DfgInfo {
2962            function: "test".to_string(),
2963            refs: vec![],
2964            edges: vec![],
2965            variables: vec![],
2966        }
2967    }
2968
2969    /// Helper to create a DFG with specific refs
2970    fn make_dfg_with_refs(refs: Vec<VarRef>) -> DfgInfo {
2971        let variables: Vec<String> = refs.iter().map(|r| r.name.clone()).collect();
2972        DfgInfo {
2973            function: "test".to_string(),
2974            refs,
2975            edges: vec![],
2976            variables,
2977        }
2978    }
2979
2980    /// Helper to create a VarRef
2981    fn make_var_ref(name: &str, line: u32, ref_type: RefType) -> VarRef {
2982        VarRef {
2983            name: name.to_string(),
2984            ref_type,
2985            line,
2986            column: 0,
2987            context: None,
2988            group_id: None,
2989        }
2990    }
2991
2992    #[test]
2993    fn test_compute_empty_cfg_empty_dfg() {
2994        // Empty function - no blocks means NoCfg error
2995        let cfg = CfgInfo {
2996            function: "empty".to_string(),
2997            blocks: vec![],
2998            edges: vec![],
2999            entry_block: 0,
3000            exit_blocks: vec![],
3001            cyclomatic_complexity: 1,
3002            nested_functions: HashMap::new(),
3003        };
3004        let dfg = make_empty_dfg();
3005
3006        let result = compute_available_exprs(&cfg, &dfg);
3007        assert!(result.is_err());
3008    }
3009
3010    #[test]
3011    fn test_compute_single_block_no_exprs() {
3012        // Single entry block, no expressions
3013        let cfg = make_test_cfg(vec![(0, BlockType::Entry, (1, 1))], vec![], 0);
3014        let dfg = make_empty_dfg();
3015
3016        let result = compute_available_exprs(&cfg, &dfg);
3017        assert!(result.is_ok());
3018        let info = result.unwrap();
3019
3020        // No expressions
3021        assert!(info.all_exprs.is_empty());
3022
3023        // avail_in and avail_out should exist for block 0
3024        assert!(info.avail_in.contains_key(&0));
3025        assert!(info.avail_out.contains_key(&0));
3026
3027        // Entry block should have nothing available at entry
3028        assert!(info.avail_in.get(&0).unwrap().is_empty());
3029    }
3030
3031    #[test]
3032    fn test_compute_entry_block_nothing_available() {
3033        // Entry block always has nothing available at entry
3034        let cfg = make_test_cfg(
3035            vec![(0, BlockType::Entry, (1, 5)), (1, BlockType::Exit, (6, 10))],
3036            vec![(0, 1)],
3037            0,
3038        );
3039        let dfg = make_empty_dfg();
3040
3041        let result = compute_available_exprs(&cfg, &dfg).unwrap();
3042
3043        // Entry block avail_in should be empty
3044        assert!(result.avail_in.get(&0).unwrap().is_empty());
3045    }
3046
3047    #[test]
3048    fn test_compute_linear_cfg_expression_propagates() {
3049        // Linear CFG: 0 -> 1 -> 2
3050        // Expression computed in block 0 should be available in blocks 1 and 2
3051        let cfg = make_test_cfg(
3052            vec![
3053                (0, BlockType::Entry, (1, 2)),
3054                (1, BlockType::Body, (3, 4)),
3055                (2, BlockType::Exit, (5, 6)),
3056            ],
3057            vec![(0, 1), (1, 2)],
3058            0,
3059        );
3060
3061        // DFG with a+b expression in block 0
3062        let dfg = make_dfg_with_refs(vec![
3063            make_var_ref("x", 2, RefType::Definition),
3064            make_var_ref("a", 2, RefType::Use),
3065            make_var_ref("b", 2, RefType::Use),
3066        ]);
3067
3068        let result = compute_available_exprs(&cfg, &dfg).unwrap();
3069
3070        // If expressions were extracted, they should propagate downstream
3071        // Note: this depends on expression extraction working
3072        if !result.all_exprs.is_empty() {
3073            let expr = result.all_exprs.iter().next().unwrap();
3074
3075            // Expression generated in block 0 should be available at exit of 0
3076            assert!(result.is_available_at_exit(0, expr));
3077
3078            // Available at entry to block 1
3079            assert!(result.is_available(1, expr));
3080
3081            // Available at entry to block 2
3082            assert!(result.is_available(2, expr));
3083        }
3084    }
3085
3086    #[test]
3087    fn test_compute_diamond_must_intersection() {
3088        // Diamond CFG:
3089        //      [0: entry]
3090        //       /      \
3091        //   [1:x=a+b]  [2:skip]
3092        //       \      /
3093        //        [3:merge]
3094        //
3095        // Expression in only one branch should NOT be available at merge (MUST)
3096        let cfg = make_test_cfg(
3097            vec![
3098                (0, BlockType::Entry, (1, 1)),
3099                (1, BlockType::Body, (2, 2)),
3100                (2, BlockType::Body, (3, 3)),
3101                (3, BlockType::Exit, (4, 4)),
3102            ],
3103            vec![(0, 1), (0, 2), (1, 3), (2, 3)],
3104            0,
3105        );
3106
3107        // Expression only in block 1
3108        let dfg = make_dfg_with_refs(vec![
3109            make_var_ref("x", 2, RefType::Definition),
3110            make_var_ref("a", 2, RefType::Use),
3111            make_var_ref("b", 2, RefType::Use),
3112        ]);
3113
3114        let result = compute_available_exprs(&cfg, &dfg).unwrap();
3115
3116        // If expressions were extracted from block 1 only
3117        if !result.all_exprs.is_empty() {
3118            let expr = result.all_exprs.iter().next().unwrap();
3119
3120            // Expression should be available at exit of block 1
3121            assert!(result.is_available_at_exit(1, expr));
3122
3123            // Block 2 has no expressions, so nothing generated there
3124            // At merge (block 3), MUST analysis (intersection) means:
3125            // avail_in[3] = avail_out[1] ∩ avail_out[2]
3126            // Since block 2 doesn't have the expr in avail_out, it won't be in avail_in[3]
3127
3128            // This test validates MUST semantics
3129            // Note: Block 2's avail_out will be what flows through from avail_in[2]
3130        }
3131    }
3132
3133    #[test]
3134    fn test_compute_self_loop_terminates() {
3135        // CFG with self-loop: 0 -> 1 -> 1 (self-loop)
3136        let cfg = make_test_cfg(
3137            vec![
3138                (0, BlockType::Entry, (1, 1)),
3139                (1, BlockType::LoopHeader, (2, 3)),
3140            ],
3141            vec![(0, 1), (1, 1)],
3142            0,
3143        );
3144        let dfg = make_empty_dfg();
3145
3146        // Should not infinite loop
3147        let result = compute_available_exprs(&cfg, &dfg);
3148        assert!(result.is_ok());
3149    }
3150
3151    #[test]
3152    fn test_compute_unreachable_block() {
3153        // CFG with unreachable block
3154        // 0 -> 1, but block 2 is unreachable
3155        let cfg = make_test_cfg(
3156            vec![
3157                (0, BlockType::Entry, (1, 1)),
3158                (1, BlockType::Exit, (2, 2)),
3159                (2, BlockType::Body, (3, 3)), // No edges to this block
3160            ],
3161            vec![(0, 1)],
3162            0,
3163        );
3164        let dfg = make_empty_dfg();
3165
3166        let result = compute_available_exprs(&cfg, &dfg);
3167        assert!(result.is_ok());
3168
3169        let info = result.unwrap();
3170
3171        // Unreachable block should have empty avail_in (no predecessors)
3172        assert!(info.avail_in.get(&2).unwrap().is_empty());
3173    }
3174
3175    #[test]
3176    fn test_is_killed_function() {
3177        let kills: HashSet<String> = ["a".to_string(), "x".to_string()].into_iter().collect();
3178
3179        let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
3180        let expr2 = Expression::new("c + d", vec!["c", "d"], 2);
3181        let expr3 = Expression::new("x + y", vec!["x", "y"], 3);
3182
3183        // expr1 uses 'a', which is in kills
3184        assert!(is_killed(&expr1, &kills));
3185
3186        // expr2 uses 'c' and 'd', neither in kills
3187        assert!(!is_killed(&expr2, &kills));
3188
3189        // expr3 uses 'x', which is in kills
3190        assert!(is_killed(&expr3, &kills));
3191    }
3192
3193    #[test]
3194    fn test_compute_loop_expression_available_in_body() {
3195        // Loop CFG: 0 -> 1 (header) <-> 2 (body) -> 3 (exit)
3196        let cfg = make_test_cfg(
3197            vec![
3198                (0, BlockType::Entry, (1, 1)),
3199                (1, BlockType::LoopHeader, (2, 2)),
3200                (2, BlockType::LoopBody, (3, 3)),
3201                (3, BlockType::Exit, (4, 4)),
3202            ],
3203            vec![(0, 1), (1, 2), (2, 1), (1, 3)],
3204            0,
3205        );
3206        let dfg = make_empty_dfg();
3207
3208        let result = compute_available_exprs(&cfg, &dfg);
3209        assert!(result.is_ok());
3210    }
3211
3212    // =========================================================================
3213    // AST-based expression extraction tests
3214    // =========================================================================
3215
3216    #[test]
3217    fn test_extract_binary_exprs_from_ast_python() {
3218        let source = r#"
3219def example(a, b, c):
3220    x = a + b
3221    if a * c > 10:
3222        return a + b
3223    y = c - a
3224"#;
3225        let lang = crate::types::Language::Python;
3226        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 6);
3227        // Should find: a + b (line 3), a * c (line 4), a + b (line 5), c - a (line 6)
3228        assert!(
3229            exprs.len() >= 3,
3230            "Expected at least 3 binary exprs from Python, got {}: {:?}",
3231            exprs.len(),
3232            exprs
3233        );
3234        // Check that we found a + b
3235        let texts: Vec<&str> = exprs.iter().map(|e| e.0.as_str()).collect();
3236        assert!(
3237            texts
3238                .iter()
3239                .any(|t| t.contains("a") && t.contains("b") && t.contains("+")),
3240            "Should find a + b expression, got: {:?}",
3241            texts
3242        );
3243    }
3244
3245    #[test]
3246    fn test_extract_binary_exprs_from_ast_go() {
3247        let source = r#"
3248package main
3249
3250func example(a int, b int, c int) int {
3251    x := a + b
3252    if a * c > 10 {
3253        return a + b
3254    }
3255    y := c - a
3256    return y
3257}
3258"#;
3259        let lang = crate::types::Language::Go;
3260        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
3261        assert!(
3262            exprs.len() >= 3,
3263            "Expected at least 3 binary exprs from Go, got {}: {:?}",
3264            exprs.len(),
3265            exprs
3266        );
3267    }
3268
3269    #[test]
3270    fn test_extract_binary_exprs_from_ast_java() {
3271        let source = r#"
3272class Example {
3273    int example(int a, int b, int c) {
3274        int x = a + b;
3275        if (a * c > 10) {
3276            return a + b;
3277        }
3278        int y = c - a;
3279        return y;
3280    }
3281}
3282"#;
3283        let lang = crate::types::Language::Java;
3284        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
3285        assert!(
3286            exprs.len() >= 3,
3287            "Expected at least 3 binary exprs from Java, got {}: {:?}",
3288            exprs.len(),
3289            exprs
3290        );
3291    }
3292
3293    #[test]
3294    fn test_extract_binary_exprs_from_ast_ruby() {
3295        let source = r#"
3296def example(a, b, c)
3297  x = a + b
3298  if a * c > 10
3299    return a + b
3300  end
3301  y = c - a
3302  y
3303end
3304"#;
3305        let lang = crate::types::Language::Ruby;
3306        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3307        assert!(
3308            exprs.len() >= 3,
3309            "Expected at least 3 binary exprs from Ruby, got {}: {:?}",
3310            exprs.len(),
3311            exprs
3312        );
3313    }
3314
3315    #[test]
3316    fn test_extract_binary_exprs_from_ast_cpp() {
3317        let source = r#"
3318int example(int a, int b, int c) {
3319    int x = a + b;
3320    if (a * c > 10) {
3321        return a + b;
3322    }
3323    int y = c - a;
3324    return y;
3325}
3326"#;
3327        let lang = crate::types::Language::Cpp;
3328        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3329        assert!(
3330            exprs.len() >= 3,
3331            "Expected at least 3 binary exprs from C++, got {}: {:?}",
3332            exprs.len(),
3333            exprs
3334        );
3335    }
3336
3337    #[test]
3338    fn test_extract_binary_exprs_from_ast_php() {
3339        let source = r#"<?php
3340function example($a, $b, $c) {
3341    $x = $a + $b;
3342    if ($a * $c > 10) {
3343        return $a + $b;
3344    }
3345    $y = $c - $a;
3346    return $y;
3347}
3348"#;
3349        let lang = crate::types::Language::Php;
3350        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3351        assert!(
3352            exprs.len() >= 3,
3353            "Expected at least 3 binary exprs from PHP, got {}: {:?}",
3354            exprs.len(),
3355            exprs
3356        );
3357    }
3358
3359    #[test]
3360    fn test_extract_binary_exprs_from_ast_csharp() {
3361        let source = r#"
3362class Example {
3363    int DoWork(int a, int b, int c) {
3364        int x = a + b;
3365        if (a * c > 10) {
3366            return a + b;
3367        }
3368        int y = c - a;
3369        return y;
3370    }
3371}
3372"#;
3373        let lang = crate::types::Language::CSharp;
3374        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
3375        assert!(
3376            exprs.len() >= 3,
3377            "Expected at least 3 binary exprs from C#, got {}: {:?}",
3378            exprs.len(),
3379            exprs
3380        );
3381    }
3382
3383    #[test]
3384    fn test_extract_binary_exprs_from_ast_kotlin() {
3385        let source = r#"
3386fun example(a: Int, b: Int, c: Int): Int {
3387    val x = a + b
3388    if (a * c > 10) {
3389        return a + b
3390    }
3391    val y = c - a
3392    return y
3393}
3394"#;
3395        let lang = crate::types::Language::Kotlin;
3396        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3397        assert!(
3398            exprs.len() >= 3,
3399            "Expected at least 3 binary exprs from Kotlin, got {}: {:?}",
3400            exprs.len(),
3401            exprs
3402        );
3403    }
3404
3405    #[test]
3406    fn test_extract_binary_exprs_from_ast_elixir() {
3407        let source = r#"
3408defmodule Example do
3409  def example(a, b, c) do
3410    x = a + b
3411    if a * c > 10 do
3412      a + b
3413    end
3414    y = c - a
3415    y
3416  end
3417end
3418"#;
3419        let lang = crate::types::Language::Elixir;
3420        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
3421        assert!(
3422            exprs.len() >= 3,
3423            "Expected at least 3 binary exprs from Elixir, got {}: {:?}",
3424            exprs.len(),
3425            exprs
3426        );
3427    }
3428
3429    #[test]
3430    fn test_extract_binary_exprs_from_ast_rust() {
3431        let source = r#"
3432fn example(a: i32, b: i32, c: i32) -> i32 {
3433    let x = a + b;
3434    if a * c > 10 {
3435        return a + b;
3436    }
3437    let y = c - a;
3438    y
3439}
3440"#;
3441        let lang = crate::types::Language::Rust;
3442        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3443        assert!(
3444            exprs.len() >= 3,
3445            "Expected at least 3 binary exprs from Rust, got {}: {:?}",
3446            exprs.len(),
3447            exprs
3448        );
3449    }
3450
3451    #[test]
3452    fn test_extract_binary_exprs_excludes_function_calls() {
3453        let source = r#"
3454def example(a, b):
3455    x = a + b
3456    y = len(a)
3457    z = foo(a, b)
3458"#;
3459        let lang = crate::types::Language::Python;
3460        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 5);
3461        // Should find a + b but NOT the function calls
3462        let texts: Vec<&str> = exprs.iter().map(|e| e.0.as_str()).collect();
3463        assert!(
3464            !texts.iter().any(|t| t.contains("len") || t.contains("foo")),
3465            "Should not include function calls, got: {:?}",
3466            texts
3467        );
3468    }
3469
3470    #[test]
3471    fn test_extract_binary_exprs_normalizes_commutative() {
3472        let source = r#"
3473def example(a, b):
3474    x = b + a
3475    y = a + b
3476"#;
3477        let lang = crate::types::Language::Python;
3478        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 4);
3479        // Both should normalize to "a + b"
3480        let texts: Vec<String> = exprs.iter().map(|e| e.0.clone()).collect();
3481        if texts.len() >= 2 {
3482            assert_eq!(
3483                texts[0], texts[1],
3484                "Commutative exprs should normalize to same text: {:?}",
3485                texts
3486            );
3487        }
3488    }
3489
3490    #[test]
3491    fn test_extract_binary_exprs_returns_line_numbers() {
3492        let source = r#"
3493def example(a, b):
3494    x = a + b
3495    y = a - b
3496"#;
3497        let lang = crate::types::Language::Python;
3498        let exprs = extract_binary_exprs_from_ast(source, lang, 1, 4);
3499        assert!(
3500            exprs.len() >= 2,
3501            "Expected at least 2 exprs, got {}",
3502            exprs.len()
3503        );
3504        // Lines should be > 0
3505        for (text, _op, _left, _right, line) in &exprs {
3506            assert!(*line > 0, "Line should be > 0 for expr: {}", text);
3507        }
3508    }
3509
3510    #[test]
3511    fn test_parse_expression_no_assignment_return() {
3512        // Expressions in return statements (no assignment)
3513        let result = parse_expression_from_line("    return a + b");
3514        assert!(
3515            result.is_some(),
3516            "Should parse expression from return statement"
3517        );
3518        let (left, op, right) = result.unwrap();
3519        assert_eq!(op, "+");
3520        assert!(left == "a" || right == "a");
3521    }
3522
3523    #[test]
3524    fn test_parse_expression_no_assignment_if() {
3525        // Expressions in if conditions
3526        let result = parse_expression_from_line("    if a + b > 10:");
3527        // Should find at least one binary expression
3528        assert!(
3529            result.is_some(),
3530            "Should parse expression from if condition"
3531        );
3532    }
3533
3534    #[test]
3535    fn test_parse_expression_no_assignment_standalone() {
3536        // Standalone expression
3537        let result = parse_expression_from_line("    a + b");
3538        assert!(
3539            result.is_some(),
3540            "Should parse standalone binary expression"
3541        );
3542    }
3543}