use std::collections::{HashMap, HashSet};
use std::hash::{Hash, Hasher};
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use super::types::{build_predecessors, reverse_postorder, validate_cfg, BlockId, DataflowError};
use crate::types::{CfgInfo, DfgInfo, RefType, VarRef};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum Confidence {
#[default]
Low,
Medium,
High,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub struct UncertainFinding {
pub expr: String,
pub line: usize,
pub reason: String,
}
pub const COMMUTATIVE_OPS: &[&str] = &["+", "*", "==", "!=", "and", "or", "&", "|", "^"];
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Expression {
pub text: String,
pub operands: Vec<String>,
pub line: usize,
}
impl PartialEq for Expression {
fn eq(&self, other: &Self) -> bool {
self.text == other.text
}
}
impl Eq for Expression {}
impl Hash for Expression {
fn hash<H: Hasher>(&self, state: &mut H) {
self.text.hash(state);
}
}
impl Expression {
pub fn new(text: impl Into<String>, operands: Vec<impl Into<String>>, line: usize) -> Self {
let mut ops: Vec<String> = operands.into_iter().map(|s| s.into()).collect();
ops.sort();
Self {
text: text.into(),
operands: ops,
line,
}
}
pub fn is_killed_by(&self, var: &str) -> bool {
self.operands.iter().any(|op| op == var)
}
}
pub fn normalize_expression(left: &str, op: &str, right: &str) -> String {
let left = left.trim();
let right = right.trim();
if COMMUTATIVE_OPS.contains(&op) {
let mut operands = [left, right];
operands.sort();
format!("{} {} {}", operands[0], op, operands[1])
} else {
format!("{} {} {}", left, op, right)
}
}
#[derive(Debug, Clone, Default)]
pub struct BlockExpressions {
pub gen: HashSet<Expression>,
pub kill: HashSet<String>,
}
impl BlockExpressions {
pub fn new() -> Self {
Self::default()
}
}
#[derive(Debug, Clone)]
pub struct ExprInstance {
pub expr: Expression,
pub block_id: BlockId,
}
impl ExprInstance {
pub fn new(expr: Expression, block_id: BlockId) -> Self {
Self { expr, block_id }
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AvailableExprsInfo {
#[serde(
serialize_with = "serialize_avail_map",
deserialize_with = "deserialize_avail_map"
)]
pub avail_in: HashMap<BlockId, HashSet<Expression>>,
#[serde(
serialize_with = "serialize_avail_map",
deserialize_with = "deserialize_avail_map"
)]
pub avail_out: HashMap<BlockId, HashSet<Expression>>,
pub all_exprs: HashSet<Expression>,
pub entry_block: BlockId,
#[serde(skip)]
pub expr_instances: Vec<Expression>,
#[serde(skip)]
pub expr_instances_with_blocks: Vec<ExprInstance>,
#[serde(skip)]
pub defs_per_line: HashMap<usize, HashSet<String>>,
#[serde(skip)]
pub line_to_block: HashMap<usize, BlockId>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub uncertain_exprs: Vec<UncertainFinding>,
#[serde(default)]
pub confidence: Confidence,
}
fn serialize_avail_map<S>(
map: &HashMap<BlockId, HashSet<Expression>>,
serializer: S,
) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let sorted: IndexMap<String, Vec<&Expression>> = map
.iter()
.map(|(k, v)| {
let mut exprs: Vec<_> = v.iter().collect();
exprs.sort_by_key(|e| &e.text);
(k.to_string(), exprs)
})
.collect::<Vec<_>>()
.into_iter()
.collect();
let ordered: IndexMap<_, _> = sorted.into_iter().collect();
ordered.serialize(serializer)
}
fn deserialize_avail_map<'de, D>(
deserializer: D,
) -> Result<HashMap<BlockId, HashSet<Expression>>, D::Error>
where
D: serde::Deserializer<'de>,
{
let map: HashMap<String, Vec<Expression>> = HashMap::deserialize(deserializer)?;
Ok(map
.into_iter()
.map(|(k, v)| {
let block_id: BlockId = k.parse().unwrap_or(0);
let exprs: HashSet<Expression> = v.into_iter().collect();
(block_id, exprs)
})
.collect())
}
impl AvailableExprsInfo {
pub fn empty(entry_block: BlockId) -> Self {
Self {
avail_in: HashMap::new(),
avail_out: HashMap::new(),
all_exprs: HashSet::new(),
entry_block,
expr_instances: Vec::new(),
expr_instances_with_blocks: Vec::new(),
defs_per_line: HashMap::new(),
line_to_block: HashMap::new(),
uncertain_exprs: Vec::new(),
confidence: Confidence::default(),
}
}
pub fn new(entry_block: BlockId) -> Self {
Self::empty(entry_block)
}
pub fn is_available(&self, block: BlockId, expr: &Expression) -> bool {
self.avail_in
.get(&block)
.is_some_and(|set| set.contains(expr))
}
pub fn is_available_at_exit(&self, block: BlockId, expr: &Expression) -> bool {
self.avail_out
.get(&block)
.is_some_and(|set| set.contains(expr))
}
pub fn redundant_computations(&self) -> Vec<(String, usize, usize)> {
let mut redundant = Vec::new();
if !self.expr_instances_with_blocks.is_empty() {
let mut first_seen: HashMap<String, (usize, BlockId)> = HashMap::new();
let mut block_exprs: HashMap<BlockId, Vec<&ExprInstance>> = HashMap::new();
for inst in &self.expr_instances_with_blocks {
block_exprs.entry(inst.block_id).or_default().push(inst);
}
for exprs in block_exprs.values_mut() {
exprs.sort_by_key(|e| e.expr.line);
}
let mut block_ids: Vec<_> = block_exprs.keys().copied().collect();
block_ids.sort();
for &block_id in &block_ids {
if let Some(exprs) = block_exprs.get(&block_id) {
let mut gen_in_block: HashMap<String, usize> = HashMap::new();
for inst in exprs {
let expr = &inst.expr;
let line = expr.line;
let mut is_redundant = false;
if let Some(&(first_line, first_block)) = first_seen.get(&expr.text) {
let start_line = if first_block == block_id {
first_line + 1
} else {
1 };
let mut killed = false;
for check_line in start_line..line {
if let Some(defs) = self.defs_per_line.get(&check_line) {
if expr.operands.iter().any(|op| defs.contains(op)) {
killed = true;
break;
}
}
}
if !killed && first_line != line {
is_redundant = true;
redundant.push((expr.text.clone(), first_line, line));
}
}
if !is_redundant {
if let Some(&gen_line) = gen_in_block.get(&expr.text) {
let mut killed = false;
for check_line in (gen_line + 1)..line {
if let Some(defs) = self.defs_per_line.get(&check_line) {
if expr.operands.iter().any(|op| defs.contains(op)) {
killed = true;
break;
}
}
}
if !killed && gen_line != line {
let first_line = first_seen
.get(&expr.text)
.map(|(l, _)| *l)
.unwrap_or(gen_line);
if first_line != line {
redundant.push((expr.text.clone(), first_line, line));
}
}
}
}
if !first_seen.contains_key(&expr.text) {
first_seen.insert(expr.text.clone(), (line, block_id));
}
gen_in_block.entry(expr.text.clone()).or_insert(line);
}
}
}
} else {
let mut seen: HashMap<String, usize> = HashMap::new();
for expr in &self.expr_instances {
if let Some(&first_line) = seen.get(&expr.text) {
redundant.push((expr.text.clone(), first_line, expr.line));
} else {
seen.insert(expr.text.clone(), expr.line);
}
}
}
redundant.sort_by_key(|(_, _, line)| *line);
redundant
}
pub fn get_available_at_line(&self, line: usize, cfg: &CfgInfo) -> HashSet<Expression> {
let block_id = if let Some(&bid) = self.line_to_block.get(&line) {
bid
} else {
let found = cfg.blocks.iter().find(|b| {
let (start, end) = b.lines;
(start as usize) <= line && line <= (end as usize)
});
match found {
Some(block) => block.id,
None => return HashSet::new(),
}
};
let mut available = self.avail_in.get(&block_id).cloned().unwrap_or_default();
let block_start = cfg
.blocks
.iter()
.find(|b| b.id == block_id)
.map(|b| b.lines.0 as usize)
.unwrap_or(line);
let mut killed: HashSet<String> = HashSet::new();
for check_line in block_start..line {
if let Some(defs) = self.defs_per_line.get(&check_line) {
for def in defs {
killed.insert(def.clone());
}
}
}
available.retain(|expr| !expr.operands.iter().any(|op| killed.contains(op)));
available
}
pub fn to_json(&self) -> serde_json::Value {
let avail_map_to_json = |map: &HashMap<BlockId, HashSet<Expression>>| -> IndexMap<String, Vec<serde_json::Value>> {
let mut sorted: Vec<_> = map.iter().collect();
sorted.sort_by_key(|(k, _)| *k);
sorted
.into_iter()
.map(|(k, v)| {
let mut exprs: Vec<_> = v.iter().collect();
exprs.sort_by_key(|e| &e.text);
let expr_values: Vec<_> = exprs
.into_iter()
.map(|e| {
serde_json::json!({
"text": e.text,
"operands": e.operands,
"line": e.line,
})
})
.collect();
(k.to_string(), expr_values)
})
.collect()
};
let avail_in = avail_map_to_json(&self.avail_in);
let avail_out = avail_map_to_json(&self.avail_out);
let mut all_exprs: Vec<_> = self.all_exprs.iter().collect();
all_exprs.sort_by_key(|e| &e.text);
let all_expressions: Vec<_> = all_exprs
.into_iter()
.map(|e| {
serde_json::json!({
"text": e.text,
"operands": e.operands,
"line": e.line,
})
})
.collect();
let redundant: Vec<_> = self
.redundant_computations()
.into_iter()
.map(|(expr, first, redundant)| {
serde_json::json!({
"expr": expr,
"first_at": first,
"redundant_at": redundant,
})
})
.collect();
let uncertain: Vec<_> = self
.uncertain_exprs
.iter()
.map(|uf| {
serde_json::json!({
"expr": uf.expr,
"line": uf.line,
"reason": uf.reason,
})
})
.collect();
let confidence_str = match self.confidence {
Confidence::Low => "low",
Confidence::Medium => "medium",
Confidence::High => "high",
};
serde_json::json!({
"avail_in": avail_in,
"avail_out": avail_out,
"all_expressions": all_expressions,
"entry_block": self.entry_block,
"redundant_computations": redundant,
"uncertain_exprs": uncertain,
"confidence": confidence_str,
})
}
}
const BINARY_OPS: &[&str] = &[
"+", "-", "*", "/", "%", "//", "**", "==", "!=", "<", ">", "<=", ">=", "and", "or", "&&", "||", "&", "|", "^", "<<", ">>",
];
pub fn is_function_call(text: &str) -> bool {
let trimmed = text.trim();
if let Some(paren_idx) = trimmed.find('(') {
let before_paren = &trimmed[..paren_idx];
let before_trimmed = before_paren.trim_end();
if before_trimmed.is_empty() {
return false;
}
if let Some(last_char) = before_trimmed.chars().last() {
if last_char.is_alphanumeric() || last_char == '_' {
return true;
}
}
}
false
}
fn detect_uncertain_expression(line: &str, line_num: usize) -> Option<UncertainFinding> {
let trimmed = line.trim();
const SKIP_PREFIXES: &[&str] = &[
"#",
"//",
"/*",
"@",
"import ",
"from ",
"use ",
"class ",
"def ",
"fn ",
"func ",
"function ",
"pub ",
"struct ",
"enum ",
"trait ",
"impl ",
"interface ",
];
if trimmed.is_empty() || SKIP_PREFIXES.iter().any(|p| trimmed.starts_with(p)) {
return None;
}
fn has_binary_operator(s: &str) -> bool {
s.contains(" + ") || s.contains(" - ") || s.contains(" * ") || s.contains(" / ")
}
if let Some(eq_idx) = trimmed.find('=') {
if eq_idx > 0 {
let before = trimmed.as_bytes().get(eq_idx.wrapping_sub(1));
let after = trimmed.as_bytes().get(eq_idx + 1);
let is_comparison =
matches!(before, Some(b'!' | b'<' | b'>' | b'=')) || matches!(after, Some(b'='));
if !is_comparison {
let rhs = trimmed[eq_idx + 1..].trim();
if is_function_call(rhs) && has_binary_operator(rhs) {
return Some(UncertainFinding {
expr: rhs.to_string(),
line: line_num,
reason: "contains function call - purity unknown".to_string(),
});
}
if is_function_call(rhs) && rhs.contains('.') {
return Some(UncertainFinding {
expr: rhs.to_string(),
line: line_num,
reason: "contains method access - purity unknown".to_string(),
});
}
}
}
}
if is_function_call(trimmed) && has_binary_operator(trimmed) {
return Some(UncertainFinding {
expr: trimmed.to_string(),
line: line_num,
reason: "function calls may have side effects".to_string(),
});
}
None
}
pub fn parse_expression_from_line(line: &str) -> Option<(String, String, String)> {
let line = if let Some(idx) = line.find('#') {
&line[..idx]
} else if let Some(idx) = line.find("//") {
&line[..idx]
} else {
line
};
let mut rhs_start = None;
let chars: Vec<char> = line.chars().collect();
for i in (0..chars.len()).rev() {
if chars[i] == '=' {
let is_comparison = (i > 0 && matches!(chars[i - 1], '=' | '!' | '<' | '>' | ':'))
|| (i + 1 < chars.len() && chars[i + 1] == '=');
if !is_comparison {
rhs_start = Some(i + 1);
break;
}
}
}
let expr_text = if let Some(start) = rhs_start {
line[start..].trim()
} else {
let trimmed = line.trim();
let stripped = if let Some(rest) = trimmed.strip_prefix("return ") {
rest.trim()
} else if let Some(rest) = trimmed.strip_prefix("return(") {
rest.trim()
} else if let Some(rest) = trimmed.strip_prefix("if ") {
let r = rest.trim();
let r = r.strip_suffix(':').unwrap_or(r);
let r = r.strip_suffix('{').unwrap_or(r);
let r = r.strip_prefix('(').unwrap_or(r);
let r = r.strip_suffix(')').unwrap_or(r);
r.trim()
} else if let Some(rest) = trimmed.strip_prefix("elif ") {
let r = rest.trim();
r.strip_suffix(':').unwrap_or(r).trim()
} else if let Some(rest) = trimmed.strip_prefix("else if ") {
let r = rest.trim();
let r = r.strip_suffix('{').unwrap_or(r);
let r = r.strip_prefix('(').unwrap_or(r);
let r = r.strip_suffix(')').unwrap_or(r);
r.trim()
} else if let Some(rest) = trimmed.strip_prefix("while ") {
let r = rest.trim();
let r = r.strip_suffix(':').unwrap_or(r);
let r = r.strip_suffix('{').unwrap_or(r);
let r = r.strip_prefix('(').unwrap_or(r);
let r = r.strip_suffix(')').unwrap_or(r);
r.trim()
} else if let Some(rest) = trimmed.strip_prefix("assert ") {
rest.trim()
} else if let Some(rest) = trimmed.strip_prefix("yield ") {
rest.trim()
} else {
trimmed
};
stripped
};
if is_function_call(expr_text) {
return None;
}
let mut ops_by_len: Vec<&str> = BINARY_OPS.to_vec();
ops_by_len.sort_by_key(|op| std::cmp::Reverse(op.len()));
for op in ops_by_len {
if let Some(op_idx) = find_operator_in_expr(expr_text, op) {
let left = expr_text[..op_idx].trim();
let right = expr_text[op_idx + op.len()..].trim();
let left = extract_base_variable(left);
let right = extract_base_variable(right);
if is_function_call(&left) || is_function_call(&right) {
return None;
}
if left.is_empty() || right.is_empty() {
continue;
}
if is_numeric_literal(&left) && is_numeric_literal(&right) {
continue;
}
return Some((left, op.to_string(), right));
}
}
None
}
fn find_operator_in_expr(expr: &str, op: &str) -> Option<usize> {
let chars: Vec<char> = expr.chars().collect();
let op_chars: Vec<char> = op.chars().collect();
let mut paren_depth: usize = 0;
let mut in_string = false;
let mut string_char = '"';
let mut i = 0;
while i < chars.len() {
let c = chars[i];
if !in_string && (c == '"' || c == '\'') {
in_string = true;
string_char = c;
i += 1;
continue;
}
if in_string && c == string_char && (i == 0 || chars[i - 1] != '\\') {
in_string = false;
i += 1;
continue;
}
if in_string {
i += 1;
continue;
}
if c == '(' || c == '[' || c == '{' {
paren_depth += 1;
i += 1;
continue;
}
if c == ')' || c == ']' || c == '}' {
paren_depth = paren_depth.saturating_sub(1);
i += 1;
continue;
}
if paren_depth == 0 {
if i + op_chars.len() <= chars.len() {
let matches = op_chars
.iter()
.enumerate()
.all(|(j, &oc)| chars[i + j] == oc);
if matches {
if op.chars().all(|c| c.is_alphabetic()) {
let before_ok = i == 0 || !chars[i - 1].is_alphanumeric();
let after_ok =
i + op.len() >= chars.len() || !chars[i + op.len()].is_alphanumeric();
if before_ok && after_ok {
return Some(i);
}
} else {
return Some(i);
}
}
}
}
i += 1;
}
None
}
fn extract_base_variable(text: &str) -> String {
let trimmed = text.trim();
let parts: Vec<&str> = trimmed.split('.').collect();
if parts.len() > 3 {
parts[..3].join(".")
} else {
trimmed.to_string()
}
}
fn is_numeric_literal(text: &str) -> bool {
let trimmed = text.trim();
if trimmed.parse::<i64>().is_ok() {
return true;
}
if trimmed.parse::<f64>().is_ok() {
return true;
}
if trimmed.starts_with("0x") || trimmed.starts_with("0o") || trimmed.starts_with("0b") {
return true;
}
false
}
#[derive(Debug, Clone, Default)]
pub struct ExtractionResult {
pub all_exprs: HashSet<Expression>,
pub block_info: HashMap<BlockId, BlockExpressions>,
pub expr_instances: Vec<Expression>,
pub expr_instances_with_blocks: Vec<ExprInstance>,
pub defs_per_line: HashMap<usize, HashSet<String>>,
pub line_to_block: HashMap<usize, BlockId>,
pub uncertain_exprs: Vec<UncertainFinding>,
}
#[allow(dead_code)]
fn is_identifier(text: &str) -> bool {
let trimmed = text.trim();
if trimmed.is_empty() {
return false;
}
let mut chars = trimmed.chars();
match chars.next() {
Some(c) if c.is_alphabetic() || c == '_' => {}
_ => return false,
}
chars.all(|c| c.is_alphanumeric() || c == '_' || c == '.')
}
pub fn extract_expressions_from_refs(
cfg: &CfgInfo,
dfg: &DfgInfo,
) -> (
HashSet<Expression>,
HashMap<BlockId, BlockExpressions>,
Vec<Expression>,
) {
let mut all_exprs: HashSet<Expression> = HashSet::new();
let mut block_info: HashMap<BlockId, BlockExpressions> = HashMap::new();
let mut expr_instances: Vec<Expression> = Vec::new();
for block in &cfg.blocks {
block_info.insert(block.id, BlockExpressions::new());
}
let mut refs_by_line: HashMap<(BlockId, u32), Vec<&VarRef>> = HashMap::new();
for var_ref in &dfg.refs {
let block_id = find_block_for_line(cfg, var_ref.line);
if let Some(bid) = block_id {
refs_by_line
.entry((bid, var_ref.line))
.or_default()
.push(var_ref);
}
}
for ((block_id, line), refs) in refs_by_line {
let defs: Vec<_> = refs
.iter()
.filter(|r| matches!(r.ref_type, RefType::Definition))
.collect();
let uses: Vec<_> = refs
.iter()
.filter(|r| matches!(r.ref_type, RefType::Use))
.collect();
if defs.is_empty() || uses.len() < 2 {
continue;
}
let use_names: Vec<&str> = uses.iter().map(|r| r.name.as_str()).collect();
let unique_uses: HashSet<&str> = use_names.iter().copied().collect();
if unique_uses.len() >= 2 {
let mut operands: Vec<String> = unique_uses.iter().map(|s| s.to_string()).collect();
operands.sort();
if let Some(op) = infer_operator_from_uses(&use_names) {
let text = normalize_expression(&operands[0], &op, &operands[1]);
let expr = Expression::new(text, operands.clone(), line as usize);
all_exprs.insert(expr.clone());
expr_instances.push(expr.clone());
if let Some(block_expr) = block_info.get_mut(&block_id) {
block_expr.gen.insert(expr);
}
}
}
for def in &defs {
if let Some(block_expr) = block_info.get_mut(&block_id) {
block_expr.kill.insert(def.name.clone());
}
}
}
expr_instances.sort_by_key(|e| e.line);
(all_exprs, block_info, expr_instances)
}
pub fn extract_expressions_from_refs_with_source(
cfg: &CfgInfo,
dfg: &DfgInfo,
source_lines: Option<&[String]>,
) -> (
HashSet<Expression>,
HashMap<BlockId, BlockExpressions>,
Vec<Expression>,
) {
let mut all_exprs: HashSet<Expression> = HashSet::new();
let mut block_info: HashMap<BlockId, BlockExpressions> = HashMap::new();
let mut expr_instances: Vec<Expression> = Vec::new();
for block in &cfg.blocks {
block_info.insert(block.id, BlockExpressions::new());
}
if let Some(lines) = source_lines {
for (line_num, line_text) in lines.iter().enumerate() {
let line = (line_num + 1) as u32;
if let Some((left, op, right)) = parse_expression_from_line(line_text) {
let refs_on_line: Vec<_> = dfg.refs.iter().filter(|r| r.line == line).collect();
let has_left = refs_on_line.iter().any(|r| r.name == left);
let has_right = refs_on_line.iter().any(|r| r.name == right);
if has_left || has_right || is_numeric_literal(&left) || is_numeric_literal(&right)
{
let text = normalize_expression(&left, &op, &right);
let operands = vec![left.clone(), right.clone()];
let expr = Expression::new(text, operands, line as usize);
if let Some(block_id) = find_block_for_line(cfg, line) {
all_exprs.insert(expr.clone());
expr_instances.push(expr.clone());
if let Some(block_expr) = block_info.get_mut(&block_id) {
block_expr.gen.insert(expr);
}
}
}
}
}
for var_ref in &dfg.refs {
if matches!(var_ref.ref_type, RefType::Definition | RefType::Update) {
if let Some(block_id) = find_block_for_line(cfg, var_ref.line) {
if let Some(block_expr) = block_info.get_mut(&block_id) {
block_expr.kill.insert(var_ref.name.clone());
}
}
}
}
} else {
return extract_expressions_from_refs(cfg, dfg);
}
expr_instances.sort_by_key(|e| e.line);
(all_exprs, block_info, expr_instances)
}
fn find_block_for_line(cfg: &CfgInfo, line: u32) -> Option<BlockId> {
for block in &cfg.blocks {
if block.lines.0 <= line && line <= block.lines.1 {
return Some(block.id);
}
}
cfg.blocks
.iter()
.min_by_key(|b| {
let dist_start = (b.lines.0 as i64 - line as i64).abs();
let dist_end = (b.lines.1 as i64 - line as i64).abs();
dist_start.min(dist_end)
})
.map(|b| b.id)
}
fn infer_operator_from_uses(uses: &[&str]) -> Option<String> {
if uses.len() >= 2 {
Some("+".to_string())
} else {
None
}
}
pub fn extract_expressions_full(
cfg: &CfgInfo,
dfg: &DfgInfo,
source_lines: Option<&[String]>,
) -> ExtractionResult {
extract_expressions_full_with_lang(cfg, dfg, source_lines, None)
}
pub fn extract_expressions_full_with_lang(
cfg: &CfgInfo,
dfg: &DfgInfo,
source_lines: Option<&[String]>,
lang: Option<Language>,
) -> ExtractionResult {
let mut result = ExtractionResult::default();
for block in &cfg.blocks {
result.block_info.insert(block.id, BlockExpressions::new());
for line in block.lines.0..=block.lines.1 {
result.line_to_block.insert(line as usize, block.id);
}
}
for var_ref in &dfg.refs {
if matches!(var_ref.ref_type, RefType::Definition | RefType::Update) {
result
.defs_per_line
.entry(var_ref.line as usize)
.or_default()
.insert(var_ref.name.clone());
if let Some(block_id) = find_block_for_line(cfg, var_ref.line) {
if let Some(block_expr) = result.block_info.get_mut(&block_id) {
block_expr.kill.insert(var_ref.name.clone());
}
}
}
}
if let Some(lines) = source_lines {
for (line_num, line_text) in lines.iter().enumerate() {
let line = (line_num + 1) as u32;
if let Some((left, op, right)) = parse_expression_from_line(line_text) {
let refs_on_line: Vec<_> = dfg.refs.iter().filter(|r| r.line == line).collect();
let has_left = refs_on_line.iter().any(|r| r.name == left);
let has_right = refs_on_line.iter().any(|r| r.name == right);
if has_left || has_right || is_numeric_literal(&left) || is_numeric_literal(&right)
{
let text = normalize_expression(&left, &op, &right);
let operands = vec![left.clone(), right.clone()];
let expr = Expression::new(text, operands, line as usize);
if let Some(block_id) = find_block_for_line(cfg, line) {
result.all_exprs.insert(expr.clone());
result.expr_instances.push(expr.clone());
result
.expr_instances_with_blocks
.push(ExprInstance::new(expr.clone(), block_id));
if let Some(block_expr) = result.block_info.get_mut(&block_id) {
block_expr.gen.insert(expr);
}
}
}
} else {
if let Some(uncertain) = detect_uncertain_expression(line_text, line as usize) {
if find_block_for_line(cfg, line).is_some() {
result.uncertain_exprs.push(uncertain);
}
}
}
}
} else {
let mut refs_by_line: HashMap<(BlockId, u32), Vec<&VarRef>> = HashMap::new();
for var_ref in &dfg.refs {
if let Some(bid) = find_block_for_line(cfg, var_ref.line) {
refs_by_line
.entry((bid, var_ref.line))
.or_default()
.push(var_ref);
}
}
for ((block_id, line), refs) in refs_by_line {
let defs: Vec<_> = refs
.iter()
.filter(|r| matches!(r.ref_type, RefType::Definition))
.collect();
let uses: Vec<_> = refs
.iter()
.filter(|r| matches!(r.ref_type, RefType::Use))
.collect();
if defs.is_empty() || uses.len() < 2 {
continue;
}
let use_names: Vec<&str> = uses.iter().map(|r| r.name.as_str()).collect();
let unique_uses: HashSet<&str> = use_names.iter().copied().collect();
if unique_uses.len() >= 2 {
let mut operands: Vec<String> = unique_uses.iter().map(|s| s.to_string()).collect();
operands.sort();
if let Some(op) = infer_operator_from_uses(&use_names) {
let text = normalize_expression(&operands[0], &op, &operands[1]);
let expr = Expression::new(text, operands.clone(), line as usize);
result.all_exprs.insert(expr.clone());
result.expr_instances.push(expr.clone());
result
.expr_instances_with_blocks
.push(ExprInstance::new(expr.clone(), block_id));
if let Some(block_expr) = result.block_info.get_mut(&block_id) {
block_expr.gen.insert(expr);
}
}
}
}
}
if let (Some(lines), Some(language)) = (source_lines, lang) {
let full_source = lines.join("\n");
let min_line = cfg.blocks.iter().map(|b| b.lines.0).min().unwrap_or(1) as usize;
let max_line = cfg.blocks.iter().map(|b| b.lines.1).max().unwrap_or(1) as usize;
let ast_exprs = extract_binary_exprs_from_ast(&full_source, language, min_line, max_line);
for (text, _op, left, right, line) in ast_exprs {
let line_u32 = line as u32;
let already_found = result
.all_exprs
.iter()
.any(|e| e.text == text && e.line == line);
if already_found {
continue;
}
let operands = vec![left.clone(), right.clone()];
let expr = Expression::new(text.clone(), operands, line);
if let Some(block_id) = find_block_for_line(cfg, line_u32) {
result.all_exprs.insert(expr.clone());
result.expr_instances.push(expr.clone());
result
.expr_instances_with_blocks
.push(ExprInstance::new(expr.clone(), block_id));
if let Some(block_expr) = result.block_info.get_mut(&block_id) {
block_expr.gen.insert(expr);
}
}
}
}
result.expr_instances.sort_by_key(|e| e.line);
result
.expr_instances_with_blocks
.sort_by_key(|e| e.expr.line);
result
}
pub fn compute_available_exprs(
cfg: &CfgInfo,
dfg: &DfgInfo,
) -> Result<AvailableExprsInfo, DataflowError> {
validate_cfg(cfg)?;
let entry = cfg.entry_block;
let extraction = extract_expressions_full(cfg, dfg, None);
let all_exprs = extraction.all_exprs;
let block_info = extraction.block_info;
let expr_instances = extraction.expr_instances;
let expr_instances_with_blocks = extraction.expr_instances_with_blocks;
let defs_per_line = extraction.defs_per_line;
let line_to_block = extraction.line_to_block;
if all_exprs.is_empty() {
let mut info = AvailableExprsInfo::empty(entry);
for block in &cfg.blocks {
info.avail_in.insert(block.id, HashSet::new());
info.avail_out.insert(block.id, HashSet::new());
}
return Ok(info);
}
let predecessors = build_predecessors(cfg);
let mut avail_in: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
let mut avail_out: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
avail_in.insert(entry, HashSet::new());
let entry_gen = block_info
.get(&entry)
.map(|b| b.gen.clone())
.unwrap_or_default();
avail_out.insert(entry, entry_gen);
for block in &cfg.blocks {
if block.id != entry {
avail_in.insert(block.id, all_exprs.clone());
avail_out.insert(block.id, all_exprs.clone());
}
}
let max_iterations = cfg.blocks.len() * all_exprs.len() * 2 + 10;
let mut changed = true;
let mut iteration = 0;
let block_order = reverse_postorder(cfg);
while changed && iteration < max_iterations {
changed = false;
iteration += 1;
for &block_id in &block_order {
if block_id == entry {
continue;
}
let preds = predecessors
.get(&block_id)
.map(|v| v.as_slice())
.unwrap_or(&[]);
let new_in: HashSet<Expression> = if preds.is_empty() {
HashSet::new()
} else {
let mut result = avail_out
.get(&preds[0])
.cloned()
.unwrap_or_else(|| all_exprs.clone());
for &pred in &preds[1..] {
let pred_out = avail_out
.get(&pred)
.cloned()
.unwrap_or_else(|| all_exprs.clone());
result = result.intersection(&pred_out).cloned().collect();
}
result
};
let info = block_info.get(&block_id);
let gen = info.map(|i| &i.gen).cloned().unwrap_or_default();
let kill = info.map(|i| &i.kill).cloned().unwrap_or_default();
let not_killed: HashSet<Expression> = new_in
.iter()
.filter(|expr| !is_killed(expr, &kill))
.cloned()
.collect();
let new_out: HashSet<Expression> = gen.union(¬_killed).cloned().collect();
if avail_in.get(&block_id) != Some(&new_in)
|| avail_out.get(&block_id) != Some(&new_out)
{
changed = true;
avail_in.insert(block_id, new_in);
avail_out.insert(block_id, new_out);
}
}
}
if iteration >= max_iterations {
return Err(DataflowError::IterationLimit {
iterations: iteration,
});
}
Ok(AvailableExprsInfo {
avail_in,
avail_out,
all_exprs,
entry_block: entry,
expr_instances,
expr_instances_with_blocks,
defs_per_line,
line_to_block,
uncertain_exprs: Vec::new(),
confidence: Confidence::High,
})
}
pub fn compute_available_exprs_with_source(
cfg: &CfgInfo,
dfg: &DfgInfo,
source_lines: &[String],
) -> Result<AvailableExprsInfo, DataflowError> {
compute_available_exprs_with_source_and_lang(cfg, dfg, source_lines, None)
}
pub fn compute_available_exprs_with_source_and_lang(
cfg: &CfgInfo,
dfg: &DfgInfo,
source_lines: &[String],
lang: Option<Language>,
) -> Result<AvailableExprsInfo, DataflowError> {
validate_cfg(cfg)?;
let entry = cfg.entry_block;
let extraction = extract_expressions_full_with_lang(cfg, dfg, Some(source_lines), lang);
let all_exprs = extraction.all_exprs;
let block_info = extraction.block_info;
let expr_instances = extraction.expr_instances;
let expr_instances_with_blocks = extraction.expr_instances_with_blocks;
let defs_per_line = extraction.defs_per_line;
let line_to_block = extraction.line_to_block;
let uncertain_exprs = extraction.uncertain_exprs;
if all_exprs.is_empty() {
let mut info = AvailableExprsInfo::empty(entry);
info.uncertain_exprs = uncertain_exprs.clone();
info.confidence = compute_confidence(0, uncertain_exprs.len());
for block in &cfg.blocks {
info.avail_in.insert(block.id, HashSet::new());
info.avail_out.insert(block.id, HashSet::new());
}
return Ok(info);
}
let predecessors = build_predecessors(cfg);
let mut avail_in: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
let mut avail_out: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
avail_in.insert(entry, HashSet::new());
let entry_gen = block_info
.get(&entry)
.map(|b| b.gen.clone())
.unwrap_or_default();
avail_out.insert(entry, entry_gen);
for block in &cfg.blocks {
if block.id != entry {
avail_in.insert(block.id, all_exprs.clone());
avail_out.insert(block.id, all_exprs.clone());
}
}
let max_iterations = cfg.blocks.len() * all_exprs.len() * 2 + 10;
let mut changed = true;
let mut iteration = 0;
let block_order = reverse_postorder(cfg);
while changed && iteration < max_iterations {
changed = false;
iteration += 1;
for &block_id in &block_order {
if block_id == entry {
continue;
}
let preds = predecessors
.get(&block_id)
.map(|v| v.as_slice())
.unwrap_or(&[]);
let new_in: HashSet<Expression> = if preds.is_empty() {
HashSet::new()
} else {
let mut result = avail_out
.get(&preds[0])
.cloned()
.unwrap_or_else(|| all_exprs.clone());
for &pred in &preds[1..] {
let pred_out = avail_out
.get(&pred)
.cloned()
.unwrap_or_else(|| all_exprs.clone());
result = result.intersection(&pred_out).cloned().collect();
}
result
};
let info = block_info.get(&block_id);
let gen = info.map(|i| &i.gen).cloned().unwrap_or_default();
let kill = info.map(|i| &i.kill).cloned().unwrap_or_default();
let not_killed: HashSet<Expression> = new_in
.iter()
.filter(|expr| !is_killed(expr, &kill))
.cloned()
.collect();
let new_out: HashSet<Expression> = gen.union(¬_killed).cloned().collect();
if avail_in.get(&block_id) != Some(&new_in)
|| avail_out.get(&block_id) != Some(&new_out)
{
changed = true;
avail_in.insert(block_id, new_in);
avail_out.insert(block_id, new_out);
}
}
}
if iteration >= max_iterations {
return Err(DataflowError::IterationLimit {
iterations: iteration,
});
}
let confidence = compute_confidence(all_exprs.len(), uncertain_exprs.len());
Ok(AvailableExprsInfo {
avail_in,
avail_out,
all_exprs,
entry_block: entry,
expr_instances,
expr_instances_with_blocks,
defs_per_line,
line_to_block,
uncertain_exprs,
confidence,
})
}
fn compute_confidence(confirmed: usize, uncertain: usize) -> Confidence {
if confirmed == 0 && uncertain == 0 {
Confidence::Low
} else if uncertain == 0 {
Confidence::High
} else if confirmed >= uncertain {
Confidence::Medium
} else {
Confidence::Low
}
}
fn is_killed(expr: &Expression, kills: &HashSet<String>) -> bool {
expr.operands.iter().any(|op| kills.contains(op))
}
use crate::types::Language;
fn binary_expr_node_kinds(lang: Language) -> &'static [&'static str] {
match lang {
Language::Python => &["binary_operator", "boolean_operator", "comparison_operator"],
Language::Go => &["binary_expression"],
Language::TypeScript | Language::JavaScript => &["binary_expression"],
Language::Java => &["binary_expression"],
Language::Rust => &["binary_expression"],
Language::C | Language::Cpp => &["binary_expression"],
Language::Ruby => &["binary"],
Language::Php => &["binary_expression"],
Language::Kotlin => &["binary_expression"],
Language::CSharp => &["binary_expression"],
Language::Scala => &["infix_expression"],
Language::Elixir => &["binary_operator"],
Language::Ocaml => &["infix_expression"],
Language::Lua | Language::Luau => &["binary_expression"],
Language::Swift => &["infix_expression"],
}
}
fn extract_operator_from_node<'a>(
node: &tree_sitter::Node<'a>,
source: &'a [u8],
_lang: Language,
) -> Option<String> {
if let Some(op_node) = node.child_by_field_name("operator") {
let op_text = op_node.utf8_text(source).unwrap_or("").trim().to_string();
if !op_text.is_empty() {
return Some(op_text);
}
}
let child_count = node.child_count();
for i in 0..child_count {
if let Some(child) = node.child(i) {
if !child.is_named() {
let text = child.utf8_text(source).unwrap_or("").trim();
if matches!(
text,
"+" | "-"
| "*"
| "/"
| "%"
| "//"
| "**"
| "=="
| "!="
| "<"
| ">"
| "<="
| ">="
| "&&"
| "||"
| "and"
| "or"
| "&"
| "|"
| "^"
| "<<"
| ">>"
| "<>"
| "==="
| "!=="
| "<=>"
| ".."
| "..."
| "in"
| "not in"
| "|>"
| "<|"
| "++"
) {
return Some(text.to_string());
}
}
}
}
None
}
fn extract_operands_from_node<'a>(
node: &tree_sitter::Node<'a>,
source: &'a [u8],
lang: Language,
) -> Option<(String, String)> {
let left = node.child_by_field_name("left").or_else(|| {
node.named_child(0)
});
let right = node.child_by_field_name("right").or_else(|| {
let count = node.named_child_count();
if count >= 2 {
node.named_child(count - 1)
} else {
None
}
});
match (left, right) {
(Some(l), Some(r)) if l.id() != r.id() => {
let left_text = l.utf8_text(source).unwrap_or("").trim().to_string();
let right_text = r.utf8_text(source).unwrap_or("").trim().to_string();
if left_text.is_empty() || right_text.is_empty() {
return None;
}
let left_base = extract_base_variable(&left_text);
let right_base = extract_base_variable(&right_text);
if is_function_call(&left_base) || is_function_call(&right_base) {
return None;
}
if is_numeric_literal(&left_base) && is_numeric_literal(&right_base) {
return None;
}
let left_final = if matches!(lang, Language::Php) {
left_base.trim_start_matches('$').to_string()
} else {
left_base
};
let right_final = if matches!(lang, Language::Php) {
right_base.trim_start_matches('$').to_string()
} else {
right_base
};
Some((left_final, right_final))
}
_ => None,
}
}
pub fn extract_binary_exprs_from_ast(
source: &str,
lang: Language,
start_line: usize,
end_line: usize,
) -> Vec<(String, String, String, String, usize)> {
let mut results = Vec::new();
let tree = match crate::ast::parser::parse(source, lang) {
Ok(t) => t,
Err(_) => return results,
};
let root = tree.root_node();
let source_bytes = source.as_bytes();
let node_kinds = binary_expr_node_kinds(lang);
if node_kinds.is_empty() {
return results;
}
let mut cursor = root.walk();
collect_binary_exprs(
&mut cursor,
source_bytes,
lang,
node_kinds,
start_line,
end_line,
&mut results,
);
results
}
fn collect_binary_exprs(
cursor: &mut tree_sitter::TreeCursor,
source: &[u8],
lang: Language,
node_kinds: &[&str],
start_line: usize,
end_line: usize,
results: &mut Vec<(String, String, String, String, usize)>,
) {
let node = cursor.node();
let line = node.start_position().row + 1;
let node_start_line = node.start_position().row + 1;
let node_end_line = node.end_position().row + 1;
if node_end_line < start_line || node_start_line > end_line {
return;
}
let kind = node.kind();
if node_kinds.contains(&kind) && line >= start_line && line <= end_line {
if let Some(op) = extract_operator_from_node(&node, source, lang) {
if let Some((left, right)) = extract_operands_from_node(&node, source, lang) {
let normalized = normalize_expression(&left, &op, &right);
results.push((normalized, op, left, right, line));
}
}
}
if cursor.goto_first_child() {
loop {
collect_binary_exprs(
cursor, source, lang, node_kinds, start_line, end_line, results,
);
if !cursor.goto_next_sibling() {
break;
}
}
cursor.goto_parent();
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_expression_new() {
let expr = Expression::new("a + b", vec!["b", "a"], 5);
assert_eq!(expr.text, "a + b");
assert_eq!(expr.operands, vec!["a", "b"]);
assert_eq!(expr.line, 5);
}
#[test]
fn test_expression_equality_by_text() {
let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
let expr2 = Expression::new("a + b", vec!["a", "b"], 100);
assert_eq!(expr1, expr2);
}
#[test]
fn test_expression_inequality_by_text() {
let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
let expr2 = Expression::new("a - b", vec!["a", "b"], 1);
assert_ne!(expr1, expr2);
}
#[test]
fn test_expression_hash_by_text() {
use std::collections::hash_map::DefaultHasher;
let expr1 = Expression::new("x * y", vec!["x", "y"], 1);
let expr2 = Expression::new("x * y", vec!["x", "y"], 999);
let mut hasher1 = DefaultHasher::new();
let mut hasher2 = DefaultHasher::new();
expr1.hash(&mut hasher1);
expr2.hash(&mut hasher2);
assert_eq!(hasher1.finish(), hasher2.finish());
}
#[test]
fn test_expression_is_killed_by() {
let expr = Expression::new("a + b", vec!["a", "b"], 1);
assert!(expr.is_killed_by("a"));
assert!(expr.is_killed_by("b"));
assert!(!expr.is_killed_by("c"));
}
#[test]
fn test_expression_hashset() {
let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
let expr2 = Expression::new("a + b", vec!["a", "b"], 5);
let mut set: HashSet<Expression> = HashSet::new();
set.insert(expr1);
set.insert(expr2);
assert_eq!(set.len(), 1);
}
#[test]
fn test_normalize_commutative_addition() {
assert_eq!(normalize_expression("a", "+", "b"), "a + b");
assert_eq!(normalize_expression("b", "+", "a"), "a + b");
}
#[test]
fn test_normalize_commutative_multiplication() {
assert_eq!(normalize_expression("x", "*", "y"), "x * y");
assert_eq!(normalize_expression("y", "*", "x"), "x * y");
}
#[test]
fn test_normalize_commutative_equality() {
assert_eq!(normalize_expression("foo", "==", "bar"), "bar == foo");
assert_eq!(normalize_expression("bar", "==", "foo"), "bar == foo");
}
#[test]
fn test_normalize_non_commutative_subtraction() {
assert_eq!(normalize_expression("a", "-", "b"), "a - b");
assert_eq!(normalize_expression("b", "-", "a"), "b - a");
}
#[test]
fn test_normalize_non_commutative_division() {
assert_eq!(normalize_expression("x", "/", "y"), "x / y");
assert_eq!(normalize_expression("y", "/", "x"), "y / x");
}
#[test]
fn test_normalize_whitespace_trimmed() {
assert_eq!(normalize_expression(" a ", "+", " b "), "a + b");
assert_eq!(normalize_expression("\ta\n", "-", "\tb\n"), "a - b");
}
#[test]
fn test_block_expressions_default() {
let block = BlockExpressions::new();
assert!(block.gen.is_empty());
assert!(block.kill.is_empty());
}
#[test]
fn test_available_exprs_info_empty() {
let info = AvailableExprsInfo::empty(0);
assert!(info.avail_in.is_empty());
assert!(info.avail_out.is_empty());
assert!(info.all_exprs.is_empty());
assert_eq!(info.entry_block, 0);
assert!(info.expr_instances.is_empty());
}
#[test]
fn test_is_available_true() {
let mut info = AvailableExprsInfo::new(0);
let expr = Expression::new("a + b", vec!["a", "b"], 1);
let mut block_exprs = HashSet::new();
block_exprs.insert(expr.clone());
info.avail_in.insert(1, block_exprs);
assert!(info.is_available(1, &expr));
}
#[test]
fn test_is_available_false_not_in_set() {
let info = AvailableExprsInfo::new(0);
let expr = Expression::new("a + b", vec!["a", "b"], 1);
assert!(!info.is_available(1, &expr));
}
#[test]
fn test_is_available_false_unknown_block() {
let info = AvailableExprsInfo::new(0);
let expr = Expression::new("a + b", vec!["a", "b"], 1);
assert!(!info.is_available(999, &expr));
}
#[test]
fn test_is_available_at_exit_true() {
let mut info = AvailableExprsInfo::new(0);
let expr = Expression::new("a + b", vec!["a", "b"], 1);
let mut block_exprs = HashSet::new();
block_exprs.insert(expr.clone());
info.avail_out.insert(1, block_exprs);
assert!(info.is_available_at_exit(1, &expr));
}
#[test]
fn test_to_json_serializable() {
let mut info = AvailableExprsInfo::new(0);
let expr = Expression::new("a + b", vec!["a", "b"], 2);
let mut block_exprs = HashSet::new();
block_exprs.insert(expr.clone());
info.avail_in.insert(0, HashSet::new());
info.avail_in.insert(1, block_exprs.clone());
info.avail_out.insert(0, block_exprs);
info.all_exprs.insert(expr);
let json = info.to_json();
assert!(json.is_object());
assert!(json.get("avail_in").is_some());
assert!(json.get("avail_out").is_some());
assert!(json.get("all_expressions").is_some());
assert!(json.get("entry_block").is_some());
assert!(json.get("redundant_computations").is_some());
}
#[test]
fn test_to_json_includes_redundant_computations() {
let mut info = AvailableExprsInfo::new(0);
info.expr_instances
.push(Expression::new("a + b", vec!["a", "b"], 2));
info.expr_instances
.push(Expression::new("a + b", vec!["a", "b"], 5));
let json = info.to_json();
let redundant = json
.get("redundant_computations")
.unwrap()
.as_array()
.unwrap();
assert_eq!(redundant.len(), 1);
assert_eq!(redundant[0]["expr"], "a + b");
assert_eq!(redundant[0]["first_at"], 2);
assert_eq!(redundant[0]["redundant_at"], 5);
}
#[test]
fn test_is_function_call_simple() {
assert!(is_function_call("foo()"));
assert!(is_function_call("bar(x)"));
assert!(is_function_call("baz(1, 2, 3)"));
}
#[test]
fn test_is_function_call_method() {
assert!(is_function_call("obj.method()"));
assert!(is_function_call("x.foo(bar)"));
assert!(is_function_call("self.process(data)"));
}
#[test]
fn test_is_function_call_chained() {
assert!(is_function_call("a.b.c()"));
assert!(is_function_call("foo().bar()"));
}
#[test]
fn test_is_function_call_false_for_binary_ops() {
assert!(!is_function_call("a + b"));
assert!(!is_function_call("x * y"));
assert!(!is_function_call("foo - bar"));
assert!(!is_function_call("1 + 2"));
}
#[test]
fn test_is_function_call_false_for_parens_in_expr() {
assert!(!is_function_call("(a + b)"));
assert!(!is_function_call("(x * y) + z"));
}
#[test]
fn test_parse_expression_simple_addition() {
let result = parse_expression_from_line("x = a + b");
assert!(result.is_some());
let (left, op, right) = result.unwrap();
assert_eq!(left, "a");
assert_eq!(op, "+");
assert_eq!(right, "b");
}
#[test]
fn test_parse_expression_multiplication() {
let result = parse_expression_from_line("result = foo * bar");
assert!(result.is_some());
let (left, op, right) = result.unwrap();
assert_eq!(left, "foo");
assert_eq!(op, "*");
assert_eq!(right, "bar");
}
#[test]
fn test_parse_expression_subtraction() {
let result = parse_expression_from_line("y = x - z");
assert!(result.is_some());
let (left, op, right) = result.unwrap();
assert_eq!(left, "x");
assert_eq!(op, "-");
assert_eq!(right, "z");
}
#[test]
fn test_parse_expression_comparison() {
let result = parse_expression_from_line("flag = a == b");
assert!(result.is_some());
let (left, op, right) = result.unwrap();
assert_eq!(left, "a");
assert_eq!(op, "==");
assert_eq!(right, "b");
}
#[test]
fn test_parse_expression_with_comment() {
let result = parse_expression_from_line("x = a + b # compute sum");
assert!(result.is_some());
let (left, op, right) = result.unwrap();
assert_eq!(left, "a");
assert_eq!(op, "+");
assert_eq!(right, "b");
}
#[test]
fn test_parse_expression_function_call_excluded() {
assert!(parse_expression_from_line("x = foo()").is_none());
assert!(parse_expression_from_line("y = bar.baz()").is_none());
assert!(parse_expression_from_line("z = process(data)").is_none());
}
#[test]
fn test_parse_expression_constant_only_excluded() {
assert!(parse_expression_from_line("x = 1 + 2").is_none());
assert!(parse_expression_from_line("y = 10 * 20").is_none());
}
#[test]
fn test_parse_expression_with_spaces() {
let result = parse_expression_from_line(" x = a + b ");
assert!(result.is_some());
let (left, op, right) = result.unwrap();
assert_eq!(left, "a");
assert_eq!(op, "+");
assert_eq!(right, "b");
}
#[test]
fn test_parse_expression_rightmost_assignment() {
let result = parse_expression_from_line("a = b = c + d");
assert!(result.is_some());
let (left, op, right) = result.unwrap();
assert_eq!(left, "c");
assert_eq!(op, "+");
assert_eq!(right, "d");
}
#[test]
fn test_extract_base_variable_simple() {
assert_eq!(extract_base_variable("x"), "x");
assert_eq!(extract_base_variable("foo"), "foo");
}
#[test]
fn test_extract_base_variable_field_access() {
assert_eq!(extract_base_variable("x.a"), "x.a");
assert_eq!(extract_base_variable("obj.field"), "obj.field");
}
#[test]
fn test_extract_base_variable_deep_nesting_truncated() {
assert_eq!(extract_base_variable("x.a.b.c.d.e"), "x.a.b");
assert_eq!(extract_base_variable("a.b.c.d"), "a.b.c");
}
#[test]
fn test_is_numeric_literal() {
assert!(is_numeric_literal("42"));
assert!(is_numeric_literal("-10"));
assert!(is_numeric_literal("0"));
assert!(is_numeric_literal("3.14"));
assert!(is_numeric_literal("-2.5"));
assert!(is_numeric_literal("0x1f"));
assert!(is_numeric_literal("0o17"));
assert!(is_numeric_literal("0b1010"));
assert!(!is_numeric_literal("foo"));
assert!(!is_numeric_literal("x"));
assert!(!is_numeric_literal("a + b"));
}
#[test]
fn test_is_identifier() {
assert!(is_identifier("x"));
assert!(is_identifier("foo"));
assert!(is_identifier("_private"));
assert!(is_identifier("var123"));
assert!(is_identifier("obj.field"));
assert!(!is_identifier(""));
assert!(!is_identifier("123"));
assert!(!is_identifier("a + b"));
}
#[test]
fn test_find_operator_in_expr() {
assert_eq!(find_operator_in_expr("a + b", "+"), Some(2));
assert_eq!(find_operator_in_expr("x * y", "*"), Some(2));
assert_eq!(find_operator_in_expr("foo - bar", "-"), Some(4));
assert_eq!(find_operator_in_expr("(a + b) * c", "+"), None);
assert_eq!(find_operator_in_expr("(a + b) * c", "*"), Some(8));
}
#[test]
fn test_find_operator_not_in_string() {
assert_eq!(find_operator_in_expr("\"a + b\"", "+"), None);
assert_eq!(find_operator_in_expr("'x * y'", "*"), None);
}
use crate::types::{BlockType, CfgBlock, CfgEdge, EdgeType};
fn make_test_cfg(
blocks: Vec<(usize, BlockType, (u32, u32))>,
edges: Vec<(usize, usize)>,
entry: usize,
) -> CfgInfo {
CfgInfo {
function: "test".to_string(),
blocks: blocks
.into_iter()
.map(|(id, block_type, lines)| CfgBlock {
id,
block_type,
lines,
calls: vec![],
})
.collect(),
edges: edges
.into_iter()
.map(|(from, to)| CfgEdge {
from,
to,
edge_type: EdgeType::Unconditional,
condition: None,
})
.collect(),
entry_block: entry,
exit_blocks: vec![],
cyclomatic_complexity: 1,
nested_functions: HashMap::new(),
}
}
fn make_empty_dfg() -> DfgInfo {
DfgInfo {
function: "test".to_string(),
refs: vec![],
edges: vec![],
variables: vec![],
}
}
fn make_dfg_with_refs(refs: Vec<VarRef>) -> DfgInfo {
let variables: Vec<String> = refs.iter().map(|r| r.name.clone()).collect();
DfgInfo {
function: "test".to_string(),
refs,
edges: vec![],
variables,
}
}
fn make_var_ref(name: &str, line: u32, ref_type: RefType) -> VarRef {
VarRef {
name: name.to_string(),
ref_type,
line,
column: 0,
context: None,
group_id: None,
}
}
#[test]
fn test_compute_empty_cfg_empty_dfg() {
let cfg = CfgInfo {
function: "empty".to_string(),
blocks: vec![],
edges: vec![],
entry_block: 0,
exit_blocks: vec![],
cyclomatic_complexity: 1,
nested_functions: HashMap::new(),
};
let dfg = make_empty_dfg();
let result = compute_available_exprs(&cfg, &dfg);
assert!(result.is_err());
}
#[test]
fn test_compute_single_block_no_exprs() {
let cfg = make_test_cfg(vec![(0, BlockType::Entry, (1, 1))], vec![], 0);
let dfg = make_empty_dfg();
let result = compute_available_exprs(&cfg, &dfg);
assert!(result.is_ok());
let info = result.unwrap();
assert!(info.all_exprs.is_empty());
assert!(info.avail_in.contains_key(&0));
assert!(info.avail_out.contains_key(&0));
assert!(info.avail_in.get(&0).unwrap().is_empty());
}
#[test]
fn test_compute_entry_block_nothing_available() {
let cfg = make_test_cfg(
vec![(0, BlockType::Entry, (1, 5)), (1, BlockType::Exit, (6, 10))],
vec![(0, 1)],
0,
);
let dfg = make_empty_dfg();
let result = compute_available_exprs(&cfg, &dfg).unwrap();
assert!(result.avail_in.get(&0).unwrap().is_empty());
}
#[test]
fn test_compute_linear_cfg_expression_propagates() {
let cfg = make_test_cfg(
vec![
(0, BlockType::Entry, (1, 2)),
(1, BlockType::Body, (3, 4)),
(2, BlockType::Exit, (5, 6)),
],
vec![(0, 1), (1, 2)],
0,
);
let dfg = make_dfg_with_refs(vec![
make_var_ref("x", 2, RefType::Definition),
make_var_ref("a", 2, RefType::Use),
make_var_ref("b", 2, RefType::Use),
]);
let result = compute_available_exprs(&cfg, &dfg).unwrap();
if !result.all_exprs.is_empty() {
let expr = result.all_exprs.iter().next().unwrap();
assert!(result.is_available_at_exit(0, expr));
assert!(result.is_available(1, expr));
assert!(result.is_available(2, expr));
}
}
#[test]
fn test_compute_diamond_must_intersection() {
let cfg = make_test_cfg(
vec![
(0, BlockType::Entry, (1, 1)),
(1, BlockType::Body, (2, 2)),
(2, BlockType::Body, (3, 3)),
(3, BlockType::Exit, (4, 4)),
],
vec![(0, 1), (0, 2), (1, 3), (2, 3)],
0,
);
let dfg = make_dfg_with_refs(vec![
make_var_ref("x", 2, RefType::Definition),
make_var_ref("a", 2, RefType::Use),
make_var_ref("b", 2, RefType::Use),
]);
let result = compute_available_exprs(&cfg, &dfg).unwrap();
if !result.all_exprs.is_empty() {
let expr = result.all_exprs.iter().next().unwrap();
assert!(result.is_available_at_exit(1, expr));
}
}
#[test]
fn test_compute_self_loop_terminates() {
let cfg = make_test_cfg(
vec![
(0, BlockType::Entry, (1, 1)),
(1, BlockType::LoopHeader, (2, 3)),
],
vec![(0, 1), (1, 1)],
0,
);
let dfg = make_empty_dfg();
let result = compute_available_exprs(&cfg, &dfg);
assert!(result.is_ok());
}
#[test]
fn test_compute_unreachable_block() {
let cfg = make_test_cfg(
vec![
(0, BlockType::Entry, (1, 1)),
(1, BlockType::Exit, (2, 2)),
(2, BlockType::Body, (3, 3)), ],
vec![(0, 1)],
0,
);
let dfg = make_empty_dfg();
let result = compute_available_exprs(&cfg, &dfg);
assert!(result.is_ok());
let info = result.unwrap();
assert!(info.avail_in.get(&2).unwrap().is_empty());
}
#[test]
fn test_is_killed_function() {
let kills: HashSet<String> = ["a".to_string(), "x".to_string()].into_iter().collect();
let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
let expr2 = Expression::new("c + d", vec!["c", "d"], 2);
let expr3 = Expression::new("x + y", vec!["x", "y"], 3);
assert!(is_killed(&expr1, &kills));
assert!(!is_killed(&expr2, &kills));
assert!(is_killed(&expr3, &kills));
}
#[test]
fn test_compute_loop_expression_available_in_body() {
let cfg = make_test_cfg(
vec![
(0, BlockType::Entry, (1, 1)),
(1, BlockType::LoopHeader, (2, 2)),
(2, BlockType::LoopBody, (3, 3)),
(3, BlockType::Exit, (4, 4)),
],
vec![(0, 1), (1, 2), (2, 1), (1, 3)],
0,
);
let dfg = make_empty_dfg();
let result = compute_available_exprs(&cfg, &dfg);
assert!(result.is_ok());
}
#[test]
fn test_extract_binary_exprs_from_ast_python() {
let source = r#"
def example(a, b, c):
x = a + b
if a * c > 10:
return a + b
y = c - a
"#;
let lang = crate::types::Language::Python;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 6);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from Python, got {}: {:?}",
exprs.len(),
exprs
);
let texts: Vec<&str> = exprs.iter().map(|e| e.0.as_str()).collect();
assert!(
texts
.iter()
.any(|t| t.contains("a") && t.contains("b") && t.contains("+")),
"Should find a + b expression, got: {:?}",
texts
);
}
#[test]
fn test_extract_binary_exprs_from_ast_go() {
let source = r#"
package main
func example(a int, b int, c int) int {
x := a + b
if a * c > 10 {
return a + b
}
y := c - a
return y
}
"#;
let lang = crate::types::Language::Go;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from Go, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_from_ast_java() {
let source = r#"
class Example {
int example(int a, int b, int c) {
int x = a + b;
if (a * c > 10) {
return a + b;
}
int y = c - a;
return y;
}
}
"#;
let lang = crate::types::Language::Java;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from Java, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_from_ast_ruby() {
let source = r#"
def example(a, b, c)
x = a + b
if a * c > 10
return a + b
end
y = c - a
y
end
"#;
let lang = crate::types::Language::Ruby;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from Ruby, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_from_ast_cpp() {
let source = r#"
int example(int a, int b, int c) {
int x = a + b;
if (a * c > 10) {
return a + b;
}
int y = c - a;
return y;
}
"#;
let lang = crate::types::Language::Cpp;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from C++, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_from_ast_php() {
let source = r#"<?php
function example($a, $b, $c) {
$x = $a + $b;
if ($a * $c > 10) {
return $a + $b;
}
$y = $c - $a;
return $y;
}
"#;
let lang = crate::types::Language::Php;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from PHP, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_from_ast_csharp() {
let source = r#"
class Example {
int DoWork(int a, int b, int c) {
int x = a + b;
if (a * c > 10) {
return a + b;
}
int y = c - a;
return y;
}
}
"#;
let lang = crate::types::Language::CSharp;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from C#, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_from_ast_kotlin() {
let source = r#"
fun example(a: Int, b: Int, c: Int): Int {
val x = a + b
if (a * c > 10) {
return a + b
}
val y = c - a
return y
}
"#;
let lang = crate::types::Language::Kotlin;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from Kotlin, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_from_ast_elixir() {
let source = r#"
defmodule Example do
def example(a, b, c) do
x = a + b
if a * c > 10 do
a + b
end
y = c - a
y
end
end
"#;
let lang = crate::types::Language::Elixir;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from Elixir, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_from_ast_rust() {
let source = r#"
fn example(a: i32, b: i32, c: i32) -> i32 {
let x = a + b;
if a * c > 10 {
return a + b;
}
let y = c - a;
y
}
"#;
let lang = crate::types::Language::Rust;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
assert!(
exprs.len() >= 3,
"Expected at least 3 binary exprs from Rust, got {}: {:?}",
exprs.len(),
exprs
);
}
#[test]
fn test_extract_binary_exprs_excludes_function_calls() {
let source = r#"
def example(a, b):
x = a + b
y = len(a)
z = foo(a, b)
"#;
let lang = crate::types::Language::Python;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 5);
let texts: Vec<&str> = exprs.iter().map(|e| e.0.as_str()).collect();
assert!(
!texts.iter().any(|t| t.contains("len") || t.contains("foo")),
"Should not include function calls, got: {:?}",
texts
);
}
#[test]
fn test_extract_binary_exprs_normalizes_commutative() {
let source = r#"
def example(a, b):
x = b + a
y = a + b
"#;
let lang = crate::types::Language::Python;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 4);
let texts: Vec<String> = exprs.iter().map(|e| e.0.clone()).collect();
if texts.len() >= 2 {
assert_eq!(
texts[0], texts[1],
"Commutative exprs should normalize to same text: {:?}",
texts
);
}
}
#[test]
fn test_extract_binary_exprs_returns_line_numbers() {
let source = r#"
def example(a, b):
x = a + b
y = a - b
"#;
let lang = crate::types::Language::Python;
let exprs = extract_binary_exprs_from_ast(source, lang, 1, 4);
assert!(
exprs.len() >= 2,
"Expected at least 2 exprs, got {}",
exprs.len()
);
for (text, _op, _left, _right, line) in &exprs {
assert!(*line > 0, "Line should be > 0 for expr: {}", text);
}
}
#[test]
fn test_parse_expression_no_assignment_return() {
let result = parse_expression_from_line(" return a + b");
assert!(
result.is_some(),
"Should parse expression from return statement"
);
let (left, op, right) = result.unwrap();
assert_eq!(op, "+");
assert!(left == "a" || right == "a");
}
#[test]
fn test_parse_expression_no_assignment_if() {
let result = parse_expression_from_line(" if a + b > 10:");
assert!(
result.is_some(),
"Should parse expression from if condition"
);
}
#[test]
fn test_parse_expression_no_assignment_standalone() {
let result = parse_expression_from_line(" a + b");
assert!(
result.is_some(),
"Should parse standalone binary expression"
);
}
}