1use std::collections::{HashMap, HashSet};
28use std::hash::{Hash, Hasher};
29
30use indexmap::IndexMap;
31use serde::{Deserialize, Serialize};
32
33use super::types::{build_predecessors, reverse_postorder, validate_cfg, BlockId, DataflowError};
34use crate::types::{CfgInfo, DfgInfo, RefType, VarRef};
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
47#[serde(rename_all = "lowercase")]
48pub enum Confidence {
49 #[default]
51 Low,
52 Medium,
54 High,
56}
57
58#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
74pub struct UncertainFinding {
75 pub expr: String,
77 pub line: usize,
79 pub reason: String,
81}
82
83pub const COMMUTATIVE_OPS: &[&str] = &["+", "*", "==", "!=", "and", "or", "&", "|", "^"];
100
101#[derive(Debug, Clone, Serialize, Deserialize)]
132pub struct Expression {
133 pub text: String,
136
137 pub operands: Vec<String>,
140
141 pub line: usize,
144}
145
146impl PartialEq for Expression {
147 fn eq(&self, other: &Self) -> bool {
148 self.text == other.text
150 }
151}
152
153impl Eq for Expression {}
154
155impl Hash for Expression {
156 fn hash<H: Hasher>(&self, state: &mut H) {
157 self.text.hash(state);
159 }
160}
161
162impl Expression {
163 pub fn new(text: impl Into<String>, operands: Vec<impl Into<String>>, line: usize) -> Self {
180 let mut ops: Vec<String> = operands.into_iter().map(|s| s.into()).collect();
181 ops.sort();
182 Self {
183 text: text.into(),
184 operands: ops,
185 line,
186 }
187 }
188
189 pub fn is_killed_by(&self, var: &str) -> bool {
211 self.operands.iter().any(|op| op == var)
212 }
213}
214
215pub fn normalize_expression(left: &str, op: &str, right: &str) -> String {
256 let left = left.trim();
258 let right = right.trim();
259
260 if COMMUTATIVE_OPS.contains(&op) {
261 let mut operands = [left, right];
263 operands.sort();
264 format!("{} {} {}", operands[0], op, operands[1])
265 } else {
266 format!("{} {} {}", left, op, right)
268 }
269}
270
271#[derive(Debug, Clone, Default)]
290pub struct BlockExpressions {
291 pub gen: HashSet<Expression>,
296
297 pub kill: HashSet<String>,
302}
303
304impl BlockExpressions {
305 pub fn new() -> Self {
307 Self::default()
308 }
309}
310
311#[derive(Debug, Clone)]
325pub struct ExprInstance {
326 pub expr: Expression,
328 pub block_id: BlockId,
330}
331
332impl ExprInstance {
333 pub fn new(expr: Expression, block_id: BlockId) -> Self {
335 Self { expr, block_id }
336 }
337}
338
339#[derive(Debug, Clone, Default, Serialize, Deserialize)]
373pub struct AvailableExprsInfo {
374 #[serde(
380 serialize_with = "serialize_avail_map",
381 deserialize_with = "deserialize_avail_map"
382 )]
383 pub avail_in: HashMap<BlockId, HashSet<Expression>>,
384
385 #[serde(
389 serialize_with = "serialize_avail_map",
390 deserialize_with = "deserialize_avail_map"
391 )]
392 pub avail_out: HashMap<BlockId, HashSet<Expression>>,
393
394 pub all_exprs: HashSet<Expression>,
396
397 pub entry_block: BlockId,
399
400 #[serde(skip)]
405 pub expr_instances: Vec<Expression>,
406
407 #[serde(skip)]
411 pub expr_instances_with_blocks: Vec<ExprInstance>,
412
413 #[serde(skip)]
417 pub defs_per_line: HashMap<usize, HashSet<String>>,
418
419 #[serde(skip)]
423 pub line_to_block: HashMap<usize, BlockId>,
424
425 #[serde(default, skip_serializing_if = "Vec::is_empty")]
430 pub uncertain_exprs: Vec<UncertainFinding>,
431
432 #[serde(default)]
436 pub confidence: Confidence,
437}
438
439fn serialize_avail_map<S>(
443 map: &HashMap<BlockId, HashSet<Expression>>,
444 serializer: S,
445) -> Result<S::Ok, S::Error>
446where
447 S: serde::Serializer,
448{
449 let sorted: IndexMap<String, Vec<&Expression>> = map
451 .iter()
452 .map(|(k, v)| {
453 let mut exprs: Vec<_> = v.iter().collect();
454 exprs.sort_by_key(|e| &e.text);
456 (k.to_string(), exprs)
457 })
458 .collect::<Vec<_>>()
459 .into_iter()
460 .collect();
461
462 let ordered: IndexMap<_, _> = sorted.into_iter().collect();
464 ordered.serialize(serializer)
465}
466
467fn deserialize_avail_map<'de, D>(
469 deserializer: D,
470) -> Result<HashMap<BlockId, HashSet<Expression>>, D::Error>
471where
472 D: serde::Deserializer<'de>,
473{
474 let map: HashMap<String, Vec<Expression>> = HashMap::deserialize(deserializer)?;
475 Ok(map
476 .into_iter()
477 .map(|(k, v)| {
478 let block_id: BlockId = k.parse().unwrap_or(0);
479 let exprs: HashSet<Expression> = v.into_iter().collect();
480 (block_id, exprs)
481 })
482 .collect())
483}
484
485impl AvailableExprsInfo {
486 pub fn empty(entry_block: BlockId) -> Self {
490 Self {
491 avail_in: HashMap::new(),
492 avail_out: HashMap::new(),
493 all_exprs: HashSet::new(),
494 entry_block,
495 expr_instances: Vec::new(),
496 expr_instances_with_blocks: Vec::new(),
497 defs_per_line: HashMap::new(),
498 line_to_block: HashMap::new(),
499 uncertain_exprs: Vec::new(),
500 confidence: Confidence::default(),
501 }
502 }
503
504 pub fn new(entry_block: BlockId) -> Self {
506 Self::empty(entry_block)
507 }
508
509 pub fn is_available(&self, block: BlockId, expr: &Expression) -> bool {
520 self.avail_in
521 .get(&block)
522 .is_some_and(|set| set.contains(expr))
523 }
524
525 pub fn is_available_at_exit(&self, block: BlockId, expr: &Expression) -> bool {
536 self.avail_out
537 .get(&block)
538 .is_some_and(|set| set.contains(expr))
539 }
540
541 pub fn redundant_computations(&self) -> Vec<(String, usize, usize)> {
574 let mut redundant = Vec::new();
575
576 if !self.expr_instances_with_blocks.is_empty() {
578 let mut first_seen: HashMap<String, (usize, BlockId)> = HashMap::new();
580
581 let mut block_exprs: HashMap<BlockId, Vec<&ExprInstance>> = HashMap::new();
583 for inst in &self.expr_instances_with_blocks {
584 block_exprs.entry(inst.block_id).or_default().push(inst);
585 }
586
587 for exprs in block_exprs.values_mut() {
589 exprs.sort_by_key(|e| e.expr.line);
590 }
591
592 let mut block_ids: Vec<_> = block_exprs.keys().copied().collect();
594 block_ids.sort();
595
596 for &block_id in &block_ids {
597 if let Some(exprs) = block_exprs.get(&block_id) {
598 let mut gen_in_block: HashMap<String, usize> = HashMap::new();
600
601 for inst in exprs {
602 let expr = &inst.expr;
603 let line = expr.line;
604
605 let mut is_redundant = false;
608
609 if let Some(&(first_line, first_block)) = first_seen.get(&expr.text) {
610 let start_line = if first_block == block_id {
612 first_line + 1
614 } else {
615 1 };
619
620 let mut killed = false;
622 for check_line in start_line..line {
623 if let Some(defs) = self.defs_per_line.get(&check_line) {
624 if expr.operands.iter().any(|op| defs.contains(op)) {
625 killed = true;
626 break;
627 }
628 }
629 }
630
631 if !killed && first_line != line {
632 is_redundant = true;
634 redundant.push((expr.text.clone(), first_line, line));
635 }
636 }
637
638 if !is_redundant {
641 if let Some(&gen_line) = gen_in_block.get(&expr.text) {
642 let mut killed = false;
643 for check_line in (gen_line + 1)..line {
644 if let Some(defs) = self.defs_per_line.get(&check_line) {
645 if expr.operands.iter().any(|op| defs.contains(op)) {
646 killed = true;
647 break;
648 }
649 }
650 }
651
652 if !killed && gen_line != line {
653 let first_line = first_seen
655 .get(&expr.text)
656 .map(|(l, _)| *l)
657 .unwrap_or(gen_line);
658 if first_line != line {
659 redundant.push((expr.text.clone(), first_line, line));
660 }
661 }
662 }
663 }
664
665 if !first_seen.contains_key(&expr.text) {
667 first_seen.insert(expr.text.clone(), (line, block_id));
668 }
669
670 gen_in_block.entry(expr.text.clone()).or_insert(line);
672 }
673 }
674 }
675 } else {
676 let mut seen: HashMap<String, usize> = HashMap::new();
678
679 for expr in &self.expr_instances {
680 if let Some(&first_line) = seen.get(&expr.text) {
681 redundant.push((expr.text.clone(), first_line, expr.line));
682 } else {
683 seen.insert(expr.text.clone(), expr.line);
684 }
685 }
686 }
687
688 redundant.sort_by_key(|(_, _, line)| *line);
690 redundant
691 }
692
693 pub fn get_available_at_line(&self, line: usize, cfg: &CfgInfo) -> HashSet<Expression> {
717 let block_id = if let Some(&bid) = self.line_to_block.get(&line) {
719 bid
720 } else {
721 let found = cfg.blocks.iter().find(|b| {
723 let (start, end) = b.lines;
724 (start as usize) <= line && line <= (end as usize)
725 });
726 match found {
727 Some(block) => block.id,
728 None => return HashSet::new(),
729 }
730 };
731
732 let mut available = self.avail_in.get(&block_id).cloned().unwrap_or_default();
734
735 let block_start = cfg
737 .blocks
738 .iter()
739 .find(|b| b.id == block_id)
740 .map(|b| b.lines.0 as usize)
741 .unwrap_or(line);
742
743 let mut killed: HashSet<String> = HashSet::new();
746 for check_line in block_start..line {
747 if let Some(defs) = self.defs_per_line.get(&check_line) {
748 for def in defs {
749 killed.insert(def.clone());
750 }
751 }
752 }
753
754 available.retain(|expr| !expr.operands.iter().any(|op| killed.contains(op)));
756
757 available
758 }
759
760 pub fn to_json(&self) -> serde_json::Value {
783 let avail_map_to_json = |map: &HashMap<BlockId, HashSet<Expression>>| -> IndexMap<String, Vec<serde_json::Value>> {
787 let mut sorted: Vec<_> = map.iter().collect();
788 sorted.sort_by_key(|(k, _)| *k);
789 sorted
790 .into_iter()
791 .map(|(k, v)| {
792 let mut exprs: Vec<_> = v.iter().collect();
793 exprs.sort_by_key(|e| &e.text);
794 let expr_values: Vec<_> = exprs
795 .into_iter()
796 .map(|e| {
797 serde_json::json!({
798 "text": e.text,
799 "operands": e.operands,
800 "line": e.line,
801 })
802 })
803 .collect();
804 (k.to_string(), expr_values)
805 })
806 .collect()
807 };
808
809 let avail_in = avail_map_to_json(&self.avail_in);
810 let avail_out = avail_map_to_json(&self.avail_out);
811
812 let mut all_exprs: Vec<_> = self.all_exprs.iter().collect();
814 all_exprs.sort_by_key(|e| &e.text);
815 let all_expressions: Vec<_> = all_exprs
816 .into_iter()
817 .map(|e| {
818 serde_json::json!({
819 "text": e.text,
820 "operands": e.operands,
821 "line": e.line,
822 })
823 })
824 .collect();
825
826 let redundant: Vec<_> = self
828 .redundant_computations()
829 .into_iter()
830 .map(|(expr, first, redundant)| {
831 serde_json::json!({
832 "expr": expr,
833 "first_at": first,
834 "redundant_at": redundant,
835 })
836 })
837 .collect();
838
839 let uncertain: Vec<_> = self
841 .uncertain_exprs
842 .iter()
843 .map(|uf| {
844 serde_json::json!({
845 "expr": uf.expr,
846 "line": uf.line,
847 "reason": uf.reason,
848 })
849 })
850 .collect();
851
852 let confidence_str = match self.confidence {
853 Confidence::Low => "low",
854 Confidence::Medium => "medium",
855 Confidence::High => "high",
856 };
857
858 serde_json::json!({
859 "avail_in": avail_in,
860 "avail_out": avail_out,
861 "all_expressions": all_expressions,
862 "entry_block": self.entry_block,
863 "redundant_computations": redundant,
864 "uncertain_exprs": uncertain,
865 "confidence": confidence_str,
866 })
867 }
868}
869
870const BINARY_OPS: &[&str] = &[
879 "+", "-", "*", "/", "%", "//", "**", "==", "!=", "<", ">", "<=", ">=", "and", "or", "&&", "||", "&", "|", "^", "<<", ">>",
884];
885
886pub fn is_function_call(text: &str) -> bool {
915 let trimmed = text.trim();
916
917 if let Some(paren_idx) = trimmed.find('(') {
922 let before_paren = &trimmed[..paren_idx];
924 let before_trimmed = before_paren.trim_end();
925
926 if before_trimmed.is_empty() {
927 return false;
928 }
929
930 if let Some(last_char) = before_trimmed.chars().last() {
933 if last_char.is_alphanumeric() || last_char == '_' {
934 return true;
935 }
936 }
937 }
938
939 false
940}
941
942fn detect_uncertain_expression(line: &str, line_num: usize) -> Option<UncertainFinding> {
958 let trimmed = line.trim();
959
960 const SKIP_PREFIXES: &[&str] = &[
962 "#",
963 "//",
964 "/*",
965 "@",
966 "import ",
967 "from ",
968 "use ",
969 "class ",
970 "def ",
971 "fn ",
972 "func ",
973 "function ",
974 "pub ",
975 "struct ",
976 "enum ",
977 "trait ",
978 "impl ",
979 "interface ",
980 ];
981 if trimmed.is_empty() || SKIP_PREFIXES.iter().any(|p| trimmed.starts_with(p)) {
982 return None;
983 }
984
985 fn has_binary_operator(s: &str) -> bool {
987 s.contains(" + ") || s.contains(" - ") || s.contains(" * ") || s.contains(" / ")
988 }
989
990 if let Some(eq_idx) = trimmed.find('=') {
993 if eq_idx > 0 {
995 let before = trimmed.as_bytes().get(eq_idx.wrapping_sub(1));
996 let after = trimmed.as_bytes().get(eq_idx + 1);
997 let is_comparison =
998 matches!(before, Some(b'!' | b'<' | b'>' | b'=')) || matches!(after, Some(b'='));
999 if !is_comparison {
1000 let rhs = trimmed[eq_idx + 1..].trim();
1001 if is_function_call(rhs) && has_binary_operator(rhs) {
1003 return Some(UncertainFinding {
1004 expr: rhs.to_string(),
1005 line: line_num,
1006 reason: "contains function call - purity unknown".to_string(),
1007 });
1008 }
1009 if is_function_call(rhs) && rhs.contains('.') {
1011 return Some(UncertainFinding {
1012 expr: rhs.to_string(),
1013 line: line_num,
1014 reason: "contains method access - purity unknown".to_string(),
1015 });
1016 }
1017 }
1018 }
1019 }
1020
1021 if is_function_call(trimmed) && has_binary_operator(trimmed) {
1024 return Some(UncertainFinding {
1025 expr: trimmed.to_string(),
1026 line: line_num,
1027 reason: "function calls may have side effects".to_string(),
1028 });
1029 }
1030
1031 None
1032}
1033
1034pub fn parse_expression_from_line(line: &str) -> Option<(String, String, String)> {
1066 let line = if let Some(idx) = line.find('#') {
1068 &line[..idx]
1069 } else if let Some(idx) = line.find("//") {
1070 &line[..idx]
1071 } else {
1072 line
1073 };
1074
1075 let mut rhs_start = None;
1078 let chars: Vec<char> = line.chars().collect();
1079
1080 for i in (0..chars.len()).rev() {
1081 if chars[i] == '=' {
1082 let is_comparison = (i > 0 && matches!(chars[i - 1], '=' | '!' | '<' | '>' | ':'))
1084 || (i + 1 < chars.len() && chars[i + 1] == '=');
1085
1086 if !is_comparison {
1087 rhs_start = Some(i + 1);
1088 break;
1089 }
1090 }
1091 }
1092
1093 let expr_text = if let Some(start) = rhs_start {
1095 line[start..].trim()
1097 } else {
1098 let trimmed = line.trim();
1100 let stripped = if let Some(rest) = trimmed.strip_prefix("return ") {
1102 rest.trim()
1103 } else if let Some(rest) = trimmed.strip_prefix("return(") {
1104 rest.trim()
1106 } else if let Some(rest) = trimmed.strip_prefix("if ") {
1107 let r = rest.trim();
1109 let r = r.strip_suffix(':').unwrap_or(r);
1110 let r = r.strip_suffix('{').unwrap_or(r);
1112 let r = r.strip_prefix('(').unwrap_or(r);
1114 let r = r.strip_suffix(')').unwrap_or(r);
1115 r.trim()
1116 } else if let Some(rest) = trimmed.strip_prefix("elif ") {
1117 let r = rest.trim();
1118 r.strip_suffix(':').unwrap_or(r).trim()
1119 } else if let Some(rest) = trimmed.strip_prefix("else if ") {
1120 let r = rest.trim();
1121 let r = r.strip_suffix('{').unwrap_or(r);
1122 let r = r.strip_prefix('(').unwrap_or(r);
1123 let r = r.strip_suffix(')').unwrap_or(r);
1124 r.trim()
1125 } else if let Some(rest) = trimmed.strip_prefix("while ") {
1126 let r = rest.trim();
1127 let r = r.strip_suffix(':').unwrap_or(r);
1128 let r = r.strip_suffix('{').unwrap_or(r);
1129 let r = r.strip_prefix('(').unwrap_or(r);
1130 let r = r.strip_suffix(')').unwrap_or(r);
1131 r.trim()
1132 } else if let Some(rest) = trimmed.strip_prefix("assert ") {
1133 rest.trim()
1134 } else if let Some(rest) = trimmed.strip_prefix("yield ") {
1135 rest.trim()
1136 } else {
1137 trimmed
1139 };
1140 stripped
1141 };
1142
1143 if is_function_call(expr_text) {
1145 return None;
1146 }
1147
1148 let mut ops_by_len: Vec<&str> = BINARY_OPS.to_vec();
1151 ops_by_len.sort_by_key(|op| std::cmp::Reverse(op.len()));
1152
1153 for op in ops_by_len {
1154 if let Some(op_idx) = find_operator_in_expr(expr_text, op) {
1156 let left = expr_text[..op_idx].trim();
1157 let right = expr_text[op_idx + op.len()..].trim();
1158
1159 let left = extract_base_variable(left);
1161 let right = extract_base_variable(right);
1162
1163 if is_function_call(&left) || is_function_call(&right) {
1165 return None;
1166 }
1167
1168 if left.is_empty() || right.is_empty() {
1170 continue;
1171 }
1172
1173 if is_numeric_literal(&left) && is_numeric_literal(&right) {
1175 continue;
1176 }
1177
1178 return Some((left, op.to_string(), right));
1179 }
1180 }
1181
1182 None
1183}
1184
1185fn find_operator_in_expr(expr: &str, op: &str) -> Option<usize> {
1187 let chars: Vec<char> = expr.chars().collect();
1188 let op_chars: Vec<char> = op.chars().collect();
1189 let mut paren_depth: usize = 0;
1190 let mut in_string = false;
1191 let mut string_char = '"';
1192
1193 let mut i = 0;
1194 while i < chars.len() {
1195 let c = chars[i];
1196
1197 if !in_string && (c == '"' || c == '\'') {
1199 in_string = true;
1200 string_char = c;
1201 i += 1;
1202 continue;
1203 }
1204 if in_string && c == string_char && (i == 0 || chars[i - 1] != '\\') {
1205 in_string = false;
1206 i += 1;
1207 continue;
1208 }
1209 if in_string {
1210 i += 1;
1211 continue;
1212 }
1213
1214 if c == '(' || c == '[' || c == '{' {
1216 paren_depth += 1;
1217 i += 1;
1218 continue;
1219 }
1220 if c == ')' || c == ']' || c == '}' {
1221 paren_depth = paren_depth.saturating_sub(1);
1222 i += 1;
1223 continue;
1224 }
1225
1226 if paren_depth == 0 {
1228 if i + op_chars.len() <= chars.len() {
1230 let matches = op_chars
1231 .iter()
1232 .enumerate()
1233 .all(|(j, &oc)| chars[i + j] == oc);
1234
1235 if matches {
1236 if op.chars().all(|c| c.is_alphabetic()) {
1238 let before_ok = i == 0 || !chars[i - 1].is_alphanumeric();
1239 let after_ok =
1240 i + op.len() >= chars.len() || !chars[i + op.len()].is_alphanumeric();
1241 if before_ok && after_ok {
1242 return Some(i);
1243 }
1244 } else {
1245 return Some(i);
1246 }
1247 }
1248 }
1249 }
1250
1251 i += 1;
1252 }
1253
1254 None
1255}
1256
1257fn extract_base_variable(text: &str) -> String {
1270 let trimmed = text.trim();
1271
1272 let parts: Vec<&str> = trimmed.split('.').collect();
1274
1275 if parts.len() > 3 {
1277 parts[..3].join(".")
1278 } else {
1279 trimmed.to_string()
1280 }
1281}
1282
1283fn is_numeric_literal(text: &str) -> bool {
1285 let trimmed = text.trim();
1286
1287 if trimmed.parse::<i64>().is_ok() {
1289 return true;
1290 }
1291
1292 if trimmed.parse::<f64>().is_ok() {
1294 return true;
1295 }
1296
1297 if trimmed.starts_with("0x") || trimmed.starts_with("0o") || trimmed.starts_with("0b") {
1299 return true;
1300 }
1301
1302 false
1303}
1304
1305#[derive(Debug, Clone, Default)]
1310pub struct ExtractionResult {
1311 pub all_exprs: HashSet<Expression>,
1313 pub block_info: HashMap<BlockId, BlockExpressions>,
1315 pub expr_instances: Vec<Expression>,
1317 pub expr_instances_with_blocks: Vec<ExprInstance>,
1319 pub defs_per_line: HashMap<usize, HashSet<String>>,
1321 pub line_to_block: HashMap<usize, BlockId>,
1323 pub uncertain_exprs: Vec<UncertainFinding>,
1325}
1326
1327#[allow(dead_code)]
1329fn is_identifier(text: &str) -> bool {
1330 let trimmed = text.trim();
1331 if trimmed.is_empty() {
1332 return false;
1333 }
1334
1335 let mut chars = trimmed.chars();
1336
1337 match chars.next() {
1339 Some(c) if c.is_alphabetic() || c == '_' => {}
1340 _ => return false,
1341 }
1342
1343 chars.all(|c| c.is_alphanumeric() || c == '_' || c == '.')
1345}
1346
1347pub fn extract_expressions_from_refs(
1388 cfg: &CfgInfo,
1389 dfg: &DfgInfo,
1390) -> (
1391 HashSet<Expression>,
1392 HashMap<BlockId, BlockExpressions>,
1393 Vec<Expression>,
1394) {
1395 let mut all_exprs: HashSet<Expression> = HashSet::new();
1396 let mut block_info: HashMap<BlockId, BlockExpressions> = HashMap::new();
1397 let mut expr_instances: Vec<Expression> = Vec::new();
1398
1399 for block in &cfg.blocks {
1401 block_info.insert(block.id, BlockExpressions::new());
1402 }
1403
1404 let mut refs_by_line: HashMap<(BlockId, u32), Vec<&VarRef>> = HashMap::new();
1407
1408 for var_ref in &dfg.refs {
1409 let block_id = find_block_for_line(cfg, var_ref.line);
1411 if let Some(bid) = block_id {
1412 refs_by_line
1413 .entry((bid, var_ref.line))
1414 .or_default()
1415 .push(var_ref);
1416 }
1417 }
1418
1419 for ((block_id, line), refs) in refs_by_line {
1421 let defs: Vec<_> = refs
1423 .iter()
1424 .filter(|r| matches!(r.ref_type, RefType::Definition))
1425 .collect();
1426 let uses: Vec<_> = refs
1427 .iter()
1428 .filter(|r| matches!(r.ref_type, RefType::Use))
1429 .collect();
1430
1431 if defs.is_empty() || uses.len() < 2 {
1433 continue;
1436 }
1437
1438 let use_names: Vec<&str> = uses.iter().map(|r| r.name.as_str()).collect();
1443
1444 let unique_uses: HashSet<&str> = use_names.iter().copied().collect();
1446
1447 if unique_uses.len() >= 2 {
1448 let mut operands: Vec<String> = unique_uses.iter().map(|s| s.to_string()).collect();
1451 operands.sort();
1452
1453 if let Some(op) = infer_operator_from_uses(&use_names) {
1456 let text = normalize_expression(&operands[0], &op, &operands[1]);
1457 let expr = Expression::new(text, operands.clone(), line as usize);
1458
1459 all_exprs.insert(expr.clone());
1461
1462 expr_instances.push(expr.clone());
1464
1465 if let Some(block_expr) = block_info.get_mut(&block_id) {
1467 block_expr.gen.insert(expr);
1468 }
1469 }
1470 }
1471
1472 for def in &defs {
1474 if let Some(block_expr) = block_info.get_mut(&block_id) {
1475 block_expr.kill.insert(def.name.clone());
1476 }
1477 }
1478 }
1479
1480 expr_instances.sort_by_key(|e| e.line);
1482
1483 (all_exprs, block_info, expr_instances)
1484}
1485
1486pub fn extract_expressions_from_refs_with_source(
1501 cfg: &CfgInfo,
1502 dfg: &DfgInfo,
1503 source_lines: Option<&[String]>,
1504) -> (
1505 HashSet<Expression>,
1506 HashMap<BlockId, BlockExpressions>,
1507 Vec<Expression>,
1508) {
1509 let mut all_exprs: HashSet<Expression> = HashSet::new();
1510 let mut block_info: HashMap<BlockId, BlockExpressions> = HashMap::new();
1511 let mut expr_instances: Vec<Expression> = Vec::new();
1512
1513 for block in &cfg.blocks {
1515 block_info.insert(block.id, BlockExpressions::new());
1516 }
1517
1518 if let Some(lines) = source_lines {
1520 for (line_num, line_text) in lines.iter().enumerate() {
1522 let line = (line_num + 1) as u32; if let Some((left, op, right)) = parse_expression_from_line(line_text) {
1526 let refs_on_line: Vec<_> = dfg.refs.iter().filter(|r| r.line == line).collect();
1528
1529 let has_left = refs_on_line.iter().any(|r| r.name == left);
1530 let has_right = refs_on_line.iter().any(|r| r.name == right);
1531
1532 if has_left || has_right || is_numeric_literal(&left) || is_numeric_literal(&right)
1534 {
1535 let text = normalize_expression(&left, &op, &right);
1536 let operands = vec![left.clone(), right.clone()];
1537 let expr = Expression::new(text, operands, line as usize);
1538
1539 if let Some(block_id) = find_block_for_line(cfg, line) {
1541 all_exprs.insert(expr.clone());
1543
1544 expr_instances.push(expr.clone());
1546
1547 if let Some(block_expr) = block_info.get_mut(&block_id) {
1549 block_expr.gen.insert(expr);
1551 }
1552 }
1553 }
1554 }
1555 }
1556
1557 for var_ref in &dfg.refs {
1559 if matches!(var_ref.ref_type, RefType::Definition | RefType::Update) {
1560 if let Some(block_id) = find_block_for_line(cfg, var_ref.line) {
1561 if let Some(block_expr) = block_info.get_mut(&block_id) {
1562 block_expr.kill.insert(var_ref.name.clone());
1563 }
1564 }
1565 }
1566 }
1567 } else {
1568 return extract_expressions_from_refs(cfg, dfg);
1570 }
1571
1572 expr_instances.sort_by_key(|e| e.line);
1574
1575 (all_exprs, block_info, expr_instances)
1576}
1577
1578fn find_block_for_line(cfg: &CfgInfo, line: u32) -> Option<BlockId> {
1580 for block in &cfg.blocks {
1581 if block.lines.0 <= line && line <= block.lines.1 {
1582 return Some(block.id);
1583 }
1584 }
1585 cfg.blocks
1588 .iter()
1589 .min_by_key(|b| {
1590 let dist_start = (b.lines.0 as i64 - line as i64).abs();
1591 let dist_end = (b.lines.1 as i64 - line as i64).abs();
1592 dist_start.min(dist_end)
1593 })
1594 .map(|b| b.id)
1595}
1596
1597fn infer_operator_from_uses(uses: &[&str]) -> Option<String> {
1602 if uses.len() >= 2 {
1605 Some("+".to_string())
1606 } else {
1607 None
1608 }
1609}
1610
1611pub fn extract_expressions_full(
1626 cfg: &CfgInfo,
1627 dfg: &DfgInfo,
1628 source_lines: Option<&[String]>,
1629) -> ExtractionResult {
1630 extract_expressions_full_with_lang(cfg, dfg, source_lines, None)
1631}
1632
1633pub fn extract_expressions_full_with_lang(
1649 cfg: &CfgInfo,
1650 dfg: &DfgInfo,
1651 source_lines: Option<&[String]>,
1652 lang: Option<Language>,
1653) -> ExtractionResult {
1654 let mut result = ExtractionResult::default();
1655
1656 for block in &cfg.blocks {
1658 result.block_info.insert(block.id, BlockExpressions::new());
1659 for line in block.lines.0..=block.lines.1 {
1661 result.line_to_block.insert(line as usize, block.id);
1662 }
1663 }
1664
1665 for var_ref in &dfg.refs {
1667 if matches!(var_ref.ref_type, RefType::Definition | RefType::Update) {
1668 result
1669 .defs_per_line
1670 .entry(var_ref.line as usize)
1671 .or_default()
1672 .insert(var_ref.name.clone());
1673
1674 if let Some(block_id) = find_block_for_line(cfg, var_ref.line) {
1676 if let Some(block_expr) = result.block_info.get_mut(&block_id) {
1677 block_expr.kill.insert(var_ref.name.clone());
1678 }
1679 }
1680 }
1681 }
1682
1683 if let Some(lines) = source_lines {
1685 for (line_num, line_text) in lines.iter().enumerate() {
1686 let line = (line_num + 1) as u32; if let Some((left, op, right)) = parse_expression_from_line(line_text) {
1689 let refs_on_line: Vec<_> = dfg.refs.iter().filter(|r| r.line == line).collect();
1691
1692 let has_left = refs_on_line.iter().any(|r| r.name == left);
1693 let has_right = refs_on_line.iter().any(|r| r.name == right);
1694
1695 if has_left || has_right || is_numeric_literal(&left) || is_numeric_literal(&right)
1696 {
1697 let text = normalize_expression(&left, &op, &right);
1698 let operands = vec![left.clone(), right.clone()];
1699 let expr = Expression::new(text, operands, line as usize);
1700
1701 if let Some(block_id) = find_block_for_line(cfg, line) {
1702 result.all_exprs.insert(expr.clone());
1703 result.expr_instances.push(expr.clone());
1704 result
1705 .expr_instances_with_blocks
1706 .push(ExprInstance::new(expr.clone(), block_id));
1707
1708 if let Some(block_expr) = result.block_info.get_mut(&block_id) {
1709 block_expr.gen.insert(expr);
1710 }
1711 }
1712 }
1713 } else {
1714 if let Some(uncertain) = detect_uncertain_expression(line_text, line as usize) {
1717 if find_block_for_line(cfg, line).is_some() {
1719 result.uncertain_exprs.push(uncertain);
1720 }
1721 }
1722 }
1723 }
1724 } else {
1725 let mut refs_by_line: HashMap<(BlockId, u32), Vec<&VarRef>> = HashMap::new();
1727
1728 for var_ref in &dfg.refs {
1729 if let Some(bid) = find_block_for_line(cfg, var_ref.line) {
1730 refs_by_line
1731 .entry((bid, var_ref.line))
1732 .or_default()
1733 .push(var_ref);
1734 }
1735 }
1736
1737 for ((block_id, line), refs) in refs_by_line {
1738 let defs: Vec<_> = refs
1739 .iter()
1740 .filter(|r| matches!(r.ref_type, RefType::Definition))
1741 .collect();
1742 let uses: Vec<_> = refs
1743 .iter()
1744 .filter(|r| matches!(r.ref_type, RefType::Use))
1745 .collect();
1746
1747 if defs.is_empty() || uses.len() < 2 {
1748 continue;
1749 }
1750
1751 let use_names: Vec<&str> = uses.iter().map(|r| r.name.as_str()).collect();
1752 let unique_uses: HashSet<&str> = use_names.iter().copied().collect();
1753
1754 if unique_uses.len() >= 2 {
1755 let mut operands: Vec<String> = unique_uses.iter().map(|s| s.to_string()).collect();
1756 operands.sort();
1757
1758 if let Some(op) = infer_operator_from_uses(&use_names) {
1759 let text = normalize_expression(&operands[0], &op, &operands[1]);
1760 let expr = Expression::new(text, operands.clone(), line as usize);
1761
1762 result.all_exprs.insert(expr.clone());
1763 result.expr_instances.push(expr.clone());
1764 result
1765 .expr_instances_with_blocks
1766 .push(ExprInstance::new(expr.clone(), block_id));
1767
1768 if let Some(block_expr) = result.block_info.get_mut(&block_id) {
1769 block_expr.gen.insert(expr);
1770 }
1771 }
1772 }
1773 }
1774 }
1775
1776 if let (Some(lines), Some(language)) = (source_lines, lang) {
1779 let full_source = lines.join("\n");
1780 let min_line = cfg.blocks.iter().map(|b| b.lines.0).min().unwrap_or(1) as usize;
1782 let max_line = cfg.blocks.iter().map(|b| b.lines.1).max().unwrap_or(1) as usize;
1783
1784 let ast_exprs = extract_binary_exprs_from_ast(&full_source, language, min_line, max_line);
1785
1786 for (text, _op, left, right, line) in ast_exprs {
1787 let line_u32 = line as u32;
1788
1789 let already_found = result
1791 .all_exprs
1792 .iter()
1793 .any(|e| e.text == text && e.line == line);
1794 if already_found {
1795 continue;
1796 }
1797
1798 let operands = vec![left.clone(), right.clone()];
1800 let expr = Expression::new(text.clone(), operands, line);
1801
1802 if let Some(block_id) = find_block_for_line(cfg, line_u32) {
1803 result.all_exprs.insert(expr.clone());
1804 result.expr_instances.push(expr.clone());
1805 result
1806 .expr_instances_with_blocks
1807 .push(ExprInstance::new(expr.clone(), block_id));
1808
1809 if let Some(block_expr) = result.block_info.get_mut(&block_id) {
1810 block_expr.gen.insert(expr);
1811 }
1812 }
1813 }
1814 }
1815
1816 result.expr_instances.sort_by_key(|e| e.line);
1818 result
1819 .expr_instances_with_blocks
1820 .sort_by_key(|e| e.expr.line);
1821
1822 result
1823}
1824
1825pub fn compute_available_exprs(
1874 cfg: &CfgInfo,
1875 dfg: &DfgInfo,
1876) -> Result<AvailableExprsInfo, DataflowError> {
1877 validate_cfg(cfg)?;
1879
1880 let entry = cfg.entry_block;
1881
1882 let extraction = extract_expressions_full(cfg, dfg, None);
1884 let all_exprs = extraction.all_exprs;
1885 let block_info = extraction.block_info;
1886 let expr_instances = extraction.expr_instances;
1887 let expr_instances_with_blocks = extraction.expr_instances_with_blocks;
1888 let defs_per_line = extraction.defs_per_line;
1889 let line_to_block = extraction.line_to_block;
1890
1891 if all_exprs.is_empty() {
1893 let mut info = AvailableExprsInfo::empty(entry);
1894 for block in &cfg.blocks {
1896 info.avail_in.insert(block.id, HashSet::new());
1897 info.avail_out.insert(block.id, HashSet::new());
1898 }
1899 return Ok(info);
1900 }
1901
1902 let predecessors = build_predecessors(cfg);
1904
1905 let mut avail_in: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
1910 let mut avail_out: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
1911
1912 avail_in.insert(entry, HashSet::new());
1914
1915 let entry_gen = block_info
1917 .get(&entry)
1918 .map(|b| b.gen.clone())
1919 .unwrap_or_default();
1920 avail_out.insert(entry, entry_gen);
1921
1922 for block in &cfg.blocks {
1924 if block.id != entry {
1925 avail_in.insert(block.id, all_exprs.clone());
1926 avail_out.insert(block.id, all_exprs.clone());
1927 }
1928 }
1929
1930 let max_iterations = cfg.blocks.len() * all_exprs.len() * 2 + 10;
1933 let mut changed = true;
1934 let mut iteration = 0;
1935
1936 let block_order = reverse_postorder(cfg);
1938
1939 while changed && iteration < max_iterations {
1940 changed = false;
1941 iteration += 1;
1942
1943 for &block_id in &block_order {
1944 if block_id == entry {
1946 continue;
1947 }
1948
1949 let preds = predecessors
1951 .get(&block_id)
1952 .map(|v| v.as_slice())
1953 .unwrap_or(&[]);
1954
1955 let new_in: HashSet<Expression> = if preds.is_empty() {
1956 HashSet::new()
1958 } else {
1959 let mut result = avail_out
1961 .get(&preds[0])
1962 .cloned()
1963 .unwrap_or_else(|| all_exprs.clone());
1964
1965 for &pred in &preds[1..] {
1966 let pred_out = avail_out
1967 .get(&pred)
1968 .cloned()
1969 .unwrap_or_else(|| all_exprs.clone());
1970 result = result.intersection(&pred_out).cloned().collect();
1972 }
1973 result
1974 };
1975
1976 let info = block_info.get(&block_id);
1978 let gen = info.map(|i| &i.gen).cloned().unwrap_or_default();
1979 let kill = info.map(|i| &i.kill).cloned().unwrap_or_default();
1980
1981 let not_killed: HashSet<Expression> = new_in
1983 .iter()
1984 .filter(|expr| !is_killed(expr, &kill))
1985 .cloned()
1986 .collect();
1987
1988 let new_out: HashSet<Expression> = gen.union(¬_killed).cloned().collect();
1990
1991 if avail_in.get(&block_id) != Some(&new_in)
1993 || avail_out.get(&block_id) != Some(&new_out)
1994 {
1995 changed = true;
1996 avail_in.insert(block_id, new_in);
1997 avail_out.insert(block_id, new_out);
1998 }
1999 }
2000 }
2001
2002 if iteration >= max_iterations {
2004 return Err(DataflowError::IterationLimit {
2005 iterations: iteration,
2006 });
2007 }
2008
2009 Ok(AvailableExprsInfo {
2010 avail_in,
2011 avail_out,
2012 all_exprs,
2013 entry_block: entry,
2014 expr_instances,
2015 expr_instances_with_blocks,
2016 defs_per_line,
2017 line_to_block,
2018 uncertain_exprs: Vec::new(),
2019 confidence: Confidence::High,
2020 })
2021}
2022
2023pub fn compute_available_exprs_with_source(
2038 cfg: &CfgInfo,
2039 dfg: &DfgInfo,
2040 source_lines: &[String],
2041) -> Result<AvailableExprsInfo, DataflowError> {
2042 compute_available_exprs_with_source_and_lang(cfg, dfg, source_lines, None)
2043}
2044
2045pub fn compute_available_exprs_with_source_and_lang(
2061 cfg: &CfgInfo,
2062 dfg: &DfgInfo,
2063 source_lines: &[String],
2064 lang: Option<Language>,
2065) -> Result<AvailableExprsInfo, DataflowError> {
2066 validate_cfg(cfg)?;
2068
2069 let entry = cfg.entry_block;
2070
2071 let extraction = extract_expressions_full_with_lang(cfg, dfg, Some(source_lines), lang);
2073 let all_exprs = extraction.all_exprs;
2074 let block_info = extraction.block_info;
2075 let expr_instances = extraction.expr_instances;
2076 let expr_instances_with_blocks = extraction.expr_instances_with_blocks;
2077 let defs_per_line = extraction.defs_per_line;
2078 let line_to_block = extraction.line_to_block;
2079 let uncertain_exprs = extraction.uncertain_exprs;
2080
2081 if all_exprs.is_empty() {
2083 let mut info = AvailableExprsInfo::empty(entry);
2084 info.uncertain_exprs = uncertain_exprs.clone();
2085 info.confidence = compute_confidence(0, uncertain_exprs.len());
2087 for block in &cfg.blocks {
2088 info.avail_in.insert(block.id, HashSet::new());
2089 info.avail_out.insert(block.id, HashSet::new());
2090 }
2091 return Ok(info);
2092 }
2093
2094 let predecessors = build_predecessors(cfg);
2096
2097 let mut avail_in: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
2099 let mut avail_out: HashMap<BlockId, HashSet<Expression>> = HashMap::new();
2100
2101 avail_in.insert(entry, HashSet::new());
2102 let entry_gen = block_info
2103 .get(&entry)
2104 .map(|b| b.gen.clone())
2105 .unwrap_or_default();
2106 avail_out.insert(entry, entry_gen);
2107
2108 for block in &cfg.blocks {
2109 if block.id != entry {
2110 avail_in.insert(block.id, all_exprs.clone());
2111 avail_out.insert(block.id, all_exprs.clone());
2112 }
2113 }
2114
2115 let max_iterations = cfg.blocks.len() * all_exprs.len() * 2 + 10;
2117 let mut changed = true;
2118 let mut iteration = 0;
2119 let block_order = reverse_postorder(cfg);
2120
2121 while changed && iteration < max_iterations {
2122 changed = false;
2123 iteration += 1;
2124
2125 for &block_id in &block_order {
2126 if block_id == entry {
2127 continue;
2128 }
2129
2130 let preds = predecessors
2131 .get(&block_id)
2132 .map(|v| v.as_slice())
2133 .unwrap_or(&[]);
2134
2135 let new_in: HashSet<Expression> = if preds.is_empty() {
2136 HashSet::new()
2137 } else {
2138 let mut result = avail_out
2139 .get(&preds[0])
2140 .cloned()
2141 .unwrap_or_else(|| all_exprs.clone());
2142
2143 for &pred in &preds[1..] {
2144 let pred_out = avail_out
2145 .get(&pred)
2146 .cloned()
2147 .unwrap_or_else(|| all_exprs.clone());
2148 result = result.intersection(&pred_out).cloned().collect();
2149 }
2150 result
2151 };
2152
2153 let info = block_info.get(&block_id);
2154 let gen = info.map(|i| &i.gen).cloned().unwrap_or_default();
2155 let kill = info.map(|i| &i.kill).cloned().unwrap_or_default();
2156
2157 let not_killed: HashSet<Expression> = new_in
2158 .iter()
2159 .filter(|expr| !is_killed(expr, &kill))
2160 .cloned()
2161 .collect();
2162
2163 let new_out: HashSet<Expression> = gen.union(¬_killed).cloned().collect();
2164
2165 if avail_in.get(&block_id) != Some(&new_in)
2166 || avail_out.get(&block_id) != Some(&new_out)
2167 {
2168 changed = true;
2169 avail_in.insert(block_id, new_in);
2170 avail_out.insert(block_id, new_out);
2171 }
2172 }
2173 }
2174
2175 if iteration >= max_iterations {
2176 return Err(DataflowError::IterationLimit {
2177 iterations: iteration,
2178 });
2179 }
2180
2181 let confidence = compute_confidence(all_exprs.len(), uncertain_exprs.len());
2182
2183 Ok(AvailableExprsInfo {
2184 avail_in,
2185 avail_out,
2186 all_exprs,
2187 entry_block: entry,
2188 expr_instances,
2189 expr_instances_with_blocks,
2190 defs_per_line,
2191 line_to_block,
2192 uncertain_exprs,
2193 confidence,
2194 })
2195}
2196
2197fn compute_confidence(confirmed: usize, uncertain: usize) -> Confidence {
2199 if confirmed == 0 && uncertain == 0 {
2200 Confidence::Low
2201 } else if uncertain == 0 {
2202 Confidence::High
2203 } else if confirmed >= uncertain {
2204 Confidence::Medium
2205 } else {
2206 Confidence::Low
2207 }
2208}
2209
2210fn is_killed(expr: &Expression, kills: &HashSet<String>) -> bool {
2223 expr.operands.iter().any(|op| kills.contains(op))
2224}
2225
2226use crate::types::Language;
2231
2232fn binary_expr_node_kinds(lang: Language) -> &'static [&'static str] {
2235 match lang {
2236 Language::Python => &["binary_operator", "boolean_operator", "comparison_operator"],
2237 Language::Go => &["binary_expression"],
2238 Language::TypeScript | Language::JavaScript => &["binary_expression"],
2239 Language::Java => &["binary_expression"],
2240 Language::Rust => &["binary_expression"],
2241 Language::C | Language::Cpp => &["binary_expression"],
2242 Language::Ruby => &["binary"],
2243 Language::Php => &["binary_expression"],
2244 Language::Kotlin => &["binary_expression"],
2245 Language::CSharp => &["binary_expression"],
2246 Language::Scala => &["infix_expression"],
2247 Language::Elixir => &["binary_operator"],
2248 Language::Ocaml => &["infix_expression"],
2249 Language::Lua | Language::Luau => &["binary_expression"],
2250 Language::Swift => &["infix_expression"],
2251 }
2252}
2253
2254fn extract_operator_from_node<'a>(
2261 node: &tree_sitter::Node<'a>,
2262 source: &'a [u8],
2263 _lang: Language,
2264) -> Option<String> {
2265 if let Some(op_node) = node.child_by_field_name("operator") {
2267 let op_text = op_node.utf8_text(source).unwrap_or("").trim().to_string();
2268 if !op_text.is_empty() {
2269 return Some(op_text);
2270 }
2271 }
2272
2273 let child_count = node.child_count();
2275 for i in 0..child_count {
2276 if let Some(child) = node.child(i) {
2277 if !child.is_named() {
2278 let text = child.utf8_text(source).unwrap_or("").trim();
2279 if matches!(
2281 text,
2282 "+" | "-"
2283 | "*"
2284 | "/"
2285 | "%"
2286 | "//"
2287 | "**"
2288 | "=="
2289 | "!="
2290 | "<"
2291 | ">"
2292 | "<="
2293 | ">="
2294 | "&&"
2295 | "||"
2296 | "and"
2297 | "or"
2298 | "&"
2299 | "|"
2300 | "^"
2301 | "<<"
2302 | ">>"
2303 | "<>"
2304 | "==="
2305 | "!=="
2306 | "<=>"
2307 | ".."
2308 | "..."
2309 | "in"
2310 | "not in"
2311 | "|>"
2312 | "<|"
2313 | "++"
2314 ) {
2315 return Some(text.to_string());
2316 }
2317 }
2318 }
2321 }
2322
2323 None
2324}
2325
2326fn extract_operands_from_node<'a>(
2330 node: &tree_sitter::Node<'a>,
2331 source: &'a [u8],
2332 lang: Language,
2333) -> Option<(String, String)> {
2334 let left = node.child_by_field_name("left").or_else(|| {
2336 node.named_child(0)
2338 });
2339
2340 let right = node.child_by_field_name("right").or_else(|| {
2341 let count = node.named_child_count();
2343 if count >= 2 {
2344 node.named_child(count - 1)
2345 } else {
2346 None
2347 }
2348 });
2349
2350 match (left, right) {
2351 (Some(l), Some(r)) if l.id() != r.id() => {
2352 let left_text = l.utf8_text(source).unwrap_or("").trim().to_string();
2353 let right_text = r.utf8_text(source).unwrap_or("").trim().to_string();
2354
2355 if left_text.is_empty() || right_text.is_empty() {
2356 return None;
2357 }
2358
2359 let left_base = extract_base_variable(&left_text);
2361 let right_base = extract_base_variable(&right_text);
2362
2363 if is_function_call(&left_base) || is_function_call(&right_base) {
2365 return None;
2366 }
2367
2368 if is_numeric_literal(&left_base) && is_numeric_literal(&right_base) {
2370 return None;
2371 }
2372
2373 let left_final = if matches!(lang, Language::Php) {
2375 left_base.trim_start_matches('$').to_string()
2376 } else {
2377 left_base
2378 };
2379 let right_final = if matches!(lang, Language::Php) {
2380 right_base.trim_start_matches('$').to_string()
2381 } else {
2382 right_base
2383 };
2384
2385 Some((left_final, right_final))
2386 }
2387 _ => None,
2388 }
2389}
2390
2391pub fn extract_binary_exprs_from_ast(
2408 source: &str,
2409 lang: Language,
2410 start_line: usize,
2411 end_line: usize,
2412) -> Vec<(String, String, String, String, usize)> {
2413 let mut results = Vec::new();
2414
2415 let tree = match crate::ast::parser::parse(source, lang) {
2417 Ok(t) => t,
2418 Err(_) => return results,
2419 };
2420
2421 let root = tree.root_node();
2422 let source_bytes = source.as_bytes();
2423 let node_kinds = binary_expr_node_kinds(lang);
2424
2425 if node_kinds.is_empty() {
2426 return results;
2427 }
2428
2429 let mut cursor = root.walk();
2431 collect_binary_exprs(
2432 &mut cursor,
2433 source_bytes,
2434 lang,
2435 node_kinds,
2436 start_line,
2437 end_line,
2438 &mut results,
2439 );
2440
2441 results
2442}
2443
2444fn collect_binary_exprs(
2446 cursor: &mut tree_sitter::TreeCursor,
2447 source: &[u8],
2448 lang: Language,
2449 node_kinds: &[&str],
2450 start_line: usize,
2451 end_line: usize,
2452 results: &mut Vec<(String, String, String, String, usize)>,
2453) {
2454 let node = cursor.node();
2455 let line = node.start_position().row + 1; let node_start_line = node.start_position().row + 1;
2459 let node_end_line = node.end_position().row + 1;
2460
2461 if node_end_line < start_line || node_start_line > end_line {
2463 return;
2464 }
2465
2466 let kind = node.kind();
2468 if node_kinds.contains(&kind) && line >= start_line && line <= end_line {
2469 if let Some(op) = extract_operator_from_node(&node, source, lang) {
2471 if let Some((left, right)) = extract_operands_from_node(&node, source, lang) {
2472 let normalized = normalize_expression(&left, &op, &right);
2474 results.push((normalized, op, left, right, line));
2475 }
2476 }
2477 }
2478
2479 if cursor.goto_first_child() {
2481 loop {
2482 collect_binary_exprs(
2483 cursor, source, lang, node_kinds, start_line, end_line, results,
2484 );
2485 if !cursor.goto_next_sibling() {
2486 break;
2487 }
2488 }
2489 cursor.goto_parent();
2490 }
2491}
2492
2493#[cfg(test)]
2498mod tests {
2499 use super::*;
2500
2501 #[test]
2506 fn test_expression_new() {
2507 let expr = Expression::new("a + b", vec!["b", "a"], 5);
2508 assert_eq!(expr.text, "a + b");
2509 assert_eq!(expr.operands, vec!["a", "b"]);
2511 assert_eq!(expr.line, 5);
2512 }
2513
2514 #[test]
2515 fn test_expression_equality_by_text() {
2516 let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
2517 let expr2 = Expression::new("a + b", vec!["a", "b"], 100);
2518 assert_eq!(expr1, expr2);
2519 }
2520
2521 #[test]
2522 fn test_expression_inequality_by_text() {
2523 let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
2524 let expr2 = Expression::new("a - b", vec!["a", "b"], 1);
2525 assert_ne!(expr1, expr2);
2526 }
2527
2528 #[test]
2529 fn test_expression_hash_by_text() {
2530 use std::collections::hash_map::DefaultHasher;
2531
2532 let expr1 = Expression::new("x * y", vec!["x", "y"], 1);
2533 let expr2 = Expression::new("x * y", vec!["x", "y"], 999);
2534
2535 let mut hasher1 = DefaultHasher::new();
2536 let mut hasher2 = DefaultHasher::new();
2537 expr1.hash(&mut hasher1);
2538 expr2.hash(&mut hasher2);
2539
2540 assert_eq!(hasher1.finish(), hasher2.finish());
2541 }
2542
2543 #[test]
2544 fn test_expression_is_killed_by() {
2545 let expr = Expression::new("a + b", vec!["a", "b"], 1);
2546 assert!(expr.is_killed_by("a"));
2547 assert!(expr.is_killed_by("b"));
2548 assert!(!expr.is_killed_by("c"));
2549 }
2550
2551 #[test]
2552 fn test_expression_hashset() {
2553 let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
2554 let expr2 = Expression::new("a + b", vec!["a", "b"], 5);
2555
2556 let mut set: HashSet<Expression> = HashSet::new();
2557 set.insert(expr1);
2558 set.insert(expr2);
2559
2560 assert_eq!(set.len(), 1);
2562 }
2563
2564 #[test]
2569 fn test_normalize_commutative_addition() {
2570 assert_eq!(normalize_expression("a", "+", "b"), "a + b");
2571 assert_eq!(normalize_expression("b", "+", "a"), "a + b");
2572 }
2573
2574 #[test]
2575 fn test_normalize_commutative_multiplication() {
2576 assert_eq!(normalize_expression("x", "*", "y"), "x * y");
2577 assert_eq!(normalize_expression("y", "*", "x"), "x * y");
2578 }
2579
2580 #[test]
2581 fn test_normalize_commutative_equality() {
2582 assert_eq!(normalize_expression("foo", "==", "bar"), "bar == foo");
2583 assert_eq!(normalize_expression("bar", "==", "foo"), "bar == foo");
2584 }
2585
2586 #[test]
2587 fn test_normalize_non_commutative_subtraction() {
2588 assert_eq!(normalize_expression("a", "-", "b"), "a - b");
2589 assert_eq!(normalize_expression("b", "-", "a"), "b - a");
2590 }
2591
2592 #[test]
2593 fn test_normalize_non_commutative_division() {
2594 assert_eq!(normalize_expression("x", "/", "y"), "x / y");
2595 assert_eq!(normalize_expression("y", "/", "x"), "y / x");
2596 }
2597
2598 #[test]
2599 fn test_normalize_whitespace_trimmed() {
2600 assert_eq!(normalize_expression(" a ", "+", " b "), "a + b");
2602 assert_eq!(normalize_expression("\ta\n", "-", "\tb\n"), "a - b");
2603 }
2604
2605 #[test]
2610 fn test_block_expressions_default() {
2611 let block = BlockExpressions::new();
2612 assert!(block.gen.is_empty());
2613 assert!(block.kill.is_empty());
2614 }
2615
2616 #[test]
2621 fn test_available_exprs_info_empty() {
2622 let info = AvailableExprsInfo::empty(0);
2623 assert!(info.avail_in.is_empty());
2624 assert!(info.avail_out.is_empty());
2625 assert!(info.all_exprs.is_empty());
2626 assert_eq!(info.entry_block, 0);
2627 assert!(info.expr_instances.is_empty());
2628 }
2629
2630 #[test]
2631 fn test_is_available_true() {
2632 let mut info = AvailableExprsInfo::new(0);
2633 let expr = Expression::new("a + b", vec!["a", "b"], 1);
2634
2635 let mut block_exprs = HashSet::new();
2636 block_exprs.insert(expr.clone());
2637 info.avail_in.insert(1, block_exprs);
2638
2639 assert!(info.is_available(1, &expr));
2640 }
2641
2642 #[test]
2643 fn test_is_available_false_not_in_set() {
2644 let info = AvailableExprsInfo::new(0);
2645 let expr = Expression::new("a + b", vec!["a", "b"], 1);
2646 assert!(!info.is_available(1, &expr));
2647 }
2648
2649 #[test]
2650 fn test_is_available_false_unknown_block() {
2651 let info = AvailableExprsInfo::new(0);
2652 let expr = Expression::new("a + b", vec!["a", "b"], 1);
2653 assert!(!info.is_available(999, &expr));
2654 }
2655
2656 #[test]
2657 fn test_is_available_at_exit_true() {
2658 let mut info = AvailableExprsInfo::new(0);
2659 let expr = Expression::new("a + b", vec!["a", "b"], 1);
2660
2661 let mut block_exprs = HashSet::new();
2662 block_exprs.insert(expr.clone());
2663 info.avail_out.insert(1, block_exprs);
2664
2665 assert!(info.is_available_at_exit(1, &expr));
2666 }
2667
2668 #[test]
2669 fn test_to_json_serializable() {
2670 let mut info = AvailableExprsInfo::new(0);
2671 let expr = Expression::new("a + b", vec!["a", "b"], 2);
2672
2673 let mut block_exprs = HashSet::new();
2674 block_exprs.insert(expr.clone());
2675 info.avail_in.insert(0, HashSet::new());
2676 info.avail_in.insert(1, block_exprs.clone());
2677 info.avail_out.insert(0, block_exprs);
2678 info.all_exprs.insert(expr);
2679
2680 let json = info.to_json();
2681
2682 assert!(json.is_object());
2684 assert!(json.get("avail_in").is_some());
2685 assert!(json.get("avail_out").is_some());
2686 assert!(json.get("all_expressions").is_some());
2687 assert!(json.get("entry_block").is_some());
2688 assert!(json.get("redundant_computations").is_some());
2689 }
2690
2691 #[test]
2692 fn test_to_json_includes_redundant_computations() {
2693 let mut info = AvailableExprsInfo::new(0);
2694
2695 info.expr_instances
2697 .push(Expression::new("a + b", vec!["a", "b"], 2));
2698 info.expr_instances
2699 .push(Expression::new("a + b", vec!["a", "b"], 5));
2700
2701 let json = info.to_json();
2702 let redundant = json
2703 .get("redundant_computations")
2704 .unwrap()
2705 .as_array()
2706 .unwrap();
2707
2708 assert_eq!(redundant.len(), 1);
2709 assert_eq!(redundant[0]["expr"], "a + b");
2710 assert_eq!(redundant[0]["first_at"], 2);
2711 assert_eq!(redundant[0]["redundant_at"], 5);
2712 }
2713
2714 #[test]
2719 fn test_is_function_call_simple() {
2720 assert!(is_function_call("foo()"));
2722 assert!(is_function_call("bar(x)"));
2723 assert!(is_function_call("baz(1, 2, 3)"));
2724 }
2725
2726 #[test]
2727 fn test_is_function_call_method() {
2728 assert!(is_function_call("obj.method()"));
2730 assert!(is_function_call("x.foo(bar)"));
2731 assert!(is_function_call("self.process(data)"));
2732 }
2733
2734 #[test]
2735 fn test_is_function_call_chained() {
2736 assert!(is_function_call("a.b.c()"));
2738 assert!(is_function_call("foo().bar()"));
2739 }
2740
2741 #[test]
2742 fn test_is_function_call_false_for_binary_ops() {
2743 assert!(!is_function_call("a + b"));
2745 assert!(!is_function_call("x * y"));
2746 assert!(!is_function_call("foo - bar"));
2747 assert!(!is_function_call("1 + 2"));
2748 }
2749
2750 #[test]
2751 fn test_is_function_call_false_for_parens_in_expr() {
2752 assert!(!is_function_call("(a + b)"));
2754 assert!(!is_function_call("(x * y) + z"));
2755 }
2756
2757 #[test]
2758 fn test_parse_expression_simple_addition() {
2759 let result = parse_expression_from_line("x = a + b");
2760 assert!(result.is_some());
2761 let (left, op, right) = result.unwrap();
2762 assert_eq!(left, "a");
2763 assert_eq!(op, "+");
2764 assert_eq!(right, "b");
2765 }
2766
2767 #[test]
2768 fn test_parse_expression_multiplication() {
2769 let result = parse_expression_from_line("result = foo * bar");
2770 assert!(result.is_some());
2771 let (left, op, right) = result.unwrap();
2772 assert_eq!(left, "foo");
2773 assert_eq!(op, "*");
2774 assert_eq!(right, "bar");
2775 }
2776
2777 #[test]
2778 fn test_parse_expression_subtraction() {
2779 let result = parse_expression_from_line("y = x - z");
2780 assert!(result.is_some());
2781 let (left, op, right) = result.unwrap();
2782 assert_eq!(left, "x");
2783 assert_eq!(op, "-");
2784 assert_eq!(right, "z");
2785 }
2786
2787 #[test]
2788 fn test_parse_expression_comparison() {
2789 let result = parse_expression_from_line("flag = a == b");
2790 assert!(result.is_some());
2791 let (left, op, right) = result.unwrap();
2792 assert_eq!(left, "a");
2793 assert_eq!(op, "==");
2794 assert_eq!(right, "b");
2795 }
2796
2797 #[test]
2798 fn test_parse_expression_with_comment() {
2799 let result = parse_expression_from_line("x = a + b # compute sum");
2801 assert!(result.is_some());
2802 let (left, op, right) = result.unwrap();
2803 assert_eq!(left, "a");
2804 assert_eq!(op, "+");
2805 assert_eq!(right, "b");
2806 }
2807
2808 #[test]
2809 fn test_parse_expression_function_call_excluded() {
2810 assert!(parse_expression_from_line("x = foo()").is_none());
2812 assert!(parse_expression_from_line("y = bar.baz()").is_none());
2813 assert!(parse_expression_from_line("z = process(data)").is_none());
2814 }
2815
2816 #[test]
2817 fn test_parse_expression_constant_only_excluded() {
2818 assert!(parse_expression_from_line("x = 1 + 2").is_none());
2820 assert!(parse_expression_from_line("y = 10 * 20").is_none());
2821 }
2822
2823 #[test]
2824 fn test_parse_expression_with_spaces() {
2825 let result = parse_expression_from_line(" x = a + b ");
2827 assert!(result.is_some());
2828 let (left, op, right) = result.unwrap();
2829 assert_eq!(left, "a");
2830 assert_eq!(op, "+");
2831 assert_eq!(right, "b");
2832 }
2833
2834 #[test]
2835 fn test_parse_expression_rightmost_assignment() {
2836 let result = parse_expression_from_line("a = b = c + d");
2839 assert!(result.is_some());
2840 let (left, op, right) = result.unwrap();
2841 assert_eq!(left, "c");
2842 assert_eq!(op, "+");
2843 assert_eq!(right, "d");
2844 }
2845
2846 #[test]
2847 fn test_extract_base_variable_simple() {
2848 assert_eq!(extract_base_variable("x"), "x");
2849 assert_eq!(extract_base_variable("foo"), "foo");
2850 }
2851
2852 #[test]
2853 fn test_extract_base_variable_field_access() {
2854 assert_eq!(extract_base_variable("x.a"), "x.a");
2855 assert_eq!(extract_base_variable("obj.field"), "obj.field");
2856 }
2857
2858 #[test]
2859 fn test_extract_base_variable_deep_nesting_truncated() {
2860 assert_eq!(extract_base_variable("x.a.b.c.d.e"), "x.a.b");
2862 assert_eq!(extract_base_variable("a.b.c.d"), "a.b.c");
2863 }
2864
2865 #[test]
2866 fn test_is_numeric_literal() {
2867 assert!(is_numeric_literal("42"));
2869 assert!(is_numeric_literal("-10"));
2870 assert!(is_numeric_literal("0"));
2871
2872 assert!(is_numeric_literal("3.14"));
2874 assert!(is_numeric_literal("-2.5"));
2875
2876 assert!(is_numeric_literal("0x1f"));
2878 assert!(is_numeric_literal("0o17"));
2879 assert!(is_numeric_literal("0b1010"));
2880
2881 assert!(!is_numeric_literal("foo"));
2883 assert!(!is_numeric_literal("x"));
2884 assert!(!is_numeric_literal("a + b"));
2885 }
2886
2887 #[test]
2888 fn test_is_identifier() {
2889 assert!(is_identifier("x"));
2890 assert!(is_identifier("foo"));
2891 assert!(is_identifier("_private"));
2892 assert!(is_identifier("var123"));
2893 assert!(is_identifier("obj.field"));
2894
2895 assert!(!is_identifier(""));
2896 assert!(!is_identifier("123"));
2897 assert!(!is_identifier("a + b"));
2898 }
2899
2900 #[test]
2901 fn test_find_operator_in_expr() {
2902 assert_eq!(find_operator_in_expr("a + b", "+"), Some(2));
2903 assert_eq!(find_operator_in_expr("x * y", "*"), Some(2));
2904 assert_eq!(find_operator_in_expr("foo - bar", "-"), Some(4));
2905
2906 assert_eq!(find_operator_in_expr("(a + b) * c", "+"), None);
2908
2909 assert_eq!(find_operator_in_expr("(a + b) * c", "*"), Some(8));
2911 }
2912
2913 #[test]
2914 fn test_find_operator_not_in_string() {
2915 assert_eq!(find_operator_in_expr("\"a + b\"", "+"), None);
2917 assert_eq!(find_operator_in_expr("'x * y'", "*"), None);
2918 }
2919
2920 use crate::types::{BlockType, CfgBlock, CfgEdge, EdgeType};
2925
2926 fn make_test_cfg(
2928 blocks: Vec<(usize, BlockType, (u32, u32))>,
2929 edges: Vec<(usize, usize)>,
2930 entry: usize,
2931 ) -> CfgInfo {
2932 CfgInfo {
2933 function: "test".to_string(),
2934 blocks: blocks
2935 .into_iter()
2936 .map(|(id, block_type, lines)| CfgBlock {
2937 id,
2938 block_type,
2939 lines,
2940 calls: vec![],
2941 })
2942 .collect(),
2943 edges: edges
2944 .into_iter()
2945 .map(|(from, to)| CfgEdge {
2946 from,
2947 to,
2948 edge_type: EdgeType::Unconditional,
2949 condition: None,
2950 })
2951 .collect(),
2952 entry_block: entry,
2953 exit_blocks: vec![],
2954 cyclomatic_complexity: 1,
2955 nested_functions: HashMap::new(),
2956 }
2957 }
2958
2959 fn make_empty_dfg() -> DfgInfo {
2961 DfgInfo {
2962 function: "test".to_string(),
2963 refs: vec![],
2964 edges: vec![],
2965 variables: vec![],
2966 }
2967 }
2968
2969 fn make_dfg_with_refs(refs: Vec<VarRef>) -> DfgInfo {
2971 let variables: Vec<String> = refs.iter().map(|r| r.name.clone()).collect();
2972 DfgInfo {
2973 function: "test".to_string(),
2974 refs,
2975 edges: vec![],
2976 variables,
2977 }
2978 }
2979
2980 fn make_var_ref(name: &str, line: u32, ref_type: RefType) -> VarRef {
2982 VarRef {
2983 name: name.to_string(),
2984 ref_type,
2985 line,
2986 column: 0,
2987 context: None,
2988 group_id: None,
2989 }
2990 }
2991
2992 #[test]
2993 fn test_compute_empty_cfg_empty_dfg() {
2994 let cfg = CfgInfo {
2996 function: "empty".to_string(),
2997 blocks: vec![],
2998 edges: vec![],
2999 entry_block: 0,
3000 exit_blocks: vec![],
3001 cyclomatic_complexity: 1,
3002 nested_functions: HashMap::new(),
3003 };
3004 let dfg = make_empty_dfg();
3005
3006 let result = compute_available_exprs(&cfg, &dfg);
3007 assert!(result.is_err());
3008 }
3009
3010 #[test]
3011 fn test_compute_single_block_no_exprs() {
3012 let cfg = make_test_cfg(vec![(0, BlockType::Entry, (1, 1))], vec![], 0);
3014 let dfg = make_empty_dfg();
3015
3016 let result = compute_available_exprs(&cfg, &dfg);
3017 assert!(result.is_ok());
3018 let info = result.unwrap();
3019
3020 assert!(info.all_exprs.is_empty());
3022
3023 assert!(info.avail_in.contains_key(&0));
3025 assert!(info.avail_out.contains_key(&0));
3026
3027 assert!(info.avail_in.get(&0).unwrap().is_empty());
3029 }
3030
3031 #[test]
3032 fn test_compute_entry_block_nothing_available() {
3033 let cfg = make_test_cfg(
3035 vec![(0, BlockType::Entry, (1, 5)), (1, BlockType::Exit, (6, 10))],
3036 vec![(0, 1)],
3037 0,
3038 );
3039 let dfg = make_empty_dfg();
3040
3041 let result = compute_available_exprs(&cfg, &dfg).unwrap();
3042
3043 assert!(result.avail_in.get(&0).unwrap().is_empty());
3045 }
3046
3047 #[test]
3048 fn test_compute_linear_cfg_expression_propagates() {
3049 let cfg = make_test_cfg(
3052 vec![
3053 (0, BlockType::Entry, (1, 2)),
3054 (1, BlockType::Body, (3, 4)),
3055 (2, BlockType::Exit, (5, 6)),
3056 ],
3057 vec![(0, 1), (1, 2)],
3058 0,
3059 );
3060
3061 let dfg = make_dfg_with_refs(vec![
3063 make_var_ref("x", 2, RefType::Definition),
3064 make_var_ref("a", 2, RefType::Use),
3065 make_var_ref("b", 2, RefType::Use),
3066 ]);
3067
3068 let result = compute_available_exprs(&cfg, &dfg).unwrap();
3069
3070 if !result.all_exprs.is_empty() {
3073 let expr = result.all_exprs.iter().next().unwrap();
3074
3075 assert!(result.is_available_at_exit(0, expr));
3077
3078 assert!(result.is_available(1, expr));
3080
3081 assert!(result.is_available(2, expr));
3083 }
3084 }
3085
3086 #[test]
3087 fn test_compute_diamond_must_intersection() {
3088 let cfg = make_test_cfg(
3097 vec![
3098 (0, BlockType::Entry, (1, 1)),
3099 (1, BlockType::Body, (2, 2)),
3100 (2, BlockType::Body, (3, 3)),
3101 (3, BlockType::Exit, (4, 4)),
3102 ],
3103 vec![(0, 1), (0, 2), (1, 3), (2, 3)],
3104 0,
3105 );
3106
3107 let dfg = make_dfg_with_refs(vec![
3109 make_var_ref("x", 2, RefType::Definition),
3110 make_var_ref("a", 2, RefType::Use),
3111 make_var_ref("b", 2, RefType::Use),
3112 ]);
3113
3114 let result = compute_available_exprs(&cfg, &dfg).unwrap();
3115
3116 if !result.all_exprs.is_empty() {
3118 let expr = result.all_exprs.iter().next().unwrap();
3119
3120 assert!(result.is_available_at_exit(1, expr));
3122
3123 }
3131 }
3132
3133 #[test]
3134 fn test_compute_self_loop_terminates() {
3135 let cfg = make_test_cfg(
3137 vec![
3138 (0, BlockType::Entry, (1, 1)),
3139 (1, BlockType::LoopHeader, (2, 3)),
3140 ],
3141 vec![(0, 1), (1, 1)],
3142 0,
3143 );
3144 let dfg = make_empty_dfg();
3145
3146 let result = compute_available_exprs(&cfg, &dfg);
3148 assert!(result.is_ok());
3149 }
3150
3151 #[test]
3152 fn test_compute_unreachable_block() {
3153 let cfg = make_test_cfg(
3156 vec![
3157 (0, BlockType::Entry, (1, 1)),
3158 (1, BlockType::Exit, (2, 2)),
3159 (2, BlockType::Body, (3, 3)), ],
3161 vec![(0, 1)],
3162 0,
3163 );
3164 let dfg = make_empty_dfg();
3165
3166 let result = compute_available_exprs(&cfg, &dfg);
3167 assert!(result.is_ok());
3168
3169 let info = result.unwrap();
3170
3171 assert!(info.avail_in.get(&2).unwrap().is_empty());
3173 }
3174
3175 #[test]
3176 fn test_is_killed_function() {
3177 let kills: HashSet<String> = ["a".to_string(), "x".to_string()].into_iter().collect();
3178
3179 let expr1 = Expression::new("a + b", vec!["a", "b"], 1);
3180 let expr2 = Expression::new("c + d", vec!["c", "d"], 2);
3181 let expr3 = Expression::new("x + y", vec!["x", "y"], 3);
3182
3183 assert!(is_killed(&expr1, &kills));
3185
3186 assert!(!is_killed(&expr2, &kills));
3188
3189 assert!(is_killed(&expr3, &kills));
3191 }
3192
3193 #[test]
3194 fn test_compute_loop_expression_available_in_body() {
3195 let cfg = make_test_cfg(
3197 vec![
3198 (0, BlockType::Entry, (1, 1)),
3199 (1, BlockType::LoopHeader, (2, 2)),
3200 (2, BlockType::LoopBody, (3, 3)),
3201 (3, BlockType::Exit, (4, 4)),
3202 ],
3203 vec![(0, 1), (1, 2), (2, 1), (1, 3)],
3204 0,
3205 );
3206 let dfg = make_empty_dfg();
3207
3208 let result = compute_available_exprs(&cfg, &dfg);
3209 assert!(result.is_ok());
3210 }
3211
3212 #[test]
3217 fn test_extract_binary_exprs_from_ast_python() {
3218 let source = r#"
3219def example(a, b, c):
3220 x = a + b
3221 if a * c > 10:
3222 return a + b
3223 y = c - a
3224"#;
3225 let lang = crate::types::Language::Python;
3226 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 6);
3227 assert!(
3229 exprs.len() >= 3,
3230 "Expected at least 3 binary exprs from Python, got {}: {:?}",
3231 exprs.len(),
3232 exprs
3233 );
3234 let texts: Vec<&str> = exprs.iter().map(|e| e.0.as_str()).collect();
3236 assert!(
3237 texts
3238 .iter()
3239 .any(|t| t.contains("a") && t.contains("b") && t.contains("+")),
3240 "Should find a + b expression, got: {:?}",
3241 texts
3242 );
3243 }
3244
3245 #[test]
3246 fn test_extract_binary_exprs_from_ast_go() {
3247 let source = r#"
3248package main
3249
3250func example(a int, b int, c int) int {
3251 x := a + b
3252 if a * c > 10 {
3253 return a + b
3254 }
3255 y := c - a
3256 return y
3257}
3258"#;
3259 let lang = crate::types::Language::Go;
3260 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
3261 assert!(
3262 exprs.len() >= 3,
3263 "Expected at least 3 binary exprs from Go, got {}: {:?}",
3264 exprs.len(),
3265 exprs
3266 );
3267 }
3268
3269 #[test]
3270 fn test_extract_binary_exprs_from_ast_java() {
3271 let source = r#"
3272class Example {
3273 int example(int a, int b, int c) {
3274 int x = a + b;
3275 if (a * c > 10) {
3276 return a + b;
3277 }
3278 int y = c - a;
3279 return y;
3280 }
3281}
3282"#;
3283 let lang = crate::types::Language::Java;
3284 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
3285 assert!(
3286 exprs.len() >= 3,
3287 "Expected at least 3 binary exprs from Java, got {}: {:?}",
3288 exprs.len(),
3289 exprs
3290 );
3291 }
3292
3293 #[test]
3294 fn test_extract_binary_exprs_from_ast_ruby() {
3295 let source = r#"
3296def example(a, b, c)
3297 x = a + b
3298 if a * c > 10
3299 return a + b
3300 end
3301 y = c - a
3302 y
3303end
3304"#;
3305 let lang = crate::types::Language::Ruby;
3306 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3307 assert!(
3308 exprs.len() >= 3,
3309 "Expected at least 3 binary exprs from Ruby, got {}: {:?}",
3310 exprs.len(),
3311 exprs
3312 );
3313 }
3314
3315 #[test]
3316 fn test_extract_binary_exprs_from_ast_cpp() {
3317 let source = r#"
3318int example(int a, int b, int c) {
3319 int x = a + b;
3320 if (a * c > 10) {
3321 return a + b;
3322 }
3323 int y = c - a;
3324 return y;
3325}
3326"#;
3327 let lang = crate::types::Language::Cpp;
3328 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3329 assert!(
3330 exprs.len() >= 3,
3331 "Expected at least 3 binary exprs from C++, got {}: {:?}",
3332 exprs.len(),
3333 exprs
3334 );
3335 }
3336
3337 #[test]
3338 fn test_extract_binary_exprs_from_ast_php() {
3339 let source = r#"<?php
3340function example($a, $b, $c) {
3341 $x = $a + $b;
3342 if ($a * $c > 10) {
3343 return $a + $b;
3344 }
3345 $y = $c - $a;
3346 return $y;
3347}
3348"#;
3349 let lang = crate::types::Language::Php;
3350 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3351 assert!(
3352 exprs.len() >= 3,
3353 "Expected at least 3 binary exprs from PHP, got {}: {:?}",
3354 exprs.len(),
3355 exprs
3356 );
3357 }
3358
3359 #[test]
3360 fn test_extract_binary_exprs_from_ast_csharp() {
3361 let source = r#"
3362class Example {
3363 int DoWork(int a, int b, int c) {
3364 int x = a + b;
3365 if (a * c > 10) {
3366 return a + b;
3367 }
3368 int y = c - a;
3369 return y;
3370 }
3371}
3372"#;
3373 let lang = crate::types::Language::CSharp;
3374 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
3375 assert!(
3376 exprs.len() >= 3,
3377 "Expected at least 3 binary exprs from C#, got {}: {:?}",
3378 exprs.len(),
3379 exprs
3380 );
3381 }
3382
3383 #[test]
3384 fn test_extract_binary_exprs_from_ast_kotlin() {
3385 let source = r#"
3386fun example(a: Int, b: Int, c: Int): Int {
3387 val x = a + b
3388 if (a * c > 10) {
3389 return a + b
3390 }
3391 val y = c - a
3392 return y
3393}
3394"#;
3395 let lang = crate::types::Language::Kotlin;
3396 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3397 assert!(
3398 exprs.len() >= 3,
3399 "Expected at least 3 binary exprs from Kotlin, got {}: {:?}",
3400 exprs.len(),
3401 exprs
3402 );
3403 }
3404
3405 #[test]
3406 fn test_extract_binary_exprs_from_ast_elixir() {
3407 let source = r#"
3408defmodule Example do
3409 def example(a, b, c) do
3410 x = a + b
3411 if a * c > 10 do
3412 a + b
3413 end
3414 y = c - a
3415 y
3416 end
3417end
3418"#;
3419 let lang = crate::types::Language::Elixir;
3420 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 11);
3421 assert!(
3422 exprs.len() >= 3,
3423 "Expected at least 3 binary exprs from Elixir, got {}: {:?}",
3424 exprs.len(),
3425 exprs
3426 );
3427 }
3428
3429 #[test]
3430 fn test_extract_binary_exprs_from_ast_rust() {
3431 let source = r#"
3432fn example(a: i32, b: i32, c: i32) -> i32 {
3433 let x = a + b;
3434 if a * c > 10 {
3435 return a + b;
3436 }
3437 let y = c - a;
3438 y
3439}
3440"#;
3441 let lang = crate::types::Language::Rust;
3442 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 9);
3443 assert!(
3444 exprs.len() >= 3,
3445 "Expected at least 3 binary exprs from Rust, got {}: {:?}",
3446 exprs.len(),
3447 exprs
3448 );
3449 }
3450
3451 #[test]
3452 fn test_extract_binary_exprs_excludes_function_calls() {
3453 let source = r#"
3454def example(a, b):
3455 x = a + b
3456 y = len(a)
3457 z = foo(a, b)
3458"#;
3459 let lang = crate::types::Language::Python;
3460 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 5);
3461 let texts: Vec<&str> = exprs.iter().map(|e| e.0.as_str()).collect();
3463 assert!(
3464 !texts.iter().any(|t| t.contains("len") || t.contains("foo")),
3465 "Should not include function calls, got: {:?}",
3466 texts
3467 );
3468 }
3469
3470 #[test]
3471 fn test_extract_binary_exprs_normalizes_commutative() {
3472 let source = r#"
3473def example(a, b):
3474 x = b + a
3475 y = a + b
3476"#;
3477 let lang = crate::types::Language::Python;
3478 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 4);
3479 let texts: Vec<String> = exprs.iter().map(|e| e.0.clone()).collect();
3481 if texts.len() >= 2 {
3482 assert_eq!(
3483 texts[0], texts[1],
3484 "Commutative exprs should normalize to same text: {:?}",
3485 texts
3486 );
3487 }
3488 }
3489
3490 #[test]
3491 fn test_extract_binary_exprs_returns_line_numbers() {
3492 let source = r#"
3493def example(a, b):
3494 x = a + b
3495 y = a - b
3496"#;
3497 let lang = crate::types::Language::Python;
3498 let exprs = extract_binary_exprs_from_ast(source, lang, 1, 4);
3499 assert!(
3500 exprs.len() >= 2,
3501 "Expected at least 2 exprs, got {}",
3502 exprs.len()
3503 );
3504 for (text, _op, _left, _right, line) in &exprs {
3506 assert!(*line > 0, "Line should be > 0 for expr: {}", text);
3507 }
3508 }
3509
3510 #[test]
3511 fn test_parse_expression_no_assignment_return() {
3512 let result = parse_expression_from_line(" return a + b");
3514 assert!(
3515 result.is_some(),
3516 "Should parse expression from return statement"
3517 );
3518 let (left, op, right) = result.unwrap();
3519 assert_eq!(op, "+");
3520 assert!(left == "a" || right == "a");
3521 }
3522
3523 #[test]
3524 fn test_parse_expression_no_assignment_if() {
3525 let result = parse_expression_from_line(" if a + b > 10:");
3527 assert!(
3529 result.is_some(),
3530 "Should parse expression from if condition"
3531 );
3532 }
3533
3534 #[test]
3535 fn test_parse_expression_no_assignment_standalone() {
3536 let result = parse_expression_from_line(" a + b");
3538 assert!(
3539 result.is_some(),
3540 "Should parse standalone binary expression"
3541 );
3542 }
3543}