use std::collections::{HashMap, HashSet};
use crate::analysis::{InductionVar, LoopInfo, SsaFunction, SsaOp, SsaVarId};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BlockRole {
Entry,
Init,
Condition,
Body,
Latch,
Exit,
Dispatcher,
StateUpdate,
Unknown,
}
impl BlockRole {
#[must_use]
pub fn is_control_flow(&self) -> bool {
matches!(self, Self::Dispatcher | Self::StateUpdate)
}
#[must_use]
pub fn is_user_code(&self) -> bool {
matches!(self, Self::Init | Self::Body | Self::Latch | Self::Exit)
}
}
#[derive(Debug, Clone)]
pub struct BlockSemantics {
pub block: usize,
pub role: BlockRole,
pub secondary_roles: Vec<BlockRole>,
pub initialized_vars: Vec<SsaVarId>,
pub updated_vars: Vec<SsaVarId>,
pub has_side_effects: bool,
pub has_comparison: bool,
pub confidence: f64,
}
impl BlockSemantics {
#[must_use]
pub fn new(block: usize) -> Self {
Self {
block,
role: BlockRole::Unknown,
secondary_roles: Vec::new(),
initialized_vars: Vec::new(),
updated_vars: Vec::new(),
has_side_effects: false,
has_comparison: false,
confidence: 0.0,
}
}
#[must_use]
pub fn has_role(&self, role: BlockRole) -> bool {
self.role == role || self.secondary_roles.contains(&role)
}
}
#[derive(Debug, Clone)]
pub struct LoopSemantics {
pub init_blocks: Vec<usize>,
pub condition_blocks: Vec<usize>,
pub body_blocks: Vec<usize>,
pub latch_blocks: Vec<usize>,
pub exit_blocks: Vec<usize>,
pub induction_vars: Vec<InductionVar>,
pub execution_order: Vec<usize>,
pub confidence: f64,
}
impl Default for LoopSemantics {
fn default() -> Self {
Self::new()
}
}
impl LoopSemantics {
#[must_use]
pub fn new() -> Self {
Self {
init_blocks: Vec::new(),
condition_blocks: Vec::new(),
body_blocks: Vec::new(),
latch_blocks: Vec::new(),
exit_blocks: Vec::new(),
induction_vars: Vec::new(),
execution_order: Vec::new(),
confidence: 0.0,
}
}
#[must_use]
pub fn ordered_blocks(&self) -> Vec<usize> {
[
self.init_blocks.as_slice(),
self.condition_blocks.as_slice(),
self.body_blocks.as_slice(),
self.latch_blocks.as_slice(),
]
.concat()
}
#[must_use]
pub fn is_valid(&self) -> bool {
!self.condition_blocks.is_empty()
&& (!self.body_blocks.is_empty() || !self.latch_blocks.is_empty())
}
#[must_use]
pub fn is_branching(&self) -> bool {
!self.condition_blocks.is_empty()
&& !self.body_blocks.is_empty()
&& self.latch_blocks.is_empty()
&& self.induction_vars.is_empty()
}
}
pub struct SemanticAnalyzer<'a> {
ssa: &'a SsaFunction,
block_cache: HashMap<usize, BlockSemantics>,
dispatcher_blocks: HashSet<usize>,
}
impl<'a> SemanticAnalyzer<'a> {
#[must_use]
pub fn new(ssa: &'a SsaFunction) -> Self {
Self {
ssa,
block_cache: HashMap::new(),
dispatcher_blocks: HashSet::new(),
}
}
pub fn mark_dispatcher(&mut self, block: usize) {
self.dispatcher_blocks.insert(block);
}
pub fn analyze_block(&mut self, block_idx: usize) -> &BlockSemantics {
if !self.block_cache.contains_key(&block_idx) {
let semantics = self.compute_block_semantics(block_idx);
self.block_cache.insert(block_idx, semantics);
}
&self.block_cache[&block_idx]
}
fn compute_block_semantics(&self, block_idx: usize) -> BlockSemantics {
let mut semantics = BlockSemantics::new(block_idx);
let Some(block) = self.ssa.block(block_idx) else {
return semantics;
};
if self.dispatcher_blocks.contains(&block_idx) {
semantics.role = BlockRole::Dispatcher;
semantics.confidence = 1.0;
return semantics;
}
let mut const_assignments = 0;
let mut add_sub_ops = 0;
let mut call_count = 0;
let mut store_count = 0;
let mut comparison_count = 0;
let mut has_return = false;
let mut has_branch = false;
let mut has_switch = false;
for instr in block.instructions() {
match instr.op() {
SsaOp::Const { dest, .. } => {
const_assignments += 1;
semantics.initialized_vars.push(*dest);
}
SsaOp::Add { dest, .. } | SsaOp::Sub { dest, .. } => {
add_sub_ops += 1;
semantics.updated_vars.push(*dest);
}
SsaOp::Call { .. } | SsaOp::CallVirt { .. } | SsaOp::NewObj { .. } => {
call_count += 1;
semantics.has_side_effects = true;
}
SsaOp::StoreField { .. }
| SsaOp::StoreStaticField { .. }
| SsaOp::StoreElement { .. }
| SsaOp::StoreIndirect { .. } => {
store_count += 1;
semantics.has_side_effects = true;
}
SsaOp::Clt { .. } | SsaOp::Cgt { .. } | SsaOp::Ceq { .. } => {
comparison_count += 1;
semantics.has_comparison = true;
}
SsaOp::Branch { .. } => {
has_branch = true;
}
SsaOp::Switch { .. } => {
has_switch = true;
}
SsaOp::Return { .. } | SsaOp::Throw { .. } | SsaOp::Rethrow => {
has_return = true;
}
_ => {}
}
}
let total_instrs = block.instructions().len();
if block_idx == 0 {
semantics.secondary_roles.push(BlockRole::Entry);
}
if has_return {
semantics.role = BlockRole::Exit;
semantics.confidence = 0.95;
return semantics;
}
if has_switch {
if let Some(SsaOp::Switch { targets, .. }) = block.terminator_op() {
if targets.len() >= 4 {
semantics.role = BlockRole::Dispatcher;
semantics.confidence = 0.8;
return semantics;
}
}
}
if has_branch && (comparison_count > 0 || semantics.has_comparison) {
semantics.role = BlockRole::Condition;
semantics.confidence = 0.85;
return semantics;
}
if has_branch && total_instrs <= 2 {
semantics.role = BlockRole::Condition;
semantics.confidence = 0.7;
return semantics;
}
if const_assignments > 0 && call_count == 0 && store_count == 0 && add_sub_ops == 0 {
semantics.role = BlockRole::Init;
semantics.confidence = 0.75;
return semantics;
}
if add_sub_ops > 0 && call_count == 0 {
let is_likely_latch = self.check_increment_pattern(block_idx);
if is_likely_latch {
semantics.role = BlockRole::Latch;
semantics.confidence = 0.8;
return semantics;
}
}
if semantics.has_side_effects {
semantics.role = BlockRole::Body;
semantics.confidence = 0.7;
return semantics;
}
if self.check_state_update_pattern(block_idx) {
semantics.role = BlockRole::StateUpdate;
semantics.confidence = 0.75;
return semantics;
}
semantics.role = BlockRole::Unknown;
semantics.confidence = 0.3;
semantics
}
fn check_increment_pattern(&self, block_idx: usize) -> bool {
let Some(block) = self.ssa.block(block_idx) else {
return false;
};
for instr in block.instructions() {
if let SsaOp::Add { left, right, .. } = instr.op() {
if self.is_small_constant(*left) || self.is_small_constant(*right) {
return true;
}
}
}
false
}
fn check_state_update_pattern(&self, block_idx: usize) -> bool {
let Some(block) = self.ssa.block(block_idx) else {
return false;
};
for instr in block.instructions() {
if let SsaOp::Xor { left, right, .. } = instr.op() {
if self.is_large_constant(*left) || self.is_large_constant(*right) {
return true;
}
}
}
false
}
fn is_small_constant(&self, var: SsaVarId) -> bool {
if let Some(val) = self.get_constant_value(var) {
val.abs() < 1000
} else {
false
}
}
fn is_large_constant(&self, var: SsaVarId) -> bool {
if let Some(val) = self.get_constant_value(var) {
val.abs() > 10000
} else {
false
}
}
fn get_constant_value(&self, var: SsaVarId) -> Option<i64> {
let variable = self.ssa.variable(var)?;
let def_site = variable.def_site();
if def_site.is_phi() {
return None;
}
let block = self.ssa.block(def_site.block)?;
let instr = block.instruction(def_site.instruction?)?;
match instr.op() {
SsaOp::Const { value, .. } => value.as_i64(),
_ => None,
}
}
pub fn analyze_loop(&mut self, loop_info: &LoopInfo) -> LoopSemantics {
let mut semantics = LoopSemantics::new();
semantics.induction_vars = loop_info.find_induction_vars(self.ssa);
let iv_update_blocks: HashSet<_> = semantics
.induction_vars
.iter()
.map(|iv| iv.update_block.index())
.collect();
for &block_id in &loop_info.body {
let block_idx = block_id.index();
let block_sem = self.analyze_block(block_idx);
match block_sem.role {
BlockRole::Init => {
semantics.init_blocks.push(block_idx);
}
BlockRole::Condition => {
semantics.condition_blocks.push(block_idx);
}
BlockRole::Body => {
semantics.body_blocks.push(block_idx);
}
BlockRole::Latch => {
semantics.latch_blocks.push(block_idx);
}
BlockRole::Exit => {
semantics.exit_blocks.push(block_idx);
}
BlockRole::Dispatcher | BlockRole::StateUpdate => {
}
BlockRole::Entry | BlockRole::Unknown => {
if iv_update_blocks.contains(&block_idx) {
semantics.latch_blocks.push(block_idx);
} else if block_sem.has_side_effects {
semantics.body_blocks.push(block_idx);
}
}
}
}
if let Some(preheader) = loop_info.preheader {
let pre_sem = self.analyze_block(preheader.index());
if pre_sem.role == BlockRole::Init || !pre_sem.initialized_vars.is_empty() {
semantics.init_blocks.push(preheader.index());
}
}
if semantics.condition_blocks.is_empty() {
let header_sem = self.analyze_block(loop_info.header.index());
if header_sem.has_role(BlockRole::Condition) {
semantics.condition_blocks.push(loop_info.header.index());
}
}
for exit in &loop_info.exits {
if !semantics.exit_blocks.contains(&exit.exit_block.index()) {
semantics.exit_blocks.push(exit.exit_block.index());
}
}
semantics.execution_order = self.compute_loop_execution_order(loop_info, &semantics);
semantics.confidence = Self::compute_loop_confidence(&semantics);
semantics
}
fn compute_loop_execution_order(
&self,
loop_info: &LoopInfo,
semantics: &LoopSemantics,
) -> Vec<usize> {
let mut order = Vec::new();
for &init in &semantics.init_blocks {
if !order.contains(&init) {
order.push(init);
}
}
for &cond in &semantics.condition_blocks {
if !order.contains(&cond) {
order.push(cond);
}
}
let body_order = self.order_body_blocks(loop_info, &semantics.body_blocks);
for body in body_order {
if !order.contains(&body) {
order.push(body);
}
}
for &latch in &semantics.latch_blocks {
if !order.contains(&latch) {
order.push(latch);
}
}
order
}
fn order_body_blocks(&self, loop_info: &LoopInfo, body_blocks: &[usize]) -> Vec<usize> {
if body_blocks.is_empty() {
return Vec::new();
}
let body_set: HashSet<_> = body_blocks.iter().copied().collect();
let mut ordered = Vec::new();
let mut visited = HashSet::new();
if let Some(cond) = loop_info.find_condition_in_body(self.ssa) {
if let Some(block) = self.ssa.block(cond.index()) {
if let Some(SsaOp::Branch { true_target, .. }) = block.terminator_op() {
if body_set.contains(true_target) {
self.dfs_order(*true_target, &body_set, &mut visited, &mut ordered);
}
}
}
}
for &block in body_blocks {
if !visited.contains(&block) {
self.dfs_order(block, &body_set, &mut visited, &mut ordered);
}
}
ordered
}
fn dfs_order(
&self,
block: usize,
allowed: &HashSet<usize>,
visited: &mut HashSet<usize>,
order: &mut Vec<usize>,
) {
if !allowed.contains(&block) || visited.contains(&block) {
return;
}
visited.insert(block);
order.push(block);
if let Some(b) = self.ssa.block(block) {
if let Some(op) = b.terminator_op() {
for succ in op.successors() {
self.dfs_order(succ, allowed, visited, order);
}
}
}
}
fn compute_loop_confidence(semantics: &LoopSemantics) -> f64 {
let mut score = 0.0;
if !semantics.condition_blocks.is_empty() {
score += 0.3;
}
if !semantics.body_blocks.is_empty() {
score += 0.2;
}
if !semantics.latch_blocks.is_empty() {
score += 0.2;
}
if !semantics.induction_vars.is_empty() {
score += 0.2;
}
if !semantics.init_blocks.is_empty() {
score += 0.1;
}
score
}
pub fn analyze_dispatcher_cases(&mut self, case_blocks: &[usize]) -> HashMap<usize, BlockRole> {
let mut roles = HashMap::new();
for &block in case_blocks {
let sem = self.analyze_block(block);
roles.insert(block, sem.role);
}
roles
}
pub fn recover_loop_from_cases(&mut self, case_blocks: &[usize]) -> LoopSemantics {
let mut semantics = LoopSemantics::new();
let roles = self.analyze_dispatcher_cases(case_blocks);
for (&block, &role) in &roles {
match role {
BlockRole::Init => semantics.init_blocks.push(block),
BlockRole::Condition => semantics.condition_blocks.push(block),
BlockRole::Body => semantics.body_blocks.push(block),
BlockRole::Latch => semantics.latch_blocks.push(block),
BlockRole::Exit => semantics.exit_blocks.push(block),
_ => {}
}
}
semantics.execution_order = semantics.ordered_blocks();
semantics.confidence = Self::compute_loop_confidence(&semantics);
semantics
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::SsaFunctionBuilder;
#[test]
fn test_block_role_classification() {
assert!(BlockRole::Dispatcher.is_control_flow());
assert!(BlockRole::StateUpdate.is_control_flow());
assert!(!BlockRole::Body.is_control_flow());
assert!(BlockRole::Init.is_user_code());
assert!(BlockRole::Body.is_user_code());
assert!(!BlockRole::Dispatcher.is_user_code());
}
#[test]
fn test_exit_block_detection() {
let ssa = SsaFunctionBuilder::new(0, 0).build_with(|f| {
f.block(0, |b| {
b.ret();
});
});
let mut analyzer = SemanticAnalyzer::new(&ssa);
let sem = analyzer.analyze_block(0);
assert_eq!(sem.role, BlockRole::Exit);
assert!(sem.confidence > 0.9);
}
#[test]
fn test_condition_block_detection() {
let ssa = SsaFunctionBuilder::new(0, 1).build_with(|f| {
let local = f.local(0);
f.block(0, |b| {
let zero = b.const_i32(0);
let cmp = b.clt(local, zero);
b.branch(cmp, 1, 2);
});
f.block(1, |b| b.ret());
f.block(2, |b| b.ret());
});
let mut analyzer = SemanticAnalyzer::new(&ssa);
let sem = analyzer.analyze_block(0);
assert_eq!(sem.role, BlockRole::Condition);
assert!(sem.has_comparison);
}
#[test]
fn test_init_block_detection() {
let ssa = SsaFunctionBuilder::new(0, 0).build_with(|f| {
f.block(0, |b| {
let _ = b.const_i32(0);
let _ = b.const_i32(1);
b.jump(1);
});
f.block(1, |b| b.ret());
});
let mut analyzer = SemanticAnalyzer::new(&ssa);
let sem = analyzer.analyze_block(0);
assert_eq!(sem.role, BlockRole::Init);
assert_eq!(sem.initialized_vars.len(), 2);
}
#[test]
fn test_loop_semantics_validity() {
let mut sem = LoopSemantics::new();
assert!(!sem.is_valid());
sem.condition_blocks.push(1);
assert!(!sem.is_valid());
sem.body_blocks.push(2);
assert!(sem.is_valid()); assert!(sem.is_branching());
sem.latch_blocks.push(3);
assert!(sem.is_valid()); assert!(!sem.is_branching()); }
}