use std::collections::HashMap;
use crate::{
analysis::{cfg::InductionVar, LoopInfo, SsaFunction, SsaOp, SsaVarId},
utils::BitSet,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum BlockRole {
Entry,
Init,
Condition,
Body,
Latch,
Exit,
Dispatcher,
StateUpdate,
Unknown,
}
impl BlockRole {
#[must_use]
pub fn is_control_flow(&self) -> bool {
matches!(self, Self::Dispatcher | Self::StateUpdate)
}
#[must_use]
pub fn is_user_code(&self) -> bool {
matches!(self, Self::Init | Self::Body | Self::Latch | Self::Exit)
}
}
#[derive(Debug, Clone)]
pub struct BlockSemantics {
pub block: usize,
pub role: BlockRole,
pub secondary_roles: Vec<BlockRole>,
pub initialized_vars: Vec<SsaVarId>,
pub updated_vars: Vec<SsaVarId>,
pub has_side_effects: bool,
pub has_comparison: bool,
pub confidence: f64,
}
impl BlockSemantics {
#[must_use]
pub fn new(block: usize) -> Self {
Self {
block,
role: BlockRole::Unknown,
secondary_roles: Vec::new(),
initialized_vars: Vec::new(),
updated_vars: Vec::new(),
has_side_effects: false,
has_comparison: false,
confidence: 0.0,
}
}
#[must_use]
pub fn has_role(&self, role: BlockRole) -> bool {
self.role == role || self.secondary_roles.contains(&role)
}
}
#[derive(Debug, Clone)]
pub struct LoopSemantics {
pub init_blocks: Vec<usize>,
pub condition_blocks: Vec<usize>,
pub body_blocks: Vec<usize>,
pub latch_blocks: Vec<usize>,
pub exit_blocks: Vec<usize>,
pub induction_vars: Vec<InductionVar>,
pub execution_order: Vec<usize>,
pub confidence: f64,
}
impl Default for LoopSemantics {
fn default() -> Self {
Self::new()
}
}
impl LoopSemantics {
#[must_use]
pub fn new() -> Self {
Self {
init_blocks: Vec::new(),
condition_blocks: Vec::new(),
body_blocks: Vec::new(),
latch_blocks: Vec::new(),
exit_blocks: Vec::new(),
induction_vars: Vec::new(),
execution_order: Vec::new(),
confidence: 0.0,
}
}
#[must_use]
pub fn ordered_blocks(&self) -> Vec<usize> {
[
self.init_blocks.as_slice(),
self.condition_blocks.as_slice(),
self.body_blocks.as_slice(),
self.latch_blocks.as_slice(),
]
.concat()
}
#[must_use]
pub fn is_valid(&self) -> bool {
!self.condition_blocks.is_empty()
&& (!self.body_blocks.is_empty() || !self.latch_blocks.is_empty())
}
#[must_use]
pub fn is_branching(&self) -> bool {
!self.condition_blocks.is_empty()
&& !self.body_blocks.is_empty()
&& self.latch_blocks.is_empty()
&& self.induction_vars.is_empty()
}
}
pub struct SemanticAnalyzer<'a> {
ssa: &'a SsaFunction,
block_cache: HashMap<usize, BlockSemantics>,
dispatcher_blocks: BitSet,
}
impl<'a> SemanticAnalyzer<'a> {
#[must_use]
pub fn new(ssa: &'a SsaFunction) -> Self {
Self {
ssa,
block_cache: HashMap::new(),
dispatcher_blocks: BitSet::new(ssa.block_count().max(1)),
}
}
pub fn mark_dispatcher(&mut self, block: usize) {
if block < self.dispatcher_blocks.len() {
self.dispatcher_blocks.insert(block);
}
}
pub fn analyze_block(&mut self, block_idx: usize) -> &BlockSemantics {
if !self.block_cache.contains_key(&block_idx) {
let semantics = self.compute_block_semantics(block_idx);
self.block_cache.insert(block_idx, semantics);
}
&self.block_cache[&block_idx]
}
fn compute_block_semantics(&self, block_idx: usize) -> BlockSemantics {
let mut semantics = BlockSemantics::new(block_idx);
let Some(block) = self.ssa.block(block_idx) else {
return semantics;
};
if block_idx < self.dispatcher_blocks.len() && self.dispatcher_blocks.contains(block_idx) {
semantics.role = BlockRole::Dispatcher;
semantics.confidence = 1.0;
return semantics;
}
let mut const_assignments = 0;
let mut add_sub_ops = 0;
let mut call_count = 0;
let mut store_count = 0;
let mut comparison_count = 0;
let mut has_return = false;
let mut has_branch = false;
let mut has_switch = false;
for instr in block.instructions() {
match instr.op() {
SsaOp::Const { dest, .. } => {
const_assignments += 1;
semantics.initialized_vars.push(*dest);
}
SsaOp::Add { dest, .. } | SsaOp::Sub { dest, .. } => {
add_sub_ops += 1;
semantics.updated_vars.push(*dest);
}
SsaOp::Call { .. } | SsaOp::CallVirt { .. } | SsaOp::NewObj { .. } => {
call_count += 1;
semantics.has_side_effects = true;
}
SsaOp::StoreField { .. }
| SsaOp::StoreStaticField { .. }
| SsaOp::StoreElement { .. }
| SsaOp::StoreIndirect { .. } => {
store_count += 1;
semantics.has_side_effects = true;
}
SsaOp::Clt { .. } | SsaOp::Cgt { .. } | SsaOp::Ceq { .. } => {
comparison_count += 1;
semantics.has_comparison = true;
}
SsaOp::Branch { .. } => {
has_branch = true;
}
SsaOp::Switch { .. } => {
has_switch = true;
}
SsaOp::Return { .. } | SsaOp::Throw { .. } | SsaOp::Rethrow => {
has_return = true;
}
_ => {}
}
}
let total_instrs = block.instructions().len();
if block_idx == 0 {
semantics.secondary_roles.push(BlockRole::Entry);
}
if has_return {
semantics.role = BlockRole::Exit;
semantics.confidence = 0.95;
return semantics;
}
if has_switch {
if let Some(SsaOp::Switch { targets, .. }) = block.terminator_op() {
if targets.len() >= 4 {
semantics.role = BlockRole::Dispatcher;
semantics.confidence = 0.8;
return semantics;
}
}
}
if has_branch && (comparison_count > 0 || semantics.has_comparison) {
semantics.role = BlockRole::Condition;
semantics.confidence = 0.85;
return semantics;
}
if has_branch && total_instrs <= 2 {
semantics.role = BlockRole::Condition;
semantics.confidence = 0.7;
return semantics;
}
if const_assignments > 0 && call_count == 0 && store_count == 0 && add_sub_ops == 0 {
semantics.role = BlockRole::Init;
semantics.confidence = 0.75;
return semantics;
}
if add_sub_ops > 0 && call_count == 0 {
let is_likely_latch = self.check_increment_pattern(block_idx);
if is_likely_latch {
semantics.role = BlockRole::Latch;
semantics.confidence = 0.8;
return semantics;
}
}
if semantics.has_side_effects {
semantics.role = BlockRole::Body;
semantics.confidence = 0.7;
return semantics;
}
if self.check_state_update_pattern(block_idx) {
semantics.role = BlockRole::StateUpdate;
semantics.confidence = 0.75;
return semantics;
}
semantics.role = BlockRole::Unknown;
semantics.confidence = 0.3;
semantics
}
fn check_increment_pattern(&self, block_idx: usize) -> bool {
let Some(block) = self.ssa.block(block_idx) else {
return false;
};
for instr in block.instructions() {
if let SsaOp::Add { left, right, .. } = instr.op() {
if self.is_small_constant(*left) || self.is_small_constant(*right) {
return true;
}
}
}
false
}
fn check_state_update_pattern(&self, block_idx: usize) -> bool {
let Some(block) = self.ssa.block(block_idx) else {
return false;
};
for instr in block.instructions() {
if let SsaOp::Xor { left, right, .. } = instr.op() {
if self.is_large_constant(*left) || self.is_large_constant(*right) {
return true;
}
}
}
false
}
fn is_small_constant(&self, var: SsaVarId) -> bool {
if let Some(val) = self.get_constant_value(var) {
val.abs() < 1000
} else {
false
}
}
fn is_large_constant(&self, var: SsaVarId) -> bool {
if let Some(val) = self.get_constant_value(var) {
val.abs() > 10000
} else {
false
}
}
fn get_constant_value(&self, var: SsaVarId) -> Option<i64> {
self.ssa.try_constant_value(var).and_then(|v| v.as_i64())
}
pub fn analyze_loop(&mut self, loop_info: &LoopInfo) -> LoopSemantics {
let mut semantics = LoopSemantics::new();
semantics.induction_vars = loop_info.find_induction_vars(self.ssa);
let block_count = self.ssa.block_count().max(1);
let mut iv_update_blocks = BitSet::new(block_count);
for iv in &semantics.induction_vars {
let idx = iv.update_block.index();
if idx < block_count {
iv_update_blocks.insert(idx);
}
}
for block_idx in loop_info.body.iter() {
let block_sem = self.analyze_block(block_idx);
match block_sem.role {
BlockRole::Init => {
semantics.init_blocks.push(block_idx);
}
BlockRole::Condition => {
semantics.condition_blocks.push(block_idx);
}
BlockRole::Body => {
semantics.body_blocks.push(block_idx);
}
BlockRole::Latch => {
semantics.latch_blocks.push(block_idx);
}
BlockRole::Exit => {
semantics.exit_blocks.push(block_idx);
}
BlockRole::Dispatcher | BlockRole::StateUpdate => {
}
BlockRole::Entry | BlockRole::Unknown => {
if block_idx < block_count && iv_update_blocks.contains(block_idx) {
semantics.latch_blocks.push(block_idx);
} else if block_sem.has_side_effects {
semantics.body_blocks.push(block_idx);
}
}
}
}
if let Some(preheader) = loop_info.preheader {
let pre_sem = self.analyze_block(preheader.index());
if pre_sem.role == BlockRole::Init || !pre_sem.initialized_vars.is_empty() {
semantics.init_blocks.push(preheader.index());
}
}
if semantics.condition_blocks.is_empty() {
let header_sem = self.analyze_block(loop_info.header.index());
if header_sem.has_role(BlockRole::Condition) {
semantics.condition_blocks.push(loop_info.header.index());
}
}
for exit in &loop_info.exits {
if !semantics.exit_blocks.contains(&exit.exit_block.index()) {
semantics.exit_blocks.push(exit.exit_block.index());
}
}
semantics.execution_order = self.compute_loop_execution_order(loop_info, &semantics);
semantics.confidence = Self::compute_loop_confidence(&semantics);
semantics
}
fn compute_loop_execution_order(
&self,
loop_info: &LoopInfo,
semantics: &LoopSemantics,
) -> Vec<usize> {
let mut order = Vec::new();
let block_count = self.ssa.block_count().max(1);
let mut seen = BitSet::new(block_count);
for &init in &semantics.init_blocks {
if init < block_count && !seen.contains(init) {
seen.insert(init);
order.push(init);
}
}
for &cond in &semantics.condition_blocks {
if cond < block_count && !seen.contains(cond) {
seen.insert(cond);
order.push(cond);
}
}
let body_order = self.order_body_blocks(loop_info, &semantics.body_blocks);
for body in body_order {
if body < block_count && !seen.contains(body) {
seen.insert(body);
order.push(body);
}
}
for &latch in &semantics.latch_blocks {
if latch < block_count && !seen.contains(latch) {
seen.insert(latch);
order.push(latch);
}
}
order
}
fn order_body_blocks(&self, loop_info: &LoopInfo, body_blocks: &[usize]) -> Vec<usize> {
if body_blocks.is_empty() {
return Vec::new();
}
let block_count = self.ssa.block_count().max(1);
let mut body_set = BitSet::new(block_count);
for &b in body_blocks {
if b < block_count {
body_set.insert(b);
}
}
let mut ordered = Vec::new();
let mut visited = BitSet::new(block_count);
if let Some(cond) = loop_info.find_condition_in_body(self.ssa) {
if let Some(block) = self.ssa.block(cond.index()) {
if let Some(SsaOp::Branch { true_target, .. }) = block.terminator_op() {
if *true_target < block_count && body_set.contains(*true_target) {
self.dfs_order(*true_target, &body_set, &mut visited, &mut ordered);
}
}
}
}
for &block in body_blocks {
if block < block_count && !visited.contains(block) {
self.dfs_order(block, &body_set, &mut visited, &mut ordered);
}
}
ordered
}
fn dfs_order(
&self,
block: usize,
allowed: &BitSet,
visited: &mut BitSet,
order: &mut Vec<usize>,
) {
if block >= allowed.len() || !allowed.contains(block) || visited.contains(block) {
return;
}
visited.insert(block);
order.push(block);
if let Some(b) = self.ssa.block(block) {
if let Some(op) = b.terminator_op() {
for succ in op.successors() {
self.dfs_order(succ, allowed, visited, order);
}
}
}
}
fn compute_loop_confidence(semantics: &LoopSemantics) -> f64 {
let mut score = 0.0;
if !semantics.condition_blocks.is_empty() {
score += 0.3;
}
if !semantics.body_blocks.is_empty() {
score += 0.2;
}
if !semantics.latch_blocks.is_empty() {
score += 0.2;
}
if !semantics.induction_vars.is_empty() {
score += 0.2;
}
if !semantics.init_blocks.is_empty() {
score += 0.1;
}
score
}
pub fn analyze_dispatcher_cases(&mut self, case_blocks: &[usize]) -> HashMap<usize, BlockRole> {
let mut roles = HashMap::new();
for &block in case_blocks {
let sem = self.analyze_block(block);
roles.insert(block, sem.role);
}
roles
}
pub fn recover_loop_from_cases(&mut self, case_blocks: &[usize]) -> LoopSemantics {
let mut semantics = LoopSemantics::new();
let roles = self.analyze_dispatcher_cases(case_blocks);
for (&block, &role) in &roles {
match role {
BlockRole::Init => semantics.init_blocks.push(block),
BlockRole::Condition => semantics.condition_blocks.push(block),
BlockRole::Body => semantics.body_blocks.push(block),
BlockRole::Latch => semantics.latch_blocks.push(block),
BlockRole::Exit => semantics.exit_blocks.push(block),
_ => {}
}
}
semantics.execution_order = semantics.ordered_blocks();
semantics.confidence = Self::compute_loop_confidence(&semantics);
semantics
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::analysis::{SsaFunctionBuilder, SsaType};
#[test]
fn test_block_role_classification() {
assert!(BlockRole::Dispatcher.is_control_flow());
assert!(BlockRole::StateUpdate.is_control_flow());
assert!(!BlockRole::Body.is_control_flow());
assert!(BlockRole::Init.is_user_code());
assert!(BlockRole::Body.is_user_code());
assert!(!BlockRole::Dispatcher.is_user_code());
}
#[test]
fn test_exit_block_detection() {
let ssa = SsaFunctionBuilder::new(0, 0)
.build_with(|f| {
f.block(0, |b| {
b.ret();
});
})
.unwrap();
let mut analyzer = SemanticAnalyzer::new(&ssa);
let sem = analyzer.analyze_block(0);
assert_eq!(sem.role, BlockRole::Exit);
assert!(sem.confidence > 0.9);
}
#[test]
fn test_condition_block_detection() {
let ssa = SsaFunctionBuilder::new(0, 1)
.build_with(|f| {
let local = f.local(0, SsaType::I32);
f.block(0, |b| {
let zero = b.const_i32(0);
let cmp = b.clt(local, zero);
b.branch(cmp, 1, 2);
});
f.block(1, |b| b.ret());
f.block(2, |b| b.ret());
})
.unwrap();
let mut analyzer = SemanticAnalyzer::new(&ssa);
let sem = analyzer.analyze_block(0);
assert_eq!(sem.role, BlockRole::Condition);
assert!(sem.has_comparison);
}
#[test]
fn test_init_block_detection() {
let ssa = SsaFunctionBuilder::new(0, 0)
.build_with(|f| {
f.block(0, |b| {
let _ = b.const_i32(0);
let _ = b.const_i32(1);
b.jump(1);
});
f.block(1, |b| b.ret());
})
.unwrap();
let mut analyzer = SemanticAnalyzer::new(&ssa);
let sem = analyzer.analyze_block(0);
assert_eq!(sem.role, BlockRole::Init);
assert_eq!(sem.initialized_vars.len(), 2);
}
#[test]
fn test_loop_semantics_validity() {
let mut sem = LoopSemantics::new();
assert!(!sem.is_valid());
sem.condition_blocks.push(1);
assert!(!sem.is_valid());
sem.body_blocks.push(2);
assert!(sem.is_valid()); assert!(sem.is_branching());
sem.latch_blocks.push(3);
assert!(sem.is_valid()); assert!(!sem.is_branching()); }
}