use super::lattice::Lattice;
use crate::cfg::{Cfg, EdgeKind, NodeInfo};
use petgraph::graph::NodeIndex;
use petgraph::visit::EdgeRef;
use std::collections::{HashMap, HashSet, VecDeque};
pub const MAX_TRACKED_VARS: usize = 64;
pub const MAX_WORKLIST_ITERATIONS: usize = 100_000;
pub trait Transfer<S: Lattice> {
type Event: Clone;
fn apply(
&self,
node: NodeIndex,
info: &NodeInfo,
edge: Option<EdgeKind>,
state: S,
) -> (S, Vec<Self::Event>);
fn iteration_budget(&self) -> usize {
MAX_WORKLIST_ITERATIONS
}
fn on_budget_exceeded(&self) -> bool {
false
}
}
pub struct DataflowResult<S, E> {
pub states: HashMap<NodeIndex, S>,
pub events: Vec<E>,
#[allow(dead_code)]
pub converged: bool,
}
pub fn run_forward<S: Lattice, T: Transfer<S>>(
cfg: &Cfg,
entry: NodeIndex,
transfer: &T,
initial: S,
) -> DataflowResult<S, T::Event> {
let mut states: HashMap<NodeIndex, S> = HashMap::new();
let budget = transfer.iteration_budget();
states.insert(entry, initial);
let _phase1_span = tracing::debug_span!("state_engine_phase1").entered();
let mut worklist: VecDeque<NodeIndex> = VecDeque::new();
let mut in_worklist: HashSet<NodeIndex> = HashSet::new();
worklist.push_back(entry);
in_worklist.insert(entry);
let mut iterations: usize = 0;
let mut converged = true;
while let Some(node) = worklist.pop_front() {
in_worklist.remove(&node);
iterations += 1;
if iterations > budget {
let should_continue = transfer.on_budget_exceeded();
if !should_continue {
converged = false;
break;
}
converged = false;
}
let node_state = match states.get(&node) {
Some(s) => s.clone(),
None => continue,
};
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
if edges.is_empty() {
continue;
}
for &(edge_kind, target) in &edges {
if matches!(edge_kind, EdgeKind::Seq)
&& edges
.iter()
.any(|&(k, t)| t == target && matches!(k, EdgeKind::True | EdgeKind::False))
{
continue;
}
let info = &cfg[node];
let (out_state, _events) =
transfer.apply(node, info, Some(edge_kind), node_state.clone());
let target_state = states.get(&target);
let new_target = match target_state {
Some(existing) => existing.join(&out_state),
None => out_state,
};
let changed = target_state.is_none_or(|existing| *existing != new_target);
if changed {
states.insert(target, new_target);
if in_worklist.insert(target) {
worklist.push_back(target);
}
}
}
}
tracing::debug!(iterations, converged, "state_engine_phase1 complete");
drop(_phase1_span);
let _phase2_span = tracing::debug_span!("state_engine_phase2").entered();
let mut events: Vec<T::Event> = Vec::new();
let mut seen_edges: std::collections::HashSet<(NodeIndex, NodeIndex)> =
std::collections::HashSet::new();
for node in states.keys().copied().collect::<Vec<_>>() {
let node_state = match states.get(&node) {
Some(s) => s.clone(),
None => continue,
};
let edges: Vec<_> = cfg.edges(node).map(|e| (*e.weight(), e.target())).collect();
if edges.is_empty() {
let info = &cfg[node];
let (_out_state, new_events) = transfer.apply(node, info, None, node_state);
events.extend(new_events);
continue;
}
for &(edge_kind, target) in &edges {
if matches!(edge_kind, EdgeKind::Seq)
&& edges
.iter()
.any(|&(k, t)| t == target && matches!(k, EdgeKind::True | EdgeKind::False))
{
continue;
}
if !seen_edges.insert((node, target)) {
continue;
}
let info = &cfg[node];
let (_out_state, new_events) =
transfer.apply(node, info, Some(edge_kind), node_state.clone());
events.extend(new_events);
}
}
DataflowResult {
states,
events,
converged,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cfg::{CallMeta, EdgeKind, NodeInfo, StmtKind, TaintMeta};
use crate::cfg_analysis::rules;
use crate::state::domain::ResourceLifecycle;
use crate::state::symbol::SymbolInterner;
use crate::state::transfer::DefaultTransfer;
use crate::symbol::Lang;
use petgraph::Graph;
fn make_node(kind: StmtKind) -> NodeInfo {
NodeInfo {
kind,
..Default::default()
}
}
#[test]
fn linear_cfg_converges() {
use crate::state::domain::ProductState;
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let open_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
taint: TaintMeta {
defines: Some("f".into()),
..Default::default()
},
call: CallMeta {
callee: Some("fopen".into()),
..Default::default()
},
..Default::default()
});
let close_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
taint: TaintMeta {
uses: vec!["f".into()],
..Default::default()
},
call: CallMeta {
callee: Some("fclose".into()),
..Default::default()
},
..Default::default()
});
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, open_node, EdgeKind::Seq);
cfg.add_edge(open_node, close_node, EdgeKind::Seq);
cfg.add_edge(close_node, exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
resource_method_summaries: &[],
ptr_proxy_hints: None,
};
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
assert!(result.events.is_empty());
assert!(result.converged);
let sym_f = interner.get("f").unwrap();
let exit_state = result.states.get(&exit).unwrap();
assert_eq!(exit_state.resource.get(sym_f), ResourceLifecycle::CLOSED);
}
#[test]
fn diamond_cfg_joins_states() {
use crate::state::domain::ProductState;
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let open_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
taint: TaintMeta {
defines: Some("f".into()),
..Default::default()
},
call: CallMeta {
callee: Some("fopen".into()),
..Default::default()
},
..Default::default()
});
let if_node = cfg.add_node(make_node(StmtKind::If));
let close_node = cfg.add_node(NodeInfo {
kind: StmtKind::Call,
taint: TaintMeta {
uses: vec!["f".into()],
..Default::default()
},
call: CallMeta {
callee: Some("fclose".into()),
..Default::default()
},
..Default::default()
});
let no_close = cfg.add_node(make_node(StmtKind::Seq));
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, open_node, EdgeKind::Seq);
cfg.add_edge(open_node, if_node, EdgeKind::Seq);
cfg.add_edge(if_node, close_node, EdgeKind::True);
cfg.add_edge(if_node, no_close, EdgeKind::False);
cfg.add_edge(close_node, exit, EdgeKind::Seq);
cfg.add_edge(no_close, exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
resource_method_summaries: &[],
ptr_proxy_hints: None,
};
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
let sym_f = interner.get("f").unwrap();
let exit_state = result.states.get(&exit).unwrap();
assert_eq!(
exit_state.resource.get(sym_f),
ResourceLifecycle::OPEN | ResourceLifecycle::CLOSED
);
}
#[derive(Clone, Debug, PartialEq, Eq)]
struct UnitState;
impl Lattice for UnitState {
fn bot() -> Self {
UnitState
}
fn join(&self, _other: &Self) -> Self {
UnitState
}
fn leq(&self, _other: &Self) -> bool {
true
}
}
struct BailTransfer;
impl Transfer<UnitState> for BailTransfer {
type Event = ();
fn apply(
&self,
_node: NodeIndex,
_info: &NodeInfo,
_edge: Option<EdgeKind>,
state: UnitState,
) -> (UnitState, Vec<()>) {
(state, vec![])
}
fn iteration_budget(&self) -> usize {
2 }
fn on_budget_exceeded(&self) -> bool {
false }
}
struct ContinueTransfer;
impl Transfer<UnitState> for ContinueTransfer {
type Event = ();
fn apply(
&self,
_node: NodeIndex,
_info: &NodeInfo,
_edge: Option<EdgeKind>,
state: UnitState,
) -> (UnitState, Vec<()>) {
(state, vec![])
}
fn iteration_budget(&self) -> usize {
2
}
fn on_budget_exceeded(&self) -> bool {
true }
}
fn make_chain_cfg() -> (Cfg, NodeIndex) {
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let a = cfg.add_node(make_node(StmtKind::Seq));
let b = cfg.add_node(make_node(StmtKind::Seq));
let c = cfg.add_node(make_node(StmtKind::Seq));
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, a, EdgeKind::Seq);
cfg.add_edge(a, b, EdgeKind::Seq);
cfg.add_edge(b, c, EdgeKind::Seq);
cfg.add_edge(c, exit, EdgeKind::Seq);
(cfg, entry)
}
#[test]
fn budget_exceeded_bail_stops_immediately_and_marks_non_converged() {
let (cfg, entry) = make_chain_cfg();
let result = run_forward(&cfg, entry, &BailTransfer, UnitState);
assert!(!result.converged, "bail transfer must mark converged=false");
}
#[test]
fn budget_exceeded_continue_marks_non_converged() {
let (cfg, entry) = make_chain_cfg();
let result = run_forward(&cfg, entry, &ContinueTransfer, UnitState);
assert!(
!result.converged,
"continue-past-budget must still mark converged=false"
);
}
#[test]
fn within_budget_marks_converged() {
struct GenerousTransfer;
impl Transfer<UnitState> for GenerousTransfer {
type Event = ();
fn apply(
&self,
_node: NodeIndex,
_info: &NodeInfo,
_edge: Option<EdgeKind>,
state: UnitState,
) -> (UnitState, Vec<()>) {
(state, vec![])
}
fn iteration_budget(&self) -> usize {
100_000
}
}
let (cfg, entry) = make_chain_cfg();
let result = run_forward(&cfg, entry, &GenerousTransfer, UnitState);
assert!(result.converged, "within-budget analysis should converge");
}
#[test]
fn worklist_membership_dedup_with_nodeindex() {
use petgraph::graph::NodeIndex;
use std::collections::{HashSet, VecDeque};
let mut wl: VecDeque<NodeIndex> = VecDeque::new();
let mut in_wl: HashSet<NodeIndex> = HashSet::new();
let n0 = NodeIndex::new(0);
let n1 = NodeIndex::new(1);
let n2 = NodeIndex::new(2);
assert!(in_wl.insert(n0));
wl.push_back(n0);
assert!(in_wl.insert(n1));
wl.push_back(n1);
assert!(!in_wl.insert(n0));
assert_eq!(wl.len(), 2);
let popped = wl.pop_front().unwrap();
in_wl.remove(&popped);
assert_eq!(popped, n0);
assert!(!in_wl.contains(&n0));
assert!(in_wl.contains(&n1));
assert!(in_wl.insert(n0));
wl.push_back(n0);
assert!(in_wl.insert(n2));
wl.push_back(n2);
assert_eq!(wl.len(), 3);
assert_eq!(in_wl.len(), 3);
}
#[test]
fn unreachable_nodes_get_no_state() {
use crate::state::domain::ProductState;
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let reachable = cfg.add_node(make_node(StmtKind::Seq));
let exit = cfg.add_node(make_node(StmtKind::Exit));
let orphan = cfg.add_node(make_node(StmtKind::Seq));
let orphan_exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, reachable, EdgeKind::Seq);
cfg.add_edge(reachable, exit, EdgeKind::Seq);
cfg.add_edge(orphan, orphan_exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
resource_method_summaries: &[],
ptr_proxy_hints: None,
};
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
assert!(result.converged);
assert!(
result.states.contains_key(&entry),
"entry must have a state"
);
assert!(
result.states.contains_key(&reachable),
"reachable node must have a state"
);
assert!(
!result.states.contains_key(&orphan),
"orphan island must NOT receive any state"
);
assert!(
!result.states.contains_key(&orphan_exit),
"orphan exit must NOT receive any state"
);
}
#[test]
fn single_node_graph_terminates_immediately() {
use crate::state::domain::ProductState;
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
resource_method_summaries: &[],
ptr_proxy_hints: None,
};
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
assert!(result.converged);
assert!(
result.states.contains_key(&entry),
"single-node graph still seeds the entry state"
);
}
#[test]
fn self_loop_terminates() {
use crate::state::domain::ProductState;
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let a = cfg.add_node(make_node(StmtKind::Seq));
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, a, EdgeKind::Seq);
cfg.add_edge(a, a, EdgeKind::Back); cfg.add_edge(a, exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
resource_method_summaries: &[],
ptr_proxy_hints: None,
};
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
assert!(result.converged, "self-loop must converge");
assert!(result.states.contains_key(&exit));
}
#[test]
fn irreducible_cfg_terminates() {
use crate::state::domain::ProductState;
let mut cfg: Cfg = Graph::new();
let entry = cfg.add_node(make_node(StmtKind::Entry));
let a = cfg.add_node(make_node(StmtKind::Seq));
let b = cfg.add_node(make_node(StmtKind::Seq));
let loop_body = cfg.add_node(make_node(StmtKind::Loop));
let exit = cfg.add_node(make_node(StmtKind::Exit));
cfg.add_edge(entry, a, EdgeKind::Seq);
cfg.add_edge(entry, b, EdgeKind::Seq);
cfg.add_edge(a, loop_body, EdgeKind::Seq);
cfg.add_edge(b, loop_body, EdgeKind::Seq);
cfg.add_edge(loop_body, loop_body, EdgeKind::Back);
cfg.add_edge(loop_body, exit, EdgeKind::Seq);
let interner = SymbolInterner::from_cfg(&cfg);
let transfer = DefaultTransfer {
lang: Lang::C,
resource_pairs: rules::resource_pairs(Lang::C),
interner: &interner,
resource_method_summaries: &[],
ptr_proxy_hints: None,
};
let result = run_forward(&cfg, entry, &transfer, ProductState::initial());
assert!(
result.converged,
"irreducible CFG must still converge under run_forward"
);
for n in [entry, a, b, loop_body, exit] {
assert!(result.states.contains_key(&n), "node {n:?} must be visited");
}
}
}