use std::collections::{HashMap, HashSet};
use crate::cfg::types::{BlockId, CFGInfo, EdgeType};
use crate::dfg::types::DFGInfo;
use crate::pdg::types::{BranchType, ControlDependence, PDGInfo};
pub struct PDGBuilder {
cfg: CFGInfo,
dfg: DFGInfo,
control_deps: Vec<ControlDependence>,
block_line_ranges: HashMap<BlockId, (usize, usize)>,
}
impl PDGBuilder {
pub fn new(cfg: CFGInfo, dfg: DFGInfo) -> Self {
let block_line_ranges: HashMap<BlockId, (usize, usize)> = cfg
.blocks
.iter()
.map(|(id, block)| (*id, (block.start_line, block.end_line)))
.collect();
Self {
cfg,
dfg,
control_deps: Vec::new(),
block_line_ranges,
}
}
pub fn build(mut self) -> PDGInfo {
self.compute_control_dependencies();
PDGInfo::new(self.cfg, self.dfg, self.control_deps)
}
fn compute_control_dependencies(&mut self) {
let mut transitive_deps: Vec<(BlockId, BlockId, usize, BranchType)> = Vec::new();
for edge in &self.cfg.edges {
let branch_type = match edge.edge_type {
EdgeType::True => BranchType::True,
EdgeType::False | EdgeType::Else => BranchType::False,
EdgeType::BackEdge | EdgeType::Loop | EdgeType::Iterate => BranchType::Loop,
EdgeType::Exception | EdgeType::ExceptionGroup => BranchType::Exception,
EdgeType::Enter | EdgeType::Case | EdgeType::PatternMatch | EdgeType::Match => {
BranchType::True }
EdgeType::OkSome | EdgeType::ErrNone => BranchType::True, EdgeType::Pass | EdgeType::Fail => BranchType::True, _ => continue, };
let condition_line = self.get_condition_line(edge.from);
let (target_start, target_end) = match self.block_line_ranges.get(&edge.to) {
Some(&range) => range,
None => continue,
};
for line in target_start..=target_end {
if line != condition_line {
self.control_deps.push(ControlDependence {
condition_line,
dependent_line: line,
branch_type,
});
}
}
if matches!(branch_type, BranchType::True | BranchType::False) {
transitive_deps.push((edge.from, edge.to, condition_line, branch_type));
}
}
for (source_block, start_block, condition_line, branch_type) in transitive_deps {
self.add_transitive_control_deps(source_block, start_block, condition_line, branch_type);
}
self.deduplicate_control_deps();
}
fn add_transitive_control_deps(
&mut self,
source_block: BlockId,
start_block: BlockId,
condition_line: usize,
branch_type: BranchType,
) {
let mut visited: HashSet<BlockId> = HashSet::new();
let mut to_visit: Vec<BlockId> = vec![start_block];
let mut depth = 0;
const MAX_DEPTH: usize = 50;
while let Some(block_id) = to_visit.pop() {
if visited.contains(&block_id) || depth > MAX_DEPTH {
continue;
}
if block_id == source_block {
continue;
}
visited.insert(block_id);
depth += 1;
for &succ in self.cfg.successors(block_id) {
if let Some(edge) = self.cfg.edges.iter().find(|e| e.from == block_id && e.to == succ) {
if edge.edge_type == EdgeType::BackEdge {
continue;
}
}
to_visit.push(succ);
}
}
for block_id in visited {
if block_id == start_block {
continue;
}
let (start_line, end_line) = match self.block_line_ranges.get(&block_id) {
Some(&range) => range,
None => continue,
};
for line in start_line..=end_line {
if line != condition_line {
self.control_deps.push(ControlDependence {
condition_line,
dependent_line: line,
branch_type,
});
}
}
}
}
fn get_condition_line(&self, block_id: BlockId) -> usize {
self.cfg
.blocks
.get(&block_id)
.map(|b| b.start_line)
.unwrap_or(1)
}
fn deduplicate_control_deps(&mut self) {
let mut seen: HashSet<(usize, usize)> = HashSet::new();
self.control_deps.retain(|dep| {
let key = (dep.condition_line, dep.dependent_line);
seen.insert(key)
});
}
}
pub fn build_pdg(file: &str, function: &str) -> crate::error::Result<PDGInfo> {
build_pdg_with_language(file, function, None)
}
pub fn build_pdg_with_language(
file: &str,
function: &str,
language: Option<&str>,
) -> crate::error::Result<PDGInfo> {
let cfg = crate::cfg::extract_with_language(file, function, language)?;
let dfg = crate::dfg::extract_with_language(file, function, language)?;
Ok(PDGBuilder::new(cfg, dfg).build())
}
pub fn build_pdg_from_graphs(cfg: CFGInfo, dfg: DFGInfo) -> PDGInfo {
PDGBuilder::new(cfg, dfg).build()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cfg::types::{BlockType, CFGBlock, CFGEdge};
use crate::dfg::types::{DataflowEdge, DataflowKind};
fn create_test_cfg() -> CFGInfo {
let mut blocks = HashMap::new();
blocks.insert(
BlockId(0),
CFGBlock {
id: BlockId(0),
label: "entry".to_string(),
block_type: BlockType::Entry,
statements: vec!["def test(x):".to_string()],
func_calls: vec![],
start_line: 1,
end_line: 1,
},
);
blocks.insert(
BlockId(1),
CFGBlock {
id: BlockId(1),
label: "if x > 0".to_string(),
block_type: BlockType::Branch,
statements: vec!["if x > 0:".to_string()],
func_calls: vec![],
start_line: 2,
end_line: 2,
},
);
blocks.insert(
BlockId(2),
CFGBlock {
id: BlockId(2),
label: "true branch".to_string(),
block_type: BlockType::Body,
statements: vec!["result = x".to_string()],
func_calls: vec![],
start_line: 3,
end_line: 3,
},
);
blocks.insert(
BlockId(3),
CFGBlock {
id: BlockId(3),
label: "false branch".to_string(),
block_type: BlockType::Body,
statements: vec!["result = 0".to_string()],
func_calls: vec![],
start_line: 5,
end_line: 5,
},
);
blocks.insert(
BlockId(4),
CFGBlock {
id: BlockId(4),
label: "exit".to_string(),
block_type: BlockType::Return,
statements: vec!["return result".to_string()],
func_calls: vec![],
start_line: 6,
end_line: 6,
},
);
let edges = vec![
CFGEdge::unconditional(BlockId(0), BlockId(1)),
CFGEdge::new(BlockId(1), BlockId(2), EdgeType::True),
CFGEdge::new(BlockId(1), BlockId(3), EdgeType::False),
CFGEdge::unconditional(BlockId(2), BlockId(4)),
CFGEdge::unconditional(BlockId(3), BlockId(4)),
];
CFGInfo {
function_name: "test".to_string(),
blocks,
edges,
entry: BlockId(0),
exits: vec![BlockId(4)],
decision_points: 1,
comprehension_decision_points: 0,
nested_cfgs: HashMap::new(),
is_async: false,
await_points: 0,
blocking_calls: Vec::new(),
..Default::default()
}
}
fn create_test_dfg() -> DFGInfo {
DFGInfo::new(
"test".to_string(),
vec![
DataflowEdge {
variable: "x".to_string(),
from_line: 1,
to_line: 3,
kind: DataflowKind::Use,
},
DataflowEdge {
variable: "result".to_string(),
from_line: 3,
to_line: 6,
kind: DataflowKind::Definition,
},
DataflowEdge {
variable: "result".to_string(),
from_line: 5,
to_line: 6,
kind: DataflowKind::Definition,
},
],
[
("x".to_string(), vec![1]),
("result".to_string(), vec![3, 5]),
]
.into_iter()
.collect(),
[("x".to_string(), vec![3]), ("result".to_string(), vec![6])]
.into_iter()
.collect(),
)
}
#[test]
fn test_pdg_builder_creates_control_deps() {
let cfg = create_test_cfg();
let dfg = create_test_dfg();
let pdg = PDGBuilder::new(cfg, dfg).build();
assert!(
pdg.is_control_dependent(3, 2),
"Line 3 should be control-dependent on line 2 (true branch)"
);
assert!(
pdg.is_control_dependent(5, 2),
"Line 5 should be control-dependent on line 2 (false branch)"
);
}
#[test]
fn test_pdg_has_both_control_and_data_edges() {
let cfg = create_test_cfg();
let dfg = create_test_dfg();
let pdg = PDGBuilder::new(cfg, dfg).build();
assert!(pdg.control_dep_count() > 0, "PDG should have control dependencies");
assert!(pdg.data_edge_count() > 0, "PDG should have data flow edges");
}
#[test]
fn test_control_deps_for_line() {
let cfg = create_test_cfg();
let dfg = create_test_dfg();
let pdg = PDGBuilder::new(cfg, dfg).build();
let deps = pdg.control_deps_for(3);
assert!(
deps.contains(&2),
"Line 3's control deps should include line 2"
);
}
#[test]
fn test_lines_controlled_by() {
let cfg = create_test_cfg();
let dfg = create_test_dfg();
let pdg = PDGBuilder::new(cfg, dfg).build();
let controlled = pdg.lines_controlled_by(2);
assert!(controlled.contains(&3), "Line 2 should control line 3");
assert!(controlled.contains(&5), "Line 2 should control line 5");
}
}