use super::csr::CsrAdjacency;
use super::scc::SccData;
use crate::graph::unified::edge::EdgeKind;
use crate::graph::unified::node::NodeId;
use anyhow::Result;
use std::collections::{HashMap, HashSet, VecDeque};
#[derive(Debug, Clone)]
pub struct LabelBudgetConfig {
pub budget_per_kind: usize,
pub on_exceeded: BudgetExceededPolicy,
pub density_gate_threshold: usize,
pub skip_labels: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum BudgetExceededPolicy {
Fail,
Degrade,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub enum ReachabilityStrategy {
IntervalLabels,
DagBfs,
}
impl Default for LabelBudgetConfig {
fn default() -> Self {
Self {
budget_per_kind: 15_000_000,
on_exceeded: BudgetExceededPolicy::Degrade,
density_gate_threshold: 64,
skip_labels: false,
}
}
}
#[repr(C)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
pub struct Interval {
pub start: u32,
pub end: u32,
}
impl Interval {
#[must_use]
pub fn new(start: u32, end: u32) -> Self {
Self { start, end }
}
#[must_use]
pub fn contains(&self, value: u32) -> bool {
value >= self.start && value < self.end
}
#[must_use]
pub fn intersects(&self, other: &Interval) -> bool {
self.start < other.end && other.start < self.end
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct CondensationDag {
pub edge_kind: EdgeKind,
pub scc_count: u32,
pub edge_count: u32,
pub row_offsets: Vec<u32>,
pub col_indices: Vec<u32>,
pub topo_order: Vec<u32>,
pub label_out_offsets: Vec<u32>,
pub label_out_data: Vec<Interval>,
pub label_in_offsets: Vec<u32>,
pub label_in_data: Vec<Interval>,
pub strategy: ReachabilityStrategy,
}
impl CondensationDag {
#[allow(clippy::cast_possible_truncation)] pub fn build(scc: &SccData, adjacency: &CsrAdjacency) -> Result<Self> {
Self::build_with_budget(scc, adjacency, &LabelBudgetConfig::default())
}
#[allow(clippy::cast_possible_truncation)] pub fn build_with_budget(
scc: &SccData,
adjacency: &CsrAdjacency,
budget_config: &LabelBudgetConfig,
) -> Result<Self> {
let scc_count = scc.scc_count as usize;
let scc_edges = extract_cross_scc_edges(scc, adjacency);
let (row_offsets, col_indices) = build_csr_from_edges(&scc_edges, scc_count);
let edge_count = col_indices.len() as u32;
let topo_order = compute_topological_order(scc_count, &row_offsets, &col_indices)?;
let cond_edge_count = col_indices.len();
let density_gated = budget_config.density_gate_threshold > 0
&& scc_count > 0
&& match budget_config.density_gate_threshold.checked_mul(scc_count) {
Some(product) => cond_edge_count > product,
None => false, };
let mut partial_dag = Self {
edge_kind: scc.edge_kind.clone(),
scc_count: scc_count as u32,
edge_count,
row_offsets,
col_indices,
topo_order,
label_out_offsets: Vec::new(),
label_out_data: Vec::new(),
label_in_offsets: Vec::new(),
label_in_data: Vec::new(),
strategy: ReachabilityStrategy::DagBfs,
};
if budget_config.skip_labels {
log::info!(
"Labels skipped for {:?}: skip_labels requested, using BFS fallback",
scc.edge_kind,
);
} else if density_gated {
log::info!(
"Density gate triggered for {:?}: {} cond edges > {} * {} SCCs, skipping labels",
scc.edge_kind,
cond_edge_count,
budget_config.density_gate_threshold,
scc_count
);
} else {
let budget = if budget_config.budget_per_kind == 0 {
usize::MAX
} else {
budget_config.budget_per_kind
};
match super::reachability::compute_2hop_labels(&partial_dag, budget) {
Ok((label_out_offsets, label_out_data, label_in_offsets, label_in_data)) => {
partial_dag.label_out_offsets = label_out_offsets;
partial_dag.label_out_data = label_out_data;
partial_dag.label_in_offsets = label_in_offsets;
partial_dag.label_in_data = label_in_data;
partial_dag.strategy = ReachabilityStrategy::IntervalLabels;
}
Err(e) => match budget_config.on_exceeded {
BudgetExceededPolicy::Fail => return Err(e),
BudgetExceededPolicy::Degrade => {
log::warn!(
"Label budget exceeded for {:?}, degrading to BFS fallback: {e}",
scc.edge_kind
);
}
},
}
}
Ok(partial_dag)
}
#[must_use]
pub fn successors(&self, scc_id: u32) -> &[u32] {
let scc_idx = scc_id as usize;
if scc_idx >= self.row_offsets.len() - 1 {
return &[];
}
let start = self.row_offsets[scc_idx] as usize;
let end = self.row_offsets[scc_idx + 1] as usize;
&self.col_indices[start..end]
}
pub fn fixup_after_load(&mut self) {
if self.strategy == ReachabilityStrategy::IntervalLabels
&& self.label_out_data.is_empty()
&& self.label_in_data.is_empty()
{
self.strategy = ReachabilityStrategy::DagBfs;
}
}
#[must_use]
pub fn can_reach(&self, from_scc: u32, to_scc: u32) -> bool {
if from_scc == to_scc {
return true;
}
match self.strategy {
ReachabilityStrategy::IntervalLabels => self.can_reach_via_labels(from_scc, to_scc),
ReachabilityStrategy::DagBfs => self.can_reach_via_bfs(from_scc, to_scc),
}
}
fn can_reach_via_labels(&self, from_scc: u32, to_scc: u32) -> bool {
let from_idx = from_scc as usize;
if from_idx >= self.label_out_offsets.len().saturating_sub(1) {
return false;
}
let out_start = self.label_out_offsets[from_idx] as usize;
let out_end = self.label_out_offsets[from_idx + 1] as usize;
let label_out = &self.label_out_data[out_start..out_end];
let to_idx = to_scc as usize;
if to_idx >= self.label_in_offsets.len().saturating_sub(1) {
return false;
}
let in_start = self.label_in_offsets[to_idx] as usize;
let in_end = self.label_in_offsets[to_idx + 1] as usize;
let label_in = &self.label_in_data[in_start..in_end];
for out_interval in label_out {
for in_interval in label_in {
if out_interval.intersects(in_interval) {
return true;
}
}
}
false
}
fn can_reach_via_bfs(&self, from_scc: u32, to_scc: u32) -> bool {
let mut visited = HashSet::new();
let mut queue = VecDeque::new();
queue.push_back(from_scc);
visited.insert(from_scc);
while let Some(current) = queue.pop_front() {
for &neighbor in self.successors(current) {
if neighbor == to_scc {
return true;
}
if visited.insert(neighbor) {
queue.push_back(neighbor);
}
}
}
false
}
#[must_use]
pub fn find_scc_path(&self, from_scc: u32, to_scc: u32) -> Option<Vec<u32>> {
if from_scc == to_scc {
return Some(vec![from_scc]);
}
if self.strategy == ReachabilityStrategy::IntervalLabels
&& !self.can_reach(from_scc, to_scc)
{
return None;
}
let mut queue = VecDeque::new();
let mut visited = HashSet::new();
let mut parent: HashMap<u32, u32> = HashMap::new();
queue.push_back(from_scc);
visited.insert(from_scc);
while let Some(current) = queue.pop_front() {
if current == to_scc {
return Some(reconstruct_scc_path(&parent, from_scc, to_scc));
}
for &neighbor in self.successors(current) {
if visited.contains(&neighbor) || !self.should_explore_neighbor(neighbor, to_scc) {
continue;
}
visited.insert(neighbor);
parent.insert(neighbor, current);
queue.push_back(neighbor);
}
}
None
}
fn should_explore_neighbor(&self, neighbor: u32, to_scc: u32) -> bool {
match self.strategy {
ReachabilityStrategy::IntervalLabels => self.can_reach_via_labels(neighbor, to_scc),
ReachabilityStrategy::DagBfs => true,
}
}
}
fn reconstruct_scc_path(parent: &HashMap<u32, u32>, from_scc: u32, to_scc: u32) -> Vec<u32> {
let mut path = vec![to_scc];
let mut node = to_scc;
while node != from_scc {
node = parent[&node];
path.push(node);
}
path.reverse();
path
}
#[allow(clippy::cast_possible_truncation)]
fn extract_cross_scc_edges(scc: &SccData, adjacency: &CsrAdjacency) -> HashSet<(u32, u32)> {
let mut scc_edges: HashSet<(u32, u32)> = HashSet::new();
for node in 0..adjacency.node_count {
let src_scc = scc
.scc_of(NodeId::new(node, 0))
.expect("Node within adjacency should have valid SCC");
let neighbors = adjacency.neighbors_filtered(NodeId::new(node, 0), &scc.edge_kind);
for &target in &neighbors {
let tgt_scc = scc
.scc_of(NodeId::new(target, 0))
.expect("Target within adjacency should have valid SCC");
if src_scc != tgt_scc {
scc_edges.insert((src_scc, tgt_scc));
}
}
}
scc_edges
}
#[allow(clippy::cast_possible_truncation)]
fn build_csr_from_edges(scc_edges: &HashSet<(u32, u32)>, scc_count: usize) -> (Vec<u32>, Vec<u32>) {
let mut scc_adjacency: HashMap<u32, Vec<u32>> = HashMap::new();
for &(src, tgt) in scc_edges {
scc_adjacency.entry(src).or_default().push(tgt);
}
for successors in scc_adjacency.values_mut() {
successors.sort_unstable();
}
let mut row_offsets = Vec::with_capacity(scc_count + 1);
let mut col_indices = Vec::new();
row_offsets.push(0);
for scc_id in 0..scc_count as u32 {
if let Some(successors) = scc_adjacency.get(&scc_id) {
col_indices.extend_from_slice(successors);
}
row_offsets.push(col_indices.len() as u32);
}
(row_offsets, col_indices)
}
#[allow(clippy::cast_possible_truncation)]
fn compute_topological_order(
scc_count: usize,
row_offsets: &[u32],
col_indices: &[u32],
) -> Result<Vec<u32>> {
let mut in_degree = vec![0u32; scc_count];
for &target in col_indices {
in_degree[target as usize] += 1;
}
let mut queue: VecDeque<u32> = VecDeque::new();
for (scc_id, °) in in_degree.iter().enumerate() {
if deg == 0 {
queue.push_back(scc_id as u32);
}
}
let mut topo_order = Vec::with_capacity(scc_count);
while let Some(scc_id) = queue.pop_front() {
topo_order.push(scc_id);
let start = row_offsets[scc_id as usize] as usize;
let end = row_offsets[scc_id as usize + 1] as usize;
for &successor in &col_indices[start..end] {
in_degree[successor as usize] -= 1;
if in_degree[successor as usize] == 0 {
queue.push_back(successor);
}
}
}
if topo_order.len() != scc_count {
anyhow::bail!(
"Topological sort failed: expected {} SCCs, got {}. Graph has cycles!",
scc_count,
topo_order.len()
);
}
Ok(topo_order)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::unified::analysis::csr::CsrAdjacency;
use crate::graph::unified::analysis::scc::SccData;
use crate::graph::unified::compaction::{CompactionSnapshot, MergedEdge};
use crate::graph::unified::edge::EdgeKind;
use crate::graph::unified::file::FileId;
use crate::graph::unified::node::NodeId;
fn test_snapshot() -> CompactionSnapshot {
let file = FileId::new(0);
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
CompactionSnapshot {
csr_edges: vec![
MergedEdge::new(NodeId::new(0, 0), NodeId::new(1, 0), kind.clone(), 1, file),
MergedEdge::new(NodeId::new(1, 0), NodeId::new(2, 0), kind.clone(), 2, file),
MergedEdge::new(NodeId::new(2, 0), NodeId::new(3, 0), kind.clone(), 3, file),
],
delta_edges: Vec::new(),
node_count: 4,
csr_version: 0,
}
}
#[test]
fn test_label_budget_default() {
let config = LabelBudgetConfig::default();
assert_eq!(config.budget_per_kind, 15_000_000);
assert!(matches!(config.on_exceeded, BudgetExceededPolicy::Degrade));
}
#[test]
fn test_label_budget_fail_policy() {
let config = LabelBudgetConfig {
budget_per_kind: 1,
on_exceeded: BudgetExceededPolicy::Fail,
..LabelBudgetConfig::default()
};
let snapshot = test_snapshot();
let csr = CsrAdjacency::build_from_snapshot(&snapshot).unwrap();
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
let scc = SccData::compute_tarjan(&csr, &kind).unwrap();
let result = CondensationDag::build_with_budget(&scc, &csr, &config);
assert!(result.is_err(), "Should fail with budget=1 and Fail policy");
}
#[test]
fn test_label_budget_degrade_policy() {
let config = LabelBudgetConfig {
budget_per_kind: 1,
on_exceeded: BudgetExceededPolicy::Degrade,
..LabelBudgetConfig::default()
};
let snapshot = test_snapshot();
let csr = CsrAdjacency::build_from_snapshot(&snapshot).unwrap();
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
let scc = SccData::compute_tarjan(&csr, &kind).unwrap();
let result = CondensationDag::build_with_budget(&scc, &csr, &config);
assert!(
result.is_ok(),
"Should degrade gracefully with Degrade policy"
);
let dag = result.unwrap();
assert!(dag.label_out_data.is_empty());
assert!(dag.label_in_data.is_empty());
assert_eq!(dag.strategy, ReachabilityStrategy::DagBfs);
}
#[test]
fn test_dag_bfs_can_reach() {
let config = LabelBudgetConfig {
budget_per_kind: 1,
on_exceeded: BudgetExceededPolicy::Degrade,
..LabelBudgetConfig::default()
};
let snapshot = test_snapshot(); let csr = CsrAdjacency::build_from_snapshot(&snapshot).unwrap();
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
let scc = SccData::compute_tarjan(&csr, &kind).unwrap();
let dag = CondensationDag::build_with_budget(&scc, &csr, &config).unwrap();
assert_eq!(dag.strategy, ReachabilityStrategy::DagBfs);
assert!(dag.can_reach(0, 0));
let scc_0 = scc.scc_of(NodeId::new(0, 0)).unwrap();
let scc_3 = scc.scc_of(NodeId::new(3, 0)).unwrap();
assert!(dag.can_reach(scc_0, scc_3));
assert!(!dag.can_reach(scc_3, scc_0));
}
#[test]
fn test_dag_bfs_find_scc_path() {
let config = LabelBudgetConfig {
budget_per_kind: 1,
on_exceeded: BudgetExceededPolicy::Degrade,
..LabelBudgetConfig::default()
};
let snapshot = test_snapshot(); let csr = CsrAdjacency::build_from_snapshot(&snapshot).unwrap();
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
let scc = SccData::compute_tarjan(&csr, &kind).unwrap();
let dag = CondensationDag::build_with_budget(&scc, &csr, &config).unwrap();
assert_eq!(dag.strategy, ReachabilityStrategy::DagBfs);
let scc_0 = scc.scc_of(NodeId::new(0, 0)).unwrap();
let scc_3 = scc.scc_of(NodeId::new(3, 0)).unwrap();
let path = dag.find_scc_path(scc_0, scc_3);
assert!(path.is_some());
let path = path.unwrap();
assert_eq!(*path.first().unwrap(), scc_0);
assert_eq!(*path.last().unwrap(), scc_3);
assert!(dag.find_scc_path(scc_3, scc_0).is_none());
}
#[test]
fn test_interval_labels_and_bfs_produce_same_results() {
let snapshot = test_snapshot(); let csr = CsrAdjacency::build_from_snapshot(&snapshot).unwrap();
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
let scc = SccData::compute_tarjan(&csr, &kind).unwrap();
let dag_labels = CondensationDag::build(&scc, &csr).unwrap();
assert_eq!(dag_labels.strategy, ReachabilityStrategy::IntervalLabels);
let config = LabelBudgetConfig {
budget_per_kind: 1,
on_exceeded: BudgetExceededPolicy::Degrade,
..LabelBudgetConfig::default()
};
let dag_bfs = CondensationDag::build_with_budget(&scc, &csr, &config).unwrap();
assert_eq!(dag_bfs.strategy, ReachabilityStrategy::DagBfs);
for from in 0..dag_labels.scc_count {
for to in 0..dag_labels.scc_count {
assert_eq!(
dag_labels.can_reach(from, to),
dag_bfs.can_reach(from, to),
"Mismatch for can_reach({from}, {to})"
);
}
}
}
#[test]
fn test_fixup_after_load_corrects_empty_labels() {
let mut dag = CondensationDag {
edge_kind: EdgeKind::Calls {
argument_count: 0,
is_async: false,
},
scc_count: 2,
edge_count: 1,
row_offsets: vec![0, 1, 1],
col_indices: vec![1],
topo_order: vec![0, 1],
label_out_offsets: Vec::new(),
label_out_data: Vec::new(),
label_in_offsets: Vec::new(),
label_in_data: Vec::new(),
strategy: ReachabilityStrategy::IntervalLabels, };
assert_eq!(dag.strategy, ReachabilityStrategy::IntervalLabels);
dag.fixup_after_load();
assert_eq!(dag.strategy, ReachabilityStrategy::DagBfs);
assert!(dag.can_reach(0, 1));
assert!(!dag.can_reach(1, 0));
}
#[test]
fn test_fixup_after_load_preserves_valid_labels() {
let snapshot = test_snapshot();
let csr = CsrAdjacency::build_from_snapshot(&snapshot).unwrap();
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
let scc = SccData::compute_tarjan(&csr, &kind).unwrap();
let mut dag = CondensationDag::build(&scc, &csr).unwrap();
assert_eq!(dag.strategy, ReachabilityStrategy::IntervalLabels);
assert!(!dag.label_out_data.is_empty());
dag.fixup_after_load();
assert_eq!(dag.strategy, ReachabilityStrategy::IntervalLabels);
}
fn dense_snapshot() -> CompactionSnapshot {
let file = FileId::new(0);
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
CompactionSnapshot {
csr_edges: vec![
MergedEdge::new(NodeId::new(0, 0), NodeId::new(1, 0), kind.clone(), 1, file),
MergedEdge::new(NodeId::new(0, 0), NodeId::new(2, 0), kind.clone(), 2, file),
MergedEdge::new(NodeId::new(0, 0), NodeId::new(3, 0), kind.clone(), 3, file),
MergedEdge::new(NodeId::new(1, 0), NodeId::new(2, 0), kind.clone(), 4, file),
MergedEdge::new(NodeId::new(1, 0), NodeId::new(3, 0), kind.clone(), 5, file),
MergedEdge::new(NodeId::new(2, 0), NodeId::new(3, 0), kind.clone(), 6, file),
],
delta_edges: Vec::new(),
node_count: 4,
csr_version: 0,
}
}
#[test]
fn test_density_gate_skips_labels() {
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
let snapshot = dense_snapshot();
let csr = CsrAdjacency::build_from_snapshot(&snapshot).unwrap();
let scc = SccData::compute_tarjan(&csr, &kind).unwrap();
let config_gated = LabelBudgetConfig {
budget_per_kind: 15_000_000,
on_exceeded: BudgetExceededPolicy::Degrade,
density_gate_threshold: 1, skip_labels: false,
};
let dag = CondensationDag::build_with_budget(&scc, &csr, &config_gated).unwrap();
assert_eq!(dag.strategy, ReachabilityStrategy::DagBfs);
assert!(dag.label_out_data.is_empty());
assert!(dag.label_in_data.is_empty());
let scc_0 = scc.scc_of(NodeId::new(0, 0)).unwrap();
let scc_3 = scc.scc_of(NodeId::new(3, 0)).unwrap();
assert!(dag.can_reach(scc_0, scc_3));
assert!(!dag.can_reach(scc_3, scc_0));
let config_disabled = LabelBudgetConfig {
density_gate_threshold: 0,
..config_gated.clone()
};
let dag2 = CondensationDag::build_with_budget(&scc, &csr, &config_disabled).unwrap();
assert_eq!(dag2.strategy, ReachabilityStrategy::IntervalLabels);
assert!(!dag2.label_out_data.is_empty());
}
#[test]
fn test_budget_zero_means_unlimited() {
let config = LabelBudgetConfig {
budget_per_kind: 0, on_exceeded: BudgetExceededPolicy::Fail,
density_gate_threshold: 0,
skip_labels: false,
};
let snapshot = test_snapshot();
let csr = CsrAdjacency::build_from_snapshot(&snapshot).unwrap();
let kind = EdgeKind::Calls {
argument_count: 0,
is_async: false,
};
let scc = SccData::compute_tarjan(&csr, &kind).unwrap();
let dag = CondensationDag::build_with_budget(&scc, &csr, &config).unwrap();
assert_eq!(dag.strategy, ReachabilityStrategy::IntervalLabels);
assert!(!dag.label_out_data.is_empty());
}
}