haz-dag 0.1.0

DAG construction and traversal for haz tasks.
Documentation
//! [`detect_cycles`]: find every strongly-connected component
//! of size two or more in the union graph of a [`TaskGraph`].
//!
//! `DAG-014` rejects simple cycles of length two or more in the
//! union of hard (`DAG-008`), soft (`DAG-009`), and producer-
//! matching (`DAG-013`) edges. Self-loops (length-one cycles)
//! are explicitly permitted: a code formatter that consumes
//! and produces the same files contributes a producer-matching
//! self-edge that does not constitute a configuration error.
//!
//! Tarjan's algorithm gives every strongly-connected component
//! of the directed union graph. An SCC of size two or more
//! contains at least one length-two-or-more cycle, so reporting
//! the SCC is equivalent to reporting the cycle for the purpose
//! of `DAG-014`. The SCC is the right diagnostic unit: the
//! user has to break the entanglement, and a single shortest
//! path through it would be less informative than the full set
//! of mutually-dependent tasks.

use std::collections::{BTreeMap, BTreeSet};

use haz_domain::task_id::TaskId;

use crate::graph::TaskGraph;

/// Return every cycle in `graph` per `DAG-014`.
///
/// Each cycle is reported as the set of `TaskId`s of the
/// strongly-connected component that contains it. Size-one
/// components (length-one cycles per `DAG-013`) are omitted.
///
/// The outer `Vec` is ordered by each component's
/// lexicographically-first `TaskId`, so the output is stable
/// across runs.
#[must_use]
pub fn detect_cycles(graph: &TaskGraph) -> Vec<BTreeSet<TaskId>> {
    let nodes: Vec<&TaskId> = graph.nodes.iter().collect();
    if nodes.is_empty() {
        return Vec::new();
    }

    let id_to_idx: BTreeMap<&TaskId, usize> =
        nodes.iter().enumerate().map(|(i, n)| (*n, i)).collect();

    let mut adj: Vec<Vec<usize>> = vec![Vec::new(); nodes.len()];
    for edge in &graph.edges {
        // Edges in graph are guaranteed (by construction) to
        // reference nodes in graph.nodes; tolerate stray edges
        // defensively rather than panicking.
        if let (Some(&from), Some(&to)) = (id_to_idx.get(&edge.from), id_to_idx.get(&edge.to)) {
            adj[from].push(to);
        }
    }

    let mut state = TarjanState::new(nodes.len(), adj);
    for v in 0..nodes.len() {
        if state.indices[v].is_none() {
            state.strongconnect(v);
        }
    }

    let mut sccs: Vec<BTreeSet<TaskId>> = state
        .sccs
        .into_iter()
        .filter(|s| s.len() >= 2)
        .map(|scc| scc.into_iter().map(|i| nodes[i].clone()).collect())
        .collect();
    // Sort by first member for stable output.
    sccs.sort_by(|a, b| {
        let a_first = a.iter().next();
        let b_first = b.iter().next();
        a_first.cmp(&b_first)
    });
    sccs
}

struct TarjanState {
    adj: Vec<Vec<usize>>,
    index_counter: usize,
    stack: Vec<usize>,
    on_stack: Vec<bool>,
    indices: Vec<Option<usize>>,
    lowlinks: Vec<usize>,
    sccs: Vec<Vec<usize>>,
}

impl TarjanState {
    fn new(n: usize, adj: Vec<Vec<usize>>) -> Self {
        Self {
            adj,
            index_counter: 0,
            stack: Vec::new(),
            on_stack: vec![false; n],
            indices: vec![None; n],
            lowlinks: vec![0; n],
            sccs: Vec::new(),
        }
    }

    fn strongconnect(&mut self, v: usize) {
        self.indices[v] = Some(self.index_counter);
        self.lowlinks[v] = self.index_counter;
        self.index_counter += 1;
        self.stack.push(v);
        self.on_stack[v] = true;

        // Iterate a snapshot of v's successors to keep the
        // borrow checker satisfied while we mutate self below.
        let successors = self.adj[v].clone();
        for w in successors {
            if self.indices[w].is_none() {
                self.strongconnect(w);
                self.lowlinks[v] = self.lowlinks[v].min(self.lowlinks[w]);
            } else if self.on_stack[w] {
                let iw = self.indices[w].expect("set above");
                self.lowlinks[v] = self.lowlinks[v].min(iw);
            }
        }

        if Some(self.lowlinks[v]) == self.indices[v] {
            let mut scc = Vec::new();
            loop {
                let w = self.stack.pop().expect("non-empty during SCC pop");
                self.on_stack[w] = false;
                scc.push(w);
                if w == v {
                    break;
                }
            }
            self.sccs.push(scc);
        }
    }
}

#[cfg(test)]
mod tests {
    use std::collections::BTreeSet;
    use std::str::FromStr;

    use haz_domain::name::{ProjectName, TaskName};
    use haz_domain::task_id::TaskId;

    use crate::cycles::detect_cycles;
    use crate::edge::{Edge, EdgeKind};
    use crate::graph::TaskGraph;

    fn id(project: &str, task: &str) -> TaskId {
        TaskId {
            project: ProjectName::from_str(project).unwrap(),
            task: TaskName::from_str(task).unwrap(),
        }
    }

    fn edge(from: TaskId, to: TaskId, kind: EdgeKind) -> Edge {
        Edge { from, to, kind }
    }

    fn graph(nodes: Vec<TaskId>, edges: Vec<Edge>) -> TaskGraph {
        TaskGraph {
            nodes: nodes.into_iter().collect(),
            edges: edges.into_iter().collect(),
        }
    }

    #[test]
    fn empty_graph_has_no_cycles() {
        let g = graph(vec![], vec![]);
        assert!(detect_cycles(&g).is_empty());
    }

    #[test]
    fn single_isolated_node_no_cycle() {
        let a = id("p", "a");
        let g = graph(vec![a], vec![]);
        assert!(detect_cycles(&g).is_empty());
    }

    #[test]
    fn acyclic_chain_no_cycle() {
        let a = id("p", "a");
        let b = id("p", "b");
        let c = id("p", "c");
        let g = graph(
            vec![a.clone(), b.clone(), c.clone()],
            vec![
                edge(a, b.clone(), EdgeKind::Hard),
                edge(b, c, EdgeKind::Hard),
            ],
        );
        assert!(detect_cycles(&g).is_empty());
    }

    #[test]
    fn length_two_cycle_detected() {
        let a = id("p", "a");
        let b = id("p", "b");
        let g = graph(
            vec![a.clone(), b.clone()],
            vec![
                edge(a.clone(), b.clone(), EdgeKind::Hard),
                edge(b.clone(), a.clone(), EdgeKind::Hard),
            ],
        );
        let cycles = detect_cycles(&g);
        assert_eq!(cycles, vec![BTreeSet::from([a, b])]);
    }

    #[test]
    fn length_three_cycle_detected() {
        let a = id("p", "a");
        let b = id("p", "b");
        let c = id("p", "c");
        let g = graph(
            vec![a.clone(), b.clone(), c.clone()],
            vec![
                edge(a.clone(), b.clone(), EdgeKind::Hard),
                edge(b.clone(), c.clone(), EdgeKind::Hard),
                edge(c.clone(), a.clone(), EdgeKind::Hard),
            ],
        );
        let cycles = detect_cycles(&g);
        assert_eq!(cycles, vec![BTreeSet::from([a, b, c])]);
    }

    #[test]
    fn two_disjoint_cycles_both_detected() {
        let a = id("p", "a");
        let b = id("p", "b");
        let x = id("q", "x");
        let y = id("q", "y");
        let gph = graph(
            vec![a.clone(), b.clone(), x.clone(), y.clone()],
            vec![
                edge(a.clone(), b.clone(), EdgeKind::Hard),
                edge(b.clone(), a.clone(), EdgeKind::Hard),
                edge(x.clone(), y.clone(), EdgeKind::Hard),
                edge(y.clone(), x.clone(), EdgeKind::Hard),
            ],
        );
        let cycles = detect_cycles(&gph);
        assert_eq!(cycles, vec![BTreeSet::from([a, b]), BTreeSet::from([x, y])]);
    }

    #[test]
    fn cycle_through_mixed_edge_kinds_is_detected() {
        // A -> B is a hard edge; B -> A is a producer-matching
        // edge. The union still cycles.
        let a = id("p", "a");
        let b = id("p", "b");
        let g = graph(
            vec![a.clone(), b.clone()],
            vec![
                edge(a.clone(), b.clone(), EdgeKind::Hard),
                edge(b.clone(), a.clone(), EdgeKind::ProducerMatching),
            ],
        );
        let cycles = detect_cycles(&g);
        assert_eq!(cycles, vec![BTreeSet::from([a, b])]);
    }

    #[test]
    fn cycle_through_soft_and_producer_is_detected() {
        let a = id("p", "a");
        let b = id("p", "b");
        let g = graph(
            vec![a.clone(), b.clone()],
            vec![
                edge(a.clone(), b.clone(), EdgeKind::Soft),
                edge(b.clone(), a.clone(), EdgeKind::ProducerMatching),
            ],
        );
        let cycles = detect_cycles(&g);
        assert_eq!(cycles, vec![BTreeSet::from([a, b])]);
    }

    #[test]
    fn lone_self_loop_is_not_a_cycle() {
        // DAG-013 formatter case: producer-matching self-edge.
        // The SCC has size 1, so DAG-014 does not reject it.
        let a = id("p", "format");
        let g = graph(
            vec![a.clone()],
            vec![edge(a.clone(), a, EdgeKind::ProducerMatching)],
        );
        assert!(detect_cycles(&g).is_empty());
    }

    #[test]
    fn self_loop_combined_with_length_two_cycle_is_reported() {
        // A has a self-loop (producer-matching) AND participates
        // in a length-2 cycle A -> B -> A via hard edges. The
        // SCC has size 2, so we report it.
        let a = id("p", "a");
        let b = id("p", "b");
        let g = graph(
            vec![a.clone(), b.clone()],
            vec![
                edge(a.clone(), a.clone(), EdgeKind::ProducerMatching),
                edge(a.clone(), b.clone(), EdgeKind::Hard),
                edge(b.clone(), a.clone(), EdgeKind::Hard),
            ],
        );
        let cycles = detect_cycles(&g);
        assert_eq!(cycles, vec![BTreeSet::from([a, b])]);
    }

    #[test]
    fn output_ordering_is_stable_by_first_member() {
        // Construct cycles whose insertion order in graph.edges
        // is intentionally different from sorted-first-member
        // order, to verify the post-sort.
        let a = id("a_proj", "x");
        let b = id("a_proj", "y");
        let c = id("z_proj", "x");
        let d = id("z_proj", "y");
        let gph = graph(
            vec![a.clone(), b.clone(), c.clone(), d.clone()],
            vec![
                edge(c.clone(), d.clone(), EdgeKind::Hard),
                edge(d.clone(), c.clone(), EdgeKind::Hard),
                edge(a.clone(), b.clone(), EdgeKind::Hard),
                edge(b.clone(), a.clone(), EdgeKind::Hard),
            ],
        );
        let cycles = detect_cycles(&gph);
        assert_eq!(cycles.len(), 2);
        assert!(cycles[0].iter().next().unwrap().project.as_ref() == "a_proj");
        assert!(cycles[1].iter().next().unwrap().project.as_ref() == "z_proj");
    }
}