haz-dag 0.1.0

DAG construction and traversal for haz tasks.
Documentation
//! [`detect_literal_output_collisions`]: the `DAG-015`
//! literal-output uniqueness check.
//!
//! Per `DAG-015`, at most one task in a workspace may declare a
//! given literal output path. The check operates on every task's
//! declared `outputs`, filtering to literal patterns (globs are
//! deferred to runtime per `DAG-016`) and bucketing by the
//! workspace-absolute string form of the path. Any bucket with
//! two or more tasks is a collision.

use std::collections::{BTreeMap, BTreeSet};

use haz_domain::path::PathPattern;
use haz_domain::task_id::TaskId;
use haz_domain::workspace::Workspace;

/// One literal output path declared as an output by more than
/// one task.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LiteralOutputCollision {
    /// Workspace-absolute string form of the colliding literal
    /// path.
    pub path: String,
    /// Tasks that declared this path. Always `|tasks| >= 2`.
    pub tasks: BTreeSet<TaskId>,
}

/// Return every literal-output collision in `workspace` per
/// `DAG-015`.
///
/// The output `Vec` is ordered by the colliding `path`
/// (`BTreeMap` iteration), so it is stable across runs.
#[must_use]
pub fn detect_literal_output_collisions(workspace: &Workspace) -> Vec<LiteralOutputCollision> {
    let mut by_path: BTreeMap<String, BTreeSet<TaskId>> = BTreeMap::new();

    for (project_name, project) in &workspace.projects {
        for (task_name, task) in &project.tasks {
            let task_id = TaskId {
                project: project_name.clone(),
                task: task_name.clone(),
            };
            for output in &task.outputs {
                if !matches!(output.pattern(), PathPattern::Literal(_)) {
                    // DAG-016 defers glob-output overlap to
                    // execution; skip globs here.
                    continue;
                }
                let anchored =
                    crate::producer::anchor_to_workspace_absolute(output.pattern(), &project.root);
                by_path.entry(anchored).or_default().insert(task_id.clone());
            }
        }
    }

    by_path
        .into_iter()
        .filter(|(_, tasks)| tasks.len() >= 2)
        .map(|(path, tasks)| LiteralOutputCollision { path, tasks })
        .collect()
}

#[cfg(test)]
mod tests {
    use std::collections::{BTreeMap, BTreeSet};
    use std::path::PathBuf;
    use std::str::FromStr;

    use haz_domain::action::TaskAction;
    use haz_domain::env::EnvSettings;
    use haz_domain::name::{ProjectName, TaskName};
    use haz_domain::path::{
        CanonicalPath, HazPath, InputSpec, OutputSpec, ProjectRoot, WorkspaceRootPath,
    };
    use haz_domain::project::Project;
    use haz_domain::settings::WorkspaceSettings;
    use haz_domain::task::Task;
    use haz_domain::task_id::TaskId;
    use haz_domain::workspace::Workspace;
    use nonempty::NonEmpty;

    use crate::outputs::detect_literal_output_collisions;

    fn project_name(s: &str) -> ProjectName {
        ProjectName::from_str(s).unwrap()
    }
    fn task_name(s: &str) -> TaskName {
        TaskName::from_str(s).unwrap()
    }
    fn nested_root(path: &str) -> ProjectRoot {
        ProjectRoot::Nested(CanonicalPath::from_absolute(&HazPath::parse(path).unwrap()).unwrap())
    }
    fn output(s: &str) -> OutputSpec {
        OutputSpec::parse(s).unwrap()
    }
    fn input(s: &str) -> InputSpec {
        InputSpec::parse(s).unwrap()
    }

    fn task_with(name: &str, inputs: Vec<InputSpec>, outputs: Vec<OutputSpec>) -> Task {
        Task {
            name: task_name(name),
            action: TaskAction::Command(NonEmpty::from_vec(vec!["true".to_owned()]).unwrap()),
            inputs,
            outputs,
            deps: vec![],
            weak_deps: vec![],
            mutex: None,
            env: EnvSettings::default(),
        }
    }

    fn project_with(name: &str, root: ProjectRoot, tasks: Vec<Task>) -> Project {
        Project {
            name: project_name(name),
            root,
            tags: BTreeSet::new(),
            tasks: tasks.into_iter().map(|t| (t.name.clone(), t)).collect(),
        }
    }

    fn workspace_with(projects: Vec<Project>) -> Workspace {
        let mut map = BTreeMap::new();
        for project in projects {
            map.insert(project.name.clone(), project);
        }
        Workspace {
            root: WorkspaceRootPath::try_new(PathBuf::from("/abs/ws")).unwrap(),
            projects: map,
            overlays: BTreeMap::new(),
            settings: WorkspaceSettings::default(),
        }
    }

    fn id(project: &str, task: &str) -> TaskId {
        TaskId {
            project: project_name(project),
            task: task_name(task),
        }
    }

    #[test]
    fn empty_workspace_has_no_collisions() {
        let workspace = workspace_with(vec![]);
        assert!(detect_literal_output_collisions(&workspace).is_empty());
    }

    #[test]
    fn distinct_outputs_have_no_collision() {
        let p = project_with(
            "p",
            nested_root("/p"),
            vec![
                task_with("a", vec![], vec![output("dist/a.js")]),
                task_with("b", vec![], vec![output("dist/b.js")]),
            ],
        );
        let workspace = workspace_with(vec![p]);
        assert!(detect_literal_output_collisions(&workspace).is_empty());
    }

    #[test]
    fn two_tasks_same_project_same_literal_output_collide() {
        let p = project_with(
            "p",
            nested_root("/p"),
            vec![
                task_with("a", vec![], vec![output("dist/main.js")]),
                task_with("b", vec![], vec![output("dist/main.js")]),
            ],
        );
        let workspace = workspace_with(vec![p]);
        let collisions = detect_literal_output_collisions(&workspace);
        assert_eq!(collisions.len(), 1);
        assert_eq!(collisions[0].path, "/p/dist/main.js");
        assert_eq!(
            collisions[0].tasks,
            BTreeSet::from([id("p", "a"), id("p", "b")])
        );
    }

    #[test]
    fn cross_project_outputs_resolving_to_same_path_collide() {
        // p's task "a" writes /shared/x.bin via workspace-absolute path.
        // q's task "b" writes /shared/x.bin via project-relative path
        // when q's project root is /shared.
        let p = project_with(
            "p",
            nested_root("/p"),
            vec![task_with("a", vec![], vec![output("/shared/x.bin")])],
        );
        let q = project_with(
            "q",
            nested_root("/shared"),
            vec![task_with("b", vec![], vec![output("x.bin")])],
        );
        let workspace = workspace_with(vec![p, q]);

        let collisions = detect_literal_output_collisions(&workspace);
        assert_eq!(collisions.len(), 1);
        assert_eq!(collisions[0].path, "/shared/x.bin");
        assert_eq!(
            collisions[0].tasks,
            BTreeSet::from([id("p", "a"), id("q", "b")])
        );
    }

    #[test]
    fn three_tasks_same_output_yield_one_collision_with_three_tasks() {
        let p = project_with(
            "p",
            nested_root("/p"),
            vec![
                task_with("a", vec![], vec![output("dist/x")]),
                task_with("b", vec![], vec![output("dist/x")]),
                task_with("c", vec![], vec![output("dist/x")]),
            ],
        );
        let workspace = workspace_with(vec![p]);
        let collisions = detect_literal_output_collisions(&workspace);
        assert_eq!(collisions.len(), 1);
        assert_eq!(
            collisions[0].tasks,
            BTreeSet::from([id("p", "a"), id("p", "b"), id("p", "c")])
        );
    }

    #[test]
    fn dag_016_glob_output_does_not_trigger_static_collision_against_literal() {
        // DAG-016: glob-output overlap is deferred to execution.
        // A literal output and a glob that could match it are NOT
        // a static collision. (Runtime detection is EXEC-020.)
        let p = project_with(
            "p",
            nested_root("/p"),
            vec![
                task_with("a", vec![], vec![output("dist/main.js")]),
                task_with("b", vec![], vec![output("dist/*.js")]),
            ],
        );
        let workspace = workspace_with(vec![p]);
        assert!(detect_literal_output_collisions(&workspace).is_empty());
    }

    #[test]
    fn dag_016_two_glob_outputs_to_same_pattern_do_not_collide_statically() {
        // DAG-016: even identical glob patterns are not flagged
        // statically; runtime materialisation is the source of
        // truth (EXEC-020).
        let p = project_with(
            "p",
            nested_root("/p"),
            vec![
                task_with("a", vec![], vec![output("dist/*.js")]),
                task_with("b", vec![], vec![output("dist/*.js")]),
            ],
        );
        let workspace = workspace_with(vec![p]);
        assert!(detect_literal_output_collisions(&workspace).is_empty());
    }

    #[test]
    fn implicit_root_anchors_relative_pattern_at_workspace_root() {
        // Two tasks in the implicit-mode project (root = WorkspaceRoot).
        let p = project_with(
            "root",
            ProjectRoot::WorkspaceRoot,
            vec![
                task_with("a", vec![], vec![output("build/x")]),
                task_with("b", vec![], vec![output("/build/x")]),
            ],
        );
        let workspace = workspace_with(vec![p]);
        let collisions = detect_literal_output_collisions(&workspace);
        assert_eq!(collisions.len(), 1);
        assert_eq!(collisions[0].path, "/build/x");
    }

    #[test]
    fn task_with_inputs_only_contributes_nothing() {
        let p = project_with(
            "p",
            nested_root("/p"),
            vec![task_with("only_inputs", vec![input("src/main.rs")], vec![])],
        );
        let workspace = workspace_with(vec![p]);
        assert!(detect_literal_output_collisions(&workspace).is_empty());
    }
}