use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use snafu::{ResultExt, Snafu};
use haz_cache::{
CacheKey, CacheKeyBuilder, CacheKeyInputs, EnvContribution, Hasher, InputFile,
PredecessorStreams,
};
use haz_dag::edge::EdgeKind;
use haz_dag::graph::TaskGraph;
use haz_domain::env::{EnvSettings, EnvVarName};
use haz_domain::path::{HazPath, InputSpec, PathPattern, ProjectRoot};
use haz_domain::project::Project;
use haz_domain::settings::cache::HashAlgo;
use haz_domain::task_id::TaskId;
use haz_domain::workspace::Workspace;
use haz_vfs::{EntryKind, Filesystem, FsError};
use crate::pattern_walk::{
GlobMatchAction, GlobWalk, glob_walk_origin, host_path_from_segments,
literal_workspace_segments, workspace_absolute_string_from_segments,
};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct PredecessorStreamHashes {
pub stdout_hash: [u8; 32],
pub stderr_hash: [u8; 32],
}
#[derive(Debug, Snafu)]
#[snafu(visibility(pub(crate)))]
pub enum BuildKeyError {
#[snafu(display("task not in workspace: {task}"))]
TaskNotInWorkspace {
task: TaskId,
},
#[snafu(display("missing recorded stream hashes for hard-edge predecessor: {predecessor}"))]
PredecessorStreamsMissing {
predecessor: TaskId,
},
#[snafu(display(
"failed to resolve input patterns under: {}: {source}",
root.display()
))]
InputPatternResolutionFailed {
root: PathBuf,
source: FsError,
},
#[snafu(display(
"failed to read input file at: {}: {source}",
path.display()
))]
InputContentReadFailed {
path: PathBuf,
source: FsError,
},
#[snafu(display("input path is not a regular file: {}", path.display()))]
InputNotARegularFile {
path: PathBuf,
},
}
pub fn build_cache_key<F: Filesystem>(
fs: &F,
workspace: &Workspace,
graph: &TaskGraph,
task: &TaskId,
host_env: &BTreeMap<EnvVarName, String>,
predecessor_streams: &BTreeMap<TaskId, PredecessorStreamHashes>,
algo: HashAlgo,
) -> Result<CacheKey, BuildKeyError> {
let project = workspace
.projects
.get(&task.project)
.ok_or_else(|| BuildKeyError::TaskNotInWorkspace { task: task.clone() })?;
let task_def = project
.tasks
.get(&task.task)
.ok_or_else(|| BuildKeyError::TaskNotInWorkspace { task: task.clone() })?;
let from_host_resolved = resolve_from_host_values(&task_def.env, host_env);
let predecessors_owned = collect_hard_predecessor_streams(graph, task, predecessor_streams)?;
let input_files_owned = resolve_input_files(fs, workspace, project, &task_def.inputs, algo)?;
let input_files: Vec<InputFile<'_>> = input_files_owned
.iter()
.map(|f| InputFile {
workspace_absolute_path: &f.workspace_absolute_path,
content_hash: f.content_hash,
})
.collect();
let hard_predecessors: Vec<PredecessorStreams<'_>> = predecessors_owned
.iter()
.map(|(id, hashes)| PredecessorStreams {
project: &id.project,
task: &id.task,
stdout_hash: hashes.stdout_hash,
stderr_hash: hashes.stderr_hash,
})
.collect();
let env = EnvContribution {
from_host: &from_host_resolved,
overrides: &task_def.env.overrides,
};
let inputs = CacheKeyInputs {
action: &task_def.action,
input_files: &input_files,
hard_predecessors: &hard_predecessors,
env: &env,
};
Ok(CacheKeyBuilder::new(algo).finish(&inputs))
}
fn collect_hard_predecessor_streams<'g>(
graph: &'g TaskGraph,
target: &TaskId,
streams: &BTreeMap<TaskId, PredecessorStreamHashes>,
) -> Result<Vec<(&'g TaskId, PredecessorStreamHashes)>, BuildKeyError> {
graph
.edges
.iter()
.filter(|e| e.kind == EdgeKind::Hard && &e.to == target)
.map(|e| {
streams
.get(&e.from)
.copied()
.map(|hashes| (&e.from, hashes))
.ok_or_else(|| BuildKeyError::PredecessorStreamsMissing {
predecessor: e.from.clone(),
})
})
.collect()
}
fn resolve_from_host_values(
env: &EnvSettings,
host_env: &BTreeMap<EnvVarName, String>,
) -> BTreeMap<EnvVarName, Option<String>> {
env.from_host
.iter()
.map(|name| (name.clone(), host_env.get(name).cloned()))
.collect()
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OwnedInputFile {
pub workspace_absolute_path: String,
pub content_hash: [u8; 32],
}
pub fn resolve_input_files<F: Filesystem>(
fs: &F,
workspace: &Workspace,
project: &Project,
inputs: &[InputSpec],
algo: HashAlgo,
) -> Result<Vec<OwnedInputFile>, BuildKeyError> {
let workspace_host = workspace.root.as_path();
let action = InputAction { algo };
let mut out = Vec::new();
for spec in inputs {
match spec.pattern() {
PathPattern::Literal(haz_path) => {
resolve_literal(
fs,
workspace_host,
&project.root,
haz_path,
&action,
&mut out,
)?;
}
PathPattern::Glob(glob_pattern) => {
let glob = glob_pattern.compile();
let matcher = glob.compile_matcher();
let (walk_host, workspace_prefix, candidate_prefix) =
glob_walk_origin(workspace_host, &project.root, glob_pattern.anchor());
let walker = GlobWalk {
fs,
matcher: &matcher,
candidate_prefix,
workspace_prefix,
action: &action,
};
let mut walk_rel: Vec<String> = Vec::new();
walker.walk(&walk_host, &mut walk_rel, &mut out)?;
}
}
}
Ok(out)
}
fn resolve_literal<F: Filesystem>(
fs: &F,
workspace_host: &Path,
project_root: &ProjectRoot,
haz_path: &HazPath,
action: &InputAction,
out: &mut Vec<OwnedInputFile>,
) -> Result<(), BuildKeyError> {
let ws_segments = literal_workspace_segments(haz_path, project_root);
let host = host_path_from_segments(workspace_host, &ws_segments);
let meta = fs
.metadata(&host)
.context(InputPatternResolutionFailedSnafu { root: host.clone() })?;
if meta.kind != EntryKind::File {
return Err(BuildKeyError::InputNotARegularFile { path: host });
}
let workspace_absolute_path = workspace_absolute_string_from_segments(&ws_segments);
action.on_match(fs, &host, workspace_absolute_path, out)
}
struct InputAction {
algo: HashAlgo,
}
impl<F: Filesystem> GlobMatchAction<F> for InputAction {
type Output = OwnedInputFile;
type Error = BuildKeyError;
fn map_walk_error(&self, root: PathBuf, source: FsError) -> BuildKeyError {
BuildKeyError::InputPatternResolutionFailed { root, source }
}
fn on_match(
&self,
fs: &F,
host_path: &Path,
workspace_absolute_path: String,
out: &mut Vec<OwnedInputFile>,
) -> Result<(), BuildKeyError> {
let bytes = fs.read(host_path).context(InputContentReadFailedSnafu {
path: host_path.to_path_buf(),
})?;
let mut hasher = Hasher::new(self.algo);
hasher.update(&bytes);
out.push(OwnedInputFile {
workspace_absolute_path,
content_hash: hasher.finalize(),
});
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::collections::{BTreeMap, BTreeSet};
use std::str::FromStr;
use haz_dag::edge::{Edge, EdgeKind};
use haz_dag::graph::TaskGraph;
use haz_domain::env::{EnvSettings, EnvVarName};
use haz_domain::name::{ProjectName, TaskName};
use haz_domain::task_id::TaskId;
use super::{
BuildKeyError, PredecessorStreamHashes, collect_hard_predecessor_streams,
resolve_from_host_values,
};
fn name(s: &str) -> EnvVarName {
EnvVarName::try_new(s).expect("valid env var name")
}
fn settings(from_host: &[&str], overrides: &[(&str, &str)]) -> EnvSettings {
EnvSettings {
from_host: from_host.iter().map(|s| name(s)).collect::<BTreeSet<_>>(),
overrides: overrides
.iter()
.map(|(k, v)| (name(k), (*v).to_owned()))
.collect::<BTreeMap<_, _>>(),
}
}
fn host(entries: &[(&str, &str)]) -> BTreeMap<EnvVarName, String> {
entries
.iter()
.map(|(k, v)| (name(k), (*v).to_owned()))
.collect()
}
#[test]
fn empty_from_host_yields_empty_map() {
let env = settings(&[], &[]);
let result = resolve_from_host_values(&env, &host(&[]));
assert!(result.is_empty());
}
#[test]
fn name_present_in_host_resolves_to_some() {
let env = settings(&["PATH"], &[]);
let result = resolve_from_host_values(&env, &host(&[("PATH", "/usr/bin")]));
assert_eq!(
result.get(&name("PATH")).cloned(),
Some(Some("/usr/bin".to_owned()))
);
}
#[test]
fn name_absent_from_host_resolves_to_none() {
let env = settings(&["NEVER_SET"], &[]);
let result = resolve_from_host_values(&env, &host(&[("OTHER", "v")]));
assert_eq!(result.get(&name("NEVER_SET")).cloned(), Some(None));
}
#[test]
fn empty_string_in_host_is_distinct_from_absent() {
let env = settings(&["X"], &[]);
let result_empty = resolve_from_host_values(&env, &host(&[("X", "")]));
let result_absent = resolve_from_host_values(&env, &host(&[]));
assert_eq!(
result_empty.get(&name("X")).cloned(),
Some(Some(String::new()))
);
assert_eq!(result_absent.get(&name("X")).cloned(), Some(None));
assert_ne!(
result_empty.get(&name("X")).unwrap(),
result_absent.get(&name("X")).unwrap(),
"empty value MUST NOT collapse with absence"
);
}
#[test]
fn from_host_iteration_is_lexicographically_sorted() {
let env = settings(&["ZULU", "ALPHA", "BRAVO"], &[]);
let h = host(&[("ALPHA", "a"), ("BRAVO", "b"), ("ZULU", "z")]);
let result = resolve_from_host_values(&env, &h);
let names: Vec<&str> = result
.keys()
.map(|n| AsRef::<str>::as_ref(n.as_ref()))
.collect();
assert_eq!(names, vec!["ALPHA", "BRAVO", "ZULU"]);
}
#[test]
fn overrides_are_not_consulted_by_this_helper() {
let env = settings(&["X"], &[("X", "override-val")]);
let result = resolve_from_host_values(&env, &host(&[("X", "host-val")]));
assert_eq!(
result.get(&name("X")).cloned(),
Some(Some("host-val".to_owned()))
);
}
#[test]
fn unrelated_host_entries_do_not_appear() {
let env = settings(&["WANTED"], &[]);
let h = host(&[("WANTED", "yes"), ("UNRELATED", "no")]);
let result = resolve_from_host_values(&env, &h);
assert_eq!(result.len(), 1);
assert!(!result.contains_key(&name("UNRELATED")));
}
fn task_id(project: &str, task: &str) -> TaskId {
TaskId {
project: ProjectName::from_str(project).expect("project name"),
task: TaskName::from_str(task).expect("task name"),
}
}
fn edge(from: TaskId, to: TaskId, kind: EdgeKind) -> Edge {
Edge { from, to, kind }
}
fn streams(stdout: u8, stderr: u8) -> PredecessorStreamHashes {
PredecessorStreamHashes {
stdout_hash: [stdout; 32],
stderr_hash: [stderr; 32],
}
}
fn graph(nodes: &[TaskId], edges: &[Edge]) -> TaskGraph {
TaskGraph {
nodes: nodes.iter().cloned().collect::<BTreeSet<_>>(),
edges: edges.iter().cloned().collect::<BTreeSet<_>>(),
}
}
#[test]
fn no_predecessors_yields_empty_vec() {
let target = task_id("p", "alone");
let g = graph(std::slice::from_ref(&target), &[]);
let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
assert!(result.is_empty());
}
#[test]
fn single_hard_predecessor_is_returned() {
let pred = task_id("p", "pre");
let target = task_id("p", "main");
let g = graph(
&[pred.clone(), target.clone()],
&[edge(pred.clone(), target.clone(), EdgeKind::Hard)],
);
let mut recorded = BTreeMap::new();
recorded.insert(pred.clone(), streams(0xAA, 0xBB));
let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].0, &pred);
assert_eq!(result[0].1, streams(0xAA, 0xBB));
}
#[test]
fn soft_and_producer_matching_edges_do_not_count() {
let target = task_id("p", "main");
let soft_pred = task_id("p", "soft");
let pm_pred = task_id("p", "pm");
let g = graph(
&[target.clone(), soft_pred.clone(), pm_pred.clone()],
&[
edge(soft_pred, target.clone(), EdgeKind::Soft),
edge(pm_pred, target.clone(), EdgeKind::ProducerMatching),
],
);
let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
assert!(
result.is_empty(),
"only Hard edges contribute to CACHE-007; got {} predecessors",
result.len(),
);
}
#[test]
fn outgoing_hard_edges_are_not_predecessors() {
let target = task_id("p", "main");
let dep_of_target = task_id("p", "downstream");
let g = graph(
&[target.clone(), dep_of_target.clone()],
&[edge(target.clone(), dep_of_target, EdgeKind::Hard)],
);
let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
assert!(result.is_empty());
}
#[test]
fn multiple_hard_predecessors_are_all_returned() {
let target = task_id("p", "main");
let a = task_id("alpha", "build");
let b = task_id("beta", "build");
let g = graph(
&[target.clone(), a.clone(), b.clone()],
&[
edge(a.clone(), target.clone(), EdgeKind::Hard),
edge(b.clone(), target.clone(), EdgeKind::Hard),
],
);
let mut recorded = BTreeMap::new();
recorded.insert(a.clone(), streams(0x01, 0x02));
recorded.insert(b.clone(), streams(0x03, 0x04));
let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
assert_eq!(result.len(), 2);
let names: BTreeSet<TaskId> = result.iter().map(|(id, _)| (*id).clone()).collect();
assert!(names.contains(&a));
assert!(names.contains(&b));
}
#[test]
fn missing_stream_hashes_yield_predecessor_streams_missing() {
let pred = task_id("p", "pre");
let target = task_id("p", "main");
let g = graph(
&[pred.clone(), target.clone()],
&[edge(pred.clone(), target.clone(), EdgeKind::Hard)],
);
let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
match collect_hard_predecessor_streams(&g, &target, &recorded) {
Err(BuildKeyError::PredecessorStreamsMissing { predecessor }) => {
assert_eq!(predecessor, pred);
}
Err(other) => panic!("expected PredecessorStreamsMissing, got {other:?}"),
Ok(v) => panic!("expected error, got Ok with {} entries", v.len()),
}
}
#[test]
fn mixed_edge_kinds_into_target_select_only_hard() {
let target = task_id("p", "main");
let hard_pred = task_id("p", "hard");
let soft_pred = task_id("p", "soft");
let g = graph(
&[target.clone(), hard_pred.clone(), soft_pred.clone()],
&[
edge(hard_pred.clone(), target.clone(), EdgeKind::Hard),
edge(soft_pred, target.clone(), EdgeKind::Soft),
],
);
let mut recorded = BTreeMap::new();
recorded.insert(hard_pred.clone(), streams(0x10, 0x20));
let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].0, &hard_pred);
}
mod input_resolution {
use std::collections::{BTreeMap, BTreeSet};
use std::path::PathBuf;
use haz_cache::Hasher;
use haz_domain::path::{CanonicalPath, HazPath, InputSpec, ProjectRoot, WorkspaceRootPath};
use haz_domain::project::Project;
use haz_domain::settings::WorkspaceSettings;
use haz_domain::settings::cache::HashAlgo;
use haz_domain::workspace::Workspace;
use haz_vfs::{FsError, MemFilesystem};
use super::super::BuildKeyError;
use super::super::{OwnedInputFile, resolve_input_files};
const WORKSPACE_HOST: &str = "/ws";
const PROJECT_HOST: &str = "/ws/proj";
fn nested_project() -> Project {
Project {
name: haz_domain::name::ProjectName::try_new("proj").unwrap(),
root: ProjectRoot::Nested(
CanonicalPath::from_absolute(&HazPath::parse("/proj").unwrap()).unwrap(),
),
tags: BTreeSet::new(),
tasks: BTreeMap::new(),
}
}
fn implicit_project() -> Project {
Project {
name: haz_domain::name::ProjectName::try_new("root").unwrap(),
root: ProjectRoot::WorkspaceRoot,
tags: BTreeSet::new(),
tasks: BTreeMap::new(),
}
}
fn workspace_with(project: &Project) -> Workspace {
let mut projects = BTreeMap::new();
projects.insert(project.name.clone(), project.clone());
Workspace {
root: WorkspaceRootPath::try_new(PathBuf::from(WORKSPACE_HOST)).unwrap(),
projects,
overlays: BTreeMap::new(),
settings: WorkspaceSettings::default(),
}
}
fn hash_of(algo: HashAlgo, bytes: &[u8]) -> [u8; 32] {
let mut h = Hasher::new(algo);
h.update(bytes);
h.finalize()
}
fn paths_of(files: &[OwnedInputFile]) -> BTreeSet<String> {
files
.iter()
.map(|f| f.workspace_absolute_path.clone())
.collect()
}
#[test]
fn literal_hit_returns_one_input_with_correct_hash() {
let mut fs = MemFilesystem::new();
fs.add_dir(PROJECT_HOST).unwrap();
fs.add_file(format!("{PROJECT_HOST}/file.txt"), b"hello".to_vec())
.unwrap();
let project = nested_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("file.txt").unwrap()];
let result =
resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].workspace_absolute_path, "/proj/file.txt");
assert_eq!(result[0].content_hash, hash_of(HashAlgo::Blake3, b"hello"));
}
#[test]
fn literal_workspace_absolute_resolves_under_workspace_root() {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws/other").unwrap();
fs.add_file("/ws/other/data.bin", b"\xDE\xAD\xBE\xEF".to_vec())
.unwrap();
fs.add_dir(PROJECT_HOST).unwrap();
let project = nested_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("/other/data.bin").unwrap()];
let result =
resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].workspace_absolute_path, "/other/data.bin");
assert_eq!(
result[0].content_hash,
hash_of(HashAlgo::Blake3, b"\xDE\xAD\xBE\xEF")
);
}
#[test]
fn literal_missing_file_surfaces_pattern_resolution_failure() {
let mut fs = MemFilesystem::new();
fs.add_dir(PROJECT_HOST).unwrap();
let project = nested_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("absent.txt").unwrap()];
match resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3) {
Err(BuildKeyError::InputPatternResolutionFailed { root, source }) => {
assert_eq!(root, PathBuf::from("/ws/proj/absent.txt"));
assert!(
matches!(source, FsError::NotFound { .. }),
"expected NotFound source, got {source:?}",
);
}
other => panic!("expected InputPatternResolutionFailed, got {other:?}"),
}
}
#[test]
fn literal_pointing_at_directory_surfaces_input_not_a_regular_file() {
let mut fs = MemFilesystem::new();
fs.add_dir(format!("{PROJECT_HOST}/subdir")).unwrap();
let project = nested_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("subdir").unwrap()];
match resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3) {
Err(BuildKeyError::InputNotARegularFile { path }) => {
assert_eq!(path, PathBuf::from("/ws/proj/subdir"));
}
other => panic!("expected InputNotARegularFile, got {other:?}"),
}
}
#[test]
fn glob_multi_match_collects_every_matching_file() {
let mut fs = MemFilesystem::new();
fs.add_dir(PROJECT_HOST).unwrap();
fs.add_file(format!("{PROJECT_HOST}/a.rs"), b"a".to_vec())
.unwrap();
fs.add_file(format!("{PROJECT_HOST}/b.rs"), b"b".to_vec())
.unwrap();
fs.add_file(format!("{PROJECT_HOST}/keep.txt"), b"ignored".to_vec())
.unwrap();
let project = nested_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("*.rs").unwrap()];
let result =
resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
assert_eq!(result.len(), 2);
assert_eq!(
paths_of(&result),
BTreeSet::from(["/proj/a.rs".to_owned(), "/proj/b.rs".to_owned()]),
);
for file in &result {
let bytes: &[u8] = if file.workspace_absolute_path == "/proj/a.rs" {
b"a"
} else {
b"b"
};
assert_eq!(file.content_hash, hash_of(HashAlgo::Blake3, bytes));
}
}
#[test]
fn glob_no_match_returns_empty_contribution() {
let mut fs = MemFilesystem::new();
fs.add_dir(PROJECT_HOST).unwrap();
fs.add_file(format!("{PROJECT_HOST}/only.txt"), b"x".to_vec())
.unwrap();
let project = nested_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("*.rs").unwrap()];
let result =
resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
assert!(result.is_empty());
}
#[test]
fn glob_nested_double_star_recurses_into_subdirectories() {
let mut fs = MemFilesystem::new();
fs.add_dir(format!("{PROJECT_HOST}/src")).unwrap();
fs.add_dir(format!("{PROJECT_HOST}/src/inner")).unwrap();
fs.add_file(
format!("{PROJECT_HOST}/src/top.rs"),
b"top contents".to_vec(),
)
.unwrap();
fs.add_file(
format!("{PROJECT_HOST}/src/inner/deep.rs"),
b"deep contents".to_vec(),
)
.unwrap();
fs.add_file(format!("{PROJECT_HOST}/other.rs"), b"other".to_vec())
.unwrap();
let project = nested_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("src/**/*.rs").unwrap()];
let result =
resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
assert_eq!(result.len(), 2);
assert_eq!(
paths_of(&result),
BTreeSet::from([
"/proj/src/top.rs".to_owned(),
"/proj/src/inner/deep.rs".to_owned(),
]),
);
}
#[test]
fn glob_symlink_to_file_records_link_path_with_target_bytes_hash() {
let mut fs = MemFilesystem::new();
fs.add_dir(PROJECT_HOST).unwrap();
fs.add_file(format!("{PROJECT_HOST}/real.txt"), b"real bytes".to_vec())
.unwrap();
fs.add_symlink(
format!("{PROJECT_HOST}/link.txt"),
format!("{PROJECT_HOST}/real.txt"),
)
.unwrap();
let project = nested_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("*.txt").unwrap()];
let result =
resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
assert_eq!(
result.len(),
2,
"both the real file and the symlink to it are distinct contributions",
);
assert_eq!(
paths_of(&result),
BTreeSet::from(["/proj/real.txt".to_owned(), "/proj/link.txt".to_owned(),]),
);
let expected_hash = hash_of(HashAlgo::Blake3, b"real bytes");
for file in &result {
assert_eq!(file.content_hash, expected_hash);
}
}
#[test]
fn implicit_mode_project_relative_literal_is_workspace_absolute() {
let mut fs = MemFilesystem::new();
fs.add_dir(WORKSPACE_HOST).unwrap();
fs.add_file(format!("{WORKSPACE_HOST}/at_root.txt"), b"r".to_vec())
.unwrap();
let project = implicit_project();
let workspace = workspace_with(&project);
let inputs = vec![InputSpec::parse("at_root.txt").unwrap()];
let result =
resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].workspace_absolute_path, "/at_root.txt");
assert_eq!(result[0].content_hash, hash_of(HashAlgo::Blake3, b"r"));
}
}
mod e2e {
use std::collections::{BTreeMap, BTreeSet};
use std::path::{Path, PathBuf};
use nonempty::NonEmpty;
use haz_cache::CacheKey;
use haz_dag::edge::{Edge, EdgeKind};
use haz_dag::graph::TaskGraph;
use haz_domain::action::TaskAction;
use haz_domain::env::{EnvSettings, EnvVarName};
use haz_domain::mutex::{Mutex, MutexMode, MutexScope};
use haz_domain::name::{MutexName, ProjectName, TaskName};
use haz_domain::path::{CanonicalPath, HazPath, InputSpec, ProjectRoot, WorkspaceRootPath};
use haz_domain::project::Project;
use haz_domain::settings::WorkspaceSettings;
use haz_domain::settings::cache::HashAlgo;
use haz_domain::task::Task;
use haz_domain::task_id::TaskId;
use haz_domain::workspace::Workspace;
use haz_vfs::{MemFilesystem, WritableFilesystem};
use super::super::{BuildKeyError, PredecessorStreamHashes, build_cache_key};
struct E2eState {
fs: MemFilesystem,
workspace: Workspace,
graph: TaskGraph,
task_id: TaskId,
host_env: BTreeMap<EnvVarName, String>,
predecessor_streams: BTreeMap<TaskId, PredecessorStreamHashes>,
}
impl E2eState {
fn key(&self) -> CacheKey {
build_cache_key(
&self.fs,
&self.workspace,
&self.graph,
&self.task_id,
&self.host_env,
&self.predecessor_streams,
HashAlgo::Blake3,
)
.expect("baseline state must yield a valid key")
}
}
fn env_var(s: &str) -> EnvVarName {
EnvVarName::try_new(s).unwrap()
}
fn project_name(s: &str) -> ProjectName {
ProjectName::try_new(s).unwrap()
}
fn task_name(s: &str) -> TaskName {
TaskName::try_new(s).unwrap()
}
fn baseline() -> E2eState {
let mut fs = MemFilesystem::new();
fs.add_dir("/ws/proj").unwrap();
fs.add_file("/ws/proj/file.txt", b"hello".to_vec()).unwrap();
let task = Task {
name: task_name("build"),
action: TaskAction::Command(NonEmpty::from_vec(vec!["true".to_owned()]).unwrap()),
inputs: vec![InputSpec::parse("file.txt").unwrap()],
outputs: vec![],
deps: vec![],
weak_deps: vec![],
mutex: None,
env: EnvSettings {
from_host: BTreeSet::from([env_var("PATH")]),
overrides: BTreeMap::new(),
},
};
let project = Project {
name: project_name("proj"),
root: ProjectRoot::Nested(
CanonicalPath::from_absolute(&HazPath::parse("/proj").unwrap()).unwrap(),
),
tags: BTreeSet::new(),
tasks: BTreeMap::from([(task.name.clone(), task)]),
};
let task_id = TaskId {
project: project_name("proj"),
task: task_name("build"),
};
let pred = TaskId {
project: project_name("lib"),
task: task_name("compile"),
};
let workspace = Workspace {
root: WorkspaceRootPath::try_new(PathBuf::from("/ws")).unwrap(),
projects: BTreeMap::from([(project.name.clone(), project)]),
overlays: BTreeMap::new(),
settings: WorkspaceSettings::default(),
};
let graph = TaskGraph {
nodes: BTreeSet::from([task_id.clone(), pred.clone()]),
edges: BTreeSet::from([Edge {
from: pred.clone(),
to: task_id.clone(),
kind: EdgeKind::Hard,
}]),
};
let predecessor_streams = BTreeMap::from([(
pred,
PredecessorStreamHashes {
stdout_hash: [0xAA; 32],
stderr_hash: [0xBB; 32],
},
)]);
let host_env = BTreeMap::from([(env_var("PATH"), "/usr/bin".to_owned())]);
E2eState {
fs,
workspace,
graph,
task_id,
host_env,
predecessor_streams,
}
}
fn mutate_task(state: &mut E2eState, f: impl FnOnce(&mut Task)) {
let proj = state
.workspace
.projects
.get_mut(&state.task_id.project)
.expect("baseline contains the target project");
let t = proj
.tasks
.get_mut(&state.task_id.task)
.expect("baseline contains the target task");
f(t);
}
#[test]
fn identical_inputs_yield_identical_keys() {
assert_eq!(baseline().key(), baseline().key());
}
#[test]
fn task_action_change_changes_key() {
let baseline_key = baseline().key();
let mut perturbed = baseline();
mutate_task(&mut perturbed, |t| {
t.action =
TaskAction::Command(NonEmpty::from_vec(vec!["false".to_owned()]).unwrap());
});
assert_ne!(baseline_key, perturbed.key());
}
#[test]
fn dag_018_input_file_content_change_changes_key() {
let baseline_key = baseline().key();
let perturbed = baseline();
perturbed
.fs
.write_file(Path::new("/ws/proj/file.txt"), b"world")
.unwrap();
assert_ne!(baseline_key, perturbed.key());
}
#[test]
fn dag_018_adding_a_soft_edge_predecessor_does_not_change_the_key() {
let baseline_key = baseline().key();
let mut perturbed = baseline();
let soft_pred = TaskId {
project: project_name("soft_producer"),
task: task_name("emit"),
};
perturbed.graph.nodes.insert(soft_pred.clone());
perturbed.graph.edges.insert(Edge {
from: soft_pred,
to: perturbed.task_id.clone(),
kind: EdgeKind::Soft,
});
assert_eq!(
baseline_key,
perturbed.key(),
"DAG-018: soft-edge predecessors MUST NOT contribute to the key",
);
}
#[test]
fn dag_018_adding_a_producer_matching_edge_does_not_change_the_key() {
let baseline_key = baseline().key();
let mut perturbed = baseline();
let pm_pred = TaskId {
project: project_name("pm_producer"),
task: task_name("emit"),
};
perturbed.graph.nodes.insert(pm_pred.clone());
perturbed.graph.edges.insert(Edge {
from: pm_pred,
to: perturbed.task_id.clone(),
kind: EdgeKind::ProducerMatching,
});
assert_eq!(
baseline_key,
perturbed.key(),
"DAG-018: producer-matching edges MUST NOT contribute to the key",
);
}
#[test]
fn dag_017_predecessor_stream_hash_change_changes_key() {
let baseline_key = baseline().key();
let mut perturbed = baseline();
let pred = TaskId {
project: project_name("lib"),
task: task_name("compile"),
};
perturbed.predecessor_streams.insert(
pred,
PredecessorStreamHashes {
stdout_hash: [0xCC; 32],
stderr_hash: [0xBB; 32],
},
);
assert_ne!(baseline_key, perturbed.key());
}
#[test]
fn env_value_change_changes_key() {
let baseline_key = baseline().key();
let mut perturbed = baseline();
perturbed
.host_env
.insert(env_var("PATH"), "/usr/local/bin".to_owned());
assert_ne!(baseline_key, perturbed.key());
}
#[test]
fn unrelated_host_env_does_not_affect_key() {
let baseline_key = baseline().key();
let mut perturbed = baseline();
perturbed
.host_env
.insert(env_var("HOME"), "/home/user".to_owned());
assert_eq!(baseline_key, perturbed.key());
}
#[test]
fn override_change_changes_key() {
let baseline_key = baseline().key();
let mut perturbed = baseline();
mutate_task(&mut perturbed, |t| {
t.env
.overrides
.insert(env_var("HAZ_ENV"), "alpha".to_owned());
});
let with_alpha = perturbed.key();
assert_ne!(baseline_key, with_alpha);
mutate_task(&mut perturbed, |t| {
t.env
.overrides
.insert(env_var("HAZ_ENV"), "beta".to_owned());
});
assert_ne!(with_alpha, perturbed.key());
}
#[test]
fn mutex_008_adding_a_mutex_does_not_change_the_key() {
let baseline_key = baseline().key();
let mut perturbed = baseline();
mutate_task(&mut perturbed, |t| {
t.mutex = Some(Mutex {
scope: MutexScope::Workspace,
name: MutexName::try_new("db").unwrap(),
mode: MutexMode::Exclusive,
});
});
assert_eq!(baseline_key, perturbed.key());
}
#[test]
fn mutex_008_changing_mutex_scope_does_not_change_the_key() {
let mut workspace_scoped = baseline();
mutate_task(&mut workspace_scoped, |t| {
t.mutex = Some(Mutex {
scope: MutexScope::Workspace,
name: MutexName::try_new("db").unwrap(),
mode: MutexMode::Exclusive,
});
});
let mut project_scoped = baseline();
mutate_task(&mut project_scoped, |t| {
t.mutex = Some(Mutex {
scope: MutexScope::Project,
name: MutexName::try_new("db").unwrap(),
mode: MutexMode::Exclusive,
});
});
assert_eq!(workspace_scoped.key(), project_scoped.key());
}
#[test]
fn mutex_008_changing_mutex_name_does_not_change_the_key() {
let mut named_db = baseline();
mutate_task(&mut named_db, |t| {
t.mutex = Some(Mutex {
scope: MutexScope::Workspace,
name: MutexName::try_new("db").unwrap(),
mode: MutexMode::Exclusive,
});
});
let mut named_gpu = baseline();
mutate_task(&mut named_gpu, |t| {
t.mutex = Some(Mutex {
scope: MutexScope::Workspace,
name: MutexName::try_new("gpu").unwrap(),
mode: MutexMode::Exclusive,
});
});
assert_eq!(named_db.key(), named_gpu.key());
}
#[test]
fn mutex_008_changing_mutex_mode_does_not_change_the_key() {
let mut exclusive = baseline();
mutate_task(&mut exclusive, |t| {
t.mutex = Some(Mutex {
scope: MutexScope::Workspace,
name: MutexName::try_new("db").unwrap(),
mode: MutexMode::Exclusive,
});
});
let mut shared = baseline();
mutate_task(&mut shared, |t| {
t.mutex = Some(Mutex {
scope: MutexScope::Workspace,
name: MutexName::try_new("db").unwrap(),
mode: MutexMode::Shared,
});
});
assert_eq!(exclusive.key(), shared.key());
}
#[test]
fn task_not_in_workspace_surfaces_error() {
let mut state = baseline();
state.task_id = TaskId {
project: project_name("absent"),
task: task_name("build"),
};
match build_cache_key(
&state.fs,
&state.workspace,
&state.graph,
&state.task_id,
&state.host_env,
&state.predecessor_streams,
HashAlgo::Blake3,
) {
Err(BuildKeyError::TaskNotInWorkspace { task }) => {
assert_eq!(task, state.task_id);
}
other => panic!("expected TaskNotInWorkspace, got {other:?}"),
}
}
}
}