1use std::collections::BTreeMap;
17use std::path::{Path, PathBuf};
18
19use snafu::{ResultExt, Snafu};
20
21use haz_cache::{
22 CacheKey, CacheKeyBuilder, CacheKeyInputs, EnvContribution, Hasher, InputFile,
23 PredecessorStreams,
24};
25use haz_dag::edge::EdgeKind;
26use haz_dag::graph::TaskGraph;
27use haz_domain::env::{EnvSettings, EnvVarName};
28use haz_domain::path::{HazPath, InputSpec, PathPattern, ProjectRoot};
29use haz_domain::project::Project;
30use haz_domain::settings::cache::HashAlgo;
31use haz_domain::task_id::TaskId;
32use haz_domain::workspace::Workspace;
33use haz_vfs::{EntryKind, Filesystem, FsError};
34
35use crate::pattern_walk::{
36 GlobMatchAction, GlobWalk, glob_walk_origin, host_path_from_segments,
37 literal_workspace_segments, workspace_absolute_string_from_segments,
38};
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48pub struct PredecessorStreamHashes {
49 pub stdout_hash: [u8; 32],
52 pub stderr_hash: [u8; 32],
55}
56
57#[derive(Debug, Snafu)]
59#[snafu(visibility(pub(crate)))]
60pub enum BuildKeyError {
61 #[snafu(display("task not in workspace: {task}"))]
65 TaskNotInWorkspace {
66 task: TaskId,
68 },
69
70 #[snafu(display("missing recorded stream hashes for hard-edge predecessor: {predecessor}"))]
76 PredecessorStreamsMissing {
77 predecessor: TaskId,
79 },
80
81 #[snafu(display(
85 "failed to resolve input patterns under: {}: {source}",
86 root.display()
87 ))]
88 InputPatternResolutionFailed {
89 root: PathBuf,
93 source: FsError,
95 },
96
97 #[snafu(display(
99 "failed to read input file at: {}: {source}",
100 path.display()
101 ))]
102 InputContentReadFailed {
103 path: PathBuf,
105 source: FsError,
107 },
108
109 #[snafu(display("input path is not a regular file: {}", path.display()))]
115 InputNotARegularFile {
116 path: PathBuf,
118 },
119}
120
121pub fn build_cache_key<F: Filesystem>(
155 fs: &F,
156 workspace: &Workspace,
157 graph: &TaskGraph,
158 task: &TaskId,
159 host_env: &BTreeMap<EnvVarName, String>,
160 predecessor_streams: &BTreeMap<TaskId, PredecessorStreamHashes>,
161 algo: HashAlgo,
162) -> Result<CacheKey, BuildKeyError> {
163 let project = workspace
164 .projects
165 .get(&task.project)
166 .ok_or_else(|| BuildKeyError::TaskNotInWorkspace { task: task.clone() })?;
167 let task_def = project
168 .tasks
169 .get(&task.task)
170 .ok_or_else(|| BuildKeyError::TaskNotInWorkspace { task: task.clone() })?;
171
172 let from_host_resolved = resolve_from_host_values(&task_def.env, host_env);
173 let predecessors_owned = collect_hard_predecessor_streams(graph, task, predecessor_streams)?;
174 let input_files_owned = resolve_input_files(fs, workspace, project, &task_def.inputs, algo)?;
175
176 let input_files: Vec<InputFile<'_>> = input_files_owned
177 .iter()
178 .map(|f| InputFile {
179 workspace_absolute_path: &f.workspace_absolute_path,
180 content_hash: f.content_hash,
181 })
182 .collect();
183
184 let hard_predecessors: Vec<PredecessorStreams<'_>> = predecessors_owned
185 .iter()
186 .map(|(id, hashes)| PredecessorStreams {
187 project: &id.project,
188 task: &id.task,
189 stdout_hash: hashes.stdout_hash,
190 stderr_hash: hashes.stderr_hash,
191 })
192 .collect();
193
194 let env = EnvContribution {
195 from_host: &from_host_resolved,
196 overrides: &task_def.env.overrides,
197 };
198
199 let inputs = CacheKeyInputs {
200 action: &task_def.action,
201 input_files: &input_files,
202 hard_predecessors: &hard_predecessors,
203 env: &env,
204 };
205
206 Ok(CacheKeyBuilder::new(algo).finish(&inputs))
207}
208
209fn collect_hard_predecessor_streams<'g>(
222 graph: &'g TaskGraph,
223 target: &TaskId,
224 streams: &BTreeMap<TaskId, PredecessorStreamHashes>,
225) -> Result<Vec<(&'g TaskId, PredecessorStreamHashes)>, BuildKeyError> {
226 graph
227 .edges
228 .iter()
229 .filter(|e| e.kind == EdgeKind::Hard && &e.to == target)
230 .map(|e| {
231 streams
232 .get(&e.from)
233 .copied()
234 .map(|hashes| (&e.from, hashes))
235 .ok_or_else(|| BuildKeyError::PredecessorStreamsMissing {
236 predecessor: e.from.clone(),
237 })
238 })
239 .collect()
240}
241
242fn resolve_from_host_values(
254 env: &EnvSettings,
255 host_env: &BTreeMap<EnvVarName, String>,
256) -> BTreeMap<EnvVarName, Option<String>> {
257 env.from_host
258 .iter()
259 .map(|name| (name.clone(), host_env.get(name).cloned()))
260 .collect()
261}
262
263#[derive(Debug, Clone, PartialEq, Eq)]
273pub struct OwnedInputFile {
274 pub workspace_absolute_path: String,
276 pub content_hash: [u8; 32],
279}
280
281pub fn resolve_input_files<F: Filesystem>(
304 fs: &F,
305 workspace: &Workspace,
306 project: &Project,
307 inputs: &[InputSpec],
308 algo: HashAlgo,
309) -> Result<Vec<OwnedInputFile>, BuildKeyError> {
310 let workspace_host = workspace.root.as_path();
311 let action = InputAction { algo };
312 let mut out = Vec::new();
313
314 for spec in inputs {
315 match spec.pattern() {
316 PathPattern::Literal(haz_path) => {
317 resolve_literal(
318 fs,
319 workspace_host,
320 &project.root,
321 haz_path,
322 &action,
323 &mut out,
324 )?;
325 }
326 PathPattern::Glob(glob_pattern) => {
327 let glob = glob_pattern.compile();
328 let matcher = glob.compile_matcher();
329 let (walk_host, workspace_prefix, candidate_prefix) =
330 glob_walk_origin(workspace_host, &project.root, glob_pattern.anchor());
331 let walker = GlobWalk {
332 fs,
333 matcher: &matcher,
334 candidate_prefix,
335 workspace_prefix,
336 action: &action,
337 };
338 let mut walk_rel: Vec<String> = Vec::new();
339 walker.walk(&walk_host, &mut walk_rel, &mut out)?;
340 }
341 }
342 }
343
344 Ok(out)
345}
346
347fn resolve_literal<F: Filesystem>(
348 fs: &F,
349 workspace_host: &Path,
350 project_root: &ProjectRoot,
351 haz_path: &HazPath,
352 action: &InputAction,
353 out: &mut Vec<OwnedInputFile>,
354) -> Result<(), BuildKeyError> {
355 let ws_segments = literal_workspace_segments(haz_path, project_root);
356 let host = host_path_from_segments(workspace_host, &ws_segments);
357
358 let meta = fs
359 .metadata(&host)
360 .context(InputPatternResolutionFailedSnafu { root: host.clone() })?;
361 if meta.kind != EntryKind::File {
362 return Err(BuildKeyError::InputNotARegularFile { path: host });
363 }
364
365 let workspace_absolute_path = workspace_absolute_string_from_segments(&ws_segments);
366 action.on_match(fs, &host, workspace_absolute_path, out)
367}
368
369struct InputAction {
374 algo: HashAlgo,
375}
376
377impl<F: Filesystem> GlobMatchAction<F> for InputAction {
378 type Output = OwnedInputFile;
379 type Error = BuildKeyError;
380
381 fn map_walk_error(&self, root: PathBuf, source: FsError) -> BuildKeyError {
382 BuildKeyError::InputPatternResolutionFailed { root, source }
383 }
384
385 fn on_match(
386 &self,
387 fs: &F,
388 host_path: &Path,
389 workspace_absolute_path: String,
390 out: &mut Vec<OwnedInputFile>,
391 ) -> Result<(), BuildKeyError> {
392 let bytes = fs.read(host_path).context(InputContentReadFailedSnafu {
393 path: host_path.to_path_buf(),
394 })?;
395 let mut hasher = Hasher::new(self.algo);
396 hasher.update(&bytes);
397 out.push(OwnedInputFile {
398 workspace_absolute_path,
399 content_hash: hasher.finalize(),
400 });
401 Ok(())
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use std::collections::{BTreeMap, BTreeSet};
408 use std::str::FromStr;
409
410 use haz_dag::edge::{Edge, EdgeKind};
411 use haz_dag::graph::TaskGraph;
412 use haz_domain::env::{EnvSettings, EnvVarName};
413 use haz_domain::name::{ProjectName, TaskName};
414 use haz_domain::task_id::TaskId;
415
416 use super::{
417 BuildKeyError, PredecessorStreamHashes, collect_hard_predecessor_streams,
418 resolve_from_host_values,
419 };
420
421 fn name(s: &str) -> EnvVarName {
422 EnvVarName::try_new(s).expect("valid env var name")
423 }
424
425 fn settings(from_host: &[&str], overrides: &[(&str, &str)]) -> EnvSettings {
426 EnvSettings {
427 from_host: from_host.iter().map(|s| name(s)).collect::<BTreeSet<_>>(),
428 overrides: overrides
429 .iter()
430 .map(|(k, v)| (name(k), (*v).to_owned()))
431 .collect::<BTreeMap<_, _>>(),
432 }
433 }
434
435 fn host(entries: &[(&str, &str)]) -> BTreeMap<EnvVarName, String> {
436 entries
437 .iter()
438 .map(|(k, v)| (name(k), (*v).to_owned()))
439 .collect()
440 }
441
442 #[test]
443 fn empty_from_host_yields_empty_map() {
444 let env = settings(&[], &[]);
445 let result = resolve_from_host_values(&env, &host(&[]));
446 assert!(result.is_empty());
447 }
448
449 #[test]
450 fn name_present_in_host_resolves_to_some() {
451 let env = settings(&["PATH"], &[]);
452 let result = resolve_from_host_values(&env, &host(&[("PATH", "/usr/bin")]));
453 assert_eq!(
454 result.get(&name("PATH")).cloned(),
455 Some(Some("/usr/bin".to_owned()))
456 );
457 }
458
459 #[test]
460 fn name_absent_from_host_resolves_to_none() {
461 let env = settings(&["NEVER_SET"], &[]);
462 let result = resolve_from_host_values(&env, &host(&[("OTHER", "v")]));
463 assert_eq!(result.get(&name("NEVER_SET")).cloned(), Some(None));
464 }
465
466 #[test]
467 fn empty_string_in_host_is_distinct_from_absent() {
468 let env = settings(&["X"], &[]);
469 let result_empty = resolve_from_host_values(&env, &host(&[("X", "")]));
470 let result_absent = resolve_from_host_values(&env, &host(&[]));
471 assert_eq!(
472 result_empty.get(&name("X")).cloned(),
473 Some(Some(String::new()))
474 );
475 assert_eq!(result_absent.get(&name("X")).cloned(), Some(None));
476 assert_ne!(
477 result_empty.get(&name("X")).unwrap(),
478 result_absent.get(&name("X")).unwrap(),
479 "empty value MUST NOT collapse with absence"
480 );
481 }
482
483 #[test]
484 fn from_host_iteration_is_lexicographically_sorted() {
485 let env = settings(&["ZULU", "ALPHA", "BRAVO"], &[]);
490 let h = host(&[("ALPHA", "a"), ("BRAVO", "b"), ("ZULU", "z")]);
491 let result = resolve_from_host_values(&env, &h);
492 let names: Vec<&str> = result
493 .keys()
494 .map(|n| AsRef::<str>::as_ref(n.as_ref()))
495 .collect();
496 assert_eq!(names, vec!["ALPHA", "BRAVO", "ZULU"]);
497 }
498
499 #[test]
500 fn overrides_are_not_consulted_by_this_helper() {
501 let env = settings(&["X"], &[("X", "override-val")]);
506 let result = resolve_from_host_values(&env, &host(&[("X", "host-val")]));
507 assert_eq!(
508 result.get(&name("X")).cloned(),
509 Some(Some("host-val".to_owned()))
510 );
511 }
512
513 #[test]
514 fn unrelated_host_entries_do_not_appear() {
515 let env = settings(&["WANTED"], &[]);
516 let h = host(&[("WANTED", "yes"), ("UNRELATED", "no")]);
517 let result = resolve_from_host_values(&env, &h);
518 assert_eq!(result.len(), 1);
519 assert!(!result.contains_key(&name("UNRELATED")));
520 }
521
522 fn task_id(project: &str, task: &str) -> TaskId {
525 TaskId {
526 project: ProjectName::from_str(project).expect("project name"),
527 task: TaskName::from_str(task).expect("task name"),
528 }
529 }
530
531 fn edge(from: TaskId, to: TaskId, kind: EdgeKind) -> Edge {
532 Edge { from, to, kind }
533 }
534
535 fn streams(stdout: u8, stderr: u8) -> PredecessorStreamHashes {
536 PredecessorStreamHashes {
537 stdout_hash: [stdout; 32],
538 stderr_hash: [stderr; 32],
539 }
540 }
541
542 fn graph(nodes: &[TaskId], edges: &[Edge]) -> TaskGraph {
543 TaskGraph {
544 nodes: nodes.iter().cloned().collect::<BTreeSet<_>>(),
545 edges: edges.iter().cloned().collect::<BTreeSet<_>>(),
546 }
547 }
548
549 #[test]
550 fn no_predecessors_yields_empty_vec() {
551 let target = task_id("p", "alone");
552 let g = graph(std::slice::from_ref(&target), &[]);
553 let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
554 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
555 assert!(result.is_empty());
556 }
557
558 #[test]
559 fn single_hard_predecessor_is_returned() {
560 let pred = task_id("p", "pre");
561 let target = task_id("p", "main");
562 let g = graph(
563 &[pred.clone(), target.clone()],
564 &[edge(pred.clone(), target.clone(), EdgeKind::Hard)],
565 );
566 let mut recorded = BTreeMap::new();
567 recorded.insert(pred.clone(), streams(0xAA, 0xBB));
568
569 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
570 assert_eq!(result.len(), 1);
571 assert_eq!(result[0].0, &pred);
572 assert_eq!(result[0].1, streams(0xAA, 0xBB));
573 }
574
575 #[test]
576 fn soft_and_producer_matching_edges_do_not_count() {
577 let target = task_id("p", "main");
578 let soft_pred = task_id("p", "soft");
579 let pm_pred = task_id("p", "pm");
580 let g = graph(
581 &[target.clone(), soft_pred.clone(), pm_pred.clone()],
582 &[
583 edge(soft_pred, target.clone(), EdgeKind::Soft),
584 edge(pm_pred, target.clone(), EdgeKind::ProducerMatching),
585 ],
586 );
587 let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
588 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
589 assert!(
590 result.is_empty(),
591 "only Hard edges contribute to CACHE-007; got {} predecessors",
592 result.len(),
593 );
594 }
595
596 #[test]
597 fn outgoing_hard_edges_are_not_predecessors() {
598 let target = task_id("p", "main");
599 let dep_of_target = task_id("p", "downstream");
600 let g = graph(
601 &[target.clone(), dep_of_target.clone()],
602 &[edge(target.clone(), dep_of_target, EdgeKind::Hard)],
603 );
604 let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
605 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
606 assert!(result.is_empty());
607 }
608
609 #[test]
610 fn multiple_hard_predecessors_are_all_returned() {
611 let target = task_id("p", "main");
612 let a = task_id("alpha", "build");
613 let b = task_id("beta", "build");
614 let g = graph(
615 &[target.clone(), a.clone(), b.clone()],
616 &[
617 edge(a.clone(), target.clone(), EdgeKind::Hard),
618 edge(b.clone(), target.clone(), EdgeKind::Hard),
619 ],
620 );
621 let mut recorded = BTreeMap::new();
622 recorded.insert(a.clone(), streams(0x01, 0x02));
623 recorded.insert(b.clone(), streams(0x03, 0x04));
624
625 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
626 assert_eq!(result.len(), 2);
627 let names: BTreeSet<TaskId> = result.iter().map(|(id, _)| (*id).clone()).collect();
628 assert!(names.contains(&a));
629 assert!(names.contains(&b));
630 }
631
632 #[test]
633 fn missing_stream_hashes_yield_predecessor_streams_missing() {
634 let pred = task_id("p", "pre");
635 let target = task_id("p", "main");
636 let g = graph(
637 &[pred.clone(), target.clone()],
638 &[edge(pred.clone(), target.clone(), EdgeKind::Hard)],
639 );
640 let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
641
642 match collect_hard_predecessor_streams(&g, &target, &recorded) {
643 Err(BuildKeyError::PredecessorStreamsMissing { predecessor }) => {
644 assert_eq!(predecessor, pred);
645 }
646 Err(other) => panic!("expected PredecessorStreamsMissing, got {other:?}"),
647 Ok(v) => panic!("expected error, got Ok with {} entries", v.len()),
648 }
649 }
650
651 #[test]
652 fn mixed_edge_kinds_into_target_select_only_hard() {
653 let target = task_id("p", "main");
654 let hard_pred = task_id("p", "hard");
655 let soft_pred = task_id("p", "soft");
656 let g = graph(
657 &[target.clone(), hard_pred.clone(), soft_pred.clone()],
658 &[
659 edge(hard_pred.clone(), target.clone(), EdgeKind::Hard),
660 edge(soft_pred, target.clone(), EdgeKind::Soft),
661 ],
662 );
663 let mut recorded = BTreeMap::new();
664 recorded.insert(hard_pred.clone(), streams(0x10, 0x20));
665
666 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
667 assert_eq!(result.len(), 1);
668 assert_eq!(result[0].0, &hard_pred);
669 }
670
671 mod input_resolution {
674 use std::collections::{BTreeMap, BTreeSet};
675 use std::path::PathBuf;
676
677 use haz_cache::Hasher;
678 use haz_domain::path::{CanonicalPath, HazPath, InputSpec, ProjectRoot, WorkspaceRootPath};
679 use haz_domain::project::Project;
680 use haz_domain::settings::WorkspaceSettings;
681 use haz_domain::settings::cache::HashAlgo;
682 use haz_domain::workspace::Workspace;
683 use haz_vfs::FsError;
684 use haz_vfs_testing::MemFilesystem;
685
686 use super::super::BuildKeyError;
687 use super::super::{OwnedInputFile, resolve_input_files};
688
689 const WORKSPACE_HOST: &str = "/ws";
690 const PROJECT_HOST: &str = "/ws/proj";
691
692 fn nested_project() -> Project {
693 Project {
694 name: haz_domain::name::ProjectName::try_new("proj").unwrap(),
695 root: ProjectRoot::Nested(
696 CanonicalPath::from_absolute(&HazPath::parse("/proj").unwrap()).unwrap(),
697 ),
698 tags: BTreeSet::new(),
699 tasks: BTreeMap::new(),
700 }
701 }
702
703 fn implicit_project() -> Project {
704 Project {
705 name: haz_domain::name::ProjectName::try_new("root").unwrap(),
706 root: ProjectRoot::WorkspaceRoot,
707 tags: BTreeSet::new(),
708 tasks: BTreeMap::new(),
709 }
710 }
711
712 fn workspace_with(project: &Project) -> Workspace {
713 let mut projects = BTreeMap::new();
714 projects.insert(project.name.clone(), project.clone());
715 Workspace {
716 root: WorkspaceRootPath::try_new(PathBuf::from(WORKSPACE_HOST)).unwrap(),
717 projects,
718 overlays: BTreeMap::new(),
719 settings: WorkspaceSettings::default(),
720 }
721 }
722
723 fn hash_of(algo: HashAlgo, bytes: &[u8]) -> [u8; 32] {
724 let mut h = Hasher::new(algo);
725 h.update(bytes);
726 h.finalize()
727 }
728
729 fn paths_of(files: &[OwnedInputFile]) -> BTreeSet<String> {
730 files
731 .iter()
732 .map(|f| f.workspace_absolute_path.clone())
733 .collect()
734 }
735
736 #[test]
737 fn literal_hit_returns_one_input_with_correct_hash() {
738 let mut fs = MemFilesystem::new();
739 fs.add_dir(PROJECT_HOST).unwrap();
740 fs.add_file(format!("{PROJECT_HOST}/file.txt"), b"hello".to_vec())
741 .unwrap();
742
743 let project = nested_project();
744 let workspace = workspace_with(&project);
745 let inputs = vec![InputSpec::parse("file.txt").unwrap()];
746
747 let result =
748 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
749 assert_eq!(result.len(), 1);
750 assert_eq!(result[0].workspace_absolute_path, "/proj/file.txt");
751 assert_eq!(result[0].content_hash, hash_of(HashAlgo::Blake3, b"hello"));
752 }
753
754 #[test]
755 fn literal_workspace_absolute_resolves_under_workspace_root() {
756 let mut fs = MemFilesystem::new();
760 fs.add_dir("/ws/other").unwrap();
761 fs.add_file("/ws/other/data.bin", b"\xDE\xAD\xBE\xEF".to_vec())
762 .unwrap();
763 fs.add_dir(PROJECT_HOST).unwrap();
766
767 let project = nested_project();
768 let workspace = workspace_with(&project);
769 let inputs = vec![InputSpec::parse("/other/data.bin").unwrap()];
770
771 let result =
772 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
773 assert_eq!(result.len(), 1);
774 assert_eq!(result[0].workspace_absolute_path, "/other/data.bin");
775 assert_eq!(
776 result[0].content_hash,
777 hash_of(HashAlgo::Blake3, b"\xDE\xAD\xBE\xEF")
778 );
779 }
780
781 #[test]
782 fn literal_missing_file_surfaces_pattern_resolution_failure() {
783 let mut fs = MemFilesystem::new();
784 fs.add_dir(PROJECT_HOST).unwrap();
785
786 let project = nested_project();
787 let workspace = workspace_with(&project);
788 let inputs = vec![InputSpec::parse("absent.txt").unwrap()];
789
790 match resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3) {
791 Err(BuildKeyError::InputPatternResolutionFailed { root, source }) => {
792 assert_eq!(root, PathBuf::from("/ws/proj/absent.txt"));
793 assert!(
794 matches!(source, FsError::NotFound { .. }),
795 "expected NotFound source, got {source:?}",
796 );
797 }
798 other => panic!("expected InputPatternResolutionFailed, got {other:?}"),
799 }
800 }
801
802 #[test]
803 fn literal_pointing_at_directory_surfaces_input_not_a_regular_file() {
804 let mut fs = MemFilesystem::new();
805 fs.add_dir(format!("{PROJECT_HOST}/subdir")).unwrap();
806
807 let project = nested_project();
808 let workspace = workspace_with(&project);
809 let inputs = vec![InputSpec::parse("subdir").unwrap()];
810
811 match resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3) {
812 Err(BuildKeyError::InputNotARegularFile { path }) => {
813 assert_eq!(path, PathBuf::from("/ws/proj/subdir"));
814 }
815 other => panic!("expected InputNotARegularFile, got {other:?}"),
816 }
817 }
818
819 #[test]
820 fn glob_multi_match_collects_every_matching_file() {
821 let mut fs = MemFilesystem::new();
822 fs.add_dir(PROJECT_HOST).unwrap();
823 fs.add_file(format!("{PROJECT_HOST}/a.rs"), b"a".to_vec())
824 .unwrap();
825 fs.add_file(format!("{PROJECT_HOST}/b.rs"), b"b".to_vec())
826 .unwrap();
827 fs.add_file(format!("{PROJECT_HOST}/keep.txt"), b"ignored".to_vec())
829 .unwrap();
830
831 let project = nested_project();
832 let workspace = workspace_with(&project);
833 let inputs = vec![InputSpec::parse("*.rs").unwrap()];
834
835 let result =
836 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
837 assert_eq!(result.len(), 2);
838 assert_eq!(
839 paths_of(&result),
840 BTreeSet::from(["/proj/a.rs".to_owned(), "/proj/b.rs".to_owned()]),
841 );
842 for file in &result {
843 let bytes: &[u8] = if file.workspace_absolute_path == "/proj/a.rs" {
844 b"a"
845 } else {
846 b"b"
847 };
848 assert_eq!(file.content_hash, hash_of(HashAlgo::Blake3, bytes));
849 }
850 }
851
852 #[test]
853 fn glob_no_match_returns_empty_contribution() {
854 let mut fs = MemFilesystem::new();
855 fs.add_dir(PROJECT_HOST).unwrap();
856 fs.add_file(format!("{PROJECT_HOST}/only.txt"), b"x".to_vec())
857 .unwrap();
858
859 let project = nested_project();
860 let workspace = workspace_with(&project);
861 let inputs = vec![InputSpec::parse("*.rs").unwrap()];
862
863 let result =
864 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
865 assert!(result.is_empty());
866 }
867
868 #[test]
869 fn glob_nested_double_star_recurses_into_subdirectories() {
870 let mut fs = MemFilesystem::new();
871 fs.add_dir(format!("{PROJECT_HOST}/src")).unwrap();
872 fs.add_dir(format!("{PROJECT_HOST}/src/inner")).unwrap();
873 fs.add_file(
874 format!("{PROJECT_HOST}/src/top.rs"),
875 b"top contents".to_vec(),
876 )
877 .unwrap();
878 fs.add_file(
879 format!("{PROJECT_HOST}/src/inner/deep.rs"),
880 b"deep contents".to_vec(),
881 )
882 .unwrap();
883 fs.add_file(format!("{PROJECT_HOST}/other.rs"), b"other".to_vec())
885 .unwrap();
886
887 let project = nested_project();
888 let workspace = workspace_with(&project);
889 let inputs = vec![InputSpec::parse("src/**/*.rs").unwrap()];
890
891 let result =
892 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
893 assert_eq!(result.len(), 2);
894 assert_eq!(
895 paths_of(&result),
896 BTreeSet::from([
897 "/proj/src/top.rs".to_owned(),
898 "/proj/src/inner/deep.rs".to_owned(),
899 ]),
900 );
901 }
902
903 #[test]
904 fn glob_symlink_to_file_records_link_path_with_target_bytes_hash() {
905 let mut fs = MemFilesystem::new();
906 fs.add_dir(PROJECT_HOST).unwrap();
907 fs.add_file(format!("{PROJECT_HOST}/real.txt"), b"real bytes".to_vec())
908 .unwrap();
909 fs.add_symlink(
910 format!("{PROJECT_HOST}/link.txt"),
911 format!("{PROJECT_HOST}/real.txt"),
912 )
913 .unwrap();
914
915 let project = nested_project();
916 let workspace = workspace_with(&project);
917 let inputs = vec![InputSpec::parse("*.txt").unwrap()];
918
919 let result =
920 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
921 assert_eq!(
922 result.len(),
923 2,
924 "both the real file and the symlink to it are distinct contributions",
925 );
926 assert_eq!(
927 paths_of(&result),
928 BTreeSet::from(["/proj/real.txt".to_owned(), "/proj/link.txt".to_owned(),]),
929 );
930 let expected_hash = hash_of(HashAlgo::Blake3, b"real bytes");
933 for file in &result {
934 assert_eq!(file.content_hash, expected_hash);
935 }
936 }
937
938 #[test]
939 fn implicit_mode_project_relative_literal_is_workspace_absolute() {
940 let mut fs = MemFilesystem::new();
944 fs.add_dir(WORKSPACE_HOST).unwrap();
945 fs.add_file(format!("{WORKSPACE_HOST}/at_root.txt"), b"r".to_vec())
946 .unwrap();
947
948 let project = implicit_project();
949 let workspace = workspace_with(&project);
950 let inputs = vec![InputSpec::parse("at_root.txt").unwrap()];
951
952 let result =
953 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
954 assert_eq!(result.len(), 1);
955 assert_eq!(result[0].workspace_absolute_path, "/at_root.txt");
956 assert_eq!(result[0].content_hash, hash_of(HashAlgo::Blake3, b"r"));
957 }
958 }
959
960 mod e2e {
963 use std::collections::{BTreeMap, BTreeSet};
964 use std::path::{Path, PathBuf};
965
966 use nonempty::NonEmpty;
967
968 use haz_cache::CacheKey;
969 use haz_dag::edge::{Edge, EdgeKind};
970 use haz_dag::graph::TaskGraph;
971 use haz_domain::action::TaskAction;
972 use haz_domain::env::{EnvSettings, EnvVarName};
973 use haz_domain::mutex::{Mutex, MutexMode, MutexScope};
974 use haz_domain::name::{MutexName, ProjectName, TaskName};
975 use haz_domain::path::{CanonicalPath, HazPath, InputSpec, ProjectRoot, WorkspaceRootPath};
976 use haz_domain::project::Project;
977 use haz_domain::settings::WorkspaceSettings;
978 use haz_domain::settings::cache::HashAlgo;
979 use haz_domain::task::Task;
980 use haz_domain::task_id::TaskId;
981 use haz_domain::workspace::Workspace;
982 use haz_vfs::WritableFilesystem;
983 use haz_vfs_testing::MemFilesystem;
984
985 use super::super::{BuildKeyError, PredecessorStreamHashes, build_cache_key};
986
987 struct E2eState {
988 fs: MemFilesystem,
989 workspace: Workspace,
990 graph: TaskGraph,
991 task_id: TaskId,
992 host_env: BTreeMap<EnvVarName, String>,
993 predecessor_streams: BTreeMap<TaskId, PredecessorStreamHashes>,
994 }
995
996 impl E2eState {
997 fn key(&self) -> CacheKey {
998 build_cache_key(
999 &self.fs,
1000 &self.workspace,
1001 &self.graph,
1002 &self.task_id,
1003 &self.host_env,
1004 &self.predecessor_streams,
1005 HashAlgo::Blake3,
1006 )
1007 .expect("baseline state must yield a valid key")
1008 }
1009 }
1010
1011 fn env_var(s: &str) -> EnvVarName {
1012 EnvVarName::try_new(s).unwrap()
1013 }
1014
1015 fn project_name(s: &str) -> ProjectName {
1016 ProjectName::try_new(s).unwrap()
1017 }
1018
1019 fn task_name(s: &str) -> TaskName {
1020 TaskName::try_new(s).unwrap()
1021 }
1022
1023 fn baseline() -> E2eState {
1030 let mut fs = MemFilesystem::new();
1031 fs.add_dir("/ws/proj").unwrap();
1032 fs.add_file("/ws/proj/file.txt", b"hello".to_vec()).unwrap();
1033
1034 let task = Task {
1035 name: task_name("build"),
1036 action: TaskAction::Command(NonEmpty::from_vec(vec!["true".to_owned()]).unwrap()),
1037 inputs: vec![InputSpec::parse("file.txt").unwrap()],
1038 outputs: vec![],
1039 deps: vec![],
1040 weak_deps: vec![],
1041 mutex: None,
1042 env: EnvSettings {
1043 from_host: BTreeSet::from([env_var("PATH")]),
1044 overrides: BTreeMap::new(),
1045 },
1046 };
1047
1048 let project = Project {
1049 name: project_name("proj"),
1050 root: ProjectRoot::Nested(
1051 CanonicalPath::from_absolute(&HazPath::parse("/proj").unwrap()).unwrap(),
1052 ),
1053 tags: BTreeSet::new(),
1054 tasks: BTreeMap::from([(task.name.clone(), task)]),
1055 };
1056
1057 let task_id = TaskId {
1058 project: project_name("proj"),
1059 task: task_name("build"),
1060 };
1061 let pred = TaskId {
1062 project: project_name("lib"),
1063 task: task_name("compile"),
1064 };
1065
1066 let workspace = Workspace {
1067 root: WorkspaceRootPath::try_new(PathBuf::from("/ws")).unwrap(),
1068 projects: BTreeMap::from([(project.name.clone(), project)]),
1069 overlays: BTreeMap::new(),
1070 settings: WorkspaceSettings::default(),
1071 };
1072
1073 let graph = TaskGraph {
1074 nodes: BTreeSet::from([task_id.clone(), pred.clone()]),
1075 edges: BTreeSet::from([Edge {
1076 from: pred.clone(),
1077 to: task_id.clone(),
1078 kind: EdgeKind::Hard,
1079 }]),
1080 };
1081
1082 let predecessor_streams = BTreeMap::from([(
1083 pred,
1084 PredecessorStreamHashes {
1085 stdout_hash: [0xAA; 32],
1086 stderr_hash: [0xBB; 32],
1087 },
1088 )]);
1089
1090 let host_env = BTreeMap::from([(env_var("PATH"), "/usr/bin".to_owned())]);
1091
1092 E2eState {
1093 fs,
1094 workspace,
1095 graph,
1096 task_id,
1097 host_env,
1098 predecessor_streams,
1099 }
1100 }
1101
1102 fn mutate_task(state: &mut E2eState, f: impl FnOnce(&mut Task)) {
1103 let proj = state
1104 .workspace
1105 .projects
1106 .get_mut(&state.task_id.project)
1107 .expect("baseline contains the target project");
1108 let t = proj
1109 .tasks
1110 .get_mut(&state.task_id.task)
1111 .expect("baseline contains the target task");
1112 f(t);
1113 }
1114
1115 #[test]
1116 fn identical_inputs_yield_identical_keys() {
1117 assert_eq!(baseline().key(), baseline().key());
1120 }
1121
1122 #[test]
1123 fn task_action_change_changes_key() {
1124 let baseline_key = baseline().key();
1125 let mut perturbed = baseline();
1126 mutate_task(&mut perturbed, |t| {
1127 t.action =
1128 TaskAction::Command(NonEmpty::from_vec(vec!["false".to_owned()]).unwrap());
1129 });
1130 assert_ne!(baseline_key, perturbed.key());
1131 }
1132
1133 #[test]
1134 fn dag_018_input_file_content_change_changes_key() {
1135 let baseline_key = baseline().key();
1139 let perturbed = baseline();
1140 perturbed
1141 .fs
1142 .write_file(Path::new("/ws/proj/file.txt"), b"world")
1143 .unwrap();
1144 assert_ne!(baseline_key, perturbed.key());
1145 }
1146
1147 #[test]
1148 fn dag_018_adding_a_soft_edge_predecessor_does_not_change_the_key() {
1149 let baseline_key = baseline().key();
1156 let mut perturbed = baseline();
1157 let soft_pred = TaskId {
1158 project: project_name("soft_producer"),
1159 task: task_name("emit"),
1160 };
1161 perturbed.graph.nodes.insert(soft_pred.clone());
1162 perturbed.graph.edges.insert(Edge {
1163 from: soft_pred,
1164 to: perturbed.task_id.clone(),
1165 kind: EdgeKind::Soft,
1166 });
1167 assert_eq!(
1168 baseline_key,
1169 perturbed.key(),
1170 "DAG-018: soft-edge predecessors MUST NOT contribute to the key",
1171 );
1172 }
1173
1174 #[test]
1175 fn dag_018_adding_a_producer_matching_edge_does_not_change_the_key() {
1176 let baseline_key = baseline().key();
1180 let mut perturbed = baseline();
1181 let pm_pred = TaskId {
1182 project: project_name("pm_producer"),
1183 task: task_name("emit"),
1184 };
1185 perturbed.graph.nodes.insert(pm_pred.clone());
1186 perturbed.graph.edges.insert(Edge {
1187 from: pm_pred,
1188 to: perturbed.task_id.clone(),
1189 kind: EdgeKind::ProducerMatching,
1190 });
1191 assert_eq!(
1192 baseline_key,
1193 perturbed.key(),
1194 "DAG-018: producer-matching edges MUST NOT contribute to the key",
1195 );
1196 }
1197
1198 #[test]
1199 fn dag_017_predecessor_stream_hash_change_changes_key() {
1200 let baseline_key = baseline().key();
1201 let mut perturbed = baseline();
1202 let pred = TaskId {
1203 project: project_name("lib"),
1204 task: task_name("compile"),
1205 };
1206 perturbed.predecessor_streams.insert(
1207 pred,
1208 PredecessorStreamHashes {
1209 stdout_hash: [0xCC; 32],
1210 stderr_hash: [0xBB; 32],
1211 },
1212 );
1213 assert_ne!(baseline_key, perturbed.key());
1214 }
1215
1216 #[test]
1217 fn env_value_change_changes_key() {
1218 let baseline_key = baseline().key();
1219 let mut perturbed = baseline();
1220 perturbed
1221 .host_env
1222 .insert(env_var("PATH"), "/usr/local/bin".to_owned());
1223 assert_ne!(baseline_key, perturbed.key());
1224 }
1225
1226 #[test]
1227 fn unrelated_host_env_does_not_affect_key() {
1228 let baseline_key = baseline().key();
1231 let mut perturbed = baseline();
1232 perturbed
1233 .host_env
1234 .insert(env_var("HOME"), "/home/user".to_owned());
1235 assert_eq!(baseline_key, perturbed.key());
1236 }
1237
1238 #[test]
1239 fn override_change_changes_key() {
1240 let baseline_key = baseline().key();
1243 let mut perturbed = baseline();
1244 mutate_task(&mut perturbed, |t| {
1245 t.env
1246 .overrides
1247 .insert(env_var("HAZ_ENV"), "alpha".to_owned());
1248 });
1249 let with_alpha = perturbed.key();
1250 assert_ne!(baseline_key, with_alpha);
1251
1252 mutate_task(&mut perturbed, |t| {
1253 t.env
1254 .overrides
1255 .insert(env_var("HAZ_ENV"), "beta".to_owned());
1256 });
1257 assert_ne!(with_alpha, perturbed.key());
1258 }
1259
1260 #[test]
1261 fn mutex_008_adding_a_mutex_does_not_change_the_key() {
1262 let baseline_key = baseline().key();
1268 let mut perturbed = baseline();
1269 mutate_task(&mut perturbed, |t| {
1270 t.mutex = Some(Mutex {
1271 scope: MutexScope::Workspace,
1272 name: MutexName::try_new("db").unwrap(),
1273 mode: MutexMode::Exclusive,
1274 });
1275 });
1276 assert_eq!(baseline_key, perturbed.key());
1277 }
1278
1279 #[test]
1280 fn mutex_008_changing_mutex_scope_does_not_change_the_key() {
1281 let mut workspace_scoped = baseline();
1284 mutate_task(&mut workspace_scoped, |t| {
1285 t.mutex = Some(Mutex {
1286 scope: MutexScope::Workspace,
1287 name: MutexName::try_new("db").unwrap(),
1288 mode: MutexMode::Exclusive,
1289 });
1290 });
1291 let mut project_scoped = baseline();
1292 mutate_task(&mut project_scoped, |t| {
1293 t.mutex = Some(Mutex {
1294 scope: MutexScope::Project,
1295 name: MutexName::try_new("db").unwrap(),
1296 mode: MutexMode::Exclusive,
1297 });
1298 });
1299 assert_eq!(workspace_scoped.key(), project_scoped.key());
1300 }
1301
1302 #[test]
1303 fn mutex_008_changing_mutex_name_does_not_change_the_key() {
1304 let mut named_db = baseline();
1306 mutate_task(&mut named_db, |t| {
1307 t.mutex = Some(Mutex {
1308 scope: MutexScope::Workspace,
1309 name: MutexName::try_new("db").unwrap(),
1310 mode: MutexMode::Exclusive,
1311 });
1312 });
1313 let mut named_gpu = baseline();
1314 mutate_task(&mut named_gpu, |t| {
1315 t.mutex = Some(Mutex {
1316 scope: MutexScope::Workspace,
1317 name: MutexName::try_new("gpu").unwrap(),
1318 mode: MutexMode::Exclusive,
1319 });
1320 });
1321 assert_eq!(named_db.key(), named_gpu.key());
1322 }
1323
1324 #[test]
1325 fn mutex_008_changing_mutex_mode_does_not_change_the_key() {
1326 let mut exclusive = baseline();
1328 mutate_task(&mut exclusive, |t| {
1329 t.mutex = Some(Mutex {
1330 scope: MutexScope::Workspace,
1331 name: MutexName::try_new("db").unwrap(),
1332 mode: MutexMode::Exclusive,
1333 });
1334 });
1335 let mut shared = baseline();
1336 mutate_task(&mut shared, |t| {
1337 t.mutex = Some(Mutex {
1338 scope: MutexScope::Workspace,
1339 name: MutexName::try_new("db").unwrap(),
1340 mode: MutexMode::Shared,
1341 });
1342 });
1343 assert_eq!(exclusive.key(), shared.key());
1344 }
1345
1346 #[test]
1347 fn task_not_in_workspace_surfaces_error() {
1348 let mut state = baseline();
1349 state.task_id = TaskId {
1350 project: project_name("absent"),
1351 task: task_name("build"),
1352 };
1353 match build_cache_key(
1354 &state.fs,
1355 &state.workspace,
1356 &state.graph,
1357 &state.task_id,
1358 &state.host_env,
1359 &state.predecessor_streams,
1360 HashAlgo::Blake3,
1361 ) {
1362 Err(BuildKeyError::TaskNotInWorkspace { task }) => {
1363 assert_eq!(task, state.task_id);
1364 }
1365 other => panic!("expected TaskNotInWorkspace, got {other:?}"),
1366 }
1367 }
1368 }
1369}