1use std::collections::BTreeMap;
17use std::path::{Path, PathBuf};
18
19use snafu::{ResultExt, Snafu};
20
21use haz_cache::{
22 CacheKey, CacheKeyBuilder, CacheKeyInputs, EnvContribution, Hasher, InputFile,
23 PredecessorStreams,
24};
25use haz_dag::edge::EdgeKind;
26use haz_dag::graph::TaskGraph;
27use haz_domain::env::{EnvSettings, EnvVarName};
28use haz_domain::path::{HazPath, InputSpec, PathPattern, ProjectRoot};
29use haz_domain::project::Project;
30use haz_domain::settings::cache::HashAlgo;
31use haz_domain::task_id::TaskId;
32use haz_domain::workspace::Workspace;
33use haz_vfs::{EntryKind, Filesystem, FsError};
34
35use crate::pattern_walk::{
36 GlobMatchAction, GlobWalk, glob_walk_origin, host_path_from_segments,
37 literal_workspace_segments, workspace_absolute_string_from_segments,
38};
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
48pub struct PredecessorStreamHashes {
49 pub stdout_hash: [u8; 32],
52 pub stderr_hash: [u8; 32],
55}
56
57#[derive(Debug, Snafu)]
59#[snafu(visibility(pub(crate)))]
60pub enum BuildKeyError {
61 #[snafu(display("task not in workspace: {task}"))]
65 TaskNotInWorkspace {
66 task: TaskId,
68 },
69
70 #[snafu(display("missing recorded stream hashes for hard-edge predecessor: {predecessor}"))]
76 PredecessorStreamsMissing {
77 predecessor: TaskId,
79 },
80
81 #[snafu(display(
85 "failed to resolve input patterns under: {}: {source}",
86 root.display()
87 ))]
88 InputPatternResolutionFailed {
89 root: PathBuf,
93 source: FsError,
95 },
96
97 #[snafu(display(
99 "failed to read input file at: {}: {source}",
100 path.display()
101 ))]
102 InputContentReadFailed {
103 path: PathBuf,
105 source: FsError,
107 },
108
109 #[snafu(display("input path is not a regular file: {}", path.display()))]
115 InputNotARegularFile {
116 path: PathBuf,
118 },
119}
120
121pub fn build_cache_key<F: Filesystem>(
155 fs: &F,
156 workspace: &Workspace,
157 graph: &TaskGraph,
158 task: &TaskId,
159 host_env: &BTreeMap<EnvVarName, String>,
160 predecessor_streams: &BTreeMap<TaskId, PredecessorStreamHashes>,
161 algo: HashAlgo,
162) -> Result<CacheKey, BuildKeyError> {
163 let project = workspace
164 .projects
165 .get(&task.project)
166 .ok_or_else(|| BuildKeyError::TaskNotInWorkspace { task: task.clone() })?;
167 let task_def = project
168 .tasks
169 .get(&task.task)
170 .ok_or_else(|| BuildKeyError::TaskNotInWorkspace { task: task.clone() })?;
171
172 let from_host_resolved = resolve_from_host_values(&task_def.env, host_env);
173 let predecessors_owned = collect_hard_predecessor_streams(graph, task, predecessor_streams)?;
174 let input_files_owned = resolve_input_files(fs, workspace, project, &task_def.inputs, algo)?;
175
176 let input_files: Vec<InputFile<'_>> = input_files_owned
177 .iter()
178 .map(|f| InputFile {
179 workspace_absolute_path: &f.workspace_absolute_path,
180 content_hash: f.content_hash,
181 })
182 .collect();
183
184 let hard_predecessors: Vec<PredecessorStreams<'_>> = predecessors_owned
185 .iter()
186 .map(|(id, hashes)| PredecessorStreams {
187 project: &id.project,
188 task: &id.task,
189 stdout_hash: hashes.stdout_hash,
190 stderr_hash: hashes.stderr_hash,
191 })
192 .collect();
193
194 let env = EnvContribution {
195 from_host: &from_host_resolved,
196 overrides: &task_def.env.overrides,
197 };
198
199 let inputs = CacheKeyInputs {
200 action: &task_def.action,
201 input_files: &input_files,
202 hard_predecessors: &hard_predecessors,
203 env: &env,
204 };
205
206 Ok(CacheKeyBuilder::new(algo).finish(&inputs))
207}
208
209fn collect_hard_predecessor_streams<'g>(
222 graph: &'g TaskGraph,
223 target: &TaskId,
224 streams: &BTreeMap<TaskId, PredecessorStreamHashes>,
225) -> Result<Vec<(&'g TaskId, PredecessorStreamHashes)>, BuildKeyError> {
226 graph
227 .edges
228 .iter()
229 .filter(|e| e.kind == EdgeKind::Hard && &e.to == target)
230 .map(|e| {
231 streams
232 .get(&e.from)
233 .copied()
234 .map(|hashes| (&e.from, hashes))
235 .ok_or_else(|| BuildKeyError::PredecessorStreamsMissing {
236 predecessor: e.from.clone(),
237 })
238 })
239 .collect()
240}
241
242fn resolve_from_host_values(
254 env: &EnvSettings,
255 host_env: &BTreeMap<EnvVarName, String>,
256) -> BTreeMap<EnvVarName, Option<String>> {
257 env.from_host
258 .iter()
259 .map(|name| (name.clone(), host_env.get(name).cloned()))
260 .collect()
261}
262
263#[derive(Debug, Clone, PartialEq, Eq)]
273pub struct OwnedInputFile {
274 pub workspace_absolute_path: String,
276 pub content_hash: [u8; 32],
279}
280
281pub fn resolve_input_files<F: Filesystem>(
304 fs: &F,
305 workspace: &Workspace,
306 project: &Project,
307 inputs: &[InputSpec],
308 algo: HashAlgo,
309) -> Result<Vec<OwnedInputFile>, BuildKeyError> {
310 let workspace_host = workspace.root.as_path();
311 let action = InputAction { algo };
312 let mut out = Vec::new();
313
314 for spec in inputs {
315 match spec.pattern() {
316 PathPattern::Literal(haz_path) => {
317 resolve_literal(
318 fs,
319 workspace_host,
320 &project.root,
321 haz_path,
322 &action,
323 &mut out,
324 )?;
325 }
326 PathPattern::Glob(glob_pattern) => {
327 let glob = glob_pattern.compile();
328 let matcher = glob.compile_matcher();
329 let (walk_host, workspace_prefix, candidate_prefix) =
330 glob_walk_origin(workspace_host, &project.root, glob_pattern.anchor());
331 let walker = GlobWalk {
332 fs,
333 matcher: &matcher,
334 candidate_prefix,
335 workspace_prefix,
336 action: &action,
337 };
338 let mut walk_rel: Vec<String> = Vec::new();
339 walker.walk(&walk_host, &mut walk_rel, &mut out)?;
340 }
341 }
342 }
343
344 Ok(out)
345}
346
347fn resolve_literal<F: Filesystem>(
348 fs: &F,
349 workspace_host: &Path,
350 project_root: &ProjectRoot,
351 haz_path: &HazPath,
352 action: &InputAction,
353 out: &mut Vec<OwnedInputFile>,
354) -> Result<(), BuildKeyError> {
355 let ws_segments = literal_workspace_segments(haz_path, project_root);
356 let host = host_path_from_segments(workspace_host, &ws_segments);
357
358 let meta = fs
359 .metadata(&host)
360 .context(InputPatternResolutionFailedSnafu { root: host.clone() })?;
361 if meta.kind != EntryKind::File {
362 return Err(BuildKeyError::InputNotARegularFile { path: host });
363 }
364
365 let workspace_absolute_path = workspace_absolute_string_from_segments(&ws_segments);
366 action.on_match(fs, &host, workspace_absolute_path, out)
367}
368
369struct InputAction {
374 algo: HashAlgo,
375}
376
377impl<F: Filesystem> GlobMatchAction<F> for InputAction {
378 type Output = OwnedInputFile;
379 type Error = BuildKeyError;
380
381 fn map_walk_error(&self, root: PathBuf, source: FsError) -> BuildKeyError {
382 BuildKeyError::InputPatternResolutionFailed { root, source }
383 }
384
385 fn on_match(
386 &self,
387 fs: &F,
388 host_path: &Path,
389 workspace_absolute_path: String,
390 out: &mut Vec<OwnedInputFile>,
391 ) -> Result<(), BuildKeyError> {
392 let bytes = fs.read(host_path).context(InputContentReadFailedSnafu {
393 path: host_path.to_path_buf(),
394 })?;
395 let mut hasher = Hasher::new(self.algo);
396 hasher.update(&bytes);
397 out.push(OwnedInputFile {
398 workspace_absolute_path,
399 content_hash: hasher.finalize(),
400 });
401 Ok(())
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use std::collections::{BTreeMap, BTreeSet};
408 use std::str::FromStr;
409
410 use haz_dag::edge::{Edge, EdgeKind};
411 use haz_dag::graph::TaskGraph;
412 use haz_domain::env::{EnvSettings, EnvVarName};
413 use haz_domain::name::{ProjectName, TaskName};
414 use haz_domain::task_id::TaskId;
415
416 use super::{
417 BuildKeyError, PredecessorStreamHashes, collect_hard_predecessor_streams,
418 resolve_from_host_values,
419 };
420
421 fn name(s: &str) -> EnvVarName {
422 EnvVarName::try_new(s).expect("valid env var name")
423 }
424
425 fn settings(from_host: &[&str], overrides: &[(&str, &str)]) -> EnvSettings {
426 EnvSettings {
427 from_host: from_host.iter().map(|s| name(s)).collect::<BTreeSet<_>>(),
428 overrides: overrides
429 .iter()
430 .map(|(k, v)| (name(k), (*v).to_owned()))
431 .collect::<BTreeMap<_, _>>(),
432 }
433 }
434
435 fn host(entries: &[(&str, &str)]) -> BTreeMap<EnvVarName, String> {
436 entries
437 .iter()
438 .map(|(k, v)| (name(k), (*v).to_owned()))
439 .collect()
440 }
441
442 #[test]
443 fn empty_from_host_yields_empty_map() {
444 let env = settings(&[], &[]);
445 let result = resolve_from_host_values(&env, &host(&[]));
446 assert!(result.is_empty());
447 }
448
449 #[test]
450 fn name_present_in_host_resolves_to_some() {
451 let env = settings(&["PATH"], &[]);
452 let result = resolve_from_host_values(&env, &host(&[("PATH", "/usr/bin")]));
453 assert_eq!(
454 result.get(&name("PATH")).cloned(),
455 Some(Some("/usr/bin".to_owned()))
456 );
457 }
458
459 #[test]
460 fn name_absent_from_host_resolves_to_none() {
461 let env = settings(&["NEVER_SET"], &[]);
462 let result = resolve_from_host_values(&env, &host(&[("OTHER", "v")]));
463 assert_eq!(result.get(&name("NEVER_SET")).cloned(), Some(None));
464 }
465
466 #[test]
467 fn empty_string_in_host_is_distinct_from_absent() {
468 let env = settings(&["X"], &[]);
469 let result_empty = resolve_from_host_values(&env, &host(&[("X", "")]));
470 let result_absent = resolve_from_host_values(&env, &host(&[]));
471 assert_eq!(
472 result_empty.get(&name("X")).cloned(),
473 Some(Some(String::new()))
474 );
475 assert_eq!(result_absent.get(&name("X")).cloned(), Some(None));
476 assert_ne!(
477 result_empty.get(&name("X")).unwrap(),
478 result_absent.get(&name("X")).unwrap(),
479 "empty value MUST NOT collapse with absence"
480 );
481 }
482
483 #[test]
484 fn from_host_iteration_is_lexicographically_sorted() {
485 let env = settings(&["ZULU", "ALPHA", "BRAVO"], &[]);
490 let h = host(&[("ALPHA", "a"), ("BRAVO", "b"), ("ZULU", "z")]);
491 let result = resolve_from_host_values(&env, &h);
492 let names: Vec<&str> = result
493 .keys()
494 .map(|n| AsRef::<str>::as_ref(n.as_ref()))
495 .collect();
496 assert_eq!(names, vec!["ALPHA", "BRAVO", "ZULU"]);
497 }
498
499 #[test]
500 fn overrides_are_not_consulted_by_this_helper() {
501 let env = settings(&["X"], &[("X", "override-val")]);
506 let result = resolve_from_host_values(&env, &host(&[("X", "host-val")]));
507 assert_eq!(
508 result.get(&name("X")).cloned(),
509 Some(Some("host-val".to_owned()))
510 );
511 }
512
513 #[test]
514 fn unrelated_host_entries_do_not_appear() {
515 let env = settings(&["WANTED"], &[]);
516 let h = host(&[("WANTED", "yes"), ("UNRELATED", "no")]);
517 let result = resolve_from_host_values(&env, &h);
518 assert_eq!(result.len(), 1);
519 assert!(!result.contains_key(&name("UNRELATED")));
520 }
521
522 fn task_id(project: &str, task: &str) -> TaskId {
525 TaskId {
526 project: ProjectName::from_str(project).expect("project name"),
527 task: TaskName::from_str(task).expect("task name"),
528 }
529 }
530
531 fn edge(from: TaskId, to: TaskId, kind: EdgeKind) -> Edge {
532 Edge { from, to, kind }
533 }
534
535 fn streams(stdout: u8, stderr: u8) -> PredecessorStreamHashes {
536 PredecessorStreamHashes {
537 stdout_hash: [stdout; 32],
538 stderr_hash: [stderr; 32],
539 }
540 }
541
542 fn graph(nodes: &[TaskId], edges: &[Edge]) -> TaskGraph {
543 TaskGraph {
544 nodes: nodes.iter().cloned().collect::<BTreeSet<_>>(),
545 edges: edges.iter().cloned().collect::<BTreeSet<_>>(),
546 }
547 }
548
549 #[test]
550 fn no_predecessors_yields_empty_vec() {
551 let target = task_id("p", "alone");
552 let g = graph(std::slice::from_ref(&target), &[]);
553 let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
554 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
555 assert!(result.is_empty());
556 }
557
558 #[test]
559 fn single_hard_predecessor_is_returned() {
560 let pred = task_id("p", "pre");
561 let target = task_id("p", "main");
562 let g = graph(
563 &[pred.clone(), target.clone()],
564 &[edge(pred.clone(), target.clone(), EdgeKind::Hard)],
565 );
566 let mut recorded = BTreeMap::new();
567 recorded.insert(pred.clone(), streams(0xAA, 0xBB));
568
569 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
570 assert_eq!(result.len(), 1);
571 assert_eq!(result[0].0, &pred);
572 assert_eq!(result[0].1, streams(0xAA, 0xBB));
573 }
574
575 #[test]
576 fn soft_and_producer_matching_edges_do_not_count() {
577 let target = task_id("p", "main");
578 let soft_pred = task_id("p", "soft");
579 let pm_pred = task_id("p", "pm");
580 let g = graph(
581 &[target.clone(), soft_pred.clone(), pm_pred.clone()],
582 &[
583 edge(soft_pred, target.clone(), EdgeKind::Soft),
584 edge(pm_pred, target.clone(), EdgeKind::ProducerMatching),
585 ],
586 );
587 let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
588 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
589 assert!(
590 result.is_empty(),
591 "only Hard edges contribute to CACHE-007; got {} predecessors",
592 result.len(),
593 );
594 }
595
596 #[test]
597 fn outgoing_hard_edges_are_not_predecessors() {
598 let target = task_id("p", "main");
599 let dep_of_target = task_id("p", "downstream");
600 let g = graph(
601 &[target.clone(), dep_of_target.clone()],
602 &[edge(target.clone(), dep_of_target, EdgeKind::Hard)],
603 );
604 let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
605 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
606 assert!(result.is_empty());
607 }
608
609 #[test]
610 fn multiple_hard_predecessors_are_all_returned() {
611 let target = task_id("p", "main");
612 let a = task_id("alpha", "build");
613 let b = task_id("beta", "build");
614 let g = graph(
615 &[target.clone(), a.clone(), b.clone()],
616 &[
617 edge(a.clone(), target.clone(), EdgeKind::Hard),
618 edge(b.clone(), target.clone(), EdgeKind::Hard),
619 ],
620 );
621 let mut recorded = BTreeMap::new();
622 recorded.insert(a.clone(), streams(0x01, 0x02));
623 recorded.insert(b.clone(), streams(0x03, 0x04));
624
625 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
626 assert_eq!(result.len(), 2);
627 let names: BTreeSet<TaskId> = result.iter().map(|(id, _)| (*id).clone()).collect();
628 assert!(names.contains(&a));
629 assert!(names.contains(&b));
630 }
631
632 #[test]
633 fn missing_stream_hashes_yield_predecessor_streams_missing() {
634 let pred = task_id("p", "pre");
635 let target = task_id("p", "main");
636 let g = graph(
637 &[pred.clone(), target.clone()],
638 &[edge(pred.clone(), target.clone(), EdgeKind::Hard)],
639 );
640 let recorded: BTreeMap<TaskId, PredecessorStreamHashes> = BTreeMap::new();
641
642 match collect_hard_predecessor_streams(&g, &target, &recorded) {
643 Err(BuildKeyError::PredecessorStreamsMissing { predecessor }) => {
644 assert_eq!(predecessor, pred);
645 }
646 Err(other) => panic!("expected PredecessorStreamsMissing, got {other:?}"),
647 Ok(v) => panic!("expected error, got Ok with {} entries", v.len()),
648 }
649 }
650
651 #[test]
652 fn mixed_edge_kinds_into_target_select_only_hard() {
653 let target = task_id("p", "main");
654 let hard_pred = task_id("p", "hard");
655 let soft_pred = task_id("p", "soft");
656 let g = graph(
657 &[target.clone(), hard_pred.clone(), soft_pred.clone()],
658 &[
659 edge(hard_pred.clone(), target.clone(), EdgeKind::Hard),
660 edge(soft_pred, target.clone(), EdgeKind::Soft),
661 ],
662 );
663 let mut recorded = BTreeMap::new();
664 recorded.insert(hard_pred.clone(), streams(0x10, 0x20));
665
666 let result = collect_hard_predecessor_streams(&g, &target, &recorded).unwrap();
667 assert_eq!(result.len(), 1);
668 assert_eq!(result[0].0, &hard_pred);
669 }
670
671 mod input_resolution {
674 use std::collections::{BTreeMap, BTreeSet};
675 use std::path::PathBuf;
676
677 use haz_cache::Hasher;
678 use haz_domain::path::{CanonicalPath, HazPath, InputSpec, ProjectRoot, WorkspaceRootPath};
679 use haz_domain::project::Project;
680 use haz_domain::settings::WorkspaceSettings;
681 use haz_domain::settings::cache::HashAlgo;
682 use haz_domain::workspace::Workspace;
683 use haz_vfs::{FsError, MemFilesystem};
684
685 use super::super::BuildKeyError;
686 use super::super::{OwnedInputFile, resolve_input_files};
687
688 const WORKSPACE_HOST: &str = "/ws";
689 const PROJECT_HOST: &str = "/ws/proj";
690
691 fn nested_project() -> Project {
692 Project {
693 name: haz_domain::name::ProjectName::try_new("proj").unwrap(),
694 root: ProjectRoot::Nested(
695 CanonicalPath::from_absolute(&HazPath::parse("/proj").unwrap()).unwrap(),
696 ),
697 tags: BTreeSet::new(),
698 tasks: BTreeMap::new(),
699 }
700 }
701
702 fn implicit_project() -> Project {
703 Project {
704 name: haz_domain::name::ProjectName::try_new("root").unwrap(),
705 root: ProjectRoot::WorkspaceRoot,
706 tags: BTreeSet::new(),
707 tasks: BTreeMap::new(),
708 }
709 }
710
711 fn workspace_with(project: &Project) -> Workspace {
712 let mut projects = BTreeMap::new();
713 projects.insert(project.name.clone(), project.clone());
714 Workspace {
715 root: WorkspaceRootPath::try_new(PathBuf::from(WORKSPACE_HOST)).unwrap(),
716 projects,
717 overlays: BTreeMap::new(),
718 settings: WorkspaceSettings::default(),
719 }
720 }
721
722 fn hash_of(algo: HashAlgo, bytes: &[u8]) -> [u8; 32] {
723 let mut h = Hasher::new(algo);
724 h.update(bytes);
725 h.finalize()
726 }
727
728 fn paths_of(files: &[OwnedInputFile]) -> BTreeSet<String> {
729 files
730 .iter()
731 .map(|f| f.workspace_absolute_path.clone())
732 .collect()
733 }
734
735 #[test]
736 fn literal_hit_returns_one_input_with_correct_hash() {
737 let mut fs = MemFilesystem::new();
738 fs.add_dir(PROJECT_HOST).unwrap();
739 fs.add_file(format!("{PROJECT_HOST}/file.txt"), b"hello".to_vec())
740 .unwrap();
741
742 let project = nested_project();
743 let workspace = workspace_with(&project);
744 let inputs = vec![InputSpec::parse("file.txt").unwrap()];
745
746 let result =
747 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
748 assert_eq!(result.len(), 1);
749 assert_eq!(result[0].workspace_absolute_path, "/proj/file.txt");
750 assert_eq!(result[0].content_hash, hash_of(HashAlgo::Blake3, b"hello"));
751 }
752
753 #[test]
754 fn literal_workspace_absolute_resolves_under_workspace_root() {
755 let mut fs = MemFilesystem::new();
759 fs.add_dir("/ws/other").unwrap();
760 fs.add_file("/ws/other/data.bin", b"\xDE\xAD\xBE\xEF".to_vec())
761 .unwrap();
762 fs.add_dir(PROJECT_HOST).unwrap();
765
766 let project = nested_project();
767 let workspace = workspace_with(&project);
768 let inputs = vec![InputSpec::parse("/other/data.bin").unwrap()];
769
770 let result =
771 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
772 assert_eq!(result.len(), 1);
773 assert_eq!(result[0].workspace_absolute_path, "/other/data.bin");
774 assert_eq!(
775 result[0].content_hash,
776 hash_of(HashAlgo::Blake3, b"\xDE\xAD\xBE\xEF")
777 );
778 }
779
780 #[test]
781 fn literal_missing_file_surfaces_pattern_resolution_failure() {
782 let mut fs = MemFilesystem::new();
783 fs.add_dir(PROJECT_HOST).unwrap();
784
785 let project = nested_project();
786 let workspace = workspace_with(&project);
787 let inputs = vec![InputSpec::parse("absent.txt").unwrap()];
788
789 match resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3) {
790 Err(BuildKeyError::InputPatternResolutionFailed { root, source }) => {
791 assert_eq!(root, PathBuf::from("/ws/proj/absent.txt"));
792 assert!(
793 matches!(source, FsError::NotFound { .. }),
794 "expected NotFound source, got {source:?}",
795 );
796 }
797 other => panic!("expected InputPatternResolutionFailed, got {other:?}"),
798 }
799 }
800
801 #[test]
802 fn literal_pointing_at_directory_surfaces_input_not_a_regular_file() {
803 let mut fs = MemFilesystem::new();
804 fs.add_dir(format!("{PROJECT_HOST}/subdir")).unwrap();
805
806 let project = nested_project();
807 let workspace = workspace_with(&project);
808 let inputs = vec![InputSpec::parse("subdir").unwrap()];
809
810 match resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3) {
811 Err(BuildKeyError::InputNotARegularFile { path }) => {
812 assert_eq!(path, PathBuf::from("/ws/proj/subdir"));
813 }
814 other => panic!("expected InputNotARegularFile, got {other:?}"),
815 }
816 }
817
818 #[test]
819 fn glob_multi_match_collects_every_matching_file() {
820 let mut fs = MemFilesystem::new();
821 fs.add_dir(PROJECT_HOST).unwrap();
822 fs.add_file(format!("{PROJECT_HOST}/a.rs"), b"a".to_vec())
823 .unwrap();
824 fs.add_file(format!("{PROJECT_HOST}/b.rs"), b"b".to_vec())
825 .unwrap();
826 fs.add_file(format!("{PROJECT_HOST}/keep.txt"), b"ignored".to_vec())
828 .unwrap();
829
830 let project = nested_project();
831 let workspace = workspace_with(&project);
832 let inputs = vec![InputSpec::parse("*.rs").unwrap()];
833
834 let result =
835 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
836 assert_eq!(result.len(), 2);
837 assert_eq!(
838 paths_of(&result),
839 BTreeSet::from(["/proj/a.rs".to_owned(), "/proj/b.rs".to_owned()]),
840 );
841 for file in &result {
842 let bytes: &[u8] = if file.workspace_absolute_path == "/proj/a.rs" {
843 b"a"
844 } else {
845 b"b"
846 };
847 assert_eq!(file.content_hash, hash_of(HashAlgo::Blake3, bytes));
848 }
849 }
850
851 #[test]
852 fn glob_no_match_returns_empty_contribution() {
853 let mut fs = MemFilesystem::new();
854 fs.add_dir(PROJECT_HOST).unwrap();
855 fs.add_file(format!("{PROJECT_HOST}/only.txt"), b"x".to_vec())
856 .unwrap();
857
858 let project = nested_project();
859 let workspace = workspace_with(&project);
860 let inputs = vec![InputSpec::parse("*.rs").unwrap()];
861
862 let result =
863 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
864 assert!(result.is_empty());
865 }
866
867 #[test]
868 fn glob_nested_double_star_recurses_into_subdirectories() {
869 let mut fs = MemFilesystem::new();
870 fs.add_dir(format!("{PROJECT_HOST}/src")).unwrap();
871 fs.add_dir(format!("{PROJECT_HOST}/src/inner")).unwrap();
872 fs.add_file(
873 format!("{PROJECT_HOST}/src/top.rs"),
874 b"top contents".to_vec(),
875 )
876 .unwrap();
877 fs.add_file(
878 format!("{PROJECT_HOST}/src/inner/deep.rs"),
879 b"deep contents".to_vec(),
880 )
881 .unwrap();
882 fs.add_file(format!("{PROJECT_HOST}/other.rs"), b"other".to_vec())
884 .unwrap();
885
886 let project = nested_project();
887 let workspace = workspace_with(&project);
888 let inputs = vec![InputSpec::parse("src/**/*.rs").unwrap()];
889
890 let result =
891 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
892 assert_eq!(result.len(), 2);
893 assert_eq!(
894 paths_of(&result),
895 BTreeSet::from([
896 "/proj/src/top.rs".to_owned(),
897 "/proj/src/inner/deep.rs".to_owned(),
898 ]),
899 );
900 }
901
902 #[test]
903 fn glob_symlink_to_file_records_link_path_with_target_bytes_hash() {
904 let mut fs = MemFilesystem::new();
905 fs.add_dir(PROJECT_HOST).unwrap();
906 fs.add_file(format!("{PROJECT_HOST}/real.txt"), b"real bytes".to_vec())
907 .unwrap();
908 fs.add_symlink(
909 format!("{PROJECT_HOST}/link.txt"),
910 format!("{PROJECT_HOST}/real.txt"),
911 )
912 .unwrap();
913
914 let project = nested_project();
915 let workspace = workspace_with(&project);
916 let inputs = vec![InputSpec::parse("*.txt").unwrap()];
917
918 let result =
919 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
920 assert_eq!(
921 result.len(),
922 2,
923 "both the real file and the symlink to it are distinct contributions",
924 );
925 assert_eq!(
926 paths_of(&result),
927 BTreeSet::from(["/proj/real.txt".to_owned(), "/proj/link.txt".to_owned(),]),
928 );
929 let expected_hash = hash_of(HashAlgo::Blake3, b"real bytes");
932 for file in &result {
933 assert_eq!(file.content_hash, expected_hash);
934 }
935 }
936
937 #[test]
938 fn implicit_mode_project_relative_literal_is_workspace_absolute() {
939 let mut fs = MemFilesystem::new();
943 fs.add_dir(WORKSPACE_HOST).unwrap();
944 fs.add_file(format!("{WORKSPACE_HOST}/at_root.txt"), b"r".to_vec())
945 .unwrap();
946
947 let project = implicit_project();
948 let workspace = workspace_with(&project);
949 let inputs = vec![InputSpec::parse("at_root.txt").unwrap()];
950
951 let result =
952 resolve_input_files(&fs, &workspace, &project, &inputs, HashAlgo::Blake3).unwrap();
953 assert_eq!(result.len(), 1);
954 assert_eq!(result[0].workspace_absolute_path, "/at_root.txt");
955 assert_eq!(result[0].content_hash, hash_of(HashAlgo::Blake3, b"r"));
956 }
957 }
958
959 mod e2e {
962 use std::collections::{BTreeMap, BTreeSet};
963 use std::path::{Path, PathBuf};
964
965 use nonempty::NonEmpty;
966
967 use haz_cache::CacheKey;
968 use haz_dag::edge::{Edge, EdgeKind};
969 use haz_dag::graph::TaskGraph;
970 use haz_domain::action::TaskAction;
971 use haz_domain::env::{EnvSettings, EnvVarName};
972 use haz_domain::mutex::{Mutex, MutexMode, MutexScope};
973 use haz_domain::name::{MutexName, ProjectName, TaskName};
974 use haz_domain::path::{CanonicalPath, HazPath, InputSpec, ProjectRoot, WorkspaceRootPath};
975 use haz_domain::project::Project;
976 use haz_domain::settings::WorkspaceSettings;
977 use haz_domain::settings::cache::HashAlgo;
978 use haz_domain::task::Task;
979 use haz_domain::task_id::TaskId;
980 use haz_domain::workspace::Workspace;
981 use haz_vfs::{MemFilesystem, WritableFilesystem};
982
983 use super::super::{BuildKeyError, PredecessorStreamHashes, build_cache_key};
984
985 struct E2eState {
986 fs: MemFilesystem,
987 workspace: Workspace,
988 graph: TaskGraph,
989 task_id: TaskId,
990 host_env: BTreeMap<EnvVarName, String>,
991 predecessor_streams: BTreeMap<TaskId, PredecessorStreamHashes>,
992 }
993
994 impl E2eState {
995 fn key(&self) -> CacheKey {
996 build_cache_key(
997 &self.fs,
998 &self.workspace,
999 &self.graph,
1000 &self.task_id,
1001 &self.host_env,
1002 &self.predecessor_streams,
1003 HashAlgo::Blake3,
1004 )
1005 .expect("baseline state must yield a valid key")
1006 }
1007 }
1008
1009 fn env_var(s: &str) -> EnvVarName {
1010 EnvVarName::try_new(s).unwrap()
1011 }
1012
1013 fn project_name(s: &str) -> ProjectName {
1014 ProjectName::try_new(s).unwrap()
1015 }
1016
1017 fn task_name(s: &str) -> TaskName {
1018 TaskName::try_new(s).unwrap()
1019 }
1020
1021 fn baseline() -> E2eState {
1028 let mut fs = MemFilesystem::new();
1029 fs.add_dir("/ws/proj").unwrap();
1030 fs.add_file("/ws/proj/file.txt", b"hello".to_vec()).unwrap();
1031
1032 let task = Task {
1033 name: task_name("build"),
1034 action: TaskAction::Command(NonEmpty::from_vec(vec!["true".to_owned()]).unwrap()),
1035 inputs: vec![InputSpec::parse("file.txt").unwrap()],
1036 outputs: vec![],
1037 deps: vec![],
1038 weak_deps: vec![],
1039 mutex: None,
1040 env: EnvSettings {
1041 from_host: BTreeSet::from([env_var("PATH")]),
1042 overrides: BTreeMap::new(),
1043 },
1044 };
1045
1046 let project = Project {
1047 name: project_name("proj"),
1048 root: ProjectRoot::Nested(
1049 CanonicalPath::from_absolute(&HazPath::parse("/proj").unwrap()).unwrap(),
1050 ),
1051 tags: BTreeSet::new(),
1052 tasks: BTreeMap::from([(task.name.clone(), task)]),
1053 };
1054
1055 let task_id = TaskId {
1056 project: project_name("proj"),
1057 task: task_name("build"),
1058 };
1059 let pred = TaskId {
1060 project: project_name("lib"),
1061 task: task_name("compile"),
1062 };
1063
1064 let workspace = Workspace {
1065 root: WorkspaceRootPath::try_new(PathBuf::from("/ws")).unwrap(),
1066 projects: BTreeMap::from([(project.name.clone(), project)]),
1067 overlays: BTreeMap::new(),
1068 settings: WorkspaceSettings::default(),
1069 };
1070
1071 let graph = TaskGraph {
1072 nodes: BTreeSet::from([task_id.clone(), pred.clone()]),
1073 edges: BTreeSet::from([Edge {
1074 from: pred.clone(),
1075 to: task_id.clone(),
1076 kind: EdgeKind::Hard,
1077 }]),
1078 };
1079
1080 let predecessor_streams = BTreeMap::from([(
1081 pred,
1082 PredecessorStreamHashes {
1083 stdout_hash: [0xAA; 32],
1084 stderr_hash: [0xBB; 32],
1085 },
1086 )]);
1087
1088 let host_env = BTreeMap::from([(env_var("PATH"), "/usr/bin".to_owned())]);
1089
1090 E2eState {
1091 fs,
1092 workspace,
1093 graph,
1094 task_id,
1095 host_env,
1096 predecessor_streams,
1097 }
1098 }
1099
1100 fn mutate_task(state: &mut E2eState, f: impl FnOnce(&mut Task)) {
1101 let proj = state
1102 .workspace
1103 .projects
1104 .get_mut(&state.task_id.project)
1105 .expect("baseline contains the target project");
1106 let t = proj
1107 .tasks
1108 .get_mut(&state.task_id.task)
1109 .expect("baseline contains the target task");
1110 f(t);
1111 }
1112
1113 #[test]
1114 fn identical_inputs_yield_identical_keys() {
1115 assert_eq!(baseline().key(), baseline().key());
1118 }
1119
1120 #[test]
1121 fn task_action_change_changes_key() {
1122 let baseline_key = baseline().key();
1123 let mut perturbed = baseline();
1124 mutate_task(&mut perturbed, |t| {
1125 t.action =
1126 TaskAction::Command(NonEmpty::from_vec(vec!["false".to_owned()]).unwrap());
1127 });
1128 assert_ne!(baseline_key, perturbed.key());
1129 }
1130
1131 #[test]
1132 fn dag_018_input_file_content_change_changes_key() {
1133 let baseline_key = baseline().key();
1137 let perturbed = baseline();
1138 perturbed
1139 .fs
1140 .write_file(Path::new("/ws/proj/file.txt"), b"world")
1141 .unwrap();
1142 assert_ne!(baseline_key, perturbed.key());
1143 }
1144
1145 #[test]
1146 fn dag_018_adding_a_soft_edge_predecessor_does_not_change_the_key() {
1147 let baseline_key = baseline().key();
1154 let mut perturbed = baseline();
1155 let soft_pred = TaskId {
1156 project: project_name("soft_producer"),
1157 task: task_name("emit"),
1158 };
1159 perturbed.graph.nodes.insert(soft_pred.clone());
1160 perturbed.graph.edges.insert(Edge {
1161 from: soft_pred,
1162 to: perturbed.task_id.clone(),
1163 kind: EdgeKind::Soft,
1164 });
1165 assert_eq!(
1166 baseline_key,
1167 perturbed.key(),
1168 "DAG-018: soft-edge predecessors MUST NOT contribute to the key",
1169 );
1170 }
1171
1172 #[test]
1173 fn dag_018_adding_a_producer_matching_edge_does_not_change_the_key() {
1174 let baseline_key = baseline().key();
1178 let mut perturbed = baseline();
1179 let pm_pred = TaskId {
1180 project: project_name("pm_producer"),
1181 task: task_name("emit"),
1182 };
1183 perturbed.graph.nodes.insert(pm_pred.clone());
1184 perturbed.graph.edges.insert(Edge {
1185 from: pm_pred,
1186 to: perturbed.task_id.clone(),
1187 kind: EdgeKind::ProducerMatching,
1188 });
1189 assert_eq!(
1190 baseline_key,
1191 perturbed.key(),
1192 "DAG-018: producer-matching edges MUST NOT contribute to the key",
1193 );
1194 }
1195
1196 #[test]
1197 fn dag_017_predecessor_stream_hash_change_changes_key() {
1198 let baseline_key = baseline().key();
1199 let mut perturbed = baseline();
1200 let pred = TaskId {
1201 project: project_name("lib"),
1202 task: task_name("compile"),
1203 };
1204 perturbed.predecessor_streams.insert(
1205 pred,
1206 PredecessorStreamHashes {
1207 stdout_hash: [0xCC; 32],
1208 stderr_hash: [0xBB; 32],
1209 },
1210 );
1211 assert_ne!(baseline_key, perturbed.key());
1212 }
1213
1214 #[test]
1215 fn env_value_change_changes_key() {
1216 let baseline_key = baseline().key();
1217 let mut perturbed = baseline();
1218 perturbed
1219 .host_env
1220 .insert(env_var("PATH"), "/usr/local/bin".to_owned());
1221 assert_ne!(baseline_key, perturbed.key());
1222 }
1223
1224 #[test]
1225 fn unrelated_host_env_does_not_affect_key() {
1226 let baseline_key = baseline().key();
1229 let mut perturbed = baseline();
1230 perturbed
1231 .host_env
1232 .insert(env_var("HOME"), "/home/user".to_owned());
1233 assert_eq!(baseline_key, perturbed.key());
1234 }
1235
1236 #[test]
1237 fn override_change_changes_key() {
1238 let baseline_key = baseline().key();
1241 let mut perturbed = baseline();
1242 mutate_task(&mut perturbed, |t| {
1243 t.env
1244 .overrides
1245 .insert(env_var("HAZ_ENV"), "alpha".to_owned());
1246 });
1247 let with_alpha = perturbed.key();
1248 assert_ne!(baseline_key, with_alpha);
1249
1250 mutate_task(&mut perturbed, |t| {
1251 t.env
1252 .overrides
1253 .insert(env_var("HAZ_ENV"), "beta".to_owned());
1254 });
1255 assert_ne!(with_alpha, perturbed.key());
1256 }
1257
1258 #[test]
1259 fn mutex_008_adding_a_mutex_does_not_change_the_key() {
1260 let baseline_key = baseline().key();
1266 let mut perturbed = baseline();
1267 mutate_task(&mut perturbed, |t| {
1268 t.mutex = Some(Mutex {
1269 scope: MutexScope::Workspace,
1270 name: MutexName::try_new("db").unwrap(),
1271 mode: MutexMode::Exclusive,
1272 });
1273 });
1274 assert_eq!(baseline_key, perturbed.key());
1275 }
1276
1277 #[test]
1278 fn mutex_008_changing_mutex_scope_does_not_change_the_key() {
1279 let mut workspace_scoped = baseline();
1282 mutate_task(&mut workspace_scoped, |t| {
1283 t.mutex = Some(Mutex {
1284 scope: MutexScope::Workspace,
1285 name: MutexName::try_new("db").unwrap(),
1286 mode: MutexMode::Exclusive,
1287 });
1288 });
1289 let mut project_scoped = baseline();
1290 mutate_task(&mut project_scoped, |t| {
1291 t.mutex = Some(Mutex {
1292 scope: MutexScope::Project,
1293 name: MutexName::try_new("db").unwrap(),
1294 mode: MutexMode::Exclusive,
1295 });
1296 });
1297 assert_eq!(workspace_scoped.key(), project_scoped.key());
1298 }
1299
1300 #[test]
1301 fn mutex_008_changing_mutex_name_does_not_change_the_key() {
1302 let mut named_db = baseline();
1304 mutate_task(&mut named_db, |t| {
1305 t.mutex = Some(Mutex {
1306 scope: MutexScope::Workspace,
1307 name: MutexName::try_new("db").unwrap(),
1308 mode: MutexMode::Exclusive,
1309 });
1310 });
1311 let mut named_gpu = baseline();
1312 mutate_task(&mut named_gpu, |t| {
1313 t.mutex = Some(Mutex {
1314 scope: MutexScope::Workspace,
1315 name: MutexName::try_new("gpu").unwrap(),
1316 mode: MutexMode::Exclusive,
1317 });
1318 });
1319 assert_eq!(named_db.key(), named_gpu.key());
1320 }
1321
1322 #[test]
1323 fn mutex_008_changing_mutex_mode_does_not_change_the_key() {
1324 let mut exclusive = baseline();
1326 mutate_task(&mut exclusive, |t| {
1327 t.mutex = Some(Mutex {
1328 scope: MutexScope::Workspace,
1329 name: MutexName::try_new("db").unwrap(),
1330 mode: MutexMode::Exclusive,
1331 });
1332 });
1333 let mut shared = baseline();
1334 mutate_task(&mut shared, |t| {
1335 t.mutex = Some(Mutex {
1336 scope: MutexScope::Workspace,
1337 name: MutexName::try_new("db").unwrap(),
1338 mode: MutexMode::Shared,
1339 });
1340 });
1341 assert_eq!(exclusive.key(), shared.key());
1342 }
1343
1344 #[test]
1345 fn task_not_in_workspace_surfaces_error() {
1346 let mut state = baseline();
1347 state.task_id = TaskId {
1348 project: project_name("absent"),
1349 task: task_name("build"),
1350 };
1351 match build_cache_key(
1352 &state.fs,
1353 &state.workspace,
1354 &state.graph,
1355 &state.task_id,
1356 &state.host_env,
1357 &state.predecessor_streams,
1358 HashAlgo::Blake3,
1359 ) {
1360 Err(BuildKeyError::TaskNotInWorkspace { task }) => {
1361 assert_eq!(task, state.task_id);
1362 }
1363 other => panic!("expected TaskNotInWorkspace, got {other:?}"),
1364 }
1365 }
1366 }
1367}