Skip to main content

anodizer_core/
util.rs

1use std::collections::{HashMap, HashSet, VecDeque};
2use std::fs;
3use std::path::Path;
4use std::time::{Duration, SystemTime};
5
6use anyhow::{Context as _, Result};
7
8/// Compile a regex, panicking with a diagnostic if the pattern is invalid.
9/// Intended for `LazyLock::new(…)` initializers where the pattern is a
10/// hardcoded literal (or built from `format!` over known-safe fragments).
11/// A compile failure means a programmer bug surfaced at first use, not a
12/// runtime-path user-input error. Exists because the anti-pattern hook
13/// forbids bare panicking error helpers in lib code, and `Regex::new` on
14/// a trusted literal is inherently infallible.
15pub fn static_regex(pattern: &str) -> regex::Regex {
16    regex::Regex::new(pattern)
17        .unwrap_or_else(|e| panic!("invalid static regex literal `{}`: {}", pattern, e))
18}
19
20// ---------------------------------------------------------------------------
21// Topological sort (Kahn's algorithm)
22// ---------------------------------------------------------------------------
23
24/// Topologically sort items by their dependency lists.
25///
26/// Input: slice of `(name, depends_on)` pairs.
27/// Output: names in dependency order (dependencies before dependents).
28///
29/// - Dependencies that are not in the input set are silently ignored.
30/// - Deterministic: zero-in-degree nodes are sorted alphabetically.
31/// - On cycles: sorted nodes are returned followed by remaining nodes in
32///   their original order.
33pub fn topological_sort(items: &[(impl AsRef<str>, impl AsRef<[String]>)]) -> Vec<String> {
34    let names: HashSet<&str> = items.iter().map(|(n, _)| n.as_ref()).collect();
35
36    let mut in_degree: HashMap<&str, usize> = items
37        .iter()
38        .map(|(n, deps)| {
39            let deg = deps
40                .as_ref()
41                .iter()
42                .filter(|d| names.contains(d.as_str()))
43                .count();
44            (n.as_ref(), deg)
45        })
46        .collect();
47
48    // edges: dep → list of dependents
49    let mut edges: HashMap<&str, Vec<&str>> = HashMap::new();
50    for (n, deps) in items {
51        for dep in deps.as_ref() {
52            if names.contains(dep.as_str()) {
53                edges.entry(dep.as_str()).or_default().push(n.as_ref());
54            }
55        }
56    }
57
58    // Kahn's algorithm with deterministic seed ordering
59    let mut queue: VecDeque<&str> = {
60        let mut v: Vec<&str> = in_degree
61            .iter()
62            .filter(|(_, d)| **d == 0)
63            .map(|(&n, _)| n)
64            .collect();
65        v.sort_unstable();
66        VecDeque::from(v)
67    };
68
69    let mut result = Vec::with_capacity(items.len());
70    while let Some(node) = queue.pop_front() {
71        result.push(node.to_string());
72        if let Some(dependents) = edges.get(node) {
73            let mut next: Vec<&str> = dependents
74                .iter()
75                .filter_map(|&dep| {
76                    let deg = in_degree.get_mut(dep)?;
77                    *deg -= 1;
78                    if *deg == 0 { Some(dep) } else { None }
79                })
80                .collect();
81            next.sort_unstable();
82            for n in next {
83                queue.push_back(n);
84            }
85        }
86    }
87
88    // Append remaining (cycle case) in original order.
89    if result.len() < items.len() {
90        let in_result: HashSet<String> = result.iter().cloned().collect();
91        for (n, _) in items {
92            if !in_result.contains(n.as_ref()) {
93                result.push(n.as_ref().to_string());
94            }
95        }
96    }
97
98    result
99}
100
101// ---------------------------------------------------------------------------
102// find_binary
103// ---------------------------------------------------------------------------
104
105/// Check whether a binary can be found on the system.
106///
107/// For absolute or relative paths (containing `/`), checks if the file exists.
108/// For bare names, searches each directory in the `PATH` environment variable
109/// for an executable with the given name. This is a pure-Rust implementation
110/// that avoids shelling out to `which` or `command -v`, making it portable
111/// across all platforms.
112pub fn find_binary(name: &str) -> bool {
113    if name.contains('/') || name.contains('\\') {
114        return Path::new(name).exists();
115    }
116
117    // On Windows, PATHEXT lists extensions to try (e.g., .COM;.EXE;.BAT;.CMD).
118    // When the caller asks for "upx", we also check for "upx.exe", etc.
119    let extensions: Vec<String> = if cfg!(windows) {
120        std::env::var("PATHEXT")
121            .unwrap_or_else(|_| ".COM;.EXE;.BAT;.CMD".to_string())
122            .split(';')
123            .filter(|e| !e.is_empty())
124            .map(|e| e.to_string())
125            .collect()
126    } else {
127        Vec::new()
128    };
129
130    if let Ok(path_var) = std::env::var("PATH") {
131        for dir in std::env::split_paths(&path_var) {
132            let candidate = dir.join(name);
133            if candidate.is_file() {
134                return true;
135            }
136            for ext in &extensions {
137                let with_ext = dir.join(format!("{}{}", name, ext));
138                if with_ext.is_file() {
139                    return true;
140                }
141            }
142        }
143    }
144
145    false
146}
147
148// ---------------------------------------------------------------------------
149// apply_mod_timestamp
150// ---------------------------------------------------------------------------
151
152// ---------------------------------------------------------------------------
153// mod_timestamp helpers
154// ---------------------------------------------------------------------------
155
156/// Parse a `mod_timestamp` string into a `SystemTime`.
157///
158/// Accepts:
159///   - Unix epoch seconds as an integer (e.g. `"1704067200"`)
160///   - RFC 3339 / ISO 8601 datetime (e.g. `"2024-01-01T00:00:00Z"`)
161pub fn parse_mod_timestamp(raw: &str) -> Result<SystemTime> {
162    // Try Unix epoch integer first (most common in CI)
163    if let Ok(epoch_secs) = raw.parse::<u64>() {
164        return Ok(SystemTime::UNIX_EPOCH + Duration::from_secs(epoch_secs));
165    }
166    // Try RFC 3339 / ISO 8601 via chrono
167    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(raw) {
168        let epoch_secs = dt.timestamp() as u64;
169        return Ok(SystemTime::UNIX_EPOCH + Duration::from_secs(epoch_secs));
170    }
171    // Try chrono's more lenient parsing for formats like "2024-01-01T00:00:00"
172    if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S") {
173        let epoch_secs = dt.and_utc().timestamp() as u64;
174        return Ok(SystemTime::UNIX_EPOCH + Duration::from_secs(epoch_secs));
175    }
176    anyhow::bail!(
177        "mod_timestamp value '{raw}' is not a valid timestamp. \
178         Accepted formats: Unix epoch seconds (e.g. \"1704067200\") or \
179         RFC 3339 datetime (e.g. \"2024-01-01T00:00:00Z\")"
180    )
181}
182
183/// Apply `mod_timestamp` to all regular files in a directory.
184///
185/// Parses the timestamp via `parse_mod_timestamp`, then sets the mtime on
186/// every regular file in `dir`.
187pub fn apply_mod_timestamp(dir: &Path, raw: &str, log: &crate::log::StageLogger) -> Result<()> {
188    let mtime = parse_mod_timestamp(raw)?;
189
190    for entry in fs::read_dir(dir).with_context(|| format!("read staging dir {}", dir.display()))? {
191        let entry = entry?;
192        let ft = entry.file_type()?;
193        if ft.is_file() {
194            set_file_mtime(&entry.path(), mtime)?;
195        }
196    }
197
198    log.status(&format!("applied mod_timestamp={raw} to staging files"));
199    Ok(())
200}
201
202/// Set the modification time on a single file.
203pub fn set_file_mtime(path: &Path, mtime: SystemTime) -> Result<()> {
204    let file = std::fs::OpenOptions::new()
205        .write(true)
206        .open(path)
207        .with_context(|| format!("open {} for mtime update", path.display()))?;
208    file.set_times(
209        std::fs::FileTimes::new()
210            .set_accessed(mtime)
211            .set_modified(mtime),
212    )
213    .with_context(|| format!("set mtime on {}", path.display()))?;
214    Ok(())
215}
216
217/// Set the modification time on a single file from a Unix epoch (seconds).
218///
219/// Thin wrapper over `set_file_mtime` that accepts `SOURCE_DATE_EPOCH`-style
220/// `i64` seconds (signed to permit pre-1970 values per the spec).
221pub fn set_file_mtime_epoch(path: &Path, epoch_secs: i64) -> Result<()> {
222    let mtime = if epoch_secs >= 0 {
223        SystemTime::UNIX_EPOCH + Duration::from_secs(epoch_secs as u64)
224    } else {
225        SystemTime::UNIX_EPOCH - Duration::from_secs((-epoch_secs) as u64)
226    };
227    set_file_mtime(path, mtime)
228}
229
230// ---------------------------------------------------------------------------
231// collect_replace_archives
232// ---------------------------------------------------------------------------
233
234/// Collect archive artifact paths for a given crate + target, for removal by `replace` options.
235pub fn collect_replace_archives(
236    artifacts: &crate::artifact::ArtifactRegistry,
237    crate_name: &str,
238    target: Option<&str>,
239) -> Vec<std::path::PathBuf> {
240    artifacts
241        .by_kind_and_crate(crate::artifact::ArtifactKind::Archive, crate_name)
242        .iter()
243        .filter(|a| a.target.as_deref() == target)
244        .map(|a| a.path.clone())
245        .collect()
246}
247
248/// Gated variant of [`collect_replace_archives`]: returns the matching
249/// archive paths only when `replace` is `Some(true)`. Used by packaging
250/// stages (dmg, msi, flatpak, snapcraft, nsis, pkg, appbundle) to
251/// replace a source archive with the packaged output when the user
252/// opts in via `replace: true` on the config. Returns an empty vec
253/// when `replace` is unset or `false`.
254pub fn collect_if_replace(
255    replace: Option<bool>,
256    artifacts: &crate::artifact::ArtifactRegistry,
257    crate_name: &str,
258    target: Option<&str>,
259) -> Vec<std::path::PathBuf> {
260    if replace.unwrap_or(false) {
261        collect_replace_archives(artifacts, crate_name, target)
262    } else {
263        Vec::new()
264    }
265}
266
267/// Convert any Windows-style backslash separators in `s` to forward
268/// slashes. Cross-platform path string normalization for cases where the
269/// downstream consumer (artifact-manifest JSON, MSYS subprocess env var)
270/// is sensitive to separator drift between Linux/macOS and Windows hosts.
271pub fn normalize_path_separators(s: &str) -> String {
272    s.replace('\\', "/")
273}
274
275/// Apply a "minimal trusted" environment to a `Command` after `env_clear()`.
276///
277/// Stage subprocess invocations (sbom, source-archive, …) clear the env to
278/// stop accidental token leakage but still need a small set of platform-
279/// neutral keys so that `git`, `tar`, `syft`, etc. behave normally — HOME
280/// for tool config, USER for git author fallback, USERPROFILE/LOCALAPPDATA
281/// for the Windows equivalents, TMPDIR/TMP/TEMP so temp-file allocation
282/// doesn't land in a forbidden directory, and PATH so the tool itself can
283/// find its dependencies. Keeping this list in core means any new entry
284/// (e.g. SSL_CERT_DIR for syft pulling enrich data) is added once.
285pub fn apply_minimal_env(command: &mut std::process::Command) {
286    const PASSTHROUGH: &[&str] = &[
287        "HOME",
288        "USER",
289        "USERPROFILE",
290        "TMPDIR",
291        "TMP",
292        "TEMP",
293        "PATH",
294        "LOCALAPPDATA",
295    ];
296    for key in PASSTHROUGH {
297        if let Ok(val) = std::env::var(key) {
298            command.env(key, val);
299        }
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    // -----------------------------------------------------------------------
308    // topological_sort tests
309    // -----------------------------------------------------------------------
310
311    #[test]
312    fn test_topo_sort_simple_chain() {
313        let items = vec![
314            ("c".to_string(), vec!["b".to_string()]),
315            ("b".to_string(), vec!["a".to_string()]),
316            ("a".to_string(), vec![]),
317        ];
318        let sorted = topological_sort(&items);
319        assert_eq!(sorted, vec!["a", "b", "c"]);
320    }
321
322    #[test]
323    fn test_topo_sort_no_deps() {
324        let items = vec![("b".to_string(), vec![]), ("a".to_string(), vec![])];
325        // Deterministic: alphabetical
326        let sorted = topological_sort(&items);
327        assert_eq!(sorted, vec!["a", "b"]);
328    }
329
330    #[test]
331    fn test_topo_sort_ignores_external_deps() {
332        let items = vec![
333            (
334                "b".to_string(),
335                vec!["a".to_string(), "external".to_string()],
336            ),
337            ("a".to_string(), vec![]),
338        ];
339        let sorted = topological_sort(&items);
340        assert_eq!(sorted, vec!["a", "b"]);
341    }
342
343    #[test]
344    fn test_topo_sort_diamond() {
345        let items = vec![
346            ("d".to_string(), vec!["b".to_string(), "c".to_string()]),
347            ("b".to_string(), vec!["a".to_string()]),
348            ("c".to_string(), vec!["a".to_string()]),
349            ("a".to_string(), vec![]),
350        ];
351        let sorted = topological_sort(&items);
352        // a must come first, d must come last, b and c in between
353        assert_eq!(sorted[0], "a");
354        assert_eq!(sorted[3], "d");
355    }
356
357    #[test]
358    fn test_topo_sort_cycle_appends_remaining() {
359        let items = vec![
360            ("a".to_string(), vec!["b".to_string()]),
361            ("b".to_string(), vec!["a".to_string()]),
362            ("c".to_string(), vec![]),
363        ];
364        let sorted = topological_sort(&items);
365        assert_eq!(sorted.len(), 3);
366        // c has no deps, should come first; a and b are in a cycle
367        assert_eq!(sorted[0], "c");
368    }
369
370    #[test]
371    fn test_topo_sort_empty() {
372        let items: Vec<(String, Vec<String>)> = vec![];
373        let sorted = topological_sort(&items);
374        assert!(sorted.is_empty());
375    }
376
377    // -----------------------------------------------------------------------
378    // find_binary tests
379    // -----------------------------------------------------------------------
380
381    #[test]
382    fn test_find_binary_absolute_path_exists() {
383        if cfg!(windows) {
384            // cmd.exe exists on all Windows systems
385            assert!(find_binary("C:\\Windows\\System32\\cmd.exe"));
386        } else {
387            // /usr/bin/env exists on virtually all Unix systems
388            assert!(find_binary("/usr/bin/env"));
389        }
390    }
391
392    #[test]
393    fn test_find_binary_absolute_path_does_not_exist() {
394        if cfg!(windows) {
395            assert!(!find_binary("C:\\nonexistent\\binary\\path.exe"));
396        } else {
397            assert!(!find_binary("/nonexistent/binary/path"));
398        }
399    }
400
401    #[test]
402    fn test_find_binary_bare_name_on_path() {
403        if cfg!(windows) {
404            // "cmd.exe" should be findable on PATH on any Windows system
405            // (find_binary does exact name match, no implicit .exe appending)
406            assert!(find_binary("cmd.exe"));
407        } else {
408            // "env" should be findable on PATH on any Unix system
409            assert!(find_binary("env"));
410        }
411    }
412
413    #[test]
414    fn test_find_binary_bare_name_not_on_path() {
415        assert!(!find_binary("nonexistent-binary-xyz-12345"));
416    }
417
418    // -----------------------------------------------------------------------
419    // parse_mod_timestamp tests
420    // -----------------------------------------------------------------------
421
422    #[test]
423    fn test_parse_mod_timestamp_epoch_integer() {
424        let t = parse_mod_timestamp("1704067200").unwrap();
425        let epoch = t.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
426        assert_eq!(epoch, 1704067200);
427    }
428
429    #[test]
430    fn test_parse_mod_timestamp_rfc3339() {
431        let t = parse_mod_timestamp("2024-01-01T00:00:00Z").unwrap();
432        let epoch = t.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
433        assert_eq!(epoch, 1704067200);
434    }
435
436    #[test]
437    fn test_parse_mod_timestamp_rfc3339_with_offset() {
438        let t = parse_mod_timestamp("2024-01-01T01:00:00+01:00").unwrap();
439        let epoch = t.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
440        // 2024-01-01T01:00:00+01:00 is the same instant as 2024-01-01T00:00:00Z
441        assert_eq!(epoch, 1704067200);
442    }
443
444    #[test]
445    fn test_parse_mod_timestamp_naive_datetime() {
446        let t = parse_mod_timestamp("2024-01-01T00:00:00").unwrap();
447        let epoch = t.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
448        assert_eq!(epoch, 1704067200);
449    }
450
451    #[test]
452    fn test_parse_mod_timestamp_invalid() {
453        let err = parse_mod_timestamp("not-a-timestamp").unwrap_err();
454        let msg = err.to_string();
455        assert!(
456            msg.contains("not a valid timestamp"),
457            "unexpected error: {msg}"
458        );
459        // Mirrors GoReleaser commit 50a034d: the parse error must include
460        // the offending mtime value so misconfigurations are diagnosable.
461        assert!(
462            msg.contains("not-a-timestamp"),
463            "error must include the bad value, got: {msg}"
464        );
465    }
466
467    #[test]
468    fn test_parse_mod_timestamp_zero() {
469        let t = parse_mod_timestamp("0").unwrap();
470        assert_eq!(t, SystemTime::UNIX_EPOCH);
471    }
472
473    // -----------------------------------------------------------------------
474    // set_file_mtime tests
475    // -----------------------------------------------------------------------
476
477    #[test]
478    fn test_set_file_mtime_sets_both_atime_and_mtime() {
479        let dir = std::env::temp_dir().join("anodizer_test_set_file_mtime");
480        let _ = std::fs::remove_dir_all(&dir);
481        std::fs::create_dir_all(&dir).unwrap();
482
483        let file_path = dir.join("test.txt");
484        std::fs::write(&file_path, "hello").unwrap();
485
486        // Set mtime to a known epoch: 2024-01-01T00:00:00Z = 1704067200
487        let target = SystemTime::UNIX_EPOCH + Duration::from_secs(1704067200);
488        set_file_mtime(&file_path, target).unwrap();
489
490        let meta = std::fs::metadata(&file_path).unwrap();
491        let actual_mtime = meta.modified().unwrap();
492
493        // Allow 1-second tolerance for filesystem granularity
494        let diff = if actual_mtime > target {
495            actual_mtime.duration_since(target).unwrap()
496        } else {
497            target.duration_since(actual_mtime).unwrap()
498        };
499        assert!(
500            diff.as_secs() <= 1,
501            "mtime should be within 1s of target, diff={:?}",
502            diff
503        );
504
505        // Also verify atime was set (on Linux, accessed() is available)
506        let actual_atime = meta.accessed().unwrap();
507        let diff_a = if actual_atime > target {
508            actual_atime.duration_since(target).unwrap()
509        } else {
510            target.duration_since(actual_atime).unwrap()
511        };
512        assert!(
513            diff_a.as_secs() <= 1,
514            "atime should be within 1s of target, diff={:?}",
515            diff_a
516        );
517
518        let _ = std::fs::remove_dir_all(&dir);
519    }
520
521    #[test]
522    fn test_set_file_mtime_nonexistent_file() {
523        let result = set_file_mtime(Path::new("/nonexistent/file.txt"), SystemTime::UNIX_EPOCH);
524        assert!(result.is_err());
525    }
526
527    // -----------------------------------------------------------------------
528    // apply_mod_timestamp tests
529    // -----------------------------------------------------------------------
530
531    #[test]
532    fn test_apply_mod_timestamp_sets_mtime_on_regular_files() {
533        let dir = std::env::temp_dir().join("anodizer_test_apply_mod_timestamp");
534        let _ = std::fs::remove_dir_all(&dir);
535        std::fs::create_dir_all(&dir).unwrap();
536
537        // Create two regular files and a subdirectory (should be skipped)
538        std::fs::write(dir.join("a.txt"), "aaa").unwrap();
539        std::fs::write(dir.join("b.txt"), "bbb").unwrap();
540        std::fs::create_dir(dir.join("subdir")).unwrap();
541
542        let log = crate::log::StageLogger::new("test", crate::log::Verbosity::Quiet);
543        apply_mod_timestamp(&dir, "1704067200", &log).unwrap();
544
545        let target = SystemTime::UNIX_EPOCH + Duration::from_secs(1704067200);
546        for name in &["a.txt", "b.txt"] {
547            let meta = std::fs::metadata(dir.join(name)).unwrap();
548            let mtime = meta.modified().unwrap();
549            let diff = if mtime > target {
550                mtime.duration_since(target).unwrap()
551            } else {
552                target.duration_since(mtime).unwrap()
553            };
554            assert!(
555                diff.as_secs() <= 1,
556                "{name}: mtime should be within 1s of target, diff={:?}",
557                diff
558            );
559        }
560
561        let _ = std::fs::remove_dir_all(&dir);
562    }
563
564    #[test]
565    fn test_apply_mod_timestamp_invalid_timestamp_errors() {
566        let dir = std::env::temp_dir().join("anodizer_test_apply_mod_timestamp_invalid");
567        let _ = std::fs::remove_dir_all(&dir);
568        std::fs::create_dir_all(&dir).unwrap();
569
570        let log = crate::log::StageLogger::new("test", crate::log::Verbosity::Quiet);
571        let result = apply_mod_timestamp(&dir, "not-valid", &log);
572        assert!(result.is_err());
573
574        let _ = std::fs::remove_dir_all(&dir);
575    }
576}