Skip to main content

anodizer_core/
util.rs

1use std::collections::{HashMap, HashSet, VecDeque};
2use std::fs;
3use std::path::Path;
4use std::time::{Duration, SystemTime};
5
6use anyhow::{Context as _, Result};
7
8/// Compile a regex, panicking with a diagnostic if the pattern is invalid.
9/// Intended for `LazyLock::new(…)` initializers where the pattern is a
10/// hardcoded literal (or built from `format!` over known-safe fragments).
11/// A compile failure means a programmer bug surfaced at first use, not a
12/// runtime-path user-input error. Exists because the anti-pattern hook
13/// forbids bare panicking error helpers in lib code, and `Regex::new` on
14/// a trusted literal is inherently infallible.
15pub fn static_regex(pattern: &str) -> regex::Regex {
16    regex::Regex::new(pattern)
17        .unwrap_or_else(|e| panic!("invalid static regex literal `{}`: {}", pattern, e))
18}
19
20// ---------------------------------------------------------------------------
21// Topological sort (Kahn's algorithm)
22// ---------------------------------------------------------------------------
23
24/// Topologically sort items by their dependency lists.
25///
26/// Input: slice of `(name, depends_on)` pairs.
27/// Output: names in dependency order (dependencies before dependents).
28///
29/// - Dependencies that are not in the input set are silently ignored.
30/// - Deterministic: zero-in-degree nodes are sorted alphabetically.
31/// - On cycles: sorted nodes are returned followed by remaining nodes in
32///   their original order.
33pub fn topological_sort(items: &[(impl AsRef<str>, impl AsRef<[String]>)]) -> Vec<String> {
34    let names: HashSet<&str> = items.iter().map(|(n, _)| n.as_ref()).collect();
35
36    let mut in_degree: HashMap<&str, usize> = items
37        .iter()
38        .map(|(n, deps)| {
39            let deg = deps
40                .as_ref()
41                .iter()
42                .filter(|d| names.contains(d.as_str()))
43                .count();
44            (n.as_ref(), deg)
45        })
46        .collect();
47
48    // edges: dep → list of dependents
49    let mut edges: HashMap<&str, Vec<&str>> = HashMap::new();
50    for (n, deps) in items {
51        for dep in deps.as_ref() {
52            if names.contains(dep.as_str()) {
53                edges.entry(dep.as_str()).or_default().push(n.as_ref());
54            }
55        }
56    }
57
58    // Kahn's algorithm with deterministic seed ordering
59    let mut queue: VecDeque<&str> = {
60        let mut v: Vec<&str> = in_degree
61            .iter()
62            .filter(|(_, d)| **d == 0)
63            .map(|(&n, _)| n)
64            .collect();
65        v.sort_unstable();
66        VecDeque::from(v)
67    };
68
69    let mut result = Vec::with_capacity(items.len());
70    while let Some(node) = queue.pop_front() {
71        result.push(node.to_string());
72        if let Some(dependents) = edges.get(node) {
73            let mut next: Vec<&str> = dependents
74                .iter()
75                .filter_map(|&dep| {
76                    let deg = in_degree.get_mut(dep)?;
77                    *deg -= 1;
78                    if *deg == 0 { Some(dep) } else { None }
79                })
80                .collect();
81            next.sort_unstable();
82            for n in next {
83                queue.push_back(n);
84            }
85        }
86    }
87
88    // Append remaining (cycle case) in original order.
89    if result.len() < items.len() {
90        let in_result: HashSet<String> = result.iter().cloned().collect();
91        for (n, _) in items {
92            if !in_result.contains(n.as_ref()) {
93                result.push(n.as_ref().to_string());
94            }
95        }
96    }
97
98    result
99}
100
101// ---------------------------------------------------------------------------
102// find_binary
103// ---------------------------------------------------------------------------
104
105/// Check whether a binary can be found on the system.
106///
107/// For absolute or relative paths (containing `/`), checks if the file exists.
108/// For bare names, searches each directory in the `PATH` environment variable
109/// for an executable with the given name. This is a pure-Rust implementation
110/// that avoids shelling out to `which` or `command -v`, making it portable
111/// across all platforms.
112pub fn find_binary(name: &str) -> bool {
113    if name.contains('/') || name.contains('\\') {
114        return Path::new(name).exists();
115    }
116
117    // On Windows, PATHEXT lists extensions to try (e.g., .COM;.EXE;.BAT;.CMD).
118    // When the caller asks for "upx", we also check for "upx.exe", etc.
119    let extensions: Vec<String> = if cfg!(windows) {
120        std::env::var("PATHEXT")
121            .unwrap_or_else(|_| ".COM;.EXE;.BAT;.CMD".to_string())
122            .split(';')
123            .filter(|e| !e.is_empty())
124            .map(|e| e.to_string())
125            .collect()
126    } else {
127        Vec::new()
128    };
129
130    if let Ok(path_var) = std::env::var("PATH") {
131        for dir in std::env::split_paths(&path_var) {
132            let candidate = dir.join(name);
133            if candidate.is_file() {
134                return true;
135            }
136            for ext in &extensions {
137                let with_ext = dir.join(format!("{}{}", name, ext));
138                if with_ext.is_file() {
139                    return true;
140                }
141            }
142        }
143    }
144
145    false
146}
147
148// ---------------------------------------------------------------------------
149// apply_mod_timestamp
150// ---------------------------------------------------------------------------
151
152// ---------------------------------------------------------------------------
153// mod_timestamp helpers
154// ---------------------------------------------------------------------------
155
156/// Parse a `mod_timestamp` string into a `SystemTime`.
157///
158/// Accepts:
159///   - Unix epoch seconds as an integer (e.g. `"1704067200"`)
160///   - RFC 3339 / ISO 8601 datetime (e.g. `"2024-01-01T00:00:00Z"`)
161pub fn parse_mod_timestamp(raw: &str) -> Result<SystemTime> {
162    // Try Unix epoch integer first (most common in CI)
163    if let Ok(epoch_secs) = raw.parse::<u64>() {
164        return Ok(SystemTime::UNIX_EPOCH + Duration::from_secs(epoch_secs));
165    }
166    // Try RFC 3339 / ISO 8601 via chrono
167    if let Ok(dt) = chrono::DateTime::parse_from_rfc3339(raw) {
168        let epoch_secs = dt.timestamp() as u64;
169        return Ok(SystemTime::UNIX_EPOCH + Duration::from_secs(epoch_secs));
170    }
171    // Try chrono's more lenient parsing for formats like "2024-01-01T00:00:00"
172    if let Ok(dt) = chrono::NaiveDateTime::parse_from_str(raw, "%Y-%m-%dT%H:%M:%S") {
173        let epoch_secs = dt.and_utc().timestamp() as u64;
174        return Ok(SystemTime::UNIX_EPOCH + Duration::from_secs(epoch_secs));
175    }
176    anyhow::bail!(
177        "mod_timestamp value '{raw}' is not a valid timestamp. \
178         Accepted formats: Unix epoch seconds (e.g. \"1704067200\") or \
179         RFC 3339 datetime (e.g. \"2024-01-01T00:00:00Z\")"
180    )
181}
182
183/// Apply `mod_timestamp` to all regular files in a directory.
184///
185/// Parses the timestamp via `parse_mod_timestamp`, then sets the mtime on
186/// every regular file in `dir`.
187pub fn apply_mod_timestamp(dir: &Path, raw: &str, log: &crate::log::StageLogger) -> Result<()> {
188    let mtime = parse_mod_timestamp(raw)?;
189
190    for entry in fs::read_dir(dir).with_context(|| format!("read staging dir {}", dir.display()))? {
191        let entry = entry?;
192        let ft = entry.file_type()?;
193        if ft.is_file() {
194            set_file_mtime(&entry.path(), mtime)?;
195        }
196    }
197
198    log.status(&format!("applied mod_timestamp={raw} to staging files"));
199    Ok(())
200}
201
202/// Set the modification time on a single file.
203pub fn set_file_mtime(path: &Path, mtime: SystemTime) -> Result<()> {
204    let file = std::fs::OpenOptions::new()
205        .write(true)
206        .open(path)
207        .with_context(|| format!("open {} for mtime update", path.display()))?;
208    file.set_times(
209        std::fs::FileTimes::new()
210            .set_accessed(mtime)
211            .set_modified(mtime),
212    )
213    .with_context(|| format!("set mtime on {}", path.display()))?;
214    Ok(())
215}
216
217/// Set the modification time on a single file from a Unix epoch (seconds).
218///
219/// Thin wrapper over `set_file_mtime` that accepts `SOURCE_DATE_EPOCH`-style
220/// `i64` seconds (signed to permit pre-1970 values per the spec).
221pub fn set_file_mtime_epoch(path: &Path, epoch_secs: i64) -> Result<()> {
222    let mtime = if epoch_secs >= 0 {
223        SystemTime::UNIX_EPOCH + Duration::from_secs(epoch_secs as u64)
224    } else {
225        SystemTime::UNIX_EPOCH - Duration::from_secs((-epoch_secs) as u64)
226    };
227    set_file_mtime(path, mtime)
228}
229
230// ---------------------------------------------------------------------------
231// collect_replace_archives
232// ---------------------------------------------------------------------------
233
234/// Collect archive artifact paths for a given crate + target, for removal by `replace` options.
235pub fn collect_replace_archives(
236    artifacts: &crate::artifact::ArtifactRegistry,
237    crate_name: &str,
238    target: Option<&str>,
239) -> Vec<std::path::PathBuf> {
240    artifacts
241        .by_kind_and_crate(crate::artifact::ArtifactKind::Archive, crate_name)
242        .iter()
243        .filter(|a| a.target.as_deref() == target)
244        .map(|a| a.path.clone())
245        .collect()
246}
247
248/// Gated variant of [`collect_replace_archives`]: returns the matching
249/// archive paths only when `replace` is `Some(true)`. Used by packaging
250/// stages (dmg, msi, flatpak, snapcraft, nsis, pkg, appbundle) to
251/// replace a source archive with the packaged output when the user
252/// opts in via `replace: true` on the config. Returns an empty vec
253/// when `replace` is unset or `false`.
254pub fn collect_if_replace(
255    replace: Option<bool>,
256    artifacts: &crate::artifact::ArtifactRegistry,
257    crate_name: &str,
258    target: Option<&str>,
259) -> Vec<std::path::PathBuf> {
260    if replace.unwrap_or(false) {
261        collect_replace_archives(artifacts, crate_name, target)
262    } else {
263        Vec::new()
264    }
265}
266
267/// Apply a "minimal trusted" environment to a `Command` after `env_clear()`.
268///
269/// Stage subprocess invocations (sbom, source-archive, …) clear the env to
270/// stop accidental token leakage but still need a small set of platform-
271/// neutral keys so that `git`, `tar`, `syft`, etc. behave normally — HOME
272/// for tool config, USER for git author fallback, USERPROFILE/LOCALAPPDATA
273/// for the Windows equivalents, TMPDIR/TMP/TEMP so temp-file allocation
274/// doesn't land in a forbidden directory, and PATH so the tool itself can
275/// find its dependencies. Keeping this list in core means any new entry
276/// (e.g. SSL_CERT_DIR for syft pulling enrich data) is added once.
277pub fn apply_minimal_env(command: &mut std::process::Command) {
278    const PASSTHROUGH: &[&str] = &[
279        "HOME",
280        "USER",
281        "USERPROFILE",
282        "TMPDIR",
283        "TMP",
284        "TEMP",
285        "PATH",
286        "LOCALAPPDATA",
287    ];
288    for key in PASSTHROUGH {
289        if let Ok(val) = std::env::var(key) {
290            command.env(key, val);
291        }
292    }
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298
299    // -----------------------------------------------------------------------
300    // topological_sort tests
301    // -----------------------------------------------------------------------
302
303    #[test]
304    fn test_topo_sort_simple_chain() {
305        let items = vec![
306            ("c".to_string(), vec!["b".to_string()]),
307            ("b".to_string(), vec!["a".to_string()]),
308            ("a".to_string(), vec![]),
309        ];
310        let sorted = topological_sort(&items);
311        assert_eq!(sorted, vec!["a", "b", "c"]);
312    }
313
314    #[test]
315    fn test_topo_sort_no_deps() {
316        let items = vec![("b".to_string(), vec![]), ("a".to_string(), vec![])];
317        // Deterministic: alphabetical
318        let sorted = topological_sort(&items);
319        assert_eq!(sorted, vec!["a", "b"]);
320    }
321
322    #[test]
323    fn test_topo_sort_ignores_external_deps() {
324        let items = vec![
325            (
326                "b".to_string(),
327                vec!["a".to_string(), "external".to_string()],
328            ),
329            ("a".to_string(), vec![]),
330        ];
331        let sorted = topological_sort(&items);
332        assert_eq!(sorted, vec!["a", "b"]);
333    }
334
335    #[test]
336    fn test_topo_sort_diamond() {
337        let items = vec![
338            ("d".to_string(), vec!["b".to_string(), "c".to_string()]),
339            ("b".to_string(), vec!["a".to_string()]),
340            ("c".to_string(), vec!["a".to_string()]),
341            ("a".to_string(), vec![]),
342        ];
343        let sorted = topological_sort(&items);
344        // a must come first, d must come last, b and c in between
345        assert_eq!(sorted[0], "a");
346        assert_eq!(sorted[3], "d");
347    }
348
349    #[test]
350    fn test_topo_sort_cycle_appends_remaining() {
351        let items = vec![
352            ("a".to_string(), vec!["b".to_string()]),
353            ("b".to_string(), vec!["a".to_string()]),
354            ("c".to_string(), vec![]),
355        ];
356        let sorted = topological_sort(&items);
357        assert_eq!(sorted.len(), 3);
358        // c has no deps, should come first; a and b are in a cycle
359        assert_eq!(sorted[0], "c");
360    }
361
362    #[test]
363    fn test_topo_sort_empty() {
364        let items: Vec<(String, Vec<String>)> = vec![];
365        let sorted = topological_sort(&items);
366        assert!(sorted.is_empty());
367    }
368
369    // -----------------------------------------------------------------------
370    // find_binary tests
371    // -----------------------------------------------------------------------
372
373    #[test]
374    fn test_find_binary_absolute_path_exists() {
375        if cfg!(windows) {
376            // cmd.exe exists on all Windows systems
377            assert!(find_binary("C:\\Windows\\System32\\cmd.exe"));
378        } else {
379            // /usr/bin/env exists on virtually all Unix systems
380            assert!(find_binary("/usr/bin/env"));
381        }
382    }
383
384    #[test]
385    fn test_find_binary_absolute_path_does_not_exist() {
386        if cfg!(windows) {
387            assert!(!find_binary("C:\\nonexistent\\binary\\path.exe"));
388        } else {
389            assert!(!find_binary("/nonexistent/binary/path"));
390        }
391    }
392
393    #[test]
394    fn test_find_binary_bare_name_on_path() {
395        if cfg!(windows) {
396            // "cmd.exe" should be findable on PATH on any Windows system
397            // (find_binary does exact name match, no implicit .exe appending)
398            assert!(find_binary("cmd.exe"));
399        } else {
400            // "env" should be findable on PATH on any Unix system
401            assert!(find_binary("env"));
402        }
403    }
404
405    #[test]
406    fn test_find_binary_bare_name_not_on_path() {
407        assert!(!find_binary("nonexistent-binary-xyz-12345"));
408    }
409
410    // -----------------------------------------------------------------------
411    // parse_mod_timestamp tests
412    // -----------------------------------------------------------------------
413
414    #[test]
415    fn test_parse_mod_timestamp_epoch_integer() {
416        let t = parse_mod_timestamp("1704067200").unwrap();
417        let epoch = t.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
418        assert_eq!(epoch, 1704067200);
419    }
420
421    #[test]
422    fn test_parse_mod_timestamp_rfc3339() {
423        let t = parse_mod_timestamp("2024-01-01T00:00:00Z").unwrap();
424        let epoch = t.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
425        assert_eq!(epoch, 1704067200);
426    }
427
428    #[test]
429    fn test_parse_mod_timestamp_rfc3339_with_offset() {
430        let t = parse_mod_timestamp("2024-01-01T01:00:00+01:00").unwrap();
431        let epoch = t.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
432        // 2024-01-01T01:00:00+01:00 is the same instant as 2024-01-01T00:00:00Z
433        assert_eq!(epoch, 1704067200);
434    }
435
436    #[test]
437    fn test_parse_mod_timestamp_naive_datetime() {
438        let t = parse_mod_timestamp("2024-01-01T00:00:00").unwrap();
439        let epoch = t.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();
440        assert_eq!(epoch, 1704067200);
441    }
442
443    #[test]
444    fn test_parse_mod_timestamp_invalid() {
445        let err = parse_mod_timestamp("not-a-timestamp").unwrap_err();
446        let msg = err.to_string();
447        assert!(
448            msg.contains("not a valid timestamp"),
449            "unexpected error: {msg}"
450        );
451        // Mirrors GoReleaser commit 50a034d: the parse error must include
452        // the offending mtime value so misconfigurations are diagnosable.
453        assert!(
454            msg.contains("not-a-timestamp"),
455            "error must include the bad value, got: {msg}"
456        );
457    }
458
459    #[test]
460    fn test_parse_mod_timestamp_zero() {
461        let t = parse_mod_timestamp("0").unwrap();
462        assert_eq!(t, SystemTime::UNIX_EPOCH);
463    }
464
465    // -----------------------------------------------------------------------
466    // set_file_mtime tests
467    // -----------------------------------------------------------------------
468
469    #[test]
470    fn test_set_file_mtime_sets_both_atime_and_mtime() {
471        let dir = std::env::temp_dir().join("anodizer_test_set_file_mtime");
472        let _ = std::fs::remove_dir_all(&dir);
473        std::fs::create_dir_all(&dir).unwrap();
474
475        let file_path = dir.join("test.txt");
476        std::fs::write(&file_path, "hello").unwrap();
477
478        // Set mtime to a known epoch: 2024-01-01T00:00:00Z = 1704067200
479        let target = SystemTime::UNIX_EPOCH + Duration::from_secs(1704067200);
480        set_file_mtime(&file_path, target).unwrap();
481
482        let meta = std::fs::metadata(&file_path).unwrap();
483        let actual_mtime = meta.modified().unwrap();
484
485        // Allow 1-second tolerance for filesystem granularity
486        let diff = if actual_mtime > target {
487            actual_mtime.duration_since(target).unwrap()
488        } else {
489            target.duration_since(actual_mtime).unwrap()
490        };
491        assert!(
492            diff.as_secs() <= 1,
493            "mtime should be within 1s of target, diff={:?}",
494            diff
495        );
496
497        // Also verify atime was set (on Linux, accessed() is available)
498        let actual_atime = meta.accessed().unwrap();
499        let diff_a = if actual_atime > target {
500            actual_atime.duration_since(target).unwrap()
501        } else {
502            target.duration_since(actual_atime).unwrap()
503        };
504        assert!(
505            diff_a.as_secs() <= 1,
506            "atime should be within 1s of target, diff={:?}",
507            diff_a
508        );
509
510        let _ = std::fs::remove_dir_all(&dir);
511    }
512
513    #[test]
514    fn test_set_file_mtime_nonexistent_file() {
515        let result = set_file_mtime(Path::new("/nonexistent/file.txt"), SystemTime::UNIX_EPOCH);
516        assert!(result.is_err());
517    }
518
519    // -----------------------------------------------------------------------
520    // apply_mod_timestamp tests
521    // -----------------------------------------------------------------------
522
523    #[test]
524    fn test_apply_mod_timestamp_sets_mtime_on_regular_files() {
525        let dir = std::env::temp_dir().join("anodizer_test_apply_mod_timestamp");
526        let _ = std::fs::remove_dir_all(&dir);
527        std::fs::create_dir_all(&dir).unwrap();
528
529        // Create two regular files and a subdirectory (should be skipped)
530        std::fs::write(dir.join("a.txt"), "aaa").unwrap();
531        std::fs::write(dir.join("b.txt"), "bbb").unwrap();
532        std::fs::create_dir(dir.join("subdir")).unwrap();
533
534        let log = crate::log::StageLogger::new("test", crate::log::Verbosity::Quiet);
535        apply_mod_timestamp(&dir, "1704067200", &log).unwrap();
536
537        let target = SystemTime::UNIX_EPOCH + Duration::from_secs(1704067200);
538        for name in &["a.txt", "b.txt"] {
539            let meta = std::fs::metadata(dir.join(name)).unwrap();
540            let mtime = meta.modified().unwrap();
541            let diff = if mtime > target {
542                mtime.duration_since(target).unwrap()
543            } else {
544                target.duration_since(mtime).unwrap()
545            };
546            assert!(
547                diff.as_secs() <= 1,
548                "{name}: mtime should be within 1s of target, diff={:?}",
549                diff
550            );
551        }
552
553        let _ = std::fs::remove_dir_all(&dir);
554    }
555
556    #[test]
557    fn test_apply_mod_timestamp_invalid_timestamp_errors() {
558        let dir = std::env::temp_dir().join("anodizer_test_apply_mod_timestamp_invalid");
559        let _ = std::fs::remove_dir_all(&dir);
560        std::fs::create_dir_all(&dir).unwrap();
561
562        let log = crate::log::StageLogger::new("test", crate::log::Verbosity::Quiet);
563        let result = apply_mod_timestamp(&dir, "not-valid", &log);
564        assert!(result.is_err());
565
566        let _ = std::fs::remove_dir_all(&dir);
567    }
568}