Skip to main content

sbom_tools/cli/
cache.rs

1//! CLI handler for the `cache` command.
2//!
3//! Manages the on-disk enrichment cache so that air-gapped (`--offline`) runs
4//! are fully served from local data:
5//!
6//! - `cache status` — list cached sources, entry counts, ages, total size.
7//! - `cache warm <sbom>` — pre-fetch enrichment data for an SBOM's components.
8//! - `cache clear` — remove all cached entries.
9//! - `cache export <path>` / `cache import <path>` — copy the whole cache tree
10//!   for sneakernet transfer between an online and an air-gapped machine.
11//!
12//! Export/import use a plain recursive directory copy (no archive format, no new
13//! dependency): the cache is already a tree of small JSON files, so a directory
14//! copy is the simplest portable bundle and keeps the tool dependency-free under
15//! cargo-deny. The destination is a self-contained `sbom-tools` cache tree the
16//! `--offline` reader consumes directly.
17
18use std::fs;
19use std::path::{Path, PathBuf};
20use std::time::Duration;
21
22use anyhow::{Context, Result};
23
24use crate::enrichment::source::root_cache_dir;
25use crate::pipeline::exit_codes;
26
27/// Cache management action.
28#[derive(Debug, Clone, clap::Subcommand)]
29pub enum CacheAction {
30    /// List cached sources with entry counts, ages, and total size
31    Status,
32    /// Pre-fetch enrichment data for an SBOM so a later --offline run is served
33    Warm {
34        /// SBOM file to warm the cache for
35        sbom: PathBuf,
36        /// Warm every source (OSV, EOL, KEV, EPSS, staleness, HuggingFace), not just OSV
37        #[arg(long)]
38        all_sources: bool,
39    },
40    /// Remove all cached enrichment entries
41    Clear,
42    /// Copy the whole cache tree to a directory for sneakernet transfer
43    Export {
44        /// Destination directory (created if absent)
45        path: PathBuf,
46    },
47    /// Import a previously exported cache tree into the local cache
48    Import {
49        /// Source directory produced by `cache export`
50        path: PathBuf,
51    },
52}
53
54/// Run the `cache` command.
55pub fn run_cache(action: CacheAction, quiet: bool) -> Result<i32> {
56    match action {
57        CacheAction::Status => cache_status(quiet),
58        CacheAction::Warm { sbom, all_sources } => cache_warm(&sbom, all_sources, quiet),
59        CacheAction::Clear => cache_clear(quiet),
60        CacheAction::Export { path } => cache_export(&path, quiet),
61        CacheAction::Import { path } => cache_import(&path, quiet),
62    }
63}
64
65/// The enrichment source namespaces that live under the root cache directory.
66///
67/// Must list every namespace any client writes under (see each
68/// `namespaced_cache_dir(...)` call) so `cache status`/`cache clear` cover them
69/// all; EPSS and HuggingFace were previously omitted, so their caches were never
70/// purged or reported.
71const SOURCE_NAMESPACES: &[&str] = &["osv", "eol", "kev", "epss", "staleness", "huggingface"];
72
73/// Aggregate counts for one source's cache directory.
74struct SourceStatus {
75    name: String,
76    entries: usize,
77    total_size: u64,
78    oldest: Option<Duration>,
79    newest: Option<Duration>,
80}
81
82fn source_status(name: &str, dir: &Path) -> SourceStatus {
83    let mut entries = 0usize;
84    let mut total_size = 0u64;
85    let mut oldest: Option<Duration> = None;
86    let mut newest: Option<Duration> = None;
87
88    if let Ok(read_dir) = fs::read_dir(dir) {
89        for entry in read_dir.flatten() {
90            let path = entry.path();
91            if path.extension().is_none_or(|e| e != "json") {
92                continue;
93            }
94            entries += 1;
95            if let Ok(meta) = entry.metadata() {
96                total_size += meta.len();
97                if let Ok(modified) = meta.modified()
98                    && let Ok(age) = modified.elapsed()
99                {
100                    oldest = Some(oldest.map_or(age, |o| o.max(age)));
101                    newest = Some(newest.map_or(age, |n| n.min(age)));
102                }
103            }
104        }
105    }
106
107    SourceStatus {
108        name: name.to_string(),
109        entries,
110        total_size,
111        oldest,
112        newest,
113    }
114}
115
116fn cache_status(quiet: bool) -> Result<i32> {
117    let root = root_cache_dir();
118    if !root.exists() {
119        if !quiet {
120            println!("No cache directory yet ({}).", root.display());
121        }
122        return Ok(exit_codes::SUCCESS);
123    }
124
125    let mut total_entries = 0usize;
126    let mut total_size = 0u64;
127    let mut rows: Vec<SourceStatus> = Vec::new();
128    for ns in SOURCE_NAMESPACES {
129        let dir = root.join(ns);
130        if dir.exists() {
131            let status = source_status(ns, &dir);
132            total_entries += status.entries;
133            total_size += status.total_size;
134            rows.push(status);
135        }
136    }
137
138    if quiet {
139        return Ok(exit_codes::SUCCESS);
140    }
141
142    println!("Cache directory: {}", root.display());
143    if rows.is_empty() {
144        println!("  (no cached enrichment data)");
145        return Ok(exit_codes::SUCCESS);
146    }
147
148    println!(
149        "{:<12} {:>8} {:>12} {:>12} {:>12}",
150        "SOURCE", "ENTRIES", "SIZE", "OLDEST", "NEWEST"
151    );
152    for row in &rows {
153        println!(
154            "{:<12} {:>8} {:>12} {:>12} {:>12}",
155            row.name,
156            row.entries,
157            human_size(row.total_size),
158            row.oldest.map_or_else(|| "-".to_string(), human_age),
159            row.newest.map_or_else(|| "-".to_string(), human_age),
160        );
161    }
162    println!(
163        "{:<12} {:>8} {:>12}",
164        "TOTAL",
165        total_entries,
166        human_size(total_size)
167    );
168
169    Ok(exit_codes::SUCCESS)
170}
171
172/// Warm the cache by enriching the SBOM with all (or just OSV) sources, forcing
173/// fresh fetches so the on-disk cache is fully populated for a later offline run.
174fn cache_warm(sbom_path: &Path, all_sources: bool, quiet: bool) -> Result<i32> {
175    use crate::config::EnrichmentConfig;
176
177    // Warming requires the network, so it must not run in offline mode.
178    if crate::enrichment::source::is_offline() {
179        anyhow::bail!("cannot warm the cache in offline mode: run `cache warm` while online");
180    }
181
182    let mut parsed = crate::pipeline::parse_sbom_with_context(sbom_path, quiet)?;
183
184    let mut config = EnrichmentConfig::osv();
185    config.enable_eol = all_sources;
186    config.enable_kev = all_sources;
187    config.enable_epss = all_sources;
188    config.enable_staleness = all_sources;
189    config.enable_huggingface = all_sources;
190    // Force fresh fetches so every queryable component lands in the cache.
191    config.bypass_cache = true;
192    config.offline = false;
193
194    let stats = crate::pipeline::enrich_sbom_full(parsed.sbom_mut(), &config, quiet);
195
196    if !quiet {
197        for warning in &stats.warnings {
198            eprintln!("Warning: {warning}");
199        }
200        let n = parsed.sbom().component_count();
201        println!(
202            "Warmed cache for {n} component(s) from {} ({}).",
203            sbom_path.display(),
204            if all_sources {
205                "OSV, EOL, KEV, EPSS, staleness, HuggingFace"
206            } else {
207                "OSV"
208            }
209        );
210    }
211
212    Ok(exit_codes::SUCCESS)
213}
214
215fn cache_clear(quiet: bool) -> Result<i32> {
216    let root = root_cache_dir();
217    if !root.exists() {
218        if !quiet {
219            println!("Nothing to clear ({} does not exist).", root.display());
220        }
221        return Ok(exit_codes::SUCCESS);
222    }
223
224    let mut removed = 0usize;
225    for ns in SOURCE_NAMESPACES {
226        let dir = root.join(ns);
227        if let Ok(read_dir) = fs::read_dir(&dir) {
228            for entry in read_dir.flatten() {
229                let path = entry.path();
230                if path.extension().is_some_and(|e| e == "json") && fs::remove_file(&path).is_ok() {
231                    removed += 1;
232                }
233            }
234        }
235    }
236
237    if !quiet {
238        println!("Cleared {removed} cached entr{}.", plural(removed));
239    }
240    Ok(exit_codes::SUCCESS)
241}
242
243fn cache_export(dest: &Path, quiet: bool) -> Result<i32> {
244    let root = root_cache_dir();
245    if !root.exists() {
246        anyhow::bail!("no cache to export ({} does not exist)", root.display());
247    }
248
249    fs::create_dir_all(dest)
250        .with_context(|| format!("creating export directory {}", dest.display()))?;
251    let copied = copy_dir_recursive(&root, dest)?;
252
253    if !quiet {
254        println!(
255            "Exported {copied} cache file(s) to {} (copy this to the air-gapped host, then `cache import`).",
256            dest.display()
257        );
258    }
259    Ok(exit_codes::SUCCESS)
260}
261
262fn cache_import(src: &Path, quiet: bool) -> Result<i32> {
263    if !src.exists() {
264        anyhow::bail!("import source {} does not exist", src.display());
265    }
266
267    let root = root_cache_dir();
268    fs::create_dir_all(&root)
269        .with_context(|| format!("creating cache directory {}", root.display()))?;
270    let copied = copy_dir_recursive(src, &root)?;
271
272    if !quiet {
273        println!(
274            "Imported {copied} cache file(s) into {}. Run with --offline to use them.",
275            root.display()
276        );
277    }
278    Ok(exit_codes::SUCCESS)
279}
280
281/// Recursively copy every file from `src` into `dest`, mirroring the directory
282/// structure. Returns the number of files copied.
283fn copy_dir_recursive(src: &Path, dest: &Path) -> Result<usize> {
284    let mut copied = 0usize;
285    for entry in
286        fs::read_dir(src).with_context(|| format!("reading directory {}", src.display()))?
287    {
288        let entry = entry?;
289        let file_type = entry.file_type()?;
290        let from = entry.path();
291        let to = dest.join(entry.file_name());
292        if file_type.is_dir() {
293            fs::create_dir_all(&to).with_context(|| format!("creating {}", to.display()))?;
294            copied += copy_dir_recursive(&from, &to)?;
295        } else if file_type.is_file() {
296            if let Some(parent) = to.parent() {
297                fs::create_dir_all(parent).ok();
298            }
299            fs::copy(&from, &to)
300                .with_context(|| format!("copying {} -> {}", from.display(), to.display()))?;
301            copied += 1;
302        }
303    }
304    Ok(copied)
305}
306
307/// Human-readable byte size (e.g. `12.3 KB`).
308fn human_size(bytes: u64) -> String {
309    const KB: f64 = 1024.0;
310    const MB: f64 = KB * 1024.0;
311    let b = bytes as f64;
312    if b >= MB {
313        format!("{:.1} MB", b / MB)
314    } else if b >= KB {
315        format!("{:.1} KB", b / KB)
316    } else {
317        format!("{bytes} B")
318    }
319}
320
321/// Human-readable age (e.g. `3d`, `5h`, `12m`, `<1m`).
322fn human_age(age: Duration) -> String {
323    let secs = age.as_secs();
324    if secs >= 86_400 {
325        format!("{}d", secs / 86_400)
326    } else if secs >= 3_600 {
327        format!("{}h", secs / 3_600)
328    } else if secs >= 60 {
329        format!("{}m", secs / 60)
330    } else {
331        "<1m".to_string()
332    }
333}
334
335const fn plural(n: usize) -> &'static str {
336    if n == 1 { "y" } else { "ies" }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342
343    #[test]
344    fn human_size_formats() {
345        assert_eq!(human_size(512), "512 B");
346        assert_eq!(human_size(2048), "2.0 KB");
347        assert_eq!(human_size(3 * 1024 * 1024), "3.0 MB");
348    }
349
350    #[test]
351    fn human_age_formats() {
352        assert_eq!(human_age(Duration::from_secs(30)), "<1m");
353        assert_eq!(human_age(Duration::from_secs(120)), "2m");
354        assert_eq!(human_age(Duration::from_secs(7200)), "2h");
355        assert_eq!(human_age(Duration::from_secs(2 * 86_400)), "2d");
356    }
357
358    #[test]
359    fn copy_dir_recursive_roundtrip() {
360        let src = tempfile::tempdir().unwrap();
361        let dst = tempfile::tempdir().unwrap();
362        fs::create_dir_all(src.path().join("osv")).unwrap();
363        fs::write(src.path().join("osv").join("a.json"), "{}").unwrap();
364        fs::write(src.path().join("osv").join("b.json"), "{}").unwrap();
365
366        let copied = copy_dir_recursive(src.path(), dst.path()).unwrap();
367        assert_eq!(copied, 2);
368        assert!(dst.path().join("osv").join("a.json").exists());
369        assert!(dst.path().join("osv").join("b.json").exists());
370    }
371
372    /// Regression: `cache status`/`cache clear` enumerate a fixed namespace
373    /// list. EPSS and HuggingFace clients write under their own namespaces, so
374    /// those must appear in the list or their caches are silently never purged
375    /// or reported.
376    #[test]
377    fn source_namespaces_cover_epss_and_huggingface() {
378        assert!(
379            SOURCE_NAMESPACES.contains(&"epss"),
380            "epss namespace must be covered by cache status/clear"
381        );
382        assert!(
383            SOURCE_NAMESPACES.contains(&"huggingface"),
384            "huggingface namespace must be covered by cache status/clear"
385        );
386
387        // The default cache dirs of the EPSS/HF clients must end in exactly the
388        // namespace strings the const enumerates, tying this test to the real
389        // write locations.
390        let epss_dir = crate::enrichment::epss::EpssClientConfig::default().cache_dir;
391        assert!(
392            epss_dir.ends_with("epss"),
393            "EPSS client writes under the 'epss' namespace"
394        );
395        let hf_dir = crate::enrichment::huggingface::HuggingFaceConfig::default().cache_dir;
396        assert!(
397            hf_dir.ends_with("huggingface"),
398            "HuggingFace client writes under the 'huggingface' namespace"
399        );
400    }
401
402    /// Regression: clearing the cache root removes EPSS and HuggingFace JSON
403    /// entries, not just the original four namespaces.
404    #[test]
405    fn clear_logic_removes_all_namespaces() {
406        let root = tempfile::tempdir().unwrap();
407        let mut expected_removed = 0usize;
408        for ns in SOURCE_NAMESPACES {
409            let dir = root.path().join(ns);
410            fs::create_dir_all(&dir).unwrap();
411            fs::write(dir.join("entry.json"), "{}").unwrap();
412            expected_removed += 1;
413        }
414
415        // Mirror cache_clear's per-namespace removal against the temp root.
416        let mut removed = 0usize;
417        for ns in SOURCE_NAMESPACES {
418            let dir = root.path().join(ns);
419            if let Ok(read_dir) = fs::read_dir(&dir) {
420                for entry in read_dir.flatten() {
421                    let path = entry.path();
422                    if path.extension().is_some_and(|e| e == "json")
423                        && fs::remove_file(&path).is_ok()
424                    {
425                        removed += 1;
426                    }
427                }
428            }
429        }
430
431        assert_eq!(removed, expected_removed);
432        assert!(
433            !root.path().join("epss").join("entry.json").exists(),
434            "epss entry must be cleared"
435        );
436        assert!(
437            !root.path().join("huggingface").join("entry.json").exists(),
438            "huggingface entry must be cleared"
439        );
440    }
441}