Skip to main content

provenant/license_detection/
license_cache.rs

1use std::fmt::Write as _;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use anyhow::{Context, Result};
6use rancor::{Panic, ResultExt};
7use sha2::{Digest, Sha256};
8
9use crate::cache::{CacheConfig, write_bytes_atomically};
10use crate::license_detection::index::CachedLicenseIndex;
11use crate::license_detection::models::{LoadedLicense, LoadedRule};
12
13const CACHE_ROOT_DIR_NAME: &str = "license-index";
14const CACHE_FILE_EXTENSION: &str = "rkyv";
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum LicenseCacheNamespace {
18    Embedded,
19    CustomRules,
20}
21
22impl LicenseCacheNamespace {
23    fn directory_name(self) -> &'static str {
24        match self {
25            Self::Embedded => "embedded",
26            Self::CustomRules => "custom",
27        }
28    }
29}
30
31pub struct LicenseCacheConfig {
32    pub root_dir: PathBuf,
33    pub reindex: bool,
34    pub enabled: bool,
35}
36
37impl LicenseCacheConfig {
38    pub fn new(root_dir: PathBuf, reindex: bool, enabled: bool) -> Self {
39        Self {
40            root_dir,
41            reindex,
42            enabled,
43        }
44    }
45
46    pub fn default_root_dir() -> PathBuf {
47        CacheConfig::default_root_dir_without_scan_root()
48    }
49
50    fn namespace_dir(&self, namespace: LicenseCacheNamespace) -> PathBuf {
51        self.root_dir
52            .join(CACHE_ROOT_DIR_NAME)
53            .join(namespace.directory_name())
54    }
55
56    fn cache_file_path(&self, namespace: LicenseCacheNamespace, fingerprint: &[u8; 32]) -> PathBuf {
57        self.namespace_dir(namespace).join(format!(
58            "{}.{}",
59            fingerprint_hex(fingerprint),
60            CACHE_FILE_EXTENSION
61        ))
62    }
63}
64
65fn fingerprint_hex(fingerprint: &[u8; 32]) -> String {
66    let mut hex = String::with_capacity(fingerprint.len() * 2);
67    for byte in fingerprint {
68        let _ = write!(&mut hex, "{byte:02x}");
69    }
70    hex
71}
72
73fn prune_namespace_dir(namespace_dir: &Path, active_path: &Path) -> Result<()> {
74    if !namespace_dir.exists() {
75        return Ok(());
76    }
77
78    for entry in fs::read_dir(namespace_dir)
79        .with_context(|| format!("Failed to read license cache namespace {namespace_dir:?}"))?
80    {
81        let path = entry?.path();
82        if path == active_path
83            || path.extension().and_then(|ext| ext.to_str()) != Some(CACHE_FILE_EXTENSION)
84        {
85            continue;
86        }
87        fs::remove_file(&path)
88            .with_context(|| format!("Failed to prune stale license cache file {path:?}"))?;
89    }
90
91    Ok(())
92}
93
94pub fn compute_rules_fingerprint(rules: &[LoadedRule], licenses: &[LoadedLicense]) -> [u8; 32] {
95    let mut hasher = Sha256::new();
96
97    let mut sorted_rules: Vec<_> = rules.iter().collect();
98    sorted_rules.sort_by_key(|r| &r.identifier);
99    for rule in &sorted_rules {
100        hasher.update(rule.identifier.as_bytes());
101        hasher.update(rule.license_expression.as_bytes());
102        hasher.update(rule.text.as_bytes());
103    }
104
105    let mut sorted_licenses: Vec<_> = licenses.iter().collect();
106    sorted_licenses.sort_by_key(|l| &l.key);
107    for license in &sorted_licenses {
108        hasher.update(license.key.as_bytes());
109        hasher.update(license.text.as_bytes());
110    }
111
112    hasher.finalize().into()
113}
114
115pub fn compute_artifact_fingerprint(artifact_bytes: &[u8]) -> [u8; 32] {
116    Sha256::digest(artifact_bytes).into()
117}
118
119pub fn load_cached_index(
120    config: &LicenseCacheConfig,
121    namespace: LicenseCacheNamespace,
122    fingerprint: &[u8; 32],
123) -> Result<Option<CachedLicenseIndex>> {
124    if !config.enabled {
125        return Ok(None);
126    }
127
128    let cache_path = config.cache_file_path(namespace, fingerprint);
129
130    if !cache_path.exists() {
131        return Ok(None);
132    }
133
134    let bytes = match fs::read(&cache_path) {
135        Ok(bytes) => bytes,
136        Err(_) => return Ok(None),
137    };
138
139    if bytes.len() < 32 {
140        return Ok(None);
141    }
142
143    let stored_fingerprint: [u8; 32] = bytes[..32].try_into().unwrap();
144    if stored_fingerprint != *fingerprint {
145        return Ok(None);
146    }
147
148    let archived =
149        match rkyv::access::<rkyv::Archived<CachedLicenseIndex>, rkyv::rancor::Error>(&bytes[32..])
150        {
151            Ok(archived) => archived,
152            Err(_) => return Ok(None),
153        };
154
155    let cached: CachedLicenseIndex =
156        rkyv::deserialize::<CachedLicenseIndex, Panic>(archived).always_ok();
157
158    Ok(Some(cached))
159}
160
161pub fn save_cached_index(
162    config: &LicenseCacheConfig,
163    namespace: LicenseCacheNamespace,
164    cached: &CachedLicenseIndex,
165    fingerprint: &[u8; 32],
166) -> Result<()> {
167    if !config.enabled {
168        return Ok(());
169    }
170
171    let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(cached)
172        .map_err(|e| anyhow::anyhow!("Failed to serialize license index cache: {}", e))?;
173
174    let mut payload = Vec::with_capacity(fingerprint.len() + rkyv_bytes.len());
175    payload.extend_from_slice(fingerprint);
176    payload.extend_from_slice(&rkyv_bytes);
177
178    let namespace_dir = config.namespace_dir(namespace);
179    let cache_path = config.cache_file_path(namespace, fingerprint);
180
181    crate::cache::locking::with_exclusive_cache_lock(&config.root_dir, || {
182        fs::create_dir_all(&namespace_dir)
183            .with_context(|| "Failed to create license index cache directory")?;
184        prune_namespace_dir(&namespace_dir, &cache_path)?;
185        write_bytes_atomically(&cache_path, &payload)
186            .with_context(|| "Failed to persist license index cache file")
187    })?;
188
189    Ok(())
190}
191
192pub fn delete_cache(
193    config: &LicenseCacheConfig,
194    namespace: LicenseCacheNamespace,
195    fingerprint: &[u8; 32],
196) -> Result<()> {
197    if !config.enabled {
198        return Ok(());
199    }
200
201    let cache_path = config.cache_file_path(namespace, fingerprint);
202    crate::cache::locking::with_exclusive_cache_lock(&config.root_dir, || -> Result<()> {
203        if cache_path.exists() {
204            fs::remove_file(&cache_path).context("Failed to delete license index cache file")?;
205        }
206        Ok(())
207    })?;
208
209    Ok(())
210}
211
212pub fn cache_file_size(
213    config: &LicenseCacheConfig,
214    namespace: LicenseCacheNamespace,
215    fingerprint: &[u8; 32],
216) -> Option<u64> {
217    if !config.enabled {
218        return None;
219    }
220
221    fs::metadata(config.cache_file_path(namespace, fingerprint))
222        .ok()
223        .map(|m| m.len())
224}
225
226#[cfg(test)]
227mod tests {
228    use tempfile::TempDir;
229
230    use super::*;
231    use crate::license_detection::index::dictionary::TokenDictionary;
232
233    fn sample_cached_index() -> CachedLicenseIndex {
234        CachedLicenseIndex {
235            dictionary: TokenDictionary::default(),
236            len_legalese: 0,
237            rid_by_hash: Default::default(),
238            rules_by_rid_bytes: Default::default(),
239            tids_by_rid: Default::default(),
240            rules_automaton_bytes: Default::default(),
241            unknown_automaton_bytes: Default::default(),
242            sets_by_rid: Default::default(),
243            rule_metadata_by_identifier: Default::default(),
244            msets_by_rid: Default::default(),
245            high_sets_by_rid: Default::default(),
246            high_postings_by_rid: Default::default(),
247            false_positive_rids: Default::default(),
248            approx_matchable_rids: Default::default(),
249            licenses_by_key_bytes: Default::default(),
250            pattern_id_to_rid: Default::default(),
251            rid_by_spdx_key: Default::default(),
252            unknown_spdx_rid: None,
253            rids_by_high_tid: Default::default(),
254            spdx_license_list_version: Some("test".to_string()),
255        }
256    }
257
258    #[test]
259    fn test_cache_file_path_uses_namespace_and_fingerprint() {
260        let config = LicenseCacheConfig::new(PathBuf::from("/tmp/cache-root"), false, true);
261        let fingerprint = [0xAB; 32];
262
263        assert_eq!(
264            config.cache_file_path(LicenseCacheNamespace::Embedded, &fingerprint),
265            PathBuf::from(format!(
266                "/tmp/cache-root/license-index/embedded/{}.rkyv",
267                "ab".repeat(32)
268            ))
269        );
270        assert_eq!(
271            config.cache_file_path(LicenseCacheNamespace::CustomRules, &fingerprint),
272            PathBuf::from(format!(
273                "/tmp/cache-root/license-index/custom/{}.rkyv",
274                "ab".repeat(32)
275            ))
276        );
277    }
278
279    #[test]
280    fn test_save_cached_index_prunes_stale_namespace_entries() {
281        let temp_dir = TempDir::new().expect("create temp dir");
282        let config = LicenseCacheConfig::new(temp_dir.path().to_path_buf(), false, true);
283        let fingerprint = [0x11; 32];
284        let namespace_dir = config.namespace_dir(LicenseCacheNamespace::Embedded);
285        fs::create_dir_all(&namespace_dir).expect("create namespace dir");
286        fs::write(namespace_dir.join("stale.rkyv"), b"old").expect("write stale cache file");
287
288        let cached = sample_cached_index();
289        save_cached_index(
290            &config,
291            LicenseCacheNamespace::Embedded,
292            &cached,
293            &fingerprint,
294        )
295        .expect("save cache");
296
297        let entries = fs::read_dir(&namespace_dir)
298            .expect("read namespace dir")
299            .map(|entry| entry.expect("dir entry").path())
300            .collect::<Vec<_>>();
301
302        assert_eq!(entries.len(), 1);
303        assert_eq!(
304            entries[0],
305            config.cache_file_path(LicenseCacheNamespace::Embedded, &fingerprint)
306        );
307    }
308
309    #[test]
310    fn test_disabled_cache_skips_persistence() {
311        let temp_dir = TempDir::new().expect("create temp dir");
312        let config = LicenseCacheConfig::new(temp_dir.path().to_path_buf(), false, false);
313        let fingerprint = [0x22; 32];
314
315        save_cached_index(
316            &config,
317            LicenseCacheNamespace::Embedded,
318            &sample_cached_index(),
319            &fingerprint,
320        )
321        .expect("disabled save should succeed");
322
323        assert!(
324            !config
325                .cache_file_path(LicenseCacheNamespace::Embedded, &fingerprint)
326                .exists()
327        );
328        assert!(
329            load_cached_index(&config, LicenseCacheNamespace::Embedded, &fingerprint)
330                .expect("disabled load should succeed")
331                .is_none()
332        );
333    }
334}