provenant/license_detection/
license_cache.rs1use std::fmt::Write as _;
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use anyhow::{Context, Result};
6use rancor::{Panic, ResultExt};
7use sha2::{Digest, Sha256};
8
9use crate::cache::{CacheConfig, write_bytes_atomically};
10use crate::license_detection::index::LicenseIndex;
11use crate::license_detection::models::{LoadedLicense, LoadedRule};
12
13const CACHE_ROOT_DIR_NAME: &str = "license-index";
14const CACHE_FILE_EXTENSION: &str = "rkyv";
15
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum LicenseCacheNamespace {
18 Embedded,
19 CustomRules,
20}
21
22impl LicenseCacheNamespace {
23 fn directory_name(self) -> &'static str {
24 match self {
25 Self::Embedded => "embedded",
26 Self::CustomRules => "custom",
27 }
28 }
29}
30
31pub struct LicenseCacheConfig {
32 pub root_dir: PathBuf,
33 pub reindex: bool,
34 pub enabled: bool,
35}
36
37impl LicenseCacheConfig {
38 pub fn new(root_dir: PathBuf, reindex: bool, enabled: bool) -> Self {
39 Self {
40 root_dir,
41 reindex,
42 enabled,
43 }
44 }
45
46 pub fn default_root_dir() -> PathBuf {
47 CacheConfig::default_root_dir_without_scan_root()
48 }
49
50 fn namespace_dir(&self, namespace: LicenseCacheNamespace) -> PathBuf {
51 self.root_dir
52 .join(CACHE_ROOT_DIR_NAME)
53 .join(namespace.directory_name())
54 }
55
56 fn cache_file_path(&self, namespace: LicenseCacheNamespace, fingerprint: &[u8; 32]) -> PathBuf {
57 self.namespace_dir(namespace).join(format!(
58 "{}.{}",
59 fingerprint_hex(fingerprint),
60 CACHE_FILE_EXTENSION
61 ))
62 }
63}
64
65fn fingerprint_hex(fingerprint: &[u8; 32]) -> String {
66 let mut hex = String::with_capacity(fingerprint.len() * 2);
67 for byte in fingerprint {
68 let _ = write!(&mut hex, "{byte:02x}");
69 }
70 hex
71}
72
73fn prune_namespace_dir(namespace_dir: &Path, active_path: &Path) -> Result<()> {
74 if !namespace_dir.exists() {
75 return Ok(());
76 }
77
78 for entry in fs::read_dir(namespace_dir)
79 .with_context(|| format!("Failed to read license cache namespace {namespace_dir:?}"))?
80 {
81 let path = entry?.path();
82 if path == active_path
83 || path.extension().and_then(|ext| ext.to_str()) != Some(CACHE_FILE_EXTENSION)
84 {
85 continue;
86 }
87 fs::remove_file(&path)
88 .with_context(|| format!("Failed to prune stale license cache file {path:?}"))?;
89 }
90
91 Ok(())
92}
93
94pub fn compute_rules_fingerprint(rules: &[LoadedRule], licenses: &[LoadedLicense]) -> [u8; 32] {
95 let mut hasher = Sha256::new();
96
97 let mut sorted_rules: Vec<_> = rules.iter().collect();
98 sorted_rules.sort_by_key(|r| &r.identifier);
99 for rule in &sorted_rules {
100 hasher.update(rule.identifier.as_bytes());
101 hasher.update(rule.license_expression.as_bytes());
102 hasher.update(rule.text.as_bytes());
103 }
104
105 let mut sorted_licenses: Vec<_> = licenses.iter().collect();
106 sorted_licenses.sort_by_key(|l| &l.key);
107 for license in &sorted_licenses {
108 hasher.update(license.key.as_bytes());
109 hasher.update(license.text.as_bytes());
110 }
111
112 hasher.finalize().into()
113}
114
115pub fn compute_artifact_fingerprint(artifact_bytes: &[u8]) -> [u8; 32] {
116 Sha256::digest(artifact_bytes).into()
117}
118
119pub fn load_cached_index(
120 config: &LicenseCacheConfig,
121 namespace: LicenseCacheNamespace,
122 fingerprint: &[u8; 32],
123) -> Result<Option<LicenseIndex>> {
124 if !config.enabled {
125 return Ok(None);
126 }
127
128 let cache_path = config.cache_file_path(namespace, fingerprint);
129
130 if !cache_path.exists() {
131 return Ok(None);
132 }
133
134 let bytes = match fs::read(&cache_path) {
135 Ok(bytes) => bytes,
136 Err(_) => return Ok(None),
137 };
138
139 if bytes.len() < 32 {
140 return Ok(None);
141 }
142
143 let stored_fingerprint: [u8; 32] = bytes[..32].try_into().unwrap();
144 if stored_fingerprint != *fingerprint {
145 return Ok(None);
146 }
147
148 let archived =
149 match rkyv::access::<rkyv::Archived<LicenseIndex>, rkyv::rancor::Error>(&bytes[32..]) {
150 Ok(archived) => archived,
151 Err(_) => return Ok(None),
152 };
153
154 let cached: LicenseIndex = rkyv::deserialize::<LicenseIndex, Panic>(archived).always_ok();
155
156 Ok(Some(cached))
157}
158
159pub fn save_cached_index(
160 config: &LicenseCacheConfig,
161 namespace: LicenseCacheNamespace,
162 cached: &LicenseIndex,
163 fingerprint: &[u8; 32],
164) -> Result<()> {
165 if !config.enabled {
166 return Ok(());
167 }
168
169 let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(cached)
170 .map_err(|e| anyhow::anyhow!("Failed to serialize license index cache: {}", e))?;
171
172 let mut payload = Vec::with_capacity(fingerprint.len() + rkyv_bytes.len());
173 payload.extend_from_slice(fingerprint);
174 payload.extend_from_slice(&rkyv_bytes);
175
176 let namespace_dir = config.namespace_dir(namespace);
177 let cache_path = config.cache_file_path(namespace, fingerprint);
178
179 crate::cache::locking::with_exclusive_cache_lock(&config.root_dir, || {
180 fs::create_dir_all(&namespace_dir)
181 .with_context(|| "Failed to create license index cache directory")?;
182 prune_namespace_dir(&namespace_dir, &cache_path)?;
183 write_bytes_atomically(&cache_path, &payload)
184 .with_context(|| "Failed to persist license index cache file")
185 })?;
186
187 Ok(())
188}
189
190pub fn delete_cache(
191 config: &LicenseCacheConfig,
192 namespace: LicenseCacheNamespace,
193 fingerprint: &[u8; 32],
194) -> Result<()> {
195 if !config.enabled {
196 return Ok(());
197 }
198
199 let cache_path = config.cache_file_path(namespace, fingerprint);
200 crate::cache::locking::with_exclusive_cache_lock(&config.root_dir, || -> Result<()> {
201 if cache_path.exists() {
202 fs::remove_file(&cache_path).context("Failed to delete license index cache file")?;
203 }
204 Ok(())
205 })?;
206
207 Ok(())
208}
209
210pub fn cache_file_size(
211 config: &LicenseCacheConfig,
212 namespace: LicenseCacheNamespace,
213 fingerprint: &[u8; 32],
214) -> Option<u64> {
215 if !config.enabled {
216 return None;
217 }
218
219 fs::metadata(config.cache_file_path(namespace, fingerprint))
220 .ok()
221 .map(|m| m.len())
222}
223
224#[cfg(test)]
225mod tests {
226 use tempfile::TempDir;
227
228 use super::*;
229 use crate::license_detection::automaton::Automaton;
230 use crate::license_detection::index::dictionary::TokenDictionary;
231
232 fn sample_cached_index() -> LicenseIndex {
233 LicenseIndex {
234 dictionary: TokenDictionary::default(),
235 len_legalese: 0,
236 rid_by_hash: Default::default(),
237 rules_by_rid: Default::default(),
238 tids_by_rid: Default::default(),
239 rules_automaton: Automaton::empty(),
240 unknown_automaton: Automaton::empty(),
241 sets_by_rid: Default::default(),
242 rule_metadata_by_identifier: Default::default(),
243 msets_by_rid: Default::default(),
244 high_sets_by_rid: Default::default(),
245 high_postings_by_rid: Default::default(),
246 false_positive_rids: Default::default(),
247 approx_matchable_rids: Default::default(),
248 licenses_by_key: Default::default(),
249 pattern_id_to_rid: Default::default(),
250 rid_by_spdx_key: Default::default(),
251 unknown_spdx_rid: None,
252 rids_by_high_tid: Default::default(),
253 spdx_license_list_version: Some("test".to_string()),
254 }
255 }
256
257 #[test]
258 fn test_cache_file_path_uses_namespace_and_fingerprint() {
259 let config = LicenseCacheConfig::new(PathBuf::from("/tmp/cache-root"), false, true);
260 let fingerprint = [0xAB; 32];
261
262 assert_eq!(
263 config.cache_file_path(LicenseCacheNamespace::Embedded, &fingerprint),
264 PathBuf::from(format!(
265 "/tmp/cache-root/license-index/embedded/{}.rkyv",
266 "ab".repeat(32)
267 ))
268 );
269 assert_eq!(
270 config.cache_file_path(LicenseCacheNamespace::CustomRules, &fingerprint),
271 PathBuf::from(format!(
272 "/tmp/cache-root/license-index/custom/{}.rkyv",
273 "ab".repeat(32)
274 ))
275 );
276 }
277
278 #[test]
279 fn test_save_cached_index_prunes_stale_namespace_entries() {
280 let temp_dir = TempDir::new().expect("create temp dir");
281 let config = LicenseCacheConfig::new(temp_dir.path().to_path_buf(), false, true);
282 let fingerprint = [0x11; 32];
283 let namespace_dir = config.namespace_dir(LicenseCacheNamespace::Embedded);
284 fs::create_dir_all(&namespace_dir).expect("create namespace dir");
285 fs::write(namespace_dir.join("stale.rkyv"), b"old").expect("write stale cache file");
286
287 let cached = sample_cached_index();
288 save_cached_index(
289 &config,
290 LicenseCacheNamespace::Embedded,
291 &cached,
292 &fingerprint,
293 )
294 .expect("save cache");
295
296 let entries = fs::read_dir(&namespace_dir)
297 .expect("read namespace dir")
298 .map(|entry| entry.expect("dir entry").path())
299 .collect::<Vec<_>>();
300
301 assert_eq!(entries.len(), 1);
302 assert_eq!(
303 entries[0],
304 config.cache_file_path(LicenseCacheNamespace::Embedded, &fingerprint)
305 );
306 }
307
308 #[test]
309 fn test_disabled_cache_skips_persistence() {
310 let temp_dir = TempDir::new().expect("create temp dir");
311 let config = LicenseCacheConfig::new(temp_dir.path().to_path_buf(), false, false);
312 let fingerprint = [0x22; 32];
313
314 save_cached_index(
315 &config,
316 LicenseCacheNamespace::Embedded,
317 &sample_cached_index(),
318 &fingerprint,
319 )
320 .expect("disabled save should succeed");
321
322 assert!(
323 !config
324 .cache_file_path(LicenseCacheNamespace::Embedded, &fingerprint)
325 .exists()
326 );
327 assert!(
328 load_cached_index(&config, LicenseCacheNamespace::Embedded, &fingerprint)
329 .expect("disabled load should succeed")
330 .is_none()
331 );
332 }
333}