Skip to main content

provenant/cache/
scan_cache.rs

1use std::path::Path;
2
3use chrono::Utc;
4use serde::{Deserialize, Serialize};
5
6use super::io::{CacheIoError, load_snapshot_payload, write_snapshot_payload};
7use super::metadata::{CacheInvalidationKey, CacheSnapshotMetadata};
8use super::paths::scan_result_cache_path;
9use crate::models::{
10    Author, Copyright, FileInfo, Holder, LicenseDetection, Match, OutputEmail, OutputURL,
11    PackageData,
12};
13
14const SCAN_CACHE_SCHEMA_VERSION: u32 = 2;
15const SCAN_CACHE_ENGINE_VERSION: &str = "scan-result-cache-v2";
16const SCAN_CACHE_RULES_FINGERPRINT: &str = env!("CARGO_PKG_VERSION");
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct CachedScanFindings {
20    pub package_data: Vec<PackageData>,
21    pub license_expression: Option<String>,
22    pub license_detections: Vec<LicenseDetection>,
23    pub license_clues: Vec<Match>,
24    pub percentage_of_license_text: Option<f64>,
25    pub copyrights: Vec<Copyright>,
26    pub holders: Vec<Holder>,
27    pub authors: Vec<Author>,
28    pub emails: Vec<OutputEmail>,
29    pub urls: Vec<OutputURL>,
30    pub programming_language: Option<String>,
31}
32
33impl CachedScanFindings {
34    pub fn from_file_info(file_info: &FileInfo) -> Self {
35        Self {
36            package_data: file_info.package_data.clone(),
37            license_expression: file_info.license_expression.clone(),
38            license_detections: file_info.license_detections.clone(),
39            license_clues: file_info.license_clues.clone(),
40            percentage_of_license_text: file_info.percentage_of_license_text,
41            copyrights: file_info.copyrights.clone(),
42            holders: file_info.holders.clone(),
43            authors: file_info.authors.clone(),
44            emails: file_info.emails.clone(),
45            urls: file_info.urls.clone(),
46            programming_language: file_info.programming_language.clone(),
47        }
48    }
49}
50
51pub fn read_cached_findings(
52    scan_results_dir: &Path,
53    sha256: &str,
54    options_fingerprint: &str,
55) -> Result<Option<CachedScanFindings>, CacheIoError> {
56    let Some(path) = scan_result_cache_path(scan_results_dir, sha256) else {
57        return Ok(None);
58    };
59
60    let key = CacheInvalidationKey {
61        cache_schema_version: SCAN_CACHE_SCHEMA_VERSION,
62        engine_version: SCAN_CACHE_ENGINE_VERSION,
63        rules_fingerprint: SCAN_CACHE_RULES_FINGERPRINT,
64        build_options_fingerprint: options_fingerprint,
65    };
66
67    let Some(payload) = load_snapshot_payload(&path, &key)? else {
68        return Ok(None);
69    };
70
71    match rmp_serde::decode::from_slice::<CachedScanFindings>(&payload) {
72        Ok(findings) => Ok(Some(findings)),
73        Err(_) => Ok(None),
74    }
75}
76
77pub fn write_cached_findings(
78    scan_results_dir: &Path,
79    sha256: &str,
80    options_fingerprint: &str,
81    findings: &CachedScanFindings,
82) -> Result<(), CacheIoError> {
83    let Some(path) = scan_result_cache_path(scan_results_dir, sha256) else {
84        return Ok(());
85    };
86
87    let metadata = CacheSnapshotMetadata {
88        cache_schema_version: SCAN_CACHE_SCHEMA_VERSION,
89        engine_version: SCAN_CACHE_ENGINE_VERSION.to_string(),
90        rules_fingerprint: SCAN_CACHE_RULES_FINGERPRINT.to_string(),
91        build_options_fingerprint: options_fingerprint.to_string(),
92        created_at: Utc::now().to_rfc3339(),
93    };
94
95    let payload = rmp_serde::to_vec(findings).map_err(CacheIoError::Encode)?;
96    write_snapshot_payload(&path, &metadata, &payload)
97}
98
99#[cfg(test)]
100mod tests {
101    use tempfile::TempDir;
102
103    use super::*;
104
105    fn sample_sha256() -> &'static str {
106        "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
107    }
108
109    #[test]
110    fn test_write_and_read_cached_findings_roundtrip() {
111        let temp_dir = TempDir::new().expect("create temp dir");
112        let scan_results_dir = temp_dir.path().join("scan-results");
113        let findings = CachedScanFindings {
114            package_data: Vec::new(),
115            license_expression: Some("mit".to_string()),
116            license_detections: Vec::new(),
117            license_clues: Vec::new(),
118            percentage_of_license_text: Some(100.0),
119            copyrights: Vec::new(),
120            holders: Vec::new(),
121            authors: Vec::new(),
122            emails: Vec::new(),
123            urls: Vec::new(),
124            programming_language: Some("Rust".to_string()),
125        };
126
127        write_cached_findings(
128            &scan_results_dir,
129            sample_sha256(),
130            "cache-options-v1",
131            &findings,
132        )
133        .expect("write cache entry");
134
135        let loaded = read_cached_findings(&scan_results_dir, sample_sha256(), "cache-options-v1")
136            .expect("read cache entry")
137            .expect("cache hit");
138
139        assert_eq!(loaded.license_expression, findings.license_expression);
140        assert_eq!(loaded.license_clues, findings.license_clues);
141        assert_eq!(
142            loaded.percentage_of_license_text,
143            findings.percentage_of_license_text
144        );
145        assert_eq!(loaded.programming_language, findings.programming_language);
146    }
147
148    #[test]
149    fn test_write_and_read_cached_findings_roundtrip_with_license_clues() {
150        let temp_dir = TempDir::new().expect("create temp dir");
151        let scan_results_dir = temp_dir.path().join("scan-results");
152        let findings = CachedScanFindings {
153            package_data: Vec::new(),
154            license_expression: None,
155            license_detections: Vec::new(),
156            license_clues: vec![Match {
157                license_expression: "unknown-license-reference".to_string(),
158                license_expression_spdx: "LicenseRef-scancode-unknown-license-reference"
159                    .to_string(),
160                from_file: Some("NOTICE".to_string()),
161                start_line: 1,
162                end_line: 2,
163                matcher: Some("2-aho".to_string()),
164                score: 100.0,
165                matched_length: Some(19),
166                match_coverage: Some(100.0),
167                rule_relevance: Some(100),
168                rule_identifier: Some("license-clue_1.RULE".to_string()),
169                rule_url: Some("https://example.com/license-clue_1.RULE".to_string()),
170                matched_text: Some(
171                    "This product currently only contains code developed by authors".to_string(),
172                ),
173                referenced_filenames: None,
174                matched_text_diagnostics: Some(
175                    "This product currently only contains code developed by [authors]".to_string(),
176                ),
177            }],
178            percentage_of_license_text: Some(42.0),
179            copyrights: Vec::new(),
180            holders: Vec::new(),
181            authors: Vec::new(),
182            emails: Vec::new(),
183            urls: Vec::new(),
184            programming_language: None,
185        };
186
187        write_cached_findings(
188            &scan_results_dir,
189            sample_sha256(),
190            "cache-options-v1",
191            &findings,
192        )
193        .expect("write cache entry");
194
195        let loaded = read_cached_findings(&scan_results_dir, sample_sha256(), "cache-options-v1")
196            .expect("read cache entry")
197            .expect("cache hit");
198
199        assert_eq!(loaded.license_clues, findings.license_clues);
200        assert_eq!(
201            loaded.percentage_of_license_text,
202            findings.percentage_of_license_text
203        );
204    }
205
206    #[test]
207    fn test_read_cached_findings_misses_on_fingerprint_change() {
208        let temp_dir = TempDir::new().expect("create temp dir");
209        let scan_results_dir = temp_dir.path().join("scan-results");
210        let findings = CachedScanFindings {
211            package_data: Vec::new(),
212            license_expression: Some("apache-2.0".to_string()),
213            license_detections: Vec::new(),
214            license_clues: Vec::new(),
215            percentage_of_license_text: None,
216            copyrights: Vec::new(),
217            holders: Vec::new(),
218            authors: Vec::new(),
219            emails: Vec::new(),
220            urls: Vec::new(),
221            programming_language: Some("Rust".to_string()),
222        };
223
224        write_cached_findings(
225            &scan_results_dir,
226            sample_sha256(),
227            "cache-options-v1",
228            &findings,
229        )
230        .expect("write cache entry");
231
232        let loaded = read_cached_findings(&scan_results_dir, sample_sha256(), "cache-options-v2")
233            .expect("read cache entry");
234
235        assert!(loaded.is_none());
236    }
237}