1use std::path::Path;
2
3use chrono::Utc;
4use serde::{Deserialize, Serialize};
5
6use super::io::{CacheIoError, load_snapshot_payload, write_snapshot_payload};
7use super::metadata::{CacheInvalidationKey, CacheSnapshotMetadata};
8use super::paths::scan_result_cache_path;
9use crate::models::{
10 Author, Copyright, FileInfo, Holder, LicenseDetection, Match, OutputEmail, OutputURL,
11 PackageData,
12};
13
14const SCAN_CACHE_SCHEMA_VERSION: u32 = 2;
15const SCAN_CACHE_ENGINE_VERSION: &str = "scan-result-cache-v2";
16const SCAN_CACHE_RULES_FINGERPRINT: &str = env!("CARGO_PKG_VERSION");
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct CachedScanFindings {
20 pub package_data: Vec<PackageData>,
21 pub license_expression: Option<String>,
22 pub license_detections: Vec<LicenseDetection>,
23 pub license_clues: Vec<Match>,
24 pub percentage_of_license_text: Option<f64>,
25 pub copyrights: Vec<Copyright>,
26 pub holders: Vec<Holder>,
27 pub authors: Vec<Author>,
28 pub emails: Vec<OutputEmail>,
29 pub urls: Vec<OutputURL>,
30 pub programming_language: Option<String>,
31}
32
33impl CachedScanFindings {
34 pub fn from_file_info(file_info: &FileInfo) -> Self {
35 Self {
36 package_data: file_info.package_data.clone(),
37 license_expression: file_info.license_expression.clone(),
38 license_detections: file_info.license_detections.clone(),
39 license_clues: file_info.license_clues.clone(),
40 percentage_of_license_text: file_info.percentage_of_license_text,
41 copyrights: file_info.copyrights.clone(),
42 holders: file_info.holders.clone(),
43 authors: file_info.authors.clone(),
44 emails: file_info.emails.clone(),
45 urls: file_info.urls.clone(),
46 programming_language: file_info.programming_language.clone(),
47 }
48 }
49}
50
51pub fn read_cached_findings(
52 scan_results_dir: &Path,
53 sha256: &str,
54 options_fingerprint: &str,
55) -> Result<Option<CachedScanFindings>, CacheIoError> {
56 let Some(path) = scan_result_cache_path(scan_results_dir, sha256) else {
57 return Ok(None);
58 };
59
60 let key = CacheInvalidationKey {
61 cache_schema_version: SCAN_CACHE_SCHEMA_VERSION,
62 engine_version: SCAN_CACHE_ENGINE_VERSION,
63 rules_fingerprint: SCAN_CACHE_RULES_FINGERPRINT,
64 build_options_fingerprint: options_fingerprint,
65 };
66
67 let Some(payload) = load_snapshot_payload(&path, &key)? else {
68 return Ok(None);
69 };
70
71 match rmp_serde::decode::from_slice::<CachedScanFindings>(&payload) {
72 Ok(findings) => Ok(Some(findings)),
73 Err(_) => Ok(None),
74 }
75}
76
77pub fn write_cached_findings(
78 scan_results_dir: &Path,
79 sha256: &str,
80 options_fingerprint: &str,
81 findings: &CachedScanFindings,
82) -> Result<(), CacheIoError> {
83 let Some(path) = scan_result_cache_path(scan_results_dir, sha256) else {
84 return Ok(());
85 };
86
87 let metadata = CacheSnapshotMetadata {
88 cache_schema_version: SCAN_CACHE_SCHEMA_VERSION,
89 engine_version: SCAN_CACHE_ENGINE_VERSION.to_string(),
90 rules_fingerprint: SCAN_CACHE_RULES_FINGERPRINT.to_string(),
91 build_options_fingerprint: options_fingerprint.to_string(),
92 created_at: Utc::now().to_rfc3339(),
93 };
94
95 let payload = rmp_serde::to_vec(findings).map_err(CacheIoError::Encode)?;
96 write_snapshot_payload(&path, &metadata, &payload)
97}
98
99#[cfg(test)]
100mod tests {
101 use tempfile::TempDir;
102
103 use super::*;
104
105 fn sample_sha256() -> &'static str {
106 "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
107 }
108
109 #[test]
110 fn test_write_and_read_cached_findings_roundtrip() {
111 let temp_dir = TempDir::new().expect("create temp dir");
112 let scan_results_dir = temp_dir.path().join("scan-results");
113 let findings = CachedScanFindings {
114 package_data: Vec::new(),
115 license_expression: Some("mit".to_string()),
116 license_detections: Vec::new(),
117 license_clues: Vec::new(),
118 percentage_of_license_text: Some(100.0),
119 copyrights: Vec::new(),
120 holders: Vec::new(),
121 authors: Vec::new(),
122 emails: Vec::new(),
123 urls: Vec::new(),
124 programming_language: Some("Rust".to_string()),
125 };
126
127 write_cached_findings(
128 &scan_results_dir,
129 sample_sha256(),
130 "cache-options-v1",
131 &findings,
132 )
133 .expect("write cache entry");
134
135 let loaded = read_cached_findings(&scan_results_dir, sample_sha256(), "cache-options-v1")
136 .expect("read cache entry")
137 .expect("cache hit");
138
139 assert_eq!(loaded.license_expression, findings.license_expression);
140 assert_eq!(loaded.license_clues, findings.license_clues);
141 assert_eq!(
142 loaded.percentage_of_license_text,
143 findings.percentage_of_license_text
144 );
145 assert_eq!(loaded.programming_language, findings.programming_language);
146 }
147
148 #[test]
149 fn test_write_and_read_cached_findings_roundtrip_with_license_clues() {
150 let temp_dir = TempDir::new().expect("create temp dir");
151 let scan_results_dir = temp_dir.path().join("scan-results");
152 let findings = CachedScanFindings {
153 package_data: Vec::new(),
154 license_expression: None,
155 license_detections: Vec::new(),
156 license_clues: vec![Match {
157 license_expression: "unknown-license-reference".to_string(),
158 license_expression_spdx: "LicenseRef-scancode-unknown-license-reference"
159 .to_string(),
160 from_file: Some("NOTICE".to_string()),
161 start_line: 1,
162 end_line: 2,
163 matcher: Some("2-aho".to_string()),
164 score: 100.0,
165 matched_length: Some(19),
166 match_coverage: Some(100.0),
167 rule_relevance: Some(100),
168 rule_identifier: Some("license-clue_1.RULE".to_string()),
169 rule_url: Some("https://example.com/license-clue_1.RULE".to_string()),
170 matched_text: Some(
171 "This product currently only contains code developed by authors".to_string(),
172 ),
173 referenced_filenames: None,
174 matched_text_diagnostics: Some(
175 "This product currently only contains code developed by [authors]".to_string(),
176 ),
177 }],
178 percentage_of_license_text: Some(42.0),
179 copyrights: Vec::new(),
180 holders: Vec::new(),
181 authors: Vec::new(),
182 emails: Vec::new(),
183 urls: Vec::new(),
184 programming_language: None,
185 };
186
187 write_cached_findings(
188 &scan_results_dir,
189 sample_sha256(),
190 "cache-options-v1",
191 &findings,
192 )
193 .expect("write cache entry");
194
195 let loaded = read_cached_findings(&scan_results_dir, sample_sha256(), "cache-options-v1")
196 .expect("read cache entry")
197 .expect("cache hit");
198
199 assert_eq!(loaded.license_clues, findings.license_clues);
200 assert_eq!(
201 loaded.percentage_of_license_text,
202 findings.percentage_of_license_text
203 );
204 }
205
206 #[test]
207 fn test_read_cached_findings_misses_on_fingerprint_change() {
208 let temp_dir = TempDir::new().expect("create temp dir");
209 let scan_results_dir = temp_dir.path().join("scan-results");
210 let findings = CachedScanFindings {
211 package_data: Vec::new(),
212 license_expression: Some("apache-2.0".to_string()),
213 license_detections: Vec::new(),
214 license_clues: Vec::new(),
215 percentage_of_license_text: None,
216 copyrights: Vec::new(),
217 holders: Vec::new(),
218 authors: Vec::new(),
219 emails: Vec::new(),
220 urls: Vec::new(),
221 programming_language: Some("Rust".to_string()),
222 };
223
224 write_cached_findings(
225 &scan_results_dir,
226 sample_sha256(),
227 "cache-options-v1",
228 &findings,
229 )
230 .expect("write cache entry");
231
232 let loaded = read_cached_findings(&scan_results_dir, sample_sha256(), "cache-options-v2")
233 .expect("read cache entry");
234
235 assert!(loaded.is_none());
236 }
237}