1use serde::{Deserialize, Serialize, Serializer};
5use serde_json::Map;
6
7use super::author::OutputAuthor;
8use super::copyright::OutputCopyright;
9use super::email::OutputEmail;
10use super::holder::OutputHolder;
11use super::license_detection::OutputLicenseDetection;
12use super::license_match::OutputMatch;
13use super::license_policy_entry::OutputLicensePolicyEntry;
14use super::package_data::OutputPackageData;
15use super::serde_helpers::insert_json;
16use super::tallies::OutputTallies;
17use super::url::OutputURL;
18
19#[derive(Debug, Clone, Deserialize)]
20pub struct OutputFileInfo {
21 #[serde(default)]
22 pub name: String,
23 #[serde(default)]
24 pub base_name: String,
25 #[serde(default)]
26 pub extension: String,
27 pub path: String,
28 #[serde(rename = "type")]
29 pub file_type: crate::models::FileType,
30 pub mime_type: Option<String>,
31 pub file_type_label: Option<String>,
32 #[serde(default)]
33 pub size: u64,
34 pub date: Option<String>,
35 pub sha1: Option<String>,
36 pub md5: Option<String>,
37 pub sha256: Option<String>,
38 pub sha1_git: Option<String>,
39 pub programming_language: Option<String>,
40 #[serde(default)]
41 pub package_data: Vec<OutputPackageData>,
42 #[serde(rename = "detected_license_expression_spdx")]
43 pub license_expression: Option<String>,
44 #[serde(default)]
45 pub license_detections: Vec<OutputLicenseDetection>,
46 #[serde(default, skip_serializing_if = "Vec::is_empty")]
47 pub license_clues: Vec<OutputMatch>,
48 pub percentage_of_license_text: Option<f64>,
49 #[serde(default)]
50 pub copyrights: Vec<OutputCopyright>,
51 #[serde(default)]
52 pub holders: Vec<OutputHolder>,
53 #[serde(default)]
54 pub authors: Vec<OutputAuthor>,
55 #[serde(default, skip_serializing_if = "Vec::is_empty")]
56 pub emails: Vec<OutputEmail>,
57 #[serde(default)]
58 pub urls: Vec<OutputURL>,
59 #[serde(default)]
60 pub for_packages: Vec<String>,
61 #[serde(default)]
62 pub scan_errors: Vec<String>,
63 pub license_policy: Option<Vec<OutputLicensePolicyEntry>>,
64 pub is_generated: Option<bool>,
65 pub is_binary: Option<bool>,
66 pub is_text: Option<bool>,
67 pub is_archive: Option<bool>,
68 pub is_media: Option<bool>,
69 pub is_source: Option<bool>,
70 pub is_script: Option<bool>,
71 pub files_count: Option<usize>,
72 pub dirs_count: Option<usize>,
73 pub size_count: Option<u64>,
74 pub source_count: Option<usize>,
75 #[serde(default, skip_serializing_if = "is_false")]
76 pub is_legal: bool,
77 #[serde(default, skip_serializing_if = "is_false")]
78 pub is_manifest: bool,
79 #[serde(default, skip_serializing_if = "is_false")]
80 pub is_readme: bool,
81 #[serde(default, skip_serializing_if = "is_false")]
82 pub is_top_level: bool,
83 #[serde(default, skip_serializing_if = "is_false")]
84 pub is_key_file: bool,
85 #[serde(default, skip_serializing_if = "is_false")]
86 pub is_community: bool,
87 #[serde(default, skip_serializing_if = "Vec::is_empty")]
88 pub facets: Vec<String>,
89 pub tallies: Option<OutputTallies>,
90}
91
92impl OutputFileInfo {
93 pub(crate) fn should_serialize_info_surface(&self) -> bool {
94 self.date.is_some()
95 || self.sha1.is_some()
96 || self.md5.is_some()
97 || self.sha256.is_some()
98 || self.sha1_git.is_some()
99 || self.mime_type.is_some()
100 || self.file_type_label.is_some()
101 || self.programming_language.is_some()
102 || self.is_binary.is_some()
103 || self.is_text.is_some()
104 || self.is_archive.is_some()
105 || self.is_media.is_some()
106 || self.is_source.is_some()
107 || self.is_script.is_some()
108 || self.files_count.is_some()
109 || self.dirs_count.is_some()
110 || self.size_count.is_some()
111 }
112
113 pub(crate) fn should_serialize_license_surface(&self) -> bool {
114 self.license_expression.is_some()
115 || !self.license_detections.is_empty()
116 || !self.license_clues.is_empty()
117 || self.percentage_of_license_text.is_some()
118 }
119
120 pub(crate) fn detected_license_expression_spdx(&self) -> Option<String> {
121 {
122 let expressions: Option<Vec<String>> = self
123 .license_detections
124 .iter()
125 .map(|detection| {
126 (!detection.license_expression_spdx.is_empty())
127 .then(|| detection.license_expression_spdx.clone())
128 })
129 .collect();
130 expressions.and_then(|expressions| {
131 crate::utils::spdx::select_primary_license_expression_strict(expressions.clone())
132 .or_else(|| {
133 crate::utils::spdx::combine_license_expressions_preserving_structure_strict(
134 expressions,
135 )
136 })
137 })
138 }
139 .or_else(|| {
140 let expressions: Option<Vec<String>> = self
141 .package_data
142 .iter()
143 .flat_map(|package_data| package_data.license_detections.iter())
144 .map(|detection| {
145 (!detection.license_expression_spdx.is_empty())
146 .then(|| detection.license_expression_spdx.clone())
147 })
148 .collect();
149 expressions.and_then(|expressions| {
150 crate::utils::spdx::select_primary_license_expression_strict(expressions.clone())
151 .or_else(|| {
152 crate::utils::spdx::combine_license_expressions_preserving_structure_strict(
153 expressions,
154 )
155 })
156 })
157 })
158 .or_else(|| {
159 self.license_expression
160 .clone()
161 .filter(|expression| !expression.is_empty())
162 .and_then(|expression| {
163 crate::utils::spdx::combine_license_expressions_preserving_structure_strict([
164 expression,
165 ])
166 })
167 })
168 }
169}
170
171impl Serialize for OutputFileInfo {
172 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
173 where
174 S: Serializer,
175 {
176 let mut map = Map::new();
177 insert_json(&mut map, "path", &self.path)?;
178 insert_json(&mut map, "type", &self.file_type)?;
179 insert_json(&mut map, "name", &self.name)?;
180 insert_json(&mut map, "base_name", &self.base_name)?;
181 insert_json(&mut map, "extension", &self.extension)?;
182 insert_json(&mut map, "size", self.size)?;
183
184 if self.should_serialize_info_surface() {
185 insert_json(&mut map, "date", &self.date)?;
186 insert_json(&mut map, "sha1", self.sha1.as_ref())?;
187 insert_json(&mut map, "md5", self.md5.as_ref())?;
188 insert_json(&mut map, "sha256", self.sha256.as_ref())?;
189 insert_json(&mut map, "sha1_git", self.sha1_git.as_ref())?;
190 insert_json(&mut map, "mime_type", &self.mime_type)?;
191 insert_json(&mut map, "file_type", &self.file_type_label)?;
192 insert_json(&mut map, "programming_language", &self.programming_language)?;
193 insert_json(&mut map, "is_binary", self.is_binary)?;
194 insert_json(&mut map, "is_text", self.is_text)?;
195 insert_json(&mut map, "is_archive", self.is_archive)?;
196 insert_json(&mut map, "is_media", self.is_media)?;
197 insert_json(&mut map, "is_source", self.is_source)?;
198 insert_json(&mut map, "is_script", self.is_script)?;
199 insert_json(&mut map, "files_count", self.files_count)?;
200 insert_json(&mut map, "dirs_count", self.dirs_count)?;
201 insert_json(&mut map, "size_count", self.size_count)?;
202 }
203
204 insert_json(&mut map, "package_data", &self.package_data)?;
205 insert_json(
206 &mut map,
207 "detected_license_expression_spdx",
208 self.detected_license_expression_spdx(),
209 )?;
210 insert_json(&mut map, "license_detections", &self.license_detections)?;
211 if self.should_serialize_license_surface() {
212 insert_json(&mut map, "license_clues", &self.license_clues)?;
213 }
214 if self.percentage_of_license_text.is_some() {
215 insert_json(
216 &mut map,
217 "percentage_of_license_text",
218 self.percentage_of_license_text,
219 )?;
220 }
221 insert_json(&mut map, "copyrights", &self.copyrights)?;
222 insert_json(&mut map, "holders", &self.holders)?;
223 insert_json(&mut map, "authors", &self.authors)?;
224 if !self.emails.is_empty() {
225 insert_json(&mut map, "emails", &self.emails)?;
226 }
227 insert_json(&mut map, "urls", &self.urls)?;
228 insert_json(&mut map, "for_packages", &self.for_packages)?;
229 insert_json(&mut map, "scan_errors", &self.scan_errors)?;
230 if self.license_policy.is_some() {
231 insert_json(&mut map, "license_policy", &self.license_policy)?;
232 }
233 if self.is_generated.is_some() {
234 insert_json(&mut map, "is_generated", self.is_generated)?;
235 }
236 if self.source_count.is_some() {
237 insert_json(&mut map, "source_count", self.source_count)?;
238 }
239 if self.is_legal {
240 insert_json(&mut map, "is_legal", self.is_legal)?;
241 }
242 if self.is_manifest {
243 insert_json(&mut map, "is_manifest", self.is_manifest)?;
244 }
245 if self.is_readme {
246 insert_json(&mut map, "is_readme", self.is_readme)?;
247 }
248 if self.is_top_level {
249 insert_json(&mut map, "is_top_level", self.is_top_level)?;
250 }
251 if self.is_key_file {
252 insert_json(&mut map, "is_key_file", self.is_key_file)?;
253 }
254 if self.is_community {
255 insert_json(&mut map, "is_community", self.is_community)?;
256 }
257 if !self.facets.is_empty() {
258 insert_json(&mut map, "facets", &self.facets)?;
259 }
260 if self.tallies.is_some() {
261 insert_json(&mut map, "tallies", &self.tallies)?;
262 }
263
264 map.serialize(serializer)
265 }
266}
267
268impl From<&crate::models::FileInfo> for OutputFileInfo {
269 fn from(value: &crate::models::FileInfo) -> Self {
270 Self::from_with_compat_mode(value, crate::cli::CompatibilityMode::Native)
271 }
272}
273
274impl OutputFileInfo {
275 pub fn from_with_compat_mode(
276 value: &crate::models::FileInfo,
277 mode: crate::cli::CompatibilityMode,
278 ) -> Self {
279 Self {
280 name: value.name.clone(),
281 base_name: value.base_name.clone(),
282 extension: value.extension.clone(),
283 path: value.path.clone(),
284 file_type: value.file_type.clone(),
285 mime_type: value.mime_type.clone(),
286 file_type_label: value.file_type_label.clone(),
287 size: value.size,
288 date: value.date.clone(),
289 sha1: value.sha1.as_ref().map(|d| d.as_hex()),
290 md5: value.md5.as_ref().map(|d| d.as_hex()),
291 sha256: value.sha256.as_ref().map(|d| d.as_hex()),
292 sha1_git: value.sha1_git.as_ref().map(|d| d.as_hex()),
293 programming_language: value.programming_language.clone(),
294 package_data: value
295 .package_data
296 .iter()
297 .map(OutputPackageData::from)
298 .collect(),
299 license_expression: value.license_expression.clone(),
300 license_detections: value
301 .license_detections
302 .iter()
303 .map(OutputLicenseDetection::from)
304 .collect(),
305 license_clues: value.license_clues.iter().map(OutputMatch::from).collect(),
306 percentage_of_license_text: value.percentage_of_license_text,
307 copyrights: value
308 .copyrights
309 .iter()
310 .map(|copyright| OutputCopyright::from_with_compat_mode(copyright, mode))
311 .collect(),
312 holders: value.holders.iter().map(OutputHolder::from).collect(),
313 authors: value.authors.iter().map(OutputAuthor::from).collect(),
314 emails: value.emails.iter().map(OutputEmail::from).collect(),
315 urls: value.urls.iter().map(OutputURL::from).collect(),
316 for_packages: value
317 .for_packages
318 .iter()
319 .map(|uid| uid.to_string())
320 .collect(),
321 scan_errors: value.scan_errors.clone(),
322 license_policy: value
323 .license_policy
324 .as_ref()
325 .map(|v| v.iter().map(OutputLicensePolicyEntry::from).collect()),
326 is_generated: value.is_generated,
327 is_binary: value.is_binary,
328 is_text: value.is_text,
329 is_archive: value.is_archive,
330 is_media: value.is_media,
331 is_source: value.is_source,
332 is_script: value.is_script,
333 files_count: value.files_count,
334 dirs_count: value.dirs_count,
335 size_count: value.size_count,
336 source_count: value.source_count,
337 is_legal: value.is_legal,
338 is_manifest: value.is_manifest,
339 is_readme: value.is_readme,
340 is_top_level: value.is_top_level,
341 is_key_file: value.is_key_file,
342 is_community: value.is_community,
343 facets: value.facets.clone(),
344 tallies: value.tallies.as_ref().map(OutputTallies::from),
345 }
346 }
347}
348
349impl TryFrom<&OutputFileInfo> for crate::models::FileInfo {
350 type Error = String;
351 fn try_from(value: &OutputFileInfo) -> Result<Self, Self::Error> {
352 let mut package_data = Vec::with_capacity(value.package_data.len());
353 for p in &value.package_data {
354 package_data.push(crate::models::PackageData::try_from(p)?);
355 }
356 let mut license_detections = Vec::with_capacity(value.license_detections.len());
357 for d in &value.license_detections {
358 license_detections.push(crate::models::LicenseDetection::try_from(d)?);
359 }
360 let mut license_clues = Vec::with_capacity(value.license_clues.len());
361 for m in &value.license_clues {
362 license_clues.push(crate::models::Match::try_from(m)?);
363 }
364 let mut copyrights = Vec::with_capacity(value.copyrights.len());
365 for c in &value.copyrights {
366 copyrights.push(crate::models::Copyright::try_from(c)?);
367 }
368 let mut holders = Vec::with_capacity(value.holders.len());
369 for h in &value.holders {
370 holders.push(crate::models::Holder::try_from(h)?);
371 }
372 let mut authors = Vec::with_capacity(value.authors.len());
373 for a in &value.authors {
374 authors.push(crate::models::Author::try_from(a)?);
375 }
376 let mut emails = Vec::with_capacity(value.emails.len());
377 for e in &value.emails {
378 emails.push(crate::models::OutputEmail::try_from(e)?);
379 }
380 let mut urls = Vec::with_capacity(value.urls.len());
381 for u in &value.urls {
382 urls.push(crate::models::OutputURL::try_from(u)?);
383 }
384 let license_policy = value
385 .license_policy
386 .as_ref()
387 .map(|v| {
388 v.iter()
389 .map(crate::models::LicensePolicyEntry::try_from)
390 .collect::<Result<Vec<_>, _>>()
391 })
392 .transpose()?;
393 Ok(Self {
394 name: value.name.clone(),
395 base_name: value.base_name.clone(),
396 extension: value.extension.clone(),
397 path: value.path.clone(),
398 file_type: value.file_type.clone(),
399 mime_type: value.mime_type.clone(),
400 file_type_label: value.file_type_label.clone(),
401 size: value.size,
402 date: value.date.clone(),
403 sha1: value
404 .sha1
405 .as_ref()
406 .map(|s| crate::models::Sha1Digest::from_hex(s))
407 .transpose()
408 .map_err(|e| format!("invalid sha1: {}", e))?,
409 md5: value
410 .md5
411 .as_ref()
412 .map(|s| crate::models::Md5Digest::from_hex(s))
413 .transpose()
414 .map_err(|e| format!("invalid md5: {}", e))?,
415 sha256: value
416 .sha256
417 .as_ref()
418 .map(|s| crate::models::Sha256Digest::from_hex(s))
419 .transpose()
420 .map_err(|e| format!("invalid sha256: {}", e))?,
421 sha1_git: value
422 .sha1_git
423 .as_ref()
424 .map(|s| crate::models::GitSha1::from_hex(s))
425 .transpose()
426 .map_err(|e| format!("invalid sha1_git: {}", e))?,
427 programming_language: value.programming_language.clone(),
428 package_data,
429 license_expression: value.license_expression.clone(),
430 license_detections,
431 license_clues,
432 percentage_of_license_text: value.percentage_of_license_text,
433 copyrights,
434 holders,
435 authors,
436 emails,
437 urls,
438 for_packages: value
439 .for_packages
440 .iter()
441 .map(|s| crate::models::PackageUid::from_raw(s.clone()))
442 .collect(),
443 scan_errors: value.scan_errors.clone(),
444 scan_diagnostics: crate::models::diagnostics_from_legacy_scan_errors(
445 &value.scan_errors,
446 ),
447 license_policy,
448 is_generated: value.is_generated,
449 is_binary: value.is_binary,
450 is_text: value.is_text,
451 is_archive: value.is_archive,
452 is_media: value.is_media,
453 is_source: value.is_source,
454 is_script: value.is_script,
455 files_count: value.files_count,
456 dirs_count: value.dirs_count,
457 size_count: value.size_count,
458 source_count: value.source_count,
459 is_legal: value.is_legal,
460 is_manifest: value.is_manifest,
461 is_readme: value.is_readme,
462 is_top_level: value.is_top_level,
463 is_key_file: value.is_key_file,
464 is_community: value.is_community,
465 facets: value.facets.clone(),
466 tallies: value
467 .tallies
468 .as_ref()
469 .map(crate::models::Tallies::try_from)
470 .transpose()?,
471 })
472 }
473}
474
475#[cfg(test)]
476mod tests {
477 use super::OutputFileInfo;
478 use crate::models::FileType;
479 use crate::output_schema::license_detection::OutputLicenseDetection;
480
481 fn base_output_file_info() -> OutputFileInfo {
482 OutputFileInfo {
483 name: "mod.rs".to_string(),
484 base_name: "mod".to_string(),
485 extension: ".rs".to_string(),
486 path: "mod.rs".to_string(),
487 file_type: FileType::File,
488 mime_type: None,
489 file_type_label: None,
490 size: 0,
491 date: None,
492 sha1: None,
493 md5: None,
494 sha256: None,
495 sha1_git: None,
496 programming_language: None,
497 package_data: Vec::new(),
498 license_expression: None,
499 license_detections: Vec::new(),
500 license_clues: Vec::new(),
501 percentage_of_license_text: None,
502 copyrights: Vec::new(),
503 holders: Vec::new(),
504 authors: Vec::new(),
505 emails: Vec::new(),
506 urls: Vec::new(),
507 for_packages: Vec::new(),
508 scan_errors: Vec::new(),
509 license_policy: None,
510 is_generated: None,
511 is_binary: None,
512 is_text: None,
513 is_archive: None,
514 is_media: None,
515 is_source: None,
516 is_script: None,
517 files_count: None,
518 dirs_count: None,
519 size_count: None,
520 source_count: None,
521 is_legal: false,
522 is_manifest: false,
523 is_readme: false,
524 is_top_level: false,
525 is_key_file: false,
526 is_community: false,
527 facets: Vec::new(),
528 tallies: None,
529 }
530 }
531
532 #[test]
533 fn detected_license_expression_spdx_does_not_recombine_partial_detection_spdx() {
534 let mut file_info = base_output_file_info();
535 file_info.license_expression = Some("Apache-2.0 AND MIT".to_string());
536 file_info.license_detections = vec![
537 OutputLicenseDetection {
538 license_expression: "apache-2.0".to_string(),
539 license_expression_spdx: "Apache-2.0".to_string(),
540 matches: Vec::new(),
541 detection_log: Vec::new(),
542 identifier: None,
543 },
544 OutputLicenseDetection {
545 license_expression: "mit".to_string(),
546 license_expression_spdx: String::new(),
547 matches: Vec::new(),
548 detection_log: Vec::new(),
549 identifier: None,
550 },
551 ];
552
553 assert_eq!(
554 file_info.detected_license_expression_spdx().as_deref(),
555 Some("Apache-2.0 AND MIT")
556 );
557 }
558
559 #[test]
560 fn detected_license_expression_spdx_rejects_invalid_fallback_expression() {
561 let mut file_info = base_output_file_info();
562 file_info.license_expression = Some("MIT\" or malformed".to_string());
563
564 assert_eq!(file_info.detected_license_expression_spdx(), None);
565 }
566}