1use derive_builder::Builder;
5use packageurl::PackageUrl;
6use serde::{Deserialize, Serialize};
7use sha1::{Digest, Sha1};
8use std::collections::HashMap;
9use std::str::FromStr;
10
11use super::DatasourceId;
12use super::DependencyUid;
13use super::DiagnosticSeverity;
14use super::GitSha1;
15use super::LineNumber;
16use super::MatchScore;
17use super::Md5Digest;
18use super::PackageType;
19use super::PackageUid;
20use super::ScanDiagnostic;
21use super::Sha1Digest;
22use super::Sha256Digest;
23use super::Sha512Digest;
24use super::diagnostics_from_legacy_scan_errors;
25use crate::license_detection::tokenize::tokenize_without_stopwords;
26use crate::models::output::Tallies;
27use crate::utils::spdx::combine_license_expressions;
28
29#[derive(Debug, Builder, Serialize, Deserialize, Clone)]
30#[builder(build_fn(skip))]
31pub struct FileInfo {
33 pub name: String,
34 pub base_name: String,
35 pub extension: String,
36 pub path: String,
37 #[serde(rename = "type")] pub file_type: FileType,
39 #[builder(default)]
40 #[serde(default)]
41 pub mime_type: Option<String>,
42 #[builder(default)]
43 #[serde(rename = "file_type", default)]
44 pub file_type_label: Option<String>,
45 pub size: u64,
46 #[builder(default)]
47 #[serde(default)]
48 pub date: Option<String>,
49 #[builder(default)]
50 #[serde(default)]
51 pub sha1: Option<Sha1Digest>,
52 #[builder(default)]
53 #[serde(default)]
54 pub md5: Option<Md5Digest>,
55 #[builder(default)]
56 #[serde(default)]
57 pub sha256: Option<Sha256Digest>,
58 #[builder(default)]
59 #[serde(default)]
60 pub sha1_git: Option<GitSha1>,
61 #[builder(default)]
62 #[serde(default)]
63 pub programming_language: Option<String>,
64 #[builder(default)]
65 #[serde(default)]
66 pub package_data: Vec<PackageData>,
67 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
69 pub license_expression: Option<String>,
70 #[builder(default)]
71 #[serde(default)]
72 pub license_detections: Vec<LicenseDetection>,
73 #[builder(default)]
74 #[serde(default)]
75 pub license_clues: Vec<Match>,
76 #[builder(default)]
77 #[serde(default)]
78 pub percentage_of_license_text: Option<f64>,
79 #[builder(default)]
80 #[serde(default)]
81 pub copyrights: Vec<Copyright>,
82 #[builder(default)]
83 #[serde(default)]
84 pub holders: Vec<Holder>,
85 #[builder(default)]
86 #[serde(default)]
87 pub authors: Vec<Author>,
88 #[builder(default)]
89 #[serde(default)]
90 pub emails: Vec<OutputEmail>,
91 #[builder(default)]
92 #[serde(default)]
93 pub urls: Vec<OutputURL>,
94 #[builder(default)]
95 #[serde(default)]
96 pub for_packages: Vec<PackageUid>,
97 #[builder(default)]
98 #[serde(default)]
99 pub scan_errors: Vec<String>,
100 #[builder(default)]
101 #[serde(default)]
102 pub scan_diagnostics: Vec<ScanDiagnostic>,
103 #[builder(default)]
104 #[serde(default)]
105 pub license_policy: Option<Vec<LicensePolicyEntry>>,
106 #[builder(default)]
107 #[serde(default)]
108 pub is_generated: Option<bool>,
109 #[builder(default)]
110 #[serde(default)]
111 pub is_binary: Option<bool>,
112 #[builder(default)]
113 #[serde(default)]
114 pub is_text: Option<bool>,
115 #[builder(default)]
116 #[serde(default)]
117 pub is_archive: Option<bool>,
118 #[builder(default)]
119 #[serde(default)]
120 pub is_media: Option<bool>,
121 #[builder(default)]
122 #[serde(default)]
123 pub is_source: Option<bool>,
124 #[builder(default)]
125 #[serde(default)]
126 pub is_script: Option<bool>,
127 #[builder(default)]
128 #[serde(default)]
129 pub files_count: Option<usize>,
130 #[builder(default)]
131 #[serde(default)]
132 pub dirs_count: Option<usize>,
133 #[builder(default)]
134 #[serde(default)]
135 pub size_count: Option<u64>,
136 #[builder(default)]
137 #[serde(default)]
138 pub source_count: Option<usize>,
139 #[builder(default)]
140 #[serde(default)]
141 pub is_legal: bool,
142 #[builder(default)]
143 #[serde(default)]
144 pub is_manifest: bool,
145 #[builder(default)]
146 #[serde(default)]
147 pub is_readme: bool,
148 #[builder(default)]
149 #[serde(default)]
150 pub is_top_level: bool,
151 #[builder(default)]
152 #[serde(default)]
153 pub is_key_file: bool,
154 #[builder(default)]
155 #[serde(default)]
156 pub is_community: bool,
157 #[builder(default)]
158 #[serde(default)]
159 pub facets: Vec<String>,
160 #[builder(default)]
161 #[serde(default)]
162 pub tallies: Option<Tallies>,
163}
164
165impl FileInfoBuilder {
166 pub fn build(&self) -> Result<FileInfo, String> {
168 let mut file_info = FileInfo::new(
169 self.name.clone().ok_or("Missing field: name")?,
170 self.base_name.clone().ok_or("Missing field: base_name")?,
171 self.extension.clone().ok_or("Missing field: extension")?,
172 self.path.clone().ok_or("Missing field: path")?,
173 self.file_type.clone().ok_or("Missing field: file_type")?,
174 self.mime_type.clone().flatten(),
175 self.file_type_label.clone().flatten(),
176 self.size.ok_or("Missing field: size")?,
177 self.date.clone().flatten(),
178 self.sha1.flatten(),
179 self.md5.flatten(),
180 self.sha256.flatten(),
181 self.programming_language.clone().flatten(),
182 self.package_data.clone().unwrap_or_default(),
183 self.license_expression.clone().flatten(),
184 self.license_detections.clone().unwrap_or_default(),
185 self.license_clues.clone().unwrap_or_default(),
186 self.copyrights.clone().unwrap_or_default(),
187 self.holders.clone().unwrap_or_default(),
188 self.authors.clone().unwrap_or_default(),
189 self.emails.clone().unwrap_or_default(),
190 self.urls.clone().unwrap_or_default(),
191 self.for_packages.clone().unwrap_or_default(),
192 self.scan_errors.clone().unwrap_or_default(),
193 );
194 file_info.scan_diagnostics = if let Some(diagnostics) = &self.scan_diagnostics {
195 diagnostics.clone()
196 } else {
197 diagnostics_from_legacy_scan_errors(&file_info.scan_errors)
198 };
199 file_info.scan_errors = file_info
200 .scan_diagnostics
201 .iter()
202 .map(|diagnostic| diagnostic.message.clone())
203 .collect();
204 file_info.license_policy = self.license_policy.clone().flatten();
205 file_info.sha1_git = self.sha1_git.flatten();
206 file_info.is_binary = self.is_binary.flatten();
207 file_info.is_text = self.is_text.flatten();
208 file_info.is_archive = self.is_archive.flatten();
209 file_info.is_media = self.is_media.flatten();
210 file_info.is_script = self.is_script.flatten();
211 file_info.files_count = self.files_count.flatten();
212 file_info.dirs_count = self.dirs_count.flatten();
213 file_info.size_count = self.size_count.flatten();
214 Ok(file_info)
215 }
216}
217
218impl FileInfo {
219 #[allow(clippy::too_many_arguments)]
220 pub fn new(
222 name: String,
223 base_name: String,
224 extension: String,
225 path: String,
226 file_type: FileType,
227 mime_type: Option<String>,
228 file_type_label: Option<String>,
229 size: u64,
230 date: Option<String>,
231 sha1: Option<Sha1Digest>,
232 md5: Option<Md5Digest>,
233 sha256: Option<Sha256Digest>,
234 programming_language: Option<String>,
235 package_data: Vec<PackageData>,
236 mut license_expression: Option<String>,
237 mut license_detections: Vec<LicenseDetection>,
238 license_clues: Vec<Match>,
239 copyrights: Vec<Copyright>,
240 holders: Vec<Holder>,
241 authors: Vec<Author>,
242 emails: Vec<OutputEmail>,
243 urls: Vec<OutputURL>,
244 for_packages: Vec<PackageUid>,
245 scan_errors: Vec<String>,
246 ) -> Self {
247 let mut package_data = package_data;
248 for package in &mut package_data {
249 enrich_package_data_license_provenance(package, &path);
250 }
251
252 license_expression = license_expression.or_else(|| {
254 let expressions = package_data
255 .iter()
256 .filter_map(|pkg| pkg.get_license_expression());
257 combine_license_expressions(expressions)
258 });
259
260 if license_detections.is_empty() {
262 for pkg in &package_data {
263 license_detections.extend(pkg.license_detections.clone());
264 }
265 }
266
267 if license_expression.is_none() && !license_detections.is_empty() {
269 let expressions = license_detections
270 .iter()
271 .map(|detection| detection.license_expression.clone());
272 license_expression =
273 crate::utils::spdx::combine_license_expressions_preserving_structure(expressions);
274 }
275
276 let mut file_info = FileInfo {
277 name,
278 base_name,
279 extension,
280 path,
281 file_type,
282 mime_type,
283 file_type_label,
284 size,
285 date,
286 sha1,
287 md5,
288 sha256,
289 sha1_git: None,
290 programming_language,
291 package_data,
292 license_expression,
293 license_detections,
294 license_clues,
295 percentage_of_license_text: None,
296 copyrights,
297 holders,
298 authors,
299 emails,
300 urls,
301 for_packages,
302 scan_diagnostics: diagnostics_from_legacy_scan_errors(&scan_errors),
303 scan_errors,
304 license_policy: None,
305 is_generated: None,
306 is_binary: None,
307 is_text: None,
308 is_archive: None,
309 is_media: None,
310 is_source: None,
311 is_script: None,
312 files_count: None,
313 dirs_count: None,
314 size_count: None,
315 source_count: None,
316 is_legal: false,
317 is_manifest: false,
318 is_readme: false,
319 is_top_level: false,
320 is_key_file: false,
321 is_community: false,
322 facets: vec![],
323 tallies: None,
324 };
325
326 file_info.backfill_license_provenance();
327 file_info
328 }
329
330 pub fn backfill_license_provenance(&mut self) {
331 for detection in &mut self.license_detections {
332 enrich_license_detection_provenance(detection, &self.path);
333 }
334
335 for package in &mut self.package_data {
336 enrich_package_data_license_provenance(package, &self.path);
337 }
338 }
339}
340
341impl FileInfo {
342 pub fn warning_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
343 self.scan_diagnostics
344 .iter()
345 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Warning)
346 }
347
348 pub fn error_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
349 self.scan_diagnostics
350 .iter()
351 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
352 }
353}
354
355fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
356 for detection in &mut package_data.license_detections {
357 enrich_license_detection_provenance(detection, path);
358 }
359 for detection in &mut package_data.other_license_detections {
360 enrich_license_detection_provenance(detection, path);
361 }
362}
363
364pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
365 for detection_match in &mut detection.matches {
366 if detection_match.from_file.is_none() {
367 detection_match.from_file = Some(path.to_string());
368 }
369
370 if detection_match.rule_identifier.is_none() {
371 detection_match.rule_identifier = detection_match.matcher.clone();
372 }
373 }
374
375 if detection.identifier.is_none() {
376 detection.identifier = Some(compute_public_detection_identifier(detection));
377 }
378}
379
380fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
381 let expression = python_safe_name(&detection.license_expression);
382 let mut hasher = Sha1::new();
383 hasher.update(format_public_detection_content(detection).as_bytes());
384 let hex_str = hex::encode(hasher.finalize());
385 let uuid_hex = &hex_str[..32];
386 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
387 .map(|uuid| uuid.to_string())
388 .unwrap_or_else(|_| uuid_hex.to_string());
389
390 format!("{}-{}", expression, content_uuid)
391}
392
393fn format_public_detection_content(detection: &LicenseDetection) -> String {
394 let mut result = String::from("(");
395
396 for (index, detection_match) in detection.matches.iter().enumerate() {
397 if index > 0 {
398 result.push_str(", ");
399 }
400 result.push_str(&format!(
401 "({}, {}, {})",
402 python_str_repr(
403 detection_match
404 .rule_identifier
405 .as_deref()
406 .or(detection_match.matcher.as_deref())
407 .unwrap_or("parser-declared-license")
408 ),
409 detection_match.score.value() as f32,
410 python_token_tuple_repr(&tokenize_without_stopwords(
411 detection_match.matched_text.as_deref().unwrap_or_default(),
412 )),
413 ));
414 }
415
416 if detection.matches.len() == 1 {
417 result.push(',');
418 }
419 result.push(')');
420 result
421}
422
423fn python_safe_name(value: &str) -> String {
424 let mut result = String::new();
425 let mut prev_underscore = false;
426
427 for character in value.chars() {
428 if character.is_alphanumeric() {
429 result.push(character);
430 prev_underscore = false;
431 } else if !prev_underscore {
432 result.push('_');
433 prev_underscore = true;
434 }
435 }
436
437 let trimmed = result.trim_matches('_');
438 if trimmed.is_empty() {
439 String::new()
440 } else {
441 trimmed.to_string()
442 }
443}
444
445fn python_str_repr(value: &str) -> String {
446 if value.contains('\'') && !value.contains('"') {
447 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
448 } else {
449 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
450 }
451}
452
453fn python_token_tuple_repr(tokens: &[String]) -> String {
454 if tokens.is_empty() {
455 return String::from("()");
456 }
457
458 let mut result = String::from("(");
459 for (index, token) in tokens.iter().enumerate() {
460 if index > 0 {
461 result.push_str(", ");
462 }
463 result.push_str(&python_str_repr(token));
464 }
465
466 if tokens.len() == 1 {
467 result.push(',');
468 }
469 result.push(')');
470 result
471}
472
473#[derive(Serialize, Deserialize, Debug, Clone, Default)]
479pub struct PackageData {
480 #[serde(rename = "type")] pub package_type: Option<PackageType>,
482 pub namespace: Option<String>,
483 pub name: Option<String>,
484 pub version: Option<String>,
485 #[serde(default)]
486 pub qualifiers: Option<HashMap<String, String>>,
487 pub subpath: Option<String>,
488 pub primary_language: Option<String>,
489 pub description: Option<String>,
490 pub release_date: Option<String>,
491 #[serde(default)]
492 pub parties: Vec<Party>,
493 #[serde(default)]
494 pub keywords: Vec<String>,
495 pub homepage_url: Option<String>,
496 pub download_url: Option<String>,
497 pub size: Option<u64>,
498 pub sha1: Option<Sha1Digest>,
499 pub md5: Option<Md5Digest>,
500 pub sha256: Option<Sha256Digest>,
501 pub sha512: Option<Sha512Digest>,
502 pub bug_tracking_url: Option<String>,
503 pub code_view_url: Option<String>,
504 pub vcs_url: Option<String>,
505 pub copyright: Option<String>,
506 pub holder: Option<String>,
507 pub declared_license_expression: Option<String>,
508 pub declared_license_expression_spdx: Option<String>,
509 #[serde(default)]
510 pub license_detections: Vec<LicenseDetection>,
511 pub other_license_expression: Option<String>,
512 pub other_license_expression_spdx: Option<String>,
513 #[serde(default)]
514 pub other_license_detections: Vec<LicenseDetection>,
515 pub extracted_license_statement: Option<String>,
516 pub notice_text: Option<String>,
517 #[serde(default)]
518 pub source_packages: Vec<String>,
519 #[serde(default)]
520 pub file_references: Vec<FileReference>,
521 #[serde(default)]
522 pub is_private: bool,
523 #[serde(default)]
524 pub is_virtual: bool,
525 #[serde(default)]
526 pub extra_data: Option<HashMap<String, serde_json::Value>>,
527 #[serde(default)]
528 pub dependencies: Vec<Dependency>,
529 pub repository_homepage_url: Option<String>,
530 pub repository_download_url: Option<String>,
531 pub api_data_url: Option<String>,
532 pub datasource_id: Option<DatasourceId>,
533 pub purl: Option<String>,
534}
535
536impl PackageData {
537 pub fn get_license_expression(&self) -> Option<String> {
540 if self.license_detections.is_empty() {
541 return None;
542 }
543
544 let expressions = self
545 .license_detections
546 .iter()
547 .map(|detection| detection.license_expression.clone());
548 combine_license_expressions(expressions)
549 }
550}
551
552#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
556pub struct LicenseDetection {
557 pub license_expression: String,
558 pub license_expression_spdx: String,
559 pub matches: Vec<Match>,
560 #[serde(default)]
561 pub detection_log: Vec<String>,
562 pub identifier: Option<String>,
563}
564
565#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
569pub struct Match {
570 pub license_expression: String,
571 pub license_expression_spdx: String,
572 pub from_file: Option<String>,
573 pub start_line: LineNumber,
574 pub end_line: LineNumber,
575 pub matcher: Option<String>,
576 pub score: MatchScore,
577 pub matched_length: Option<usize>,
578 pub match_coverage: Option<f64>,
579 pub rule_relevance: Option<u8>,
580 pub rule_identifier: Option<String>,
581 pub rule_url: Option<String>,
582 pub matched_text: Option<String>,
583 pub matched_text_diagnostics: Option<String>,
584 #[serde(default)]
585 pub referenced_filenames: Option<Vec<String>>,
586}
587
588#[derive(Serialize, Deserialize, Debug, Clone)]
589pub struct Copyright {
590 pub copyright: String,
591 pub start_line: LineNumber,
592 pub end_line: LineNumber,
593}
594
595#[derive(Serialize, Deserialize, Debug, Clone)]
596pub struct Holder {
597 pub holder: String,
598 pub start_line: LineNumber,
599 pub end_line: LineNumber,
600}
601
602#[derive(Serialize, Deserialize, Debug, Clone)]
603pub struct Author {
604 pub author: String,
605 pub start_line: LineNumber,
606 pub end_line: LineNumber,
607}
608
609#[derive(Serialize, Deserialize, Debug, Clone)]
614pub struct Dependency {
615 pub purl: Option<String>,
616 pub extracted_requirement: Option<String>,
617 pub scope: Option<String>,
618 pub is_runtime: Option<bool>,
619 pub is_optional: Option<bool>,
620 pub is_pinned: Option<bool>,
621 pub is_direct: Option<bool>,
622 pub resolved_package: Option<Box<ResolvedPackage>>,
623 #[serde(default)]
624 pub extra_data: Option<HashMap<String, serde_json::Value>>,
625}
626
627#[derive(Serialize, Deserialize, Debug, Clone)]
628pub struct ResolvedPackage {
629 #[serde(rename = "type")]
630 pub package_type: PackageType,
631 pub namespace: String,
632 pub name: String,
633 pub version: String,
634 #[serde(default)]
635 pub qualifiers: Option<HashMap<String, String>>,
636 pub subpath: Option<String>,
637 pub primary_language: Option<String>,
638 pub description: Option<String>,
639 pub release_date: Option<String>,
640 #[serde(default)]
641 pub parties: Vec<Party>,
642 #[serde(default)]
643 pub keywords: Vec<String>,
644 pub homepage_url: Option<String>,
645 pub download_url: Option<String>,
646 pub size: Option<u64>,
647 pub sha1: Option<Sha1Digest>,
648 pub md5: Option<Md5Digest>,
649 pub sha256: Option<Sha256Digest>,
650 pub sha512: Option<Sha512Digest>,
651 pub bug_tracking_url: Option<String>,
652 pub code_view_url: Option<String>,
653 pub vcs_url: Option<String>,
654 pub copyright: Option<String>,
655 pub holder: Option<String>,
656 pub declared_license_expression: Option<String>,
657 pub declared_license_expression_spdx: Option<String>,
658 #[serde(default)]
659 pub license_detections: Vec<LicenseDetection>,
660 pub other_license_expression: Option<String>,
661 pub other_license_expression_spdx: Option<String>,
662 #[serde(default)]
663 pub other_license_detections: Vec<LicenseDetection>,
664 pub extracted_license_statement: Option<String>,
665 pub notice_text: Option<String>,
666 #[serde(default)]
667 pub source_packages: Vec<String>,
668 #[serde(default)]
669 pub file_references: Vec<FileReference>,
670 #[serde(default)]
671 pub is_private: bool,
672 #[serde(default)]
673 pub is_virtual: bool,
674 #[serde(default)]
675 pub extra_data: Option<HashMap<String, serde_json::Value>>,
676 #[serde(default)]
677 pub dependencies: Vec<Dependency>,
678 pub repository_homepage_url: Option<String>,
679 pub repository_download_url: Option<String>,
680 pub api_data_url: Option<String>,
681 pub datasource_id: Option<DatasourceId>,
682 pub purl: Option<String>,
683}
684
685impl ResolvedPackage {
686 pub fn new(
687 package_type: PackageType,
688 namespace: String,
689 name: String,
690 version: String,
691 ) -> Self {
692 Self {
693 package_type,
694 namespace,
695 name,
696 version,
697 qualifiers: None,
698 subpath: None,
699 primary_language: None,
700 description: None,
701 release_date: None,
702 parties: vec![],
703 keywords: vec![],
704 homepage_url: None,
705 download_url: None,
706 size: None,
707 sha1: None,
708 md5: None,
709 sha256: None,
710 sha512: None,
711 bug_tracking_url: None,
712 code_view_url: None,
713 vcs_url: None,
714 copyright: None,
715 holder: None,
716 declared_license_expression: None,
717 declared_license_expression_spdx: None,
718 license_detections: vec![],
719 other_license_expression: None,
720 other_license_expression_spdx: None,
721 other_license_detections: vec![],
722 extracted_license_statement: None,
723 notice_text: None,
724 source_packages: vec![],
725 file_references: vec![],
726 is_private: false,
727 is_virtual: false,
728 extra_data: None,
729 dependencies: vec![],
730 repository_homepage_url: None,
731 repository_download_url: None,
732 api_data_url: None,
733 datasource_id: None,
734 purl: None,
735 }
736 }
737
738 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
739 Self {
740 package_type: package_data.package_type.unwrap_or(fallback_type),
741 namespace: package_data.namespace.clone().unwrap_or_default(),
742 name: package_data.name.clone().unwrap_or_default(),
743 version: package_data.version.clone().unwrap_or_default(),
744 qualifiers: package_data.qualifiers.clone(),
745 subpath: package_data.subpath.clone(),
746 primary_language: package_data.primary_language.clone(),
747 description: package_data.description.clone(),
748 release_date: package_data.release_date.clone(),
749 parties: package_data.parties.clone(),
750 keywords: package_data.keywords.clone(),
751 homepage_url: package_data.homepage_url.clone(),
752 download_url: package_data.download_url.clone(),
753 size: package_data.size,
754 sha1: package_data.sha1,
755 md5: package_data.md5,
756 sha256: package_data.sha256,
757 sha512: package_data.sha512,
758 bug_tracking_url: package_data.bug_tracking_url.clone(),
759 code_view_url: package_data.code_view_url.clone(),
760 vcs_url: package_data.vcs_url.clone(),
761 copyright: package_data.copyright.clone(),
762 holder: package_data.holder.clone(),
763 declared_license_expression: package_data.declared_license_expression.clone(),
764 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
765 license_detections: package_data.license_detections.clone(),
766 other_license_expression: package_data.other_license_expression.clone(),
767 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
768 other_license_detections: package_data.other_license_detections.clone(),
769 extracted_license_statement: package_data.extracted_license_statement.clone(),
770 notice_text: package_data.notice_text.clone(),
771 source_packages: package_data.source_packages.clone(),
772 file_references: package_data.file_references.clone(),
773 is_private: package_data.is_private,
774 is_virtual: package_data.is_virtual,
775 extra_data: package_data.extra_data.clone(),
776 dependencies: package_data.dependencies.clone(),
777 repository_homepage_url: package_data.repository_homepage_url.clone(),
778 repository_download_url: package_data.repository_download_url.clone(),
779 api_data_url: package_data.api_data_url.clone(),
780 datasource_id: package_data.datasource_id,
781 purl: package_data.purl.clone(),
782 }
783 }
784}
785
786#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
790pub struct Party {
791 pub r#type: Option<String>,
792 pub role: Option<String>,
793 pub name: Option<String>,
794 pub email: Option<String>,
795 pub url: Option<String>,
796 pub organization: Option<String>,
797 pub organization_url: Option<String>,
798 pub timezone: Option<String>,
799}
800
801impl Party {
802 pub(crate) fn person(role: &str, name: Option<String>, email: Option<String>) -> Self {
803 Self {
804 r#type: Some("person".to_string()),
805 role: Some(role.to_string()),
806 name,
807 email,
808 url: None,
809 organization: None,
810 organization_url: None,
811 timezone: None,
812 }
813 }
814}
815
816#[derive(Serialize, Deserialize, Debug, Clone)]
820pub struct FileReference {
821 pub path: String,
822 pub size: Option<u64>,
823 pub sha1: Option<Sha1Digest>,
824 pub md5: Option<Md5Digest>,
825 pub sha256: Option<Sha256Digest>,
826 pub sha512: Option<Sha512Digest>,
827 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
828}
829
830impl FileReference {
831 pub(crate) fn from_path(path: String) -> Self {
832 Self {
833 path,
834 size: None,
835 sha1: None,
836 md5: None,
837 sha256: None,
838 sha512: None,
839 extra_data: None,
840 }
841 }
842}
843
844#[derive(Serialize, Deserialize, Debug, Clone)]
854pub struct Package {
855 #[serde(rename = "type")]
856 pub package_type: Option<PackageType>,
857 pub namespace: Option<String>,
858 pub name: Option<String>,
859 pub version: Option<String>,
860 #[serde(default)]
861 pub qualifiers: Option<HashMap<String, String>>,
862 pub subpath: Option<String>,
863 pub primary_language: Option<String>,
864 pub description: Option<String>,
865 pub release_date: Option<String>,
866 #[serde(default)]
867 pub parties: Vec<Party>,
868 #[serde(default)]
869 pub keywords: Vec<String>,
870 pub homepage_url: Option<String>,
871 pub download_url: Option<String>,
872 pub size: Option<u64>,
873 pub sha1: Option<Sha1Digest>,
874 pub md5: Option<Md5Digest>,
875 pub sha256: Option<Sha256Digest>,
876 pub sha512: Option<Sha512Digest>,
877 pub bug_tracking_url: Option<String>,
878 pub code_view_url: Option<String>,
879 pub vcs_url: Option<String>,
880 pub copyright: Option<String>,
881 pub holder: Option<String>,
882 pub declared_license_expression: Option<String>,
883 pub declared_license_expression_spdx: Option<String>,
884 #[serde(default)]
885 pub license_detections: Vec<LicenseDetection>,
886 pub other_license_expression: Option<String>,
887 pub other_license_expression_spdx: Option<String>,
888 #[serde(default)]
889 pub other_license_detections: Vec<LicenseDetection>,
890 pub extracted_license_statement: Option<String>,
891 pub notice_text: Option<String>,
892 #[serde(default)]
893 pub source_packages: Vec<String>,
894 #[serde(default)]
895 pub is_private: bool,
896 #[serde(default)]
897 pub is_virtual: bool,
898 #[serde(default)]
899 pub extra_data: Option<HashMap<String, serde_json::Value>>,
900 pub repository_homepage_url: Option<String>,
901 pub repository_download_url: Option<String>,
902 pub api_data_url: Option<String>,
903 pub purl: Option<String>,
904 pub package_uid: PackageUid,
906 pub datafile_paths: Vec<String>,
908 pub datasource_ids: Vec<DatasourceId>,
910}
911
912impl Package {
913 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
919 let mut package_data = package_data.clone();
920 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
921
922 let mut package = Package {
923 package_type: package_data.package_type,
924 namespace: package_data.namespace.clone(),
925 name: package_data.name.clone(),
926 version: package_data.version.clone(),
927 qualifiers: package_data.qualifiers.clone(),
928 subpath: package_data.subpath.clone(),
929 primary_language: package_data.primary_language.clone(),
930 description: package_data.description.clone(),
931 release_date: package_data.release_date.clone(),
932 parties: package_data.parties.clone(),
933 keywords: package_data.keywords.clone(),
934 homepage_url: package_data.homepage_url.clone(),
935 download_url: package_data.download_url.clone(),
936 size: package_data.size,
937 sha1: package_data.sha1,
938 md5: package_data.md5,
939 sha256: package_data.sha256,
940 sha512: package_data.sha512,
941 bug_tracking_url: package_data.bug_tracking_url.clone(),
942 code_view_url: package_data.code_view_url.clone(),
943 vcs_url: package_data.vcs_url.clone(),
944 copyright: package_data.copyright.clone(),
945 holder: package_data.holder.clone(),
946 declared_license_expression: package_data.declared_license_expression.clone(),
947 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
948 license_detections: package_data.license_detections.clone(),
949 other_license_expression: package_data.other_license_expression.clone(),
950 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
951 other_license_detections: package_data.other_license_detections.clone(),
952 extracted_license_statement: package_data.extracted_license_statement.clone(),
953 notice_text: package_data.notice_text.clone(),
954 source_packages: package_data.source_packages.clone(),
955 is_private: package_data.is_private,
956 is_virtual: package_data.is_virtual,
957 extra_data: package_data.extra_data.clone(),
958 repository_homepage_url: package_data.repository_homepage_url.clone(),
959 repository_download_url: package_data.repository_download_url.clone(),
960 api_data_url: package_data.api_data_url.clone(),
961 purl: package_data.purl.clone(),
962 package_uid: PackageUid::empty(),
963 datafile_paths: vec![datafile_path],
964 datasource_ids: if let Some(dsid) = package_data.datasource_id {
965 vec![dsid]
966 } else {
967 vec![]
968 },
969 };
970
971 package.refresh_identity();
972 if package.package_uid.is_empty() {
973 package.package_uid = package.fallback_package_uid();
974 }
975
976 package
977 }
978
979 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
985 let mut package_data = package_data.clone();
986 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
987
988 if let Some(dsid) = package_data.datasource_id {
989 self.datasource_ids.push(dsid);
990 }
991 self.datafile_paths.push(datafile_path);
992
993 macro_rules! fill_if_empty {
994 ($field:ident) => {
995 if self.$field.is_none() {
996 self.$field = package_data.$field;
997 }
998 };
999 }
1000
1001 fill_if_empty!(package_type);
1002 fill_if_empty!(name);
1003 fill_if_empty!(namespace);
1004 fill_if_empty!(version);
1005 fill_if_empty!(qualifiers);
1006 fill_if_empty!(subpath);
1007 fill_if_empty!(primary_language);
1008 fill_if_empty!(description);
1009 fill_if_empty!(release_date);
1010 fill_if_empty!(homepage_url);
1011 fill_if_empty!(download_url);
1012 fill_if_empty!(size);
1013 fill_if_empty!(sha1);
1014 fill_if_empty!(md5);
1015 fill_if_empty!(sha256);
1016 fill_if_empty!(sha512);
1017 fill_if_empty!(bug_tracking_url);
1018 fill_if_empty!(code_view_url);
1019 fill_if_empty!(vcs_url);
1020 fill_if_empty!(copyright);
1021 fill_if_empty!(holder);
1022 fill_if_empty!(declared_license_expression);
1023 fill_if_empty!(declared_license_expression_spdx);
1024 fill_if_empty!(other_license_expression);
1025 fill_if_empty!(other_license_expression_spdx);
1026 fill_if_empty!(extracted_license_statement);
1027 fill_if_empty!(notice_text);
1028 match (&mut self.extra_data, &package_data.extra_data) {
1029 (None, Some(extra_data)) => {
1030 self.extra_data = Some(extra_data.clone());
1031 }
1032 (Some(existing), Some(incoming)) => {
1033 for (key, value) in incoming {
1034 existing.entry(key.clone()).or_insert_with(|| value.clone());
1035 }
1036 }
1037 _ => {}
1038 }
1039 fill_if_empty!(repository_homepage_url);
1040 fill_if_empty!(repository_download_url);
1041 fill_if_empty!(api_data_url);
1042
1043 for party in &package_data.parties {
1044 if let Some(existing) = self.parties.iter_mut().find(|p| {
1045 p.role == party.role
1046 && ((p.name.is_some() && p.name == party.name)
1047 || (p.email.is_some() && p.email == party.email))
1048 }) {
1049 if existing.name.is_none() {
1050 existing.name = party.name.clone();
1051 }
1052 if existing.email.is_none() {
1053 existing.email = party.email.clone();
1054 }
1055 } else {
1056 self.parties.push(party.clone());
1057 }
1058 }
1059
1060 for keyword in &package_data.keywords {
1061 if !self.keywords.contains(keyword) {
1062 self.keywords.push(keyword.clone());
1063 }
1064 }
1065
1066 for detection in &package_data.license_detections {
1067 self.license_detections.push(detection.clone());
1068 }
1069
1070 for detection in &package_data.other_license_detections {
1071 self.other_license_detections.push(detection.clone());
1072 }
1073
1074 for source_pkg in &package_data.source_packages {
1075 if !self.source_packages.contains(source_pkg) {
1076 self.source_packages.push(source_pkg.clone());
1077 }
1078 }
1079
1080 self.refresh_identity();
1081 }
1082
1083 pub fn backfill_license_provenance(&mut self) {
1084 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1085 return;
1086 };
1087
1088 for detection in &mut self.license_detections {
1089 enrich_license_detection_provenance(detection, &datafile_path);
1090 }
1091 for detection in &mut self.other_license_detections {
1092 enrich_license_detection_provenance(detection, &datafile_path);
1093 }
1094 }
1095
1096 fn refresh_identity(&mut self) {
1097 let Some(next_purl) = self.build_current_purl() else {
1098 return;
1099 };
1100
1101 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1102 self.package_uid = PackageUid::new(&next_purl);
1103 }
1104
1105 self.purl = Some(next_purl);
1106 }
1107
1108 fn fallback_package_uid(&self) -> PackageUid {
1109 let name = self
1110 .name
1111 .as_deref()
1112 .map(str::trim)
1113 .filter(|value| !value.is_empty())
1114 .unwrap_or("unknown");
1115 let version = self
1116 .version
1117 .as_deref()
1118 .map(str::trim)
1119 .filter(|value| !value.is_empty())
1120 .unwrap_or("unknown");
1121 let datasource = self
1122 .datasource_ids
1123 .first()
1124 .map(DatasourceId::as_str)
1125 .unwrap_or("unknown");
1126
1127 PackageUid::new_opaque(&format!("generated-package:{datasource}/{name}@{version}"))
1128 }
1129
1130 fn build_current_purl(&self) -> Option<String> {
1131 if let Some(existing_purl) = self.purl.as_deref() {
1132 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1133
1134 if let Some(version) = self
1135 .version
1136 .as_deref()
1137 .filter(|value| !value.trim().is_empty())
1138 {
1139 purl.with_version(version).ok()?;
1140 } else {
1141 purl.without_version();
1142 }
1143
1144 return Some(purl.to_string());
1145 }
1146
1147 if let (Some(package_type), Some(name)) = (
1148 self.package_type.as_ref(),
1149 self.name
1150 .as_deref()
1151 .filter(|value| !value.trim().is_empty()),
1152 ) {
1153 let purl_type = match package_type {
1154 PackageType::Deno => "generic",
1155 _ => package_type.as_str(),
1156 };
1157
1158 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1159
1160 if let Some(namespace) = self
1161 .namespace
1162 .as_deref()
1163 .filter(|value| !value.trim().is_empty())
1164 {
1165 purl.with_namespace(namespace).ok()?;
1166 }
1167
1168 if let Some(version) = self
1169 .version
1170 .as_deref()
1171 .filter(|value| !value.trim().is_empty())
1172 {
1173 purl.with_version(version).ok()?;
1174 }
1175
1176 if let Some(qualifiers) = &self.qualifiers {
1177 for (key, value) in qualifiers {
1178 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1179 }
1180 }
1181
1182 if let Some(subpath) = self
1183 .subpath
1184 .as_deref()
1185 .filter(|value| !value.trim().is_empty())
1186 {
1187 purl.with_subpath(subpath).ok()?;
1188 }
1189
1190 return Some(purl.to_string());
1191 }
1192 None
1193 }
1194}
1195
1196#[cfg(test)]
1197mod tests {
1198 use super::*;
1199
1200 #[test]
1201 fn file_info_new_backfills_package_detection_provenance() {
1202 let package_data = PackageData {
1203 package_type: Some(PackageType::Npm),
1204 license_detections: vec![LicenseDetection {
1205 license_expression: "mit".to_string(),
1206 license_expression_spdx: "MIT".to_string(),
1207 matches: vec![Match {
1208 license_expression: "mit".to_string(),
1209 license_expression_spdx: "MIT".to_string(),
1210 from_file: None,
1211 start_line: LineNumber::ONE,
1212 end_line: LineNumber::ONE,
1213 matcher: Some("parser-declared-license".to_string()),
1214 score: MatchScore::MAX,
1215 matched_length: Some(1),
1216 match_coverage: Some(100.0),
1217 rule_relevance: Some(100),
1218 rule_identifier: None,
1219 rule_url: None,
1220 matched_text: Some("MIT".to_string()),
1221 referenced_filenames: None,
1222 matched_text_diagnostics: None,
1223 }],
1224 detection_log: vec![],
1225 identifier: None,
1226 }],
1227 ..PackageData::default()
1228 };
1229
1230 let file_info = FileInfo::new(
1231 "package.json".to_string(),
1232 "package".to_string(),
1233 ".json".to_string(),
1234 "project/package.json".to_string(),
1235 FileType::File,
1236 None,
1237 None,
1238 1,
1239 None,
1240 None,
1241 None,
1242 None,
1243 None,
1244 vec![package_data],
1245 None,
1246 vec![],
1247 vec![],
1248 vec![],
1249 vec![],
1250 vec![],
1251 vec![],
1252 vec![],
1253 vec![],
1254 vec![],
1255 );
1256
1257 assert_eq!(file_info.license_detections.len(), 1);
1258 assert_eq!(
1259 file_info.license_detections[0].matches[0]
1260 .from_file
1261 .as_deref(),
1262 Some("project/package.json")
1263 );
1264 assert!(file_info.license_detections[0].identifier.is_some());
1265 assert_eq!(
1266 file_info.package_data[0].license_detections[0].matches[0]
1267 .from_file
1268 .as_deref(),
1269 Some("project/package.json")
1270 );
1271 assert_eq!(
1272 file_info.package_data[0].license_detections[0].matches[0]
1273 .rule_identifier
1274 .as_deref(),
1275 Some("parser-declared-license")
1276 );
1277 assert!(
1278 file_info.package_data[0].license_detections[0]
1279 .identifier
1280 .is_some()
1281 );
1282 }
1283
1284 #[test]
1285 fn package_from_package_data_backfills_detection_provenance() {
1286 let package_data = PackageData {
1287 package_type: Some(PackageType::Npm),
1288 license_detections: vec![LicenseDetection {
1289 license_expression: "mit".to_string(),
1290 license_expression_spdx: "MIT".to_string(),
1291 matches: vec![Match {
1292 license_expression: "mit".to_string(),
1293 license_expression_spdx: "MIT".to_string(),
1294 from_file: None,
1295 start_line: LineNumber::ONE,
1296 end_line: LineNumber::ONE,
1297 matcher: Some("parser-declared-license".to_string()),
1298 score: MatchScore::MAX,
1299 matched_length: Some(1),
1300 match_coverage: Some(100.0),
1301 rule_relevance: Some(100),
1302 rule_identifier: None,
1303 rule_url: None,
1304 matched_text: Some("MIT".to_string()),
1305 referenced_filenames: None,
1306 matched_text_diagnostics: None,
1307 }],
1308 detection_log: vec![],
1309 identifier: None,
1310 }],
1311 ..PackageData::default()
1312 };
1313
1314 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1315
1316 assert_eq!(
1317 package.license_detections[0].matches[0]
1318 .from_file
1319 .as_deref(),
1320 Some("project/package.json")
1321 );
1322 assert_eq!(
1323 package.license_detections[0].matches[0]
1324 .rule_identifier
1325 .as_deref(),
1326 Some("parser-declared-license")
1327 );
1328 assert!(package.license_detections[0].identifier.is_some());
1329 }
1330
1331 #[test]
1332 fn package_from_package_data_preserves_existing_purl_qualifiers() {
1333 let package_data = PackageData {
1334 package_type: Some(PackageType::Alpine),
1335 namespace: Some("alpine".to_string()),
1336 name: Some("busybox".to_string()),
1337 version: Some("1.35.0-r17".to_string()),
1338 purl: Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64".to_string()),
1339 ..PackageData::default()
1340 };
1341
1342 let package = Package::from_package_data(&package_data, "lib/apk/db/installed".to_string());
1343
1344 assert_eq!(
1345 package.purl.as_deref(),
1346 Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64")
1347 );
1348 assert!(
1349 package
1350 .package_uid
1351 .starts_with("pkg:alpine/busybox@1.35.0-r17?arch=x86_64&uuid=")
1352 );
1353 }
1354}
1355
1356#[derive(Serialize, Deserialize, Debug, Clone)]
1361pub struct TopLevelDependency {
1362 pub purl: Option<String>,
1363 pub extracted_requirement: Option<String>,
1364 pub scope: Option<String>,
1365 pub is_runtime: Option<bool>,
1366 pub is_optional: Option<bool>,
1367 pub is_pinned: Option<bool>,
1368 pub is_direct: Option<bool>,
1369 pub resolved_package: Option<Box<ResolvedPackage>>,
1370 #[serde(default)]
1371 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1372 pub dependency_uid: DependencyUid,
1374 pub for_package_uid: Option<PackageUid>,
1376 pub datafile_path: String,
1378 pub datasource_id: DatasourceId,
1380 pub namespace: Option<String>,
1382}
1383
1384impl TopLevelDependency {
1385 pub fn from_dependency(
1387 dep: &Dependency,
1388 datafile_path: String,
1389 datasource_id: DatasourceId,
1390 for_package_uid: Option<PackageUid>,
1391 ) -> Self {
1392 let dependency_uid = dep
1393 .purl
1394 .as_ref()
1395 .map(|p| DependencyUid::new(p))
1396 .unwrap_or_else(DependencyUid::empty);
1397
1398 TopLevelDependency {
1399 purl: dep.purl.clone(),
1400 extracted_requirement: dep.extracted_requirement.clone(),
1401 scope: dep.scope.clone(),
1402 is_runtime: dep.is_runtime,
1403 is_optional: dep.is_optional,
1404 is_pinned: dep.is_pinned,
1405 is_direct: dep.is_direct,
1406 resolved_package: dep.resolved_package.clone(),
1407 extra_data: dep.extra_data.clone(),
1408 dependency_uid,
1409 for_package_uid,
1410 datafile_path,
1411 datasource_id,
1412 namespace: None,
1413 }
1414 }
1415}
1416
1417#[derive(Serialize, Deserialize, Debug, Clone)]
1418pub struct OutputEmail {
1419 pub email: String,
1420 pub start_line: LineNumber,
1421 pub end_line: LineNumber,
1422}
1423
1424#[derive(Serialize, Deserialize, Debug, Clone)]
1425pub struct OutputURL {
1426 pub url: String,
1427 pub start_line: LineNumber,
1428 pub end_line: LineNumber,
1429}
1430
1431#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1432pub struct LicensePolicyEntry {
1433 pub license_key: String,
1434 pub label: String,
1435 pub color_code: String,
1436 pub icon: String,
1437}
1438
1439#[derive(Debug, Clone, PartialEq)]
1440pub enum FileType {
1441 File,
1442 Directory,
1443}
1444
1445impl serde::Serialize for FileType {
1446 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1447 where
1448 S: serde::Serializer,
1449 {
1450 match self {
1451 FileType::File => serializer.serialize_str("file"),
1452 FileType::Directory => serializer.serialize_str("directory"),
1453 }
1454 }
1455}
1456
1457impl<'de> Deserialize<'de> for FileType {
1458 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1459 where
1460 D: serde::Deserializer<'de>,
1461 {
1462 let value = String::deserialize(deserializer)?;
1463 match value.as_str() {
1464 "file" => Ok(FileType::File),
1465 "directory" => Ok(FileType::Directory),
1466 _ => Err(serde::de::Error::custom("invalid file type")),
1467 }
1468 }
1469}