1use derive_builder::Builder;
5use packageurl::PackageUrl;
6use serde::{Deserialize, Serialize};
7use sha1::{Digest, Sha1};
8use std::collections::HashMap;
9use std::str::FromStr;
10
11use super::DatasourceId;
12use super::DependencyUid;
13use super::DiagnosticSeverity;
14use super::GitSha1;
15use super::LineNumber;
16use super::MatchScore;
17use super::Md5Digest;
18use super::PackageType;
19use super::PackageUid;
20use super::ScanDiagnostic;
21use super::Sha1Digest;
22use super::Sha256Digest;
23use super::Sha512Digest;
24use super::diagnostics_from_legacy_scan_errors;
25use crate::license_detection::tokenize::tokenize_without_stopwords;
26use crate::models::output::Tallies;
27use crate::utils::spdx::combine_license_expressions;
28
29#[derive(Debug, Builder, Serialize, Deserialize, Clone)]
30#[builder(build_fn(skip))]
31pub struct FileInfo {
33 pub name: String,
34 pub base_name: String,
35 pub extension: String,
36 pub path: String,
37 #[serde(rename = "type")] pub file_type: FileType,
39 #[builder(default)]
40 #[serde(default)]
41 pub mime_type: Option<String>,
42 #[builder(default)]
43 #[serde(rename = "file_type", default)]
44 pub file_type_label: Option<String>,
45 pub size: u64,
46 #[builder(default)]
47 #[serde(default)]
48 pub date: Option<String>,
49 #[builder(default)]
50 #[serde(default)]
51 pub sha1: Option<Sha1Digest>,
52 #[builder(default)]
53 #[serde(default)]
54 pub md5: Option<Md5Digest>,
55 #[builder(default)]
56 #[serde(default)]
57 pub sha256: Option<Sha256Digest>,
58 #[builder(default)]
59 #[serde(default)]
60 pub sha1_git: Option<GitSha1>,
61 #[builder(default)]
62 #[serde(default)]
63 pub programming_language: Option<String>,
64 #[builder(default)]
65 #[serde(default)]
66 pub package_data: Vec<PackageData>,
67 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
69 pub license_expression: Option<String>,
70 #[builder(default)]
71 #[serde(default)]
72 pub license_detections: Vec<LicenseDetection>,
73 #[builder(default)]
74 #[serde(default)]
75 pub license_clues: Vec<Match>,
76 #[builder(default)]
77 #[serde(default)]
78 pub percentage_of_license_text: Option<f64>,
79 #[builder(default)]
80 #[serde(default)]
81 pub copyrights: Vec<Copyright>,
82 #[builder(default)]
83 #[serde(default)]
84 pub holders: Vec<Holder>,
85 #[builder(default)]
86 #[serde(default)]
87 pub authors: Vec<Author>,
88 #[builder(default)]
89 #[serde(default)]
90 pub emails: Vec<OutputEmail>,
91 #[builder(default)]
92 #[serde(default)]
93 pub urls: Vec<OutputURL>,
94 #[builder(default)]
95 #[serde(default)]
96 pub for_packages: Vec<PackageUid>,
97 #[builder(default)]
98 #[serde(default)]
99 pub scan_errors: Vec<String>,
100 #[builder(default)]
101 #[serde(default)]
102 pub scan_diagnostics: Vec<ScanDiagnostic>,
103 #[builder(default)]
104 #[serde(default)]
105 pub license_policy: Option<Vec<LicensePolicyEntry>>,
106 #[builder(default)]
107 #[serde(default)]
108 pub is_generated: Option<bool>,
109 #[builder(default)]
110 #[serde(default)]
111 pub is_binary: Option<bool>,
112 #[builder(default)]
113 #[serde(default)]
114 pub is_text: Option<bool>,
115 #[builder(default)]
116 #[serde(default)]
117 pub is_archive: Option<bool>,
118 #[builder(default)]
119 #[serde(default)]
120 pub is_media: Option<bool>,
121 #[builder(default)]
122 #[serde(default)]
123 pub is_source: Option<bool>,
124 #[builder(default)]
125 #[serde(default)]
126 pub is_script: Option<bool>,
127 #[builder(default)]
128 #[serde(default)]
129 pub files_count: Option<usize>,
130 #[builder(default)]
131 #[serde(default)]
132 pub dirs_count: Option<usize>,
133 #[builder(default)]
134 #[serde(default)]
135 pub size_count: Option<u64>,
136 #[builder(default)]
137 #[serde(default)]
138 pub source_count: Option<usize>,
139 #[builder(default)]
140 #[serde(default)]
141 pub is_legal: bool,
142 #[builder(default)]
143 #[serde(default)]
144 pub is_manifest: bool,
145 #[builder(default)]
146 #[serde(default)]
147 pub is_readme: bool,
148 #[builder(default)]
149 #[serde(default)]
150 pub is_top_level: bool,
151 #[builder(default)]
152 #[serde(default)]
153 pub is_key_file: bool,
154 #[builder(default)]
155 #[serde(default)]
156 pub is_community: bool,
157 #[builder(default)]
158 #[serde(default)]
159 pub facets: Vec<String>,
160 #[builder(default)]
161 #[serde(default)]
162 pub tallies: Option<Tallies>,
163}
164
165impl FileInfoBuilder {
166 pub fn build(&self) -> Result<FileInfo, String> {
168 let mut file_info = FileInfo::new(
169 self.name.clone().ok_or("Missing field: name")?,
170 self.base_name.clone().ok_or("Missing field: base_name")?,
171 self.extension.clone().ok_or("Missing field: extension")?,
172 self.path.clone().ok_or("Missing field: path")?,
173 self.file_type.clone().ok_or("Missing field: file_type")?,
174 self.mime_type.clone().flatten(),
175 self.file_type_label.clone().flatten(),
176 self.size.ok_or("Missing field: size")?,
177 self.date.clone().flatten(),
178 self.sha1.flatten(),
179 self.md5.flatten(),
180 self.sha256.flatten(),
181 self.programming_language.clone().flatten(),
182 self.package_data.clone().unwrap_or_default(),
183 self.license_expression.clone().flatten(),
184 self.license_detections.clone().unwrap_or_default(),
185 self.license_clues.clone().unwrap_or_default(),
186 self.copyrights.clone().unwrap_or_default(),
187 self.holders.clone().unwrap_or_default(),
188 self.authors.clone().unwrap_or_default(),
189 self.emails.clone().unwrap_or_default(),
190 self.urls.clone().unwrap_or_default(),
191 self.for_packages.clone().unwrap_or_default(),
192 self.scan_errors.clone().unwrap_or_default(),
193 );
194 file_info.scan_diagnostics = if let Some(diagnostics) = &self.scan_diagnostics {
195 diagnostics.clone()
196 } else {
197 diagnostics_from_legacy_scan_errors(&file_info.scan_errors)
198 };
199 file_info.scan_errors = file_info
200 .scan_diagnostics
201 .iter()
202 .map(|diagnostic| diagnostic.message.clone())
203 .collect();
204 file_info.license_policy = self.license_policy.clone().flatten();
205 file_info.sha1_git = self.sha1_git.flatten();
206 file_info.is_binary = self.is_binary.flatten();
207 file_info.is_text = self.is_text.flatten();
208 file_info.is_archive = self.is_archive.flatten();
209 file_info.is_media = self.is_media.flatten();
210 file_info.is_script = self.is_script.flatten();
211 file_info.files_count = self.files_count.flatten();
212 file_info.dirs_count = self.dirs_count.flatten();
213 file_info.size_count = self.size_count.flatten();
214 Ok(file_info)
215 }
216}
217
218impl FileInfo {
219 #[allow(clippy::too_many_arguments)]
220 pub fn new(
222 name: String,
223 base_name: String,
224 extension: String,
225 path: String,
226 file_type: FileType,
227 mime_type: Option<String>,
228 file_type_label: Option<String>,
229 size: u64,
230 date: Option<String>,
231 sha1: Option<Sha1Digest>,
232 md5: Option<Md5Digest>,
233 sha256: Option<Sha256Digest>,
234 programming_language: Option<String>,
235 package_data: Vec<PackageData>,
236 mut license_expression: Option<String>,
237 mut license_detections: Vec<LicenseDetection>,
238 license_clues: Vec<Match>,
239 copyrights: Vec<Copyright>,
240 holders: Vec<Holder>,
241 authors: Vec<Author>,
242 emails: Vec<OutputEmail>,
243 urls: Vec<OutputURL>,
244 for_packages: Vec<PackageUid>,
245 scan_errors: Vec<String>,
246 ) -> Self {
247 let mut package_data = package_data;
248 for package in &mut package_data {
249 enrich_package_data_license_provenance(package, &path);
250 }
251
252 license_expression = license_expression.or_else(|| {
254 let expressions = package_data
255 .iter()
256 .filter_map(|pkg| pkg.get_license_expression());
257 combine_license_expressions(expressions)
258 });
259
260 if license_detections.is_empty() {
262 for pkg in &package_data {
263 license_detections.extend(pkg.license_detections.clone());
264 }
265 }
266
267 if license_expression.is_none() && !license_detections.is_empty() {
269 let expressions = license_detections
270 .iter()
271 .map(|detection| detection.license_expression.clone());
272 let expressions: Vec<String> = expressions.collect();
273 license_expression = crate::utils::spdx::select_primary_license_expression(
274 expressions.clone(),
275 )
276 .or_else(|| {
277 crate::utils::spdx::combine_license_expressions_preserving_structure(expressions)
278 });
279 }
280
281 let mut file_info = FileInfo {
282 name,
283 base_name,
284 extension,
285 path,
286 file_type,
287 mime_type,
288 file_type_label,
289 size,
290 date,
291 sha1,
292 md5,
293 sha256,
294 sha1_git: None,
295 programming_language,
296 package_data,
297 license_expression,
298 license_detections,
299 license_clues,
300 percentage_of_license_text: None,
301 copyrights,
302 holders,
303 authors,
304 emails,
305 urls,
306 for_packages,
307 scan_diagnostics: diagnostics_from_legacy_scan_errors(&scan_errors),
308 scan_errors,
309 license_policy: None,
310 is_generated: None,
311 is_binary: None,
312 is_text: None,
313 is_archive: None,
314 is_media: None,
315 is_source: None,
316 is_script: None,
317 files_count: None,
318 dirs_count: None,
319 size_count: None,
320 source_count: None,
321 is_legal: false,
322 is_manifest: false,
323 is_readme: false,
324 is_top_level: false,
325 is_key_file: false,
326 is_community: false,
327 facets: vec![],
328 tallies: None,
329 };
330
331 file_info.backfill_license_provenance();
332 file_info
333 }
334
335 pub fn backfill_license_provenance(&mut self) {
336 for detection in &mut self.license_detections {
337 enrich_license_detection_provenance(detection, &self.path);
338 }
339
340 for package in &mut self.package_data {
341 enrich_package_data_license_provenance(package, &self.path);
342 }
343 }
344}
345
346impl FileInfo {
347 pub fn warning_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
348 self.scan_diagnostics
349 .iter()
350 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Warning)
351 }
352
353 pub fn error_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
354 self.scan_diagnostics
355 .iter()
356 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
357 }
358}
359
360fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
361 for detection in &mut package_data.license_detections {
362 enrich_license_detection_provenance(detection, path);
363 }
364 for detection in &mut package_data.other_license_detections {
365 enrich_license_detection_provenance(detection, path);
366 }
367}
368
369pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
370 for detection_match in &mut detection.matches {
371 if detection_match.from_file.is_none() {
372 detection_match.from_file = Some(path.to_string());
373 }
374
375 if detection_match.rule_identifier.is_none() {
376 detection_match.rule_identifier = detection_match.matcher.clone();
377 }
378 }
379
380 if detection.identifier.is_none() {
381 detection.identifier = Some(compute_public_detection_identifier(detection));
382 }
383}
384
385fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
386 let expression = python_safe_name(&detection.license_expression);
387 let mut hasher = Sha1::new();
388 hasher.update(format_public_detection_content(detection).as_bytes());
389 let hex_str = hex::encode(hasher.finalize());
390 let uuid_hex = &hex_str[..32];
391 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
392 .map(|uuid| uuid.to_string())
393 .unwrap_or_else(|_| uuid_hex.to_string());
394
395 format!("{}-{}", expression, content_uuid)
396}
397
398fn format_public_detection_content(detection: &LicenseDetection) -> String {
399 let mut result = String::from("(");
400
401 for (index, detection_match) in detection.matches.iter().enumerate() {
402 if index > 0 {
403 result.push_str(", ");
404 }
405 result.push_str(&format!(
406 "({}, {}, {})",
407 python_str_repr(
408 detection_match
409 .rule_identifier
410 .as_deref()
411 .or(detection_match.matcher.as_deref())
412 .unwrap_or("parser-declared-license")
413 ),
414 detection_match.score.value() as f32,
415 python_token_tuple_repr(&tokenize_without_stopwords(
416 detection_match.matched_text.as_deref().unwrap_or_default(),
417 )),
418 ));
419 }
420
421 if detection.matches.len() == 1 {
422 result.push(',');
423 }
424 result.push(')');
425 result
426}
427
428fn python_safe_name(value: &str) -> String {
429 let mut result = String::new();
430 let mut prev_underscore = false;
431
432 for character in value.chars() {
433 if character.is_alphanumeric() {
434 result.push(character);
435 prev_underscore = false;
436 } else if !prev_underscore {
437 result.push('_');
438 prev_underscore = true;
439 }
440 }
441
442 let trimmed = result.trim_matches('_');
443 if trimmed.is_empty() {
444 String::new()
445 } else {
446 trimmed.to_string()
447 }
448}
449
450fn python_str_repr(value: &str) -> String {
451 if value.contains('\'') && !value.contains('"') {
452 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
453 } else {
454 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
455 }
456}
457
458fn python_token_tuple_repr(tokens: &[String]) -> String {
459 if tokens.is_empty() {
460 return String::from("()");
461 }
462
463 let mut result = String::from("(");
464 for (index, token) in tokens.iter().enumerate() {
465 if index > 0 {
466 result.push_str(", ");
467 }
468 result.push_str(&python_str_repr(token));
469 }
470
471 if tokens.len() == 1 {
472 result.push(',');
473 }
474 result.push(')');
475 result
476}
477
478#[derive(Serialize, Deserialize, Debug, Clone, Default)]
484pub struct PackageData {
485 #[serde(rename = "type")] pub package_type: Option<PackageType>,
487 pub namespace: Option<String>,
488 pub name: Option<String>,
489 pub version: Option<String>,
490 #[serde(default)]
491 pub qualifiers: Option<HashMap<String, String>>,
492 pub subpath: Option<String>,
493 pub primary_language: Option<String>,
494 pub description: Option<String>,
495 pub release_date: Option<String>,
496 #[serde(default)]
497 pub parties: Vec<Party>,
498 #[serde(default)]
499 pub keywords: Vec<String>,
500 pub homepage_url: Option<String>,
501 pub download_url: Option<String>,
502 pub size: Option<u64>,
503 pub sha1: Option<Sha1Digest>,
504 pub md5: Option<Md5Digest>,
505 pub sha256: Option<Sha256Digest>,
506 pub sha512: Option<Sha512Digest>,
507 pub bug_tracking_url: Option<String>,
508 pub code_view_url: Option<String>,
509 pub vcs_url: Option<String>,
510 pub copyright: Option<String>,
511 pub holder: Option<String>,
512 pub declared_license_expression: Option<String>,
513 pub declared_license_expression_spdx: Option<String>,
514 #[serde(default)]
515 pub license_detections: Vec<LicenseDetection>,
516 pub other_license_expression: Option<String>,
517 pub other_license_expression_spdx: Option<String>,
518 #[serde(default)]
519 pub other_license_detections: Vec<LicenseDetection>,
520 pub extracted_license_statement: Option<String>,
521 pub notice_text: Option<String>,
522 #[serde(default)]
523 pub source_packages: Vec<String>,
524 #[serde(default)]
525 pub file_references: Vec<FileReference>,
526 #[serde(default)]
527 pub is_private: bool,
528 #[serde(default)]
529 pub is_virtual: bool,
530 #[serde(default)]
531 pub extra_data: Option<HashMap<String, serde_json::Value>>,
532 #[serde(default)]
533 pub dependencies: Vec<Dependency>,
534 pub repository_homepage_url: Option<String>,
535 pub repository_download_url: Option<String>,
536 pub api_data_url: Option<String>,
537 pub datasource_id: Option<DatasourceId>,
538 pub purl: Option<String>,
539}
540
541impl PackageData {
542 pub fn get_license_expression(&self) -> Option<String> {
545 if self.license_detections.is_empty() {
546 return None;
547 }
548
549 let expressions = self
550 .license_detections
551 .iter()
552 .map(|detection| detection.license_expression.clone());
553 combine_license_expressions(expressions)
554 }
555}
556
557#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
561pub struct LicenseDetection {
562 pub license_expression: String,
563 pub license_expression_spdx: String,
564 pub matches: Vec<Match>,
565 #[serde(default)]
566 pub detection_log: Vec<String>,
567 pub identifier: Option<String>,
568}
569
570#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
574pub struct Match {
575 pub license_expression: String,
576 pub license_expression_spdx: String,
577 pub from_file: Option<String>,
578 pub start_line: LineNumber,
579 pub end_line: LineNumber,
580 pub matcher: Option<String>,
581 pub score: MatchScore,
582 pub matched_length: Option<usize>,
583 pub match_coverage: Option<f64>,
584 pub rule_relevance: Option<u8>,
585 pub rule_identifier: Option<String>,
586 pub rule_url: Option<String>,
587 pub matched_text: Option<String>,
588 pub matched_text_diagnostics: Option<String>,
589 #[serde(default)]
590 pub referenced_filenames: Option<Vec<String>>,
591}
592
593#[derive(Serialize, Deserialize, Debug, Clone)]
594pub struct Copyright {
595 pub copyright: String,
596 #[serde(default, skip_serializing_if = "Option::is_none")]
597 pub normalized_copyright: Option<String>,
598 pub start_line: LineNumber,
599 pub end_line: LineNumber,
600}
601
602impl Copyright {
603 pub fn normalized_text(&self) -> &str {
604 self.normalized_copyright
605 .as_deref()
606 .unwrap_or(self.copyright.as_str())
607 }
608}
609
610#[derive(Serialize, Deserialize, Debug, Clone)]
611pub struct Holder {
612 pub holder: String,
613 pub start_line: LineNumber,
614 pub end_line: LineNumber,
615}
616
617#[derive(Serialize, Deserialize, Debug, Clone)]
618pub struct Author {
619 pub author: String,
620 pub start_line: LineNumber,
621 pub end_line: LineNumber,
622}
623
624#[derive(Serialize, Deserialize, Debug, Clone)]
629pub struct Dependency {
630 pub purl: Option<String>,
631 pub extracted_requirement: Option<String>,
632 pub scope: Option<String>,
633 pub is_runtime: Option<bool>,
634 pub is_optional: Option<bool>,
635 pub is_pinned: Option<bool>,
636 pub is_direct: Option<bool>,
637 pub resolved_package: Option<Box<ResolvedPackage>>,
638 #[serde(default)]
639 pub extra_data: Option<HashMap<String, serde_json::Value>>,
640}
641
642#[derive(Serialize, Deserialize, Debug, Clone)]
643pub struct ResolvedPackage {
644 #[serde(rename = "type")]
645 pub package_type: PackageType,
646 pub namespace: String,
647 pub name: String,
648 pub version: String,
649 #[serde(default)]
650 pub qualifiers: Option<HashMap<String, String>>,
651 pub subpath: Option<String>,
652 pub primary_language: Option<String>,
653 pub description: Option<String>,
654 pub release_date: Option<String>,
655 #[serde(default)]
656 pub parties: Vec<Party>,
657 #[serde(default)]
658 pub keywords: Vec<String>,
659 pub homepage_url: Option<String>,
660 pub download_url: Option<String>,
661 pub size: Option<u64>,
662 pub sha1: Option<Sha1Digest>,
663 pub md5: Option<Md5Digest>,
664 pub sha256: Option<Sha256Digest>,
665 pub sha512: Option<Sha512Digest>,
666 pub bug_tracking_url: Option<String>,
667 pub code_view_url: Option<String>,
668 pub vcs_url: Option<String>,
669 pub copyright: Option<String>,
670 pub holder: Option<String>,
671 pub declared_license_expression: Option<String>,
672 pub declared_license_expression_spdx: Option<String>,
673 #[serde(default)]
674 pub license_detections: Vec<LicenseDetection>,
675 pub other_license_expression: Option<String>,
676 pub other_license_expression_spdx: Option<String>,
677 #[serde(default)]
678 pub other_license_detections: Vec<LicenseDetection>,
679 pub extracted_license_statement: Option<String>,
680 pub notice_text: Option<String>,
681 #[serde(default)]
682 pub source_packages: Vec<String>,
683 #[serde(default)]
684 pub file_references: Vec<FileReference>,
685 #[serde(default)]
686 pub is_private: bool,
687 #[serde(default)]
688 pub is_virtual: bool,
689 #[serde(default)]
690 pub extra_data: Option<HashMap<String, serde_json::Value>>,
691 #[serde(default)]
692 pub dependencies: Vec<Dependency>,
693 pub repository_homepage_url: Option<String>,
694 pub repository_download_url: Option<String>,
695 pub api_data_url: Option<String>,
696 pub datasource_id: Option<DatasourceId>,
697 pub purl: Option<String>,
698}
699
700impl ResolvedPackage {
701 pub fn new(
702 package_type: PackageType,
703 namespace: String,
704 name: String,
705 version: String,
706 ) -> Self {
707 Self {
708 package_type,
709 namespace,
710 name,
711 version,
712 qualifiers: None,
713 subpath: None,
714 primary_language: None,
715 description: None,
716 release_date: None,
717 parties: vec![],
718 keywords: vec![],
719 homepage_url: None,
720 download_url: None,
721 size: None,
722 sha1: None,
723 md5: None,
724 sha256: None,
725 sha512: None,
726 bug_tracking_url: None,
727 code_view_url: None,
728 vcs_url: None,
729 copyright: None,
730 holder: None,
731 declared_license_expression: None,
732 declared_license_expression_spdx: None,
733 license_detections: vec![],
734 other_license_expression: None,
735 other_license_expression_spdx: None,
736 other_license_detections: vec![],
737 extracted_license_statement: None,
738 notice_text: None,
739 source_packages: vec![],
740 file_references: vec![],
741 is_private: false,
742 is_virtual: false,
743 extra_data: None,
744 dependencies: vec![],
745 repository_homepage_url: None,
746 repository_download_url: None,
747 api_data_url: None,
748 datasource_id: None,
749 purl: None,
750 }
751 }
752
753 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
754 Self {
755 package_type: package_data.package_type.unwrap_or(fallback_type),
756 namespace: package_data.namespace.clone().unwrap_or_default(),
757 name: package_data.name.clone().unwrap_or_default(),
758 version: package_data.version.clone().unwrap_or_default(),
759 qualifiers: package_data.qualifiers.clone(),
760 subpath: package_data.subpath.clone(),
761 primary_language: package_data.primary_language.clone(),
762 description: package_data.description.clone(),
763 release_date: package_data.release_date.clone(),
764 parties: package_data.parties.clone(),
765 keywords: package_data.keywords.clone(),
766 homepage_url: package_data.homepage_url.clone(),
767 download_url: package_data.download_url.clone(),
768 size: package_data.size,
769 sha1: package_data.sha1,
770 md5: package_data.md5,
771 sha256: package_data.sha256,
772 sha512: package_data.sha512,
773 bug_tracking_url: package_data.bug_tracking_url.clone(),
774 code_view_url: package_data.code_view_url.clone(),
775 vcs_url: package_data.vcs_url.clone(),
776 copyright: package_data.copyright.clone(),
777 holder: package_data.holder.clone(),
778 declared_license_expression: package_data.declared_license_expression.clone(),
779 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
780 license_detections: package_data.license_detections.clone(),
781 other_license_expression: package_data.other_license_expression.clone(),
782 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
783 other_license_detections: package_data.other_license_detections.clone(),
784 extracted_license_statement: package_data.extracted_license_statement.clone(),
785 notice_text: package_data.notice_text.clone(),
786 source_packages: package_data.source_packages.clone(),
787 file_references: package_data.file_references.clone(),
788 is_private: package_data.is_private,
789 is_virtual: package_data.is_virtual,
790 extra_data: package_data.extra_data.clone(),
791 dependencies: package_data.dependencies.clone(),
792 repository_homepage_url: package_data.repository_homepage_url.clone(),
793 repository_download_url: package_data.repository_download_url.clone(),
794 api_data_url: package_data.api_data_url.clone(),
795 datasource_id: package_data.datasource_id,
796 purl: package_data.purl.clone(),
797 }
798 }
799}
800
801#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
805pub struct Party {
806 pub r#type: Option<String>,
807 pub role: Option<String>,
808 pub name: Option<String>,
809 pub email: Option<String>,
810 pub url: Option<String>,
811 pub organization: Option<String>,
812 pub organization_url: Option<String>,
813 pub timezone: Option<String>,
814}
815
816impl Party {
817 pub(crate) fn person(role: &str, name: Option<String>, email: Option<String>) -> Self {
818 Self {
819 r#type: Some("person".to_string()),
820 role: Some(role.to_string()),
821 name,
822 email,
823 url: None,
824 organization: None,
825 organization_url: None,
826 timezone: None,
827 }
828 }
829}
830
831#[derive(Serialize, Deserialize, Debug, Clone)]
835pub struct FileReference {
836 pub path: String,
837 pub size: Option<u64>,
838 pub sha1: Option<Sha1Digest>,
839 pub md5: Option<Md5Digest>,
840 pub sha256: Option<Sha256Digest>,
841 pub sha512: Option<Sha512Digest>,
842 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
843}
844
845impl FileReference {
846 pub(crate) fn from_path(path: String) -> Self {
847 Self {
848 path,
849 size: None,
850 sha1: None,
851 md5: None,
852 sha256: None,
853 sha512: None,
854 extra_data: None,
855 }
856 }
857}
858
859#[derive(Serialize, Deserialize, Debug, Clone)]
869pub struct Package {
870 #[serde(rename = "type")]
871 pub package_type: Option<PackageType>,
872 pub namespace: Option<String>,
873 pub name: Option<String>,
874 pub version: Option<String>,
875 #[serde(default)]
876 pub qualifiers: Option<HashMap<String, String>>,
877 pub subpath: Option<String>,
878 pub primary_language: Option<String>,
879 pub description: Option<String>,
880 pub release_date: Option<String>,
881 #[serde(default)]
882 pub parties: Vec<Party>,
883 #[serde(default)]
884 pub keywords: Vec<String>,
885 pub homepage_url: Option<String>,
886 pub download_url: Option<String>,
887 pub size: Option<u64>,
888 pub sha1: Option<Sha1Digest>,
889 pub md5: Option<Md5Digest>,
890 pub sha256: Option<Sha256Digest>,
891 pub sha512: Option<Sha512Digest>,
892 pub bug_tracking_url: Option<String>,
893 pub code_view_url: Option<String>,
894 pub vcs_url: Option<String>,
895 pub copyright: Option<String>,
896 pub holder: Option<String>,
897 pub declared_license_expression: Option<String>,
898 pub declared_license_expression_spdx: Option<String>,
899 #[serde(default)]
900 pub license_detections: Vec<LicenseDetection>,
901 pub other_license_expression: Option<String>,
902 pub other_license_expression_spdx: Option<String>,
903 #[serde(default)]
904 pub other_license_detections: Vec<LicenseDetection>,
905 pub extracted_license_statement: Option<String>,
906 pub notice_text: Option<String>,
907 #[serde(default)]
908 pub source_packages: Vec<String>,
909 #[serde(default)]
910 pub is_private: bool,
911 #[serde(default)]
912 pub is_virtual: bool,
913 #[serde(default)]
914 pub extra_data: Option<HashMap<String, serde_json::Value>>,
915 pub repository_homepage_url: Option<String>,
916 pub repository_download_url: Option<String>,
917 pub api_data_url: Option<String>,
918 pub purl: Option<String>,
919 pub package_uid: PackageUid,
921 pub datafile_paths: Vec<String>,
923 pub datasource_ids: Vec<DatasourceId>,
925}
926
927impl Package {
928 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
934 let mut package_data = package_data.clone();
935 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
936
937 let mut package = Package {
938 package_type: package_data.package_type,
939 namespace: package_data.namespace.clone(),
940 name: package_data.name.clone(),
941 version: package_data.version.clone(),
942 qualifiers: package_data.qualifiers.clone(),
943 subpath: package_data.subpath.clone(),
944 primary_language: package_data.primary_language.clone(),
945 description: package_data.description.clone(),
946 release_date: package_data.release_date.clone(),
947 parties: package_data.parties.clone(),
948 keywords: package_data.keywords.clone(),
949 homepage_url: package_data.homepage_url.clone(),
950 download_url: package_data.download_url.clone(),
951 size: package_data.size,
952 sha1: package_data.sha1,
953 md5: package_data.md5,
954 sha256: package_data.sha256,
955 sha512: package_data.sha512,
956 bug_tracking_url: package_data.bug_tracking_url.clone(),
957 code_view_url: package_data.code_view_url.clone(),
958 vcs_url: package_data.vcs_url.clone(),
959 copyright: package_data.copyright.clone(),
960 holder: package_data.holder.clone(),
961 declared_license_expression: package_data.declared_license_expression.clone(),
962 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
963 license_detections: package_data.license_detections.clone(),
964 other_license_expression: package_data.other_license_expression.clone(),
965 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
966 other_license_detections: package_data.other_license_detections.clone(),
967 extracted_license_statement: package_data.extracted_license_statement.clone(),
968 notice_text: package_data.notice_text.clone(),
969 source_packages: package_data.source_packages.clone(),
970 is_private: package_data.is_private,
971 is_virtual: package_data.is_virtual,
972 extra_data: package_data.extra_data.clone(),
973 repository_homepage_url: package_data.repository_homepage_url.clone(),
974 repository_download_url: package_data.repository_download_url.clone(),
975 api_data_url: package_data.api_data_url.clone(),
976 purl: package_data.purl.clone(),
977 package_uid: PackageUid::empty(),
978 datafile_paths: vec![datafile_path],
979 datasource_ids: if let Some(dsid) = package_data.datasource_id {
980 vec![dsid]
981 } else {
982 vec![]
983 },
984 };
985
986 package.refresh_identity();
987 if package.package_uid.is_empty() {
988 package.package_uid = package.fallback_package_uid();
989 }
990
991 package
992 }
993
994 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
1000 let mut package_data = package_data.clone();
1001 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
1002
1003 if let Some(dsid) = package_data.datasource_id {
1004 self.datasource_ids.push(dsid);
1005 }
1006 self.datafile_paths.push(datafile_path);
1007
1008 macro_rules! fill_if_empty {
1009 ($field:ident) => {
1010 if self.$field.is_none() {
1011 self.$field = package_data.$field;
1012 }
1013 };
1014 }
1015
1016 fill_if_empty!(package_type);
1017 fill_if_empty!(name);
1018 fill_if_empty!(namespace);
1019 fill_if_empty!(version);
1020 fill_if_empty!(qualifiers);
1021 fill_if_empty!(subpath);
1022 fill_if_empty!(primary_language);
1023 fill_if_empty!(description);
1024 fill_if_empty!(release_date);
1025 fill_if_empty!(homepage_url);
1026 fill_if_empty!(download_url);
1027 fill_if_empty!(size);
1028 fill_if_empty!(sha1);
1029 fill_if_empty!(md5);
1030 fill_if_empty!(sha256);
1031 fill_if_empty!(sha512);
1032 fill_if_empty!(bug_tracking_url);
1033 fill_if_empty!(code_view_url);
1034 fill_if_empty!(vcs_url);
1035 fill_if_empty!(copyright);
1036 fill_if_empty!(holder);
1037 fill_if_empty!(declared_license_expression);
1038 fill_if_empty!(declared_license_expression_spdx);
1039 fill_if_empty!(other_license_expression);
1040 fill_if_empty!(other_license_expression_spdx);
1041 fill_if_empty!(extracted_license_statement);
1042 fill_if_empty!(notice_text);
1043 match (&mut self.extra_data, &package_data.extra_data) {
1044 (None, Some(extra_data)) => {
1045 self.extra_data = Some(extra_data.clone());
1046 }
1047 (Some(existing), Some(incoming)) => {
1048 for (key, value) in incoming {
1049 existing.entry(key.clone()).or_insert_with(|| value.clone());
1050 }
1051 }
1052 _ => {}
1053 }
1054 fill_if_empty!(repository_homepage_url);
1055 fill_if_empty!(repository_download_url);
1056 fill_if_empty!(api_data_url);
1057
1058 for party in &package_data.parties {
1059 if let Some(existing) = self.parties.iter_mut().find(|p| {
1060 p.role == party.role
1061 && ((p.name.is_some() && p.name == party.name)
1062 || (p.email.is_some() && p.email == party.email))
1063 }) {
1064 if existing.name.is_none() {
1065 existing.name = party.name.clone();
1066 }
1067 if existing.email.is_none() {
1068 existing.email = party.email.clone();
1069 }
1070 } else {
1071 self.parties.push(party.clone());
1072 }
1073 }
1074
1075 for keyword in &package_data.keywords {
1076 if !self.keywords.contains(keyword) {
1077 self.keywords.push(keyword.clone());
1078 }
1079 }
1080
1081 for detection in &package_data.license_detections {
1082 self.license_detections.push(detection.clone());
1083 }
1084
1085 for detection in &package_data.other_license_detections {
1086 self.other_license_detections.push(detection.clone());
1087 }
1088
1089 for source_pkg in &package_data.source_packages {
1090 if !self.source_packages.contains(source_pkg) {
1091 self.source_packages.push(source_pkg.clone());
1092 }
1093 }
1094
1095 self.refresh_identity();
1096 }
1097
1098 pub fn backfill_license_provenance(&mut self) {
1099 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1100 return;
1101 };
1102
1103 for detection in &mut self.license_detections {
1104 enrich_license_detection_provenance(detection, &datafile_path);
1105 }
1106 for detection in &mut self.other_license_detections {
1107 enrich_license_detection_provenance(detection, &datafile_path);
1108 }
1109 }
1110
1111 fn refresh_identity(&mut self) {
1112 let Some(next_purl) = self.build_current_purl() else {
1113 return;
1114 };
1115
1116 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1117 self.package_uid = PackageUid::new(&next_purl);
1118 }
1119
1120 self.purl = Some(next_purl);
1121 }
1122
1123 fn fallback_package_uid(&self) -> PackageUid {
1124 let name = self
1125 .name
1126 .as_deref()
1127 .map(str::trim)
1128 .filter(|value| !value.is_empty())
1129 .unwrap_or("unknown");
1130 let version = self
1131 .version
1132 .as_deref()
1133 .map(str::trim)
1134 .filter(|value| !value.is_empty())
1135 .unwrap_or("unknown");
1136 let datasource = self
1137 .datasource_ids
1138 .first()
1139 .map(DatasourceId::as_str)
1140 .unwrap_or("unknown");
1141
1142 PackageUid::new_opaque(&format!("generated-package:{datasource}/{name}@{version}"))
1143 }
1144
1145 fn build_current_purl(&self) -> Option<String> {
1146 if let Some(existing_purl) = self.purl.as_deref() {
1147 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1148
1149 if let Some(version) = self
1150 .version
1151 .as_deref()
1152 .filter(|value| !value.trim().is_empty())
1153 {
1154 purl.with_version(version).ok()?;
1155 } else {
1156 purl.without_version();
1157 }
1158
1159 return Some(purl.to_string());
1160 }
1161
1162 if let (Some(package_type), Some(name)) = (
1163 self.package_type.as_ref(),
1164 self.name
1165 .as_deref()
1166 .filter(|value| !value.trim().is_empty()),
1167 ) {
1168 let purl_type = match package_type {
1169 PackageType::Deno => "generic",
1170 _ => package_type.as_str(),
1171 };
1172
1173 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1174
1175 if let Some(namespace) = self
1176 .namespace
1177 .as_deref()
1178 .filter(|value| !value.trim().is_empty())
1179 {
1180 purl.with_namespace(namespace).ok()?;
1181 }
1182
1183 if let Some(version) = self
1184 .version
1185 .as_deref()
1186 .filter(|value| !value.trim().is_empty())
1187 {
1188 purl.with_version(version).ok()?;
1189 }
1190
1191 if let Some(qualifiers) = &self.qualifiers {
1192 for (key, value) in qualifiers {
1193 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1194 }
1195 }
1196
1197 if let Some(subpath) = self
1198 .subpath
1199 .as_deref()
1200 .filter(|value| !value.trim().is_empty())
1201 {
1202 purl.with_subpath(subpath).ok()?;
1203 }
1204
1205 return Some(purl.to_string());
1206 }
1207 None
1208 }
1209}
1210
1211#[cfg(test)]
1212mod tests {
1213 use super::*;
1214
1215 #[test]
1216 fn file_info_new_backfills_package_detection_provenance() {
1217 let package_data = PackageData {
1218 package_type: Some(PackageType::Npm),
1219 license_detections: vec![LicenseDetection {
1220 license_expression: "mit".to_string(),
1221 license_expression_spdx: "MIT".to_string(),
1222 matches: vec![Match {
1223 license_expression: "mit".to_string(),
1224 license_expression_spdx: "MIT".to_string(),
1225 from_file: None,
1226 start_line: LineNumber::ONE,
1227 end_line: LineNumber::ONE,
1228 matcher: Some("parser-declared-license".to_string()),
1229 score: MatchScore::MAX,
1230 matched_length: Some(1),
1231 match_coverage: Some(100.0),
1232 rule_relevance: Some(100),
1233 rule_identifier: None,
1234 rule_url: None,
1235 matched_text: Some("MIT".to_string()),
1236 referenced_filenames: None,
1237 matched_text_diagnostics: None,
1238 }],
1239 detection_log: vec![],
1240 identifier: None,
1241 }],
1242 ..PackageData::default()
1243 };
1244
1245 let file_info = FileInfo::new(
1246 "package.json".to_string(),
1247 "package".to_string(),
1248 ".json".to_string(),
1249 "project/package.json".to_string(),
1250 FileType::File,
1251 None,
1252 None,
1253 1,
1254 None,
1255 None,
1256 None,
1257 None,
1258 None,
1259 vec![package_data],
1260 None,
1261 vec![],
1262 vec![],
1263 vec![],
1264 vec![],
1265 vec![],
1266 vec![],
1267 vec![],
1268 vec![],
1269 vec![],
1270 );
1271
1272 assert_eq!(file_info.license_detections.len(), 1);
1273 assert_eq!(
1274 file_info.license_detections[0].matches[0]
1275 .from_file
1276 .as_deref(),
1277 Some("project/package.json")
1278 );
1279 assert!(file_info.license_detections[0].identifier.is_some());
1280 assert_eq!(
1281 file_info.package_data[0].license_detections[0].matches[0]
1282 .from_file
1283 .as_deref(),
1284 Some("project/package.json")
1285 );
1286 assert_eq!(
1287 file_info.package_data[0].license_detections[0].matches[0]
1288 .rule_identifier
1289 .as_deref(),
1290 Some("parser-declared-license")
1291 );
1292 assert!(
1293 file_info.package_data[0].license_detections[0]
1294 .identifier
1295 .is_some()
1296 );
1297 }
1298
1299 #[test]
1300 fn package_from_package_data_backfills_detection_provenance() {
1301 let package_data = PackageData {
1302 package_type: Some(PackageType::Npm),
1303 license_detections: vec![LicenseDetection {
1304 license_expression: "mit".to_string(),
1305 license_expression_spdx: "MIT".to_string(),
1306 matches: vec![Match {
1307 license_expression: "mit".to_string(),
1308 license_expression_spdx: "MIT".to_string(),
1309 from_file: None,
1310 start_line: LineNumber::ONE,
1311 end_line: LineNumber::ONE,
1312 matcher: Some("parser-declared-license".to_string()),
1313 score: MatchScore::MAX,
1314 matched_length: Some(1),
1315 match_coverage: Some(100.0),
1316 rule_relevance: Some(100),
1317 rule_identifier: None,
1318 rule_url: None,
1319 matched_text: Some("MIT".to_string()),
1320 referenced_filenames: None,
1321 matched_text_diagnostics: None,
1322 }],
1323 detection_log: vec![],
1324 identifier: None,
1325 }],
1326 ..PackageData::default()
1327 };
1328
1329 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1330
1331 assert_eq!(
1332 package.license_detections[0].matches[0]
1333 .from_file
1334 .as_deref(),
1335 Some("project/package.json")
1336 );
1337 assert_eq!(
1338 package.license_detections[0].matches[0]
1339 .rule_identifier
1340 .as_deref(),
1341 Some("parser-declared-license")
1342 );
1343 assert!(package.license_detections[0].identifier.is_some());
1344 }
1345
1346 #[test]
1347 fn package_from_package_data_preserves_existing_purl_qualifiers() {
1348 let package_data = PackageData {
1349 package_type: Some(PackageType::Alpine),
1350 namespace: Some("alpine".to_string()),
1351 name: Some("busybox".to_string()),
1352 version: Some("1.35.0-r17".to_string()),
1353 purl: Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64".to_string()),
1354 ..PackageData::default()
1355 };
1356
1357 let package = Package::from_package_data(&package_data, "lib/apk/db/installed".to_string());
1358
1359 assert_eq!(
1360 package.purl.as_deref(),
1361 Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64")
1362 );
1363 assert!(
1364 package
1365 .package_uid
1366 .starts_with("pkg:alpine/busybox@1.35.0-r17?arch=x86_64&uuid=")
1367 );
1368 }
1369}
1370
1371#[derive(Serialize, Deserialize, Debug, Clone)]
1376pub struct TopLevelDependency {
1377 pub purl: Option<String>,
1378 pub extracted_requirement: Option<String>,
1379 pub scope: Option<String>,
1380 pub is_runtime: Option<bool>,
1381 pub is_optional: Option<bool>,
1382 pub is_pinned: Option<bool>,
1383 pub is_direct: Option<bool>,
1384 pub resolved_package: Option<Box<ResolvedPackage>>,
1385 #[serde(default)]
1386 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1387 pub dependency_uid: DependencyUid,
1389 pub for_package_uid: Option<PackageUid>,
1391 pub datafile_path: String,
1393 pub datasource_id: DatasourceId,
1395 pub namespace: Option<String>,
1397}
1398
1399impl TopLevelDependency {
1400 pub fn from_dependency(
1402 dep: &Dependency,
1403 datafile_path: String,
1404 datasource_id: DatasourceId,
1405 for_package_uid: Option<PackageUid>,
1406 ) -> Self {
1407 let dependency_uid = dep
1408 .purl
1409 .as_ref()
1410 .map(|p| DependencyUid::new(p))
1411 .unwrap_or_else(DependencyUid::empty);
1412
1413 TopLevelDependency {
1414 purl: dep.purl.clone(),
1415 extracted_requirement: dep.extracted_requirement.clone(),
1416 scope: dep.scope.clone(),
1417 is_runtime: dep.is_runtime,
1418 is_optional: dep.is_optional,
1419 is_pinned: dep.is_pinned,
1420 is_direct: dep.is_direct,
1421 resolved_package: dep.resolved_package.clone(),
1422 extra_data: dep.extra_data.clone(),
1423 dependency_uid,
1424 for_package_uid,
1425 datafile_path,
1426 datasource_id,
1427 namespace: None,
1428 }
1429 }
1430}
1431
1432#[derive(Serialize, Deserialize, Debug, Clone)]
1433pub struct OutputEmail {
1434 pub email: String,
1435 pub start_line: LineNumber,
1436 pub end_line: LineNumber,
1437}
1438
1439#[derive(Serialize, Deserialize, Debug, Clone)]
1440pub struct OutputURL {
1441 pub url: String,
1442 pub start_line: LineNumber,
1443 pub end_line: LineNumber,
1444}
1445
1446#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1447pub struct LicensePolicyEntry {
1448 pub license_key: String,
1449 pub label: String,
1450 pub color_code: String,
1451 pub icon: String,
1452}
1453
1454#[derive(Debug, Clone, PartialEq)]
1455pub enum FileType {
1456 File,
1457 Directory,
1458}
1459
1460impl serde::Serialize for FileType {
1461 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1462 where
1463 S: serde::Serializer,
1464 {
1465 match self {
1466 FileType::File => serializer.serialize_str("file"),
1467 FileType::Directory => serializer.serialize_str("directory"),
1468 }
1469 }
1470}
1471
1472impl<'de> Deserialize<'de> for FileType {
1473 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1474 where
1475 D: serde::Deserializer<'de>,
1476 {
1477 let value = String::deserialize(deserializer)?;
1478 match value.as_str() {
1479 "file" => Ok(FileType::File),
1480 "directory" => Ok(FileType::Directory),
1481 _ => Err(serde::de::Error::custom("invalid file type")),
1482 }
1483 }
1484}