1use derive_builder::Builder;
5use packageurl::PackageUrl;
6use serde::{Deserialize, Serialize};
7use sha1::{Digest, Sha1};
8use std::collections::HashMap;
9use std::fmt;
10use std::str::FromStr;
11
12use super::DatasourceId;
13use super::DependencyUid;
14use super::DiagnosticSeverity;
15use super::GitSha1;
16use super::LineNumber;
17use super::MatchScore;
18use super::Md5Digest;
19use super::PackageType;
20use super::PackageUid;
21use super::ScanDiagnostic;
22use super::Sha1Digest;
23use super::Sha256Digest;
24use super::Sha512Digest;
25
26use crate::license_detection::MatcherKind;
27use crate::license_detection::tokenize::tokenize_without_stopwords;
28use crate::models::output::Tallies;
29use crate::utils::spdx::combine_license_expressions;
30
31#[derive(Debug, Builder, Serialize, Deserialize, Clone)]
32#[builder(build_fn(skip))]
33pub struct FileInfo {
35 pub name: String,
36 pub base_name: String,
37 pub extension: String,
38 pub path: String,
39 pub file_type: FileType,
40 #[builder(default)]
41 #[serde(default)]
42 pub mime_type: Option<String>,
43 #[builder(default)]
44 #[serde(default)]
45 pub file_type_label: Option<String>,
46 pub size: u64,
47 #[builder(default)]
48 #[serde(default)]
49 pub date: Option<String>,
50 #[builder(default)]
51 #[serde(default)]
52 pub sha1: Option<Sha1Digest>,
53 #[builder(default)]
54 #[serde(default)]
55 pub md5: Option<Md5Digest>,
56 #[builder(default)]
57 #[serde(default)]
58 pub sha256: Option<Sha256Digest>,
59 #[builder(default)]
60 #[serde(default)]
61 pub sha1_git: Option<GitSha1>,
62 #[builder(default)]
63 #[serde(default)]
64 pub programming_language: Option<String>,
65 #[builder(default)]
66 #[serde(default)]
67 pub package_data: Vec<PackageData>,
68 #[builder(default)]
69 #[serde(default)]
70 pub detected_license_expression: Option<String>,
71 #[builder(default)]
72 #[serde(default)]
73 pub license_detections: Vec<LicenseDetection>,
74 #[builder(default)]
75 #[serde(default)]
76 pub license_clues: Vec<Match>,
77 #[builder(default)]
78 #[serde(default)]
79 pub percentage_of_license_text: Option<f64>,
80 #[builder(default)]
81 #[serde(default)]
82 pub copyrights: Vec<Copyright>,
83 #[builder(default)]
84 #[serde(default)]
85 pub holders: Vec<Holder>,
86 #[builder(default)]
87 #[serde(default)]
88 pub authors: Vec<Author>,
89 #[builder(default)]
90 #[serde(default)]
91 pub emails: Vec<OutputEmail>,
92 #[builder(default)]
93 #[serde(default)]
94 pub urls: Vec<OutputURL>,
95 #[builder(default)]
96 #[serde(default)]
97 pub for_packages: Vec<PackageUid>,
98 #[builder(default)]
99 #[serde(default)]
100 pub scan_diagnostics: Vec<ScanDiagnostic>,
101 #[builder(default)]
102 #[serde(default)]
103 pub license_policy: Option<Vec<LicensePolicyEntry>>,
104 #[builder(default)]
105 #[serde(default)]
106 pub is_generated: Option<bool>,
107 #[builder(default)]
108 #[serde(default)]
109 pub is_binary: Option<bool>,
110 #[builder(default)]
111 #[serde(default)]
112 pub is_text: Option<bool>,
113 #[builder(default)]
114 #[serde(default)]
115 pub is_archive: Option<bool>,
116 #[builder(default)]
117 #[serde(default)]
118 pub is_media: Option<bool>,
119 #[builder(default)]
120 #[serde(default)]
121 pub is_source: Option<bool>,
122 #[builder(default)]
123 #[serde(default)]
124 pub is_script: Option<bool>,
125 #[builder(default)]
126 #[serde(default)]
127 pub files_count: Option<usize>,
128 #[builder(default)]
129 #[serde(default)]
130 pub dirs_count: Option<usize>,
131 #[builder(default)]
132 #[serde(default)]
133 pub size_count: Option<u64>,
134 #[builder(default)]
135 #[serde(default)]
136 pub source_count: Option<usize>,
137 #[builder(default)]
138 #[serde(default)]
139 pub is_legal: bool,
140 #[builder(default)]
141 #[serde(default)]
142 pub is_manifest: bool,
143 #[builder(default)]
144 #[serde(default)]
145 pub is_readme: bool,
146 #[builder(default)]
147 #[serde(default)]
148 pub is_top_level: bool,
149 #[builder(default)]
150 #[serde(default)]
151 pub is_key_file: bool,
152 #[builder(default)]
153 #[serde(default)]
154 pub is_community: bool,
155 #[builder(default)]
156 #[serde(default)]
157 pub facets: Vec<String>,
158 #[builder(default)]
159 #[serde(default)]
160 pub tallies: Option<Tallies>,
161}
162
163impl FileInfoBuilder {
164 pub fn build(&self) -> Result<FileInfo, String> {
166 let mut file_info = FileInfo::new(
167 self.name.clone().ok_or("Missing field: name")?,
168 self.base_name.clone().ok_or("Missing field: base_name")?,
169 self.extension.clone().ok_or("Missing field: extension")?,
170 self.path.clone().ok_or("Missing field: path")?,
171 self.file_type.clone().ok_or("Missing field: file_type")?,
172 self.mime_type.clone().flatten(),
173 self.file_type_label.clone().flatten(),
174 self.size.ok_or("Missing field: size")?,
175 self.date.clone().flatten(),
176 self.sha1.flatten(),
177 self.md5.flatten(),
178 self.sha256.flatten(),
179 self.programming_language.clone().flatten(),
180 self.package_data.clone().unwrap_or_default(),
181 self.detected_license_expression.clone().flatten(),
182 self.license_detections.clone().unwrap_or_default(),
183 self.license_clues.clone().unwrap_or_default(),
184 self.copyrights.clone().unwrap_or_default(),
185 self.holders.clone().unwrap_or_default(),
186 self.authors.clone().unwrap_or_default(),
187 self.emails.clone().unwrap_or_default(),
188 self.urls.clone().unwrap_or_default(),
189 self.for_packages.clone().unwrap_or_default(),
190 self.scan_diagnostics.clone().unwrap_or_default(),
191 );
192 file_info.license_policy = self.license_policy.clone().flatten();
193 file_info.sha1_git = self.sha1_git.flatten();
194 file_info.is_binary = self.is_binary.flatten();
195 file_info.is_text = self.is_text.flatten();
196 file_info.is_archive = self.is_archive.flatten();
197 file_info.is_media = self.is_media.flatten();
198 file_info.is_script = self.is_script.flatten();
199 file_info.files_count = self.files_count.flatten();
200 file_info.dirs_count = self.dirs_count.flatten();
201 file_info.size_count = self.size_count.flatten();
202 Ok(file_info)
203 }
204}
205
206impl FileInfo {
207 #[allow(clippy::too_many_arguments)]
208 pub fn new(
210 name: String,
211 base_name: String,
212 extension: String,
213 path: String,
214 file_type: FileType,
215 mime_type: Option<String>,
216 file_type_label: Option<String>,
217 size: u64,
218 date: Option<String>,
219 sha1: Option<Sha1Digest>,
220 md5: Option<Md5Digest>,
221 sha256: Option<Sha256Digest>,
222 programming_language: Option<String>,
223 package_data: Vec<PackageData>,
224 mut detected_license_expression: Option<String>,
225 mut license_detections: Vec<LicenseDetection>,
226 license_clues: Vec<Match>,
227 copyrights: Vec<Copyright>,
228 holders: Vec<Holder>,
229 authors: Vec<Author>,
230 emails: Vec<OutputEmail>,
231 urls: Vec<OutputURL>,
232 for_packages: Vec<PackageUid>,
233 scan_diagnostics: Vec<ScanDiagnostic>,
234 ) -> Self {
235 let mut package_data = package_data;
236 for package in &mut package_data {
237 enrich_package_data_license_provenance(package, &path);
238 }
239
240 detected_license_expression = detected_license_expression.or_else(|| {
242 let expressions = package_data
243 .iter()
244 .filter_map(|pkg| pkg.get_license_expression());
245 combine_license_expressions(expressions)
246 });
247
248 if license_detections.is_empty() {
250 for pkg in &package_data {
251 license_detections.extend(pkg.license_detections.clone());
252 }
253 }
254
255 if detected_license_expression.is_none() && !license_detections.is_empty() {
257 let expressions = license_detections
258 .iter()
259 .map(|detection| detection.license_expression.clone());
260 let expressions: Vec<String> = expressions.collect();
261 detected_license_expression = crate::utils::spdx::select_primary_license_expression(
262 expressions.clone(),
263 )
264 .or_else(|| {
265 crate::utils::spdx::combine_license_expressions_preserving_structure(expressions)
266 });
267 }
268
269 let mut file_info = FileInfo {
270 name,
271 base_name,
272 extension,
273 path,
274 file_type,
275 mime_type,
276 file_type_label,
277 size,
278 date,
279 sha1,
280 md5,
281 sha256,
282 sha1_git: None,
283 programming_language,
284 package_data,
285 detected_license_expression,
286 license_detections,
287 license_clues,
288 percentage_of_license_text: None,
289 copyrights,
290 holders,
291 authors,
292 emails,
293 urls,
294 for_packages,
295 scan_diagnostics,
296 license_policy: None,
297 is_generated: None,
298 is_binary: None,
299 is_text: None,
300 is_archive: None,
301 is_media: None,
302 is_source: None,
303 is_script: None,
304 files_count: None,
305 dirs_count: None,
306 size_count: None,
307 source_count: None,
308 is_legal: false,
309 is_manifest: false,
310 is_readme: false,
311 is_top_level: false,
312 is_key_file: false,
313 is_community: false,
314 facets: vec![],
315 tallies: None,
316 };
317
318 file_info.backfill_license_provenance();
319 file_info
320 }
321
322 pub fn backfill_license_provenance(&mut self) {
323 for detection in &mut self.license_detections {
324 enrich_license_detection_provenance(detection, &self.path);
325 }
326
327 for package in &mut self.package_data {
328 enrich_package_data_license_provenance(package, &self.path);
329 }
330 }
331}
332
333impl FileInfo {
334 pub fn warning_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
335 self.scan_diagnostics
336 .iter()
337 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Warning)
338 }
339
340 pub fn error_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
341 self.scan_diagnostics.iter().filter(|diagnostic| {
342 diagnostic.severity == DiagnosticSeverity::Error
343 || diagnostic.severity == DiagnosticSeverity::Timeout
344 })
345 }
346}
347
348fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
349 for detection in &mut package_data.license_detections {
350 enrich_license_detection_provenance(detection, path);
351 }
352 for detection in &mut package_data.other_license_detections {
353 enrich_license_detection_provenance(detection, path);
354 }
355}
356
357pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
358 for detection_match in &mut detection.matches {
359 if detection_match.from_file.is_none() {
360 detection_match.from_file = Some(path.to_string());
361 }
362
363 if detection_match.rule_identifier.is_empty() {
364 detection_match.rule_identifier = detection_match.matcher.to_string();
365 }
366 }
367
368 if detection.identifier.is_empty() {
369 detection.identifier = compute_public_detection_identifier(detection);
370 }
371}
372
373fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
374 let expression = python_safe_name(&detection.license_expression);
375 let mut hasher = Sha1::new();
376 hasher.update(format_public_detection_content(detection).as_bytes());
377 let hex_str = hex::encode(hasher.finalize());
378 let uuid_hex = &hex_str[..32];
379 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
380 .map(|uuid| uuid.to_string())
381 .unwrap_or_else(|_| uuid_hex.to_string());
382
383 format!("{}-{}", expression, content_uuid)
384}
385
386fn format_public_detection_content(detection: &LicenseDetection) -> String {
387 let mut result = String::from("(");
388
389 for (index, detection_match) in detection.matches.iter().enumerate() {
390 if index > 0 {
391 result.push_str(", ");
392 }
393 result.push_str(&format!(
394 "({}, {}, {})",
395 python_str_repr(if detection_match.rule_identifier.is_empty() {
396 detection_match.matcher.as_str()
397 } else {
398 detection_match.rule_identifier.as_str()
399 }),
400 detection_match.score.value() as f32,
401 python_token_tuple_repr(&tokenize_without_stopwords(
402 detection_match.matched_text.as_deref().unwrap_or_default(),
403 )),
404 ));
405 }
406
407 if detection.matches.len() == 1 {
408 result.push(',');
409 }
410 result.push(')');
411 result
412}
413
414fn python_safe_name(value: &str) -> String {
415 let mut result = String::new();
416 let mut prev_underscore = false;
417
418 for character in value.chars() {
419 if character.is_alphanumeric() {
420 result.push(character);
421 prev_underscore = false;
422 } else if !prev_underscore {
423 result.push('_');
424 prev_underscore = true;
425 }
426 }
427
428 let trimmed = result.trim_matches('_');
429 if trimmed.is_empty() {
430 String::new()
431 } else {
432 trimmed.to_string()
433 }
434}
435
436fn python_str_repr(value: &str) -> String {
437 if value.contains('\'') && !value.contains('"') {
438 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
439 } else {
440 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
441 }
442}
443
444fn python_token_tuple_repr(tokens: &[String]) -> String {
445 if tokens.is_empty() {
446 return String::from("()");
447 }
448
449 let mut result = String::from("(");
450 for (index, token) in tokens.iter().enumerate() {
451 if index > 0 {
452 result.push_str(", ");
453 }
454 result.push_str(&python_str_repr(token));
455 }
456
457 if tokens.len() == 1 {
458 result.push(',');
459 }
460 result.push(')');
461 result
462}
463
464#[derive(Serialize, Deserialize, Debug, Clone, Default)]
470pub struct PackageData {
471 pub package_type: Option<PackageType>,
472 pub namespace: Option<String>,
473 pub name: Option<String>,
474 pub version: Option<String>,
475 #[serde(default)]
476 pub qualifiers: Option<HashMap<String, String>>,
477 pub subpath: Option<String>,
478 pub primary_language: Option<String>,
479 pub description: Option<String>,
480 pub release_date: Option<String>,
481 #[serde(default)]
482 pub parties: Vec<Party>,
483 #[serde(default)]
484 pub keywords: Vec<String>,
485 pub homepage_url: Option<String>,
486 pub download_url: Option<String>,
487 pub size: Option<u64>,
488 pub sha1: Option<Sha1Digest>,
489 pub md5: Option<Md5Digest>,
490 pub sha256: Option<Sha256Digest>,
491 pub sha512: Option<Sha512Digest>,
492 pub bug_tracking_url: Option<String>,
493 pub code_view_url: Option<String>,
494 pub vcs_url: Option<String>,
495 pub copyright: Option<String>,
496 pub holder: Option<String>,
497 pub declared_license_expression: Option<String>,
498 pub declared_license_expression_spdx: Option<String>,
499 #[serde(default)]
500 pub license_detections: Vec<LicenseDetection>,
501 pub other_license_expression: Option<String>,
502 pub other_license_expression_spdx: Option<String>,
503 #[serde(default)]
504 pub other_license_detections: Vec<LicenseDetection>,
505 pub extracted_license_statement: Option<String>,
506 pub notice_text: Option<String>,
507 #[serde(default)]
508 pub source_packages: Vec<String>,
509 #[serde(default)]
510 pub file_references: Vec<FileReference>,
511 #[serde(default)]
512 pub is_private: bool,
513 #[serde(default)]
514 pub is_virtual: bool,
515 #[serde(default)]
516 pub extra_data: Option<HashMap<String, serde_json::Value>>,
517 #[serde(default)]
518 pub dependencies: Vec<Dependency>,
519 pub repository_homepage_url: Option<String>,
520 pub repository_download_url: Option<String>,
521 pub api_data_url: Option<String>,
522 pub datasource_id: Option<DatasourceId>,
523 pub purl: Option<String>,
524}
525
526impl PackageData {
527 pub fn get_license_expression(&self) -> Option<String> {
530 if self.license_detections.is_empty() {
531 return None;
532 }
533
534 let expressions = self
535 .license_detections
536 .iter()
537 .map(|detection| detection.license_expression.clone());
538 combine_license_expressions(expressions)
539 }
540}
541
542#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
546pub struct LicenseDetection {
547 pub license_expression: String,
548 pub license_expression_spdx: String,
549 pub matches: Vec<Match>,
550 #[serde(default)]
551 pub detection_log: Vec<String>,
552 #[serde(default = "String::new")]
553 pub identifier: String,
554}
555
556#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
560pub struct Match {
561 pub license_expression: String,
562 pub license_expression_spdx: String,
563 pub from_file: Option<String>,
564 pub start_line: LineNumber,
565 pub end_line: LineNumber,
566 #[serde(default)]
567 pub matcher: MatcherKind,
568 pub score: MatchScore,
569 pub matched_length: Option<usize>,
570 pub match_coverage: Option<f64>,
571 pub rule_relevance: Option<u8>,
572 #[serde(default = "String::new")]
573 pub rule_identifier: String,
574 pub rule_url: Option<String>,
575 pub matched_text: Option<String>,
576 pub matched_text_diagnostics: Option<String>,
577 #[serde(default)]
578 pub referenced_filenames: Option<Vec<String>>,
579}
580
581#[derive(Serialize, Deserialize, Debug, Clone)]
582pub struct Copyright {
583 pub copyright: String,
584 #[serde(default)]
585 pub normalized_copyright: Option<String>,
586 pub start_line: LineNumber,
587 pub end_line: LineNumber,
588}
589
590impl Copyright {
591 pub fn normalized_text(&self) -> &str {
592 self.normalized_copyright
593 .as_deref()
594 .unwrap_or(self.copyright.as_str())
595 }
596}
597
598#[derive(Serialize, Deserialize, Debug, Clone)]
599pub struct Holder {
600 pub holder: String,
601 pub start_line: LineNumber,
602 pub end_line: LineNumber,
603}
604
605#[derive(Serialize, Deserialize, Debug, Clone)]
606pub struct Author {
607 pub author: String,
608 pub start_line: LineNumber,
609 pub end_line: LineNumber,
610}
611
612#[derive(Serialize, Deserialize, Debug, Clone)]
617pub struct Dependency {
618 pub purl: Option<String>,
619 pub extracted_requirement: Option<String>,
620 pub scope: Option<String>,
621 pub is_runtime: Option<bool>,
622 pub is_optional: Option<bool>,
623 pub is_pinned: Option<bool>,
624 pub is_direct: Option<bool>,
625 pub resolved_package: Option<Box<ResolvedPackage>>,
626 #[serde(default)]
627 pub extra_data: Option<HashMap<String, serde_json::Value>>,
628}
629
630#[derive(Serialize, Deserialize, Debug, Clone)]
631pub struct ResolvedPackage {
632 pub package_type: PackageType,
633 pub namespace: String,
634 pub name: String,
635 pub version: String,
636 #[serde(default)]
637 pub qualifiers: Option<HashMap<String, String>>,
638 pub subpath: Option<String>,
639 pub primary_language: Option<String>,
640 pub description: Option<String>,
641 pub release_date: Option<String>,
642 #[serde(default)]
643 pub parties: Vec<Party>,
644 #[serde(default)]
645 pub keywords: Vec<String>,
646 pub homepage_url: Option<String>,
647 pub download_url: Option<String>,
648 pub size: Option<u64>,
649 pub sha1: Option<Sha1Digest>,
650 pub md5: Option<Md5Digest>,
651 pub sha256: Option<Sha256Digest>,
652 pub sha512: Option<Sha512Digest>,
653 pub bug_tracking_url: Option<String>,
654 pub code_view_url: Option<String>,
655 pub vcs_url: Option<String>,
656 pub copyright: Option<String>,
657 pub holder: Option<String>,
658 pub declared_license_expression: Option<String>,
659 pub declared_license_expression_spdx: Option<String>,
660 #[serde(default)]
661 pub license_detections: Vec<LicenseDetection>,
662 pub other_license_expression: Option<String>,
663 pub other_license_expression_spdx: Option<String>,
664 #[serde(default)]
665 pub other_license_detections: Vec<LicenseDetection>,
666 pub extracted_license_statement: Option<String>,
667 pub notice_text: Option<String>,
668 #[serde(default)]
669 pub source_packages: Vec<String>,
670 #[serde(default)]
671 pub file_references: Vec<FileReference>,
672 #[serde(default)]
673 pub is_private: bool,
674 #[serde(default)]
675 pub is_virtual: bool,
676 #[serde(default)]
677 pub extra_data: Option<HashMap<String, serde_json::Value>>,
678 #[serde(default)]
679 pub dependencies: Vec<Dependency>,
680 pub repository_homepage_url: Option<String>,
681 pub repository_download_url: Option<String>,
682 pub api_data_url: Option<String>,
683 pub datasource_id: Option<DatasourceId>,
684 pub purl: Option<String>,
685}
686
687impl ResolvedPackage {
688 pub fn new(
689 package_type: PackageType,
690 namespace: String,
691 name: String,
692 version: String,
693 ) -> Self {
694 Self {
695 package_type,
696 namespace,
697 name,
698 version,
699 qualifiers: None,
700 subpath: None,
701 primary_language: None,
702 description: None,
703 release_date: None,
704 parties: vec![],
705 keywords: vec![],
706 homepage_url: None,
707 download_url: None,
708 size: None,
709 sha1: None,
710 md5: None,
711 sha256: None,
712 sha512: None,
713 bug_tracking_url: None,
714 code_view_url: None,
715 vcs_url: None,
716 copyright: None,
717 holder: None,
718 declared_license_expression: None,
719 declared_license_expression_spdx: None,
720 license_detections: vec![],
721 other_license_expression: None,
722 other_license_expression_spdx: None,
723 other_license_detections: vec![],
724 extracted_license_statement: None,
725 notice_text: None,
726 source_packages: vec![],
727 file_references: vec![],
728 is_private: false,
729 is_virtual: false,
730 extra_data: None,
731 dependencies: vec![],
732 repository_homepage_url: None,
733 repository_download_url: None,
734 api_data_url: None,
735 datasource_id: None,
736 purl: None,
737 }
738 }
739
740 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
741 Self {
742 package_type: package_data.package_type.unwrap_or(fallback_type),
743 namespace: package_data.namespace.clone().unwrap_or_default(),
744 name: package_data.name.clone().unwrap_or_default(),
745 version: package_data.version.clone().unwrap_or_default(),
746 qualifiers: package_data.qualifiers.clone(),
747 subpath: package_data.subpath.clone(),
748 primary_language: package_data.primary_language.clone(),
749 description: package_data.description.clone(),
750 release_date: package_data.release_date.clone(),
751 parties: package_data.parties.clone(),
752 keywords: package_data.keywords.clone(),
753 homepage_url: package_data.homepage_url.clone(),
754 download_url: package_data.download_url.clone(),
755 size: package_data.size,
756 sha1: package_data.sha1,
757 md5: package_data.md5,
758 sha256: package_data.sha256,
759 sha512: package_data.sha512,
760 bug_tracking_url: package_data.bug_tracking_url.clone(),
761 code_view_url: package_data.code_view_url.clone(),
762 vcs_url: package_data.vcs_url.clone(),
763 copyright: package_data.copyright.clone(),
764 holder: package_data.holder.clone(),
765 declared_license_expression: package_data.declared_license_expression.clone(),
766 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
767 license_detections: package_data.license_detections.clone(),
768 other_license_expression: package_data.other_license_expression.clone(),
769 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
770 other_license_detections: package_data.other_license_detections.clone(),
771 extracted_license_statement: package_data.extracted_license_statement.clone(),
772 notice_text: package_data.notice_text.clone(),
773 source_packages: package_data.source_packages.clone(),
774 file_references: package_data.file_references.clone(),
775 is_private: package_data.is_private,
776 is_virtual: package_data.is_virtual,
777 extra_data: package_data.extra_data.clone(),
778 dependencies: package_data.dependencies.clone(),
779 repository_homepage_url: package_data.repository_homepage_url.clone(),
780 repository_download_url: package_data.repository_download_url.clone(),
781 api_data_url: package_data.api_data_url.clone(),
782 datasource_id: package_data.datasource_id,
783 purl: package_data.purl.clone(),
784 }
785 }
786}
787
788#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
789pub enum PartyType {
790 Person,
791 Organization,
792}
793
794impl fmt::Display for PartyType {
795 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
796 match self {
797 PartyType::Person => write!(f, "person"),
798 PartyType::Organization => write!(f, "organization"),
799 }
800 }
801}
802
803impl FromStr for PartyType {
804 type Err = String;
805 fn from_str(s: &str) -> Result<Self, Self::Err> {
806 match s {
807 "person" => Ok(PartyType::Person),
808 "organization" => Ok(PartyType::Organization),
809 other => Err(format!("unknown party type: {other}")),
810 }
811 }
812}
813
814#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
818pub struct Party {
819 pub r#type: Option<PartyType>,
820 pub role: Option<String>,
821 pub name: Option<String>,
822 pub email: Option<String>,
823 pub url: Option<String>,
824 pub organization: Option<String>,
825 pub organization_url: Option<String>,
826 pub timezone: Option<String>,
827}
828
829impl Party {
830 pub(crate) fn person(role: &str, name: Option<String>, email: Option<String>) -> Self {
831 Self {
832 r#type: Some(PartyType::Person),
833 role: Some(role.to_string()),
834 name,
835 email,
836 url: None,
837 organization: None,
838 organization_url: None,
839 timezone: None,
840 }
841 }
842}
843
844#[derive(Serialize, Deserialize, Debug, Clone)]
848pub struct FileReference {
849 pub path: String,
850 pub size: Option<u64>,
851 pub sha1: Option<Sha1Digest>,
852 pub md5: Option<Md5Digest>,
853 pub sha256: Option<Sha256Digest>,
854 pub sha512: Option<Sha512Digest>,
855 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
856}
857
858impl FileReference {
859 pub(crate) fn from_path(path: String) -> Self {
860 Self {
861 path,
862 size: None,
863 sha1: None,
864 md5: None,
865 sha256: None,
866 sha512: None,
867 extra_data: None,
868 }
869 }
870}
871
872#[derive(Serialize, Deserialize, Debug, Clone)]
882pub struct Package {
883 pub package_type: Option<PackageType>,
884 pub namespace: Option<String>,
885 pub name: Option<String>,
886 pub version: Option<String>,
887 #[serde(default)]
888 pub qualifiers: Option<HashMap<String, String>>,
889 pub subpath: Option<String>,
890 pub primary_language: Option<String>,
891 pub description: Option<String>,
892 pub release_date: Option<String>,
893 #[serde(default)]
894 pub parties: Vec<Party>,
895 #[serde(default)]
896 pub keywords: Vec<String>,
897 pub homepage_url: Option<String>,
898 pub download_url: Option<String>,
899 pub size: Option<u64>,
900 pub sha1: Option<Sha1Digest>,
901 pub md5: Option<Md5Digest>,
902 pub sha256: Option<Sha256Digest>,
903 pub sha512: Option<Sha512Digest>,
904 pub bug_tracking_url: Option<String>,
905 pub code_view_url: Option<String>,
906 pub vcs_url: Option<String>,
907 pub copyright: Option<String>,
908 pub holder: Option<String>,
909 pub declared_license_expression: Option<String>,
910 pub declared_license_expression_spdx: Option<String>,
911 #[serde(default)]
912 pub license_detections: Vec<LicenseDetection>,
913 pub other_license_expression: Option<String>,
914 pub other_license_expression_spdx: Option<String>,
915 #[serde(default)]
916 pub other_license_detections: Vec<LicenseDetection>,
917 pub extracted_license_statement: Option<String>,
918 pub notice_text: Option<String>,
919 #[serde(default)]
920 pub source_packages: Vec<String>,
921 #[serde(default)]
922 pub is_private: bool,
923 #[serde(default)]
924 pub is_virtual: bool,
925 #[serde(default)]
926 pub extra_data: Option<HashMap<String, serde_json::Value>>,
927 pub repository_homepage_url: Option<String>,
928 pub repository_download_url: Option<String>,
929 pub api_data_url: Option<String>,
930 pub purl: Option<String>,
931 pub package_uid: PackageUid,
933 pub datafile_paths: Vec<String>,
935 pub datasource_ids: Vec<DatasourceId>,
937}
938
939impl Package {
940 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
946 let mut package_data = package_data.clone();
947 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
948
949 let mut package = Package {
950 package_type: package_data.package_type,
951 namespace: package_data.namespace.clone(),
952 name: package_data.name.clone(),
953 version: package_data.version.clone(),
954 qualifiers: package_data.qualifiers.clone(),
955 subpath: package_data.subpath.clone(),
956 primary_language: package_data.primary_language.clone(),
957 description: package_data.description.clone(),
958 release_date: package_data.release_date.clone(),
959 parties: package_data.parties.clone(),
960 keywords: package_data.keywords.clone(),
961 homepage_url: package_data.homepage_url.clone(),
962 download_url: package_data.download_url.clone(),
963 size: package_data.size,
964 sha1: package_data.sha1,
965 md5: package_data.md5,
966 sha256: package_data.sha256,
967 sha512: package_data.sha512,
968 bug_tracking_url: package_data.bug_tracking_url.clone(),
969 code_view_url: package_data.code_view_url.clone(),
970 vcs_url: package_data.vcs_url.clone(),
971 copyright: package_data.copyright.clone(),
972 holder: package_data.holder.clone(),
973 declared_license_expression: package_data.declared_license_expression.clone(),
974 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
975 license_detections: package_data.license_detections.clone(),
976 other_license_expression: package_data.other_license_expression.clone(),
977 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
978 other_license_detections: package_data.other_license_detections.clone(),
979 extracted_license_statement: package_data.extracted_license_statement.clone(),
980 notice_text: package_data.notice_text.clone(),
981 source_packages: package_data.source_packages.clone(),
982 is_private: package_data.is_private,
983 is_virtual: package_data.is_virtual,
984 extra_data: package_data.extra_data.clone(),
985 repository_homepage_url: package_data.repository_homepage_url.clone(),
986 repository_download_url: package_data.repository_download_url.clone(),
987 api_data_url: package_data.api_data_url.clone(),
988 purl: package_data.purl.clone(),
989 package_uid: PackageUid::empty(),
990 datafile_paths: vec![datafile_path],
991 datasource_ids: if let Some(dsid) = package_data.datasource_id {
992 vec![dsid]
993 } else {
994 vec![]
995 },
996 };
997
998 package.refresh_identity();
999 if package.package_uid.is_empty() {
1000 package.package_uid = package.fallback_package_uid();
1001 }
1002
1003 package
1004 }
1005
1006 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
1012 let mut package_data = package_data.clone();
1013 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
1014
1015 if let Some(dsid) = package_data.datasource_id {
1016 self.datasource_ids.push(dsid);
1017 }
1018 self.datafile_paths.push(datafile_path);
1019
1020 macro_rules! fill_if_empty {
1021 ($field:ident) => {
1022 if self.$field.is_none() {
1023 self.$field = package_data.$field;
1024 }
1025 };
1026 }
1027
1028 fill_if_empty!(package_type);
1029 fill_if_empty!(name);
1030 fill_if_empty!(namespace);
1031 fill_if_empty!(version);
1032 fill_if_empty!(qualifiers);
1033 fill_if_empty!(subpath);
1034 fill_if_empty!(primary_language);
1035 fill_if_empty!(description);
1036 fill_if_empty!(release_date);
1037 fill_if_empty!(homepage_url);
1038 fill_if_empty!(download_url);
1039 fill_if_empty!(size);
1040 fill_if_empty!(sha1);
1041 fill_if_empty!(md5);
1042 fill_if_empty!(sha256);
1043 fill_if_empty!(sha512);
1044 fill_if_empty!(bug_tracking_url);
1045 fill_if_empty!(code_view_url);
1046 fill_if_empty!(vcs_url);
1047 fill_if_empty!(copyright);
1048 fill_if_empty!(holder);
1049 fill_if_empty!(declared_license_expression);
1050 fill_if_empty!(declared_license_expression_spdx);
1051 fill_if_empty!(other_license_expression);
1052 fill_if_empty!(other_license_expression_spdx);
1053 fill_if_empty!(extracted_license_statement);
1054 fill_if_empty!(notice_text);
1055 match (&mut self.extra_data, &package_data.extra_data) {
1056 (None, Some(extra_data)) => {
1057 self.extra_data = Some(extra_data.clone());
1058 }
1059 (Some(existing), Some(incoming)) => {
1060 for (key, value) in incoming {
1061 existing.entry(key.clone()).or_insert_with(|| value.clone());
1062 }
1063 }
1064 _ => {}
1065 }
1066 fill_if_empty!(repository_homepage_url);
1067 fill_if_empty!(repository_download_url);
1068 fill_if_empty!(api_data_url);
1069
1070 for party in &package_data.parties {
1071 if let Some(existing) = self.parties.iter_mut().find(|p| {
1072 p.role == party.role
1073 && ((p.name.is_some() && p.name == party.name)
1074 || (p.email.is_some() && p.email == party.email))
1075 }) {
1076 if existing.name.is_none() {
1077 existing.name = party.name.clone();
1078 }
1079 if existing.email.is_none() {
1080 existing.email = party.email.clone();
1081 }
1082 } else {
1083 self.parties.push(party.clone());
1084 }
1085 }
1086
1087 for keyword in &package_data.keywords {
1088 if !self.keywords.contains(keyword) {
1089 self.keywords.push(keyword.clone());
1090 }
1091 }
1092
1093 for detection in &package_data.license_detections {
1094 self.license_detections.push(detection.clone());
1095 }
1096
1097 for detection in &package_data.other_license_detections {
1098 self.other_license_detections.push(detection.clone());
1099 }
1100
1101 for source_pkg in &package_data.source_packages {
1102 if !self.source_packages.contains(source_pkg) {
1103 self.source_packages.push(source_pkg.clone());
1104 }
1105 }
1106
1107 self.refresh_identity();
1108 }
1109
1110 pub fn backfill_license_provenance(&mut self) {
1111 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1112 return;
1113 };
1114
1115 for detection in &mut self.license_detections {
1116 enrich_license_detection_provenance(detection, &datafile_path);
1117 }
1118 for detection in &mut self.other_license_detections {
1119 enrich_license_detection_provenance(detection, &datafile_path);
1120 }
1121 }
1122
1123 fn refresh_identity(&mut self) {
1124 let Some(next_purl) = self.build_current_purl() else {
1125 return;
1126 };
1127
1128 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1129 self.package_uid = PackageUid::new(&next_purl);
1130 }
1131
1132 self.purl = Some(next_purl);
1133 }
1134
1135 fn fallback_package_uid(&self) -> PackageUid {
1136 let name = self
1137 .name
1138 .as_deref()
1139 .map(str::trim)
1140 .filter(|value| !value.is_empty())
1141 .unwrap_or("unknown");
1142 let version = self
1143 .version
1144 .as_deref()
1145 .map(str::trim)
1146 .filter(|value| !value.is_empty())
1147 .unwrap_or("unknown");
1148 let datasource = self
1149 .datasource_ids
1150 .first()
1151 .map(DatasourceId::as_str)
1152 .unwrap_or("unknown");
1153
1154 PackageUid::new_opaque(&format!("generated-package:{datasource}/{name}@{version}"))
1155 }
1156
1157 fn build_current_purl(&self) -> Option<String> {
1158 if let Some(existing_purl) = self.purl.as_deref() {
1159 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1160
1161 if let Some(version) = self
1162 .version
1163 .as_deref()
1164 .filter(|value| !value.trim().is_empty())
1165 {
1166 purl.with_version(version).ok()?;
1167 } else {
1168 purl.without_version();
1169 }
1170
1171 return Some(purl.to_string());
1172 }
1173
1174 if let (Some(package_type), Some(name)) = (
1175 self.package_type.as_ref(),
1176 self.name
1177 .as_deref()
1178 .filter(|value| !value.trim().is_empty()),
1179 ) {
1180 let purl_type = match package_type {
1181 PackageType::Deno => "generic",
1182 _ => package_type.as_str(),
1183 };
1184
1185 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1186
1187 if let Some(namespace) = self
1188 .namespace
1189 .as_deref()
1190 .filter(|value| !value.trim().is_empty())
1191 {
1192 purl.with_namespace(namespace).ok()?;
1193 }
1194
1195 if let Some(version) = self
1196 .version
1197 .as_deref()
1198 .filter(|value| !value.trim().is_empty())
1199 {
1200 purl.with_version(version).ok()?;
1201 }
1202
1203 if let Some(qualifiers) = &self.qualifiers {
1204 for (key, value) in qualifiers {
1205 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1206 }
1207 }
1208
1209 if let Some(subpath) = self
1210 .subpath
1211 .as_deref()
1212 .filter(|value| !value.trim().is_empty())
1213 {
1214 purl.with_subpath(subpath).ok()?;
1215 }
1216
1217 return Some(purl.to_string());
1218 }
1219 None
1220 }
1221}
1222
1223#[cfg(test)]
1224mod tests {
1225 use super::*;
1226
1227 #[test]
1228 fn file_info_new_backfills_package_detection_provenance() {
1229 let package_data = PackageData {
1230 package_type: Some(PackageType::Npm),
1231 license_detections: vec![LicenseDetection {
1232 license_expression: "mit".to_string(),
1233 license_expression_spdx: "MIT".to_string(),
1234 matches: vec![Match {
1235 license_expression: "mit".to_string(),
1236 license_expression_spdx: "MIT".to_string(),
1237 from_file: None,
1238 start_line: LineNumber::ONE,
1239 end_line: LineNumber::ONE,
1240 matcher: MatcherKind::Declared,
1241 score: MatchScore::MAX,
1242 matched_length: Some(1),
1243 match_coverage: Some(100.0),
1244 rule_relevance: Some(100),
1245 rule_identifier: String::new(),
1246 rule_url: None,
1247 matched_text: Some("MIT".to_string()),
1248 referenced_filenames: None,
1249 matched_text_diagnostics: None,
1250 }],
1251 detection_log: vec![],
1252 identifier: String::new(),
1253 }],
1254 ..PackageData::default()
1255 };
1256
1257 let file_info = FileInfo::new(
1258 "package.json".to_string(),
1259 "package".to_string(),
1260 ".json".to_string(),
1261 "project/package.json".to_string(),
1262 FileType::File,
1263 None,
1264 None,
1265 1,
1266 None,
1267 None,
1268 None,
1269 None,
1270 None,
1271 vec![package_data],
1272 None,
1273 vec![],
1274 vec![],
1275 vec![],
1276 vec![],
1277 vec![],
1278 vec![],
1279 vec![],
1280 vec![],
1281 vec![],
1282 );
1283
1284 assert_eq!(file_info.license_detections.len(), 1);
1285 assert_eq!(
1286 file_info.license_detections[0].matches[0]
1287 .from_file
1288 .as_deref(),
1289 Some("project/package.json")
1290 );
1291 assert!(!file_info.license_detections[0].identifier.is_empty());
1292 assert_eq!(
1293 file_info.package_data[0].license_detections[0].matches[0]
1294 .from_file
1295 .as_deref(),
1296 Some("project/package.json")
1297 );
1298 assert_eq!(
1299 file_info.package_data[0].license_detections[0].matches[0].rule_identifier,
1300 "parser-declared-license"
1301 );
1302 assert!(
1303 !file_info.package_data[0].license_detections[0]
1304 .identifier
1305 .is_empty()
1306 );
1307 }
1308
1309 #[test]
1310 fn package_from_package_data_backfills_detection_provenance() {
1311 let package_data = PackageData {
1312 package_type: Some(PackageType::Npm),
1313 license_detections: vec![LicenseDetection {
1314 license_expression: "mit".to_string(),
1315 license_expression_spdx: "MIT".to_string(),
1316 matches: vec![Match {
1317 license_expression: "mit".to_string(),
1318 license_expression_spdx: "MIT".to_string(),
1319 from_file: None,
1320 start_line: LineNumber::ONE,
1321 end_line: LineNumber::ONE,
1322 matcher: MatcherKind::Declared,
1323 score: MatchScore::MAX,
1324 matched_length: Some(1),
1325 match_coverage: Some(100.0),
1326 rule_relevance: Some(100),
1327 rule_identifier: String::new(),
1328 rule_url: None,
1329 matched_text: Some("MIT".to_string()),
1330 referenced_filenames: None,
1331 matched_text_diagnostics: None,
1332 }],
1333 detection_log: vec![],
1334 identifier: String::new(),
1335 }],
1336 ..PackageData::default()
1337 };
1338
1339 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1340
1341 assert_eq!(
1342 package.license_detections[0].matches[0]
1343 .from_file
1344 .as_deref(),
1345 Some("project/package.json")
1346 );
1347 assert_eq!(
1348 package.license_detections[0].matches[0].rule_identifier,
1349 "parser-declared-license"
1350 );
1351 assert!(!package.license_detections[0].identifier.is_empty());
1352 }
1353
1354 #[test]
1355 fn package_from_package_data_preserves_existing_purl_qualifiers() {
1356 let package_data = PackageData {
1357 package_type: Some(PackageType::Alpine),
1358 namespace: Some("alpine".to_string()),
1359 name: Some("busybox".to_string()),
1360 version: Some("1.35.0-r17".to_string()),
1361 purl: Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64".to_string()),
1362 ..PackageData::default()
1363 };
1364
1365 let package = Package::from_package_data(&package_data, "lib/apk/db/installed".to_string());
1366
1367 assert_eq!(
1368 package.purl.as_deref(),
1369 Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64")
1370 );
1371 assert!(
1372 package
1373 .package_uid
1374 .starts_with("pkg:alpine/busybox@1.35.0-r17?arch=x86_64&uuid=")
1375 );
1376 }
1377}
1378
1379#[derive(Serialize, Deserialize, Debug, Clone)]
1384pub struct TopLevelDependency {
1385 pub purl: Option<String>,
1386 pub extracted_requirement: Option<String>,
1387 pub scope: Option<String>,
1388 pub is_runtime: Option<bool>,
1389 pub is_optional: Option<bool>,
1390 pub is_pinned: Option<bool>,
1391 pub is_direct: Option<bool>,
1392 pub resolved_package: Option<Box<ResolvedPackage>>,
1393 #[serde(default)]
1394 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1395 pub dependency_uid: DependencyUid,
1397 pub for_package_uid: Option<PackageUid>,
1399 pub datafile_path: String,
1401 pub datasource_id: DatasourceId,
1403 pub namespace: Option<String>,
1405}
1406
1407impl TopLevelDependency {
1408 pub fn from_dependency(
1410 dep: &Dependency,
1411 datafile_path: String,
1412 datasource_id: DatasourceId,
1413 for_package_uid: Option<PackageUid>,
1414 ) -> Self {
1415 let dependency_uid = dep
1416 .purl
1417 .as_ref()
1418 .map(|p| DependencyUid::new(p))
1419 .unwrap_or_else(DependencyUid::empty);
1420
1421 TopLevelDependency {
1422 purl: dep.purl.clone(),
1423 extracted_requirement: dep.extracted_requirement.clone(),
1424 scope: dep.scope.clone(),
1425 is_runtime: dep.is_runtime,
1426 is_optional: dep.is_optional,
1427 is_pinned: dep.is_pinned,
1428 is_direct: dep.is_direct,
1429 resolved_package: dep.resolved_package.clone(),
1430 extra_data: dep.extra_data.clone(),
1431 dependency_uid,
1432 for_package_uid,
1433 datafile_path,
1434 datasource_id,
1435 namespace: None,
1436 }
1437 }
1438}
1439
1440#[derive(Serialize, Deserialize, Debug, Clone)]
1441pub struct OutputEmail {
1442 pub email: String,
1443 pub start_line: LineNumber,
1444 pub end_line: LineNumber,
1445}
1446
1447#[derive(Serialize, Deserialize, Debug, Clone)]
1448pub struct OutputURL {
1449 pub url: String,
1450 pub start_line: LineNumber,
1451 pub end_line: LineNumber,
1452}
1453
1454#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1455pub struct LicensePolicyEntry {
1456 pub license_key: String,
1457 pub label: String,
1458 pub color_code: String,
1459 pub icon: String,
1460}
1461
1462#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1463pub enum FileType {
1464 File,
1465 Directory,
1466}