1use derive_builder::Builder;
5use packageurl::PackageUrl;
6use serde::{Deserialize, Serialize};
7use sha1::{Digest, Sha1};
8use std::collections::HashMap;
9use std::str::FromStr;
10
11use super::DatasourceId;
12use super::DependencyUid;
13use super::DiagnosticSeverity;
14use super::GitSha1;
15use super::LineNumber;
16use super::MatchScore;
17use super::Md5Digest;
18use super::PackageType;
19use super::PackageUid;
20use super::ScanDiagnostic;
21use super::Sha1Digest;
22use super::Sha256Digest;
23use super::Sha512Digest;
24use super::diagnostics_from_legacy_scan_errors;
25use crate::license_detection::tokenize::tokenize_without_stopwords;
26use crate::models::output::Tallies;
27use crate::utils::spdx::combine_license_expressions;
28
29#[derive(Debug, Builder, Serialize, Deserialize, Clone)]
30#[builder(build_fn(skip))]
31pub struct FileInfo {
33 pub name: String,
34 pub base_name: String,
35 pub extension: String,
36 pub path: String,
37 #[serde(rename = "type")] pub file_type: FileType,
39 #[builder(default)]
40 #[serde(default)]
41 pub mime_type: Option<String>,
42 #[builder(default)]
43 #[serde(rename = "file_type", default)]
44 pub file_type_label: Option<String>,
45 pub size: u64,
46 #[builder(default)]
47 #[serde(default)]
48 pub date: Option<String>,
49 #[builder(default)]
50 #[serde(default)]
51 pub sha1: Option<Sha1Digest>,
52 #[builder(default)]
53 #[serde(default)]
54 pub md5: Option<Md5Digest>,
55 #[builder(default)]
56 #[serde(default)]
57 pub sha256: Option<Sha256Digest>,
58 #[builder(default)]
59 #[serde(default)]
60 pub sha1_git: Option<GitSha1>,
61 #[builder(default)]
62 #[serde(default)]
63 pub programming_language: Option<String>,
64 #[builder(default)]
65 #[serde(default)]
66 pub package_data: Vec<PackageData>,
67 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
69 pub license_expression: Option<String>,
70 #[builder(default)]
71 #[serde(default)]
72 pub license_detections: Vec<LicenseDetection>,
73 #[builder(default)]
74 #[serde(default)]
75 pub license_clues: Vec<Match>,
76 #[builder(default)]
77 #[serde(default)]
78 pub percentage_of_license_text: Option<f64>,
79 #[builder(default)]
80 #[serde(default)]
81 pub copyrights: Vec<Copyright>,
82 #[builder(default)]
83 #[serde(default)]
84 pub holders: Vec<Holder>,
85 #[builder(default)]
86 #[serde(default)]
87 pub authors: Vec<Author>,
88 #[builder(default)]
89 #[serde(default)]
90 pub emails: Vec<OutputEmail>,
91 #[builder(default)]
92 #[serde(default)]
93 pub urls: Vec<OutputURL>,
94 #[builder(default)]
95 #[serde(default)]
96 pub for_packages: Vec<PackageUid>,
97 #[builder(default)]
98 #[serde(default)]
99 pub scan_errors: Vec<String>,
100 #[builder(default)]
101 #[serde(default)]
102 pub scan_diagnostics: Vec<ScanDiagnostic>,
103 #[builder(default)]
104 #[serde(default)]
105 pub license_policy: Option<Vec<LicensePolicyEntry>>,
106 #[builder(default)]
107 #[serde(default)]
108 pub is_generated: Option<bool>,
109 #[builder(default)]
110 #[serde(default)]
111 pub is_binary: Option<bool>,
112 #[builder(default)]
113 #[serde(default)]
114 pub is_text: Option<bool>,
115 #[builder(default)]
116 #[serde(default)]
117 pub is_archive: Option<bool>,
118 #[builder(default)]
119 #[serde(default)]
120 pub is_media: Option<bool>,
121 #[builder(default)]
122 #[serde(default)]
123 pub is_source: Option<bool>,
124 #[builder(default)]
125 #[serde(default)]
126 pub is_script: Option<bool>,
127 #[builder(default)]
128 #[serde(default)]
129 pub files_count: Option<usize>,
130 #[builder(default)]
131 #[serde(default)]
132 pub dirs_count: Option<usize>,
133 #[builder(default)]
134 #[serde(default)]
135 pub size_count: Option<u64>,
136 #[builder(default)]
137 #[serde(default)]
138 pub source_count: Option<usize>,
139 #[builder(default)]
140 #[serde(default)]
141 pub is_legal: bool,
142 #[builder(default)]
143 #[serde(default)]
144 pub is_manifest: bool,
145 #[builder(default)]
146 #[serde(default)]
147 pub is_readme: bool,
148 #[builder(default)]
149 #[serde(default)]
150 pub is_top_level: bool,
151 #[builder(default)]
152 #[serde(default)]
153 pub is_key_file: bool,
154 #[builder(default)]
155 #[serde(default)]
156 pub is_community: bool,
157 #[builder(default)]
158 #[serde(default)]
159 pub facets: Vec<String>,
160 #[builder(default)]
161 #[serde(default)]
162 pub tallies: Option<Tallies>,
163}
164
165impl FileInfoBuilder {
166 pub fn build(&self) -> Result<FileInfo, String> {
168 let mut file_info = FileInfo::new(
169 self.name.clone().ok_or("Missing field: name")?,
170 self.base_name.clone().ok_or("Missing field: base_name")?,
171 self.extension.clone().ok_or("Missing field: extension")?,
172 self.path.clone().ok_or("Missing field: path")?,
173 self.file_type.clone().ok_or("Missing field: file_type")?,
174 self.mime_type.clone().flatten(),
175 self.file_type_label.clone().flatten(),
176 self.size.ok_or("Missing field: size")?,
177 self.date.clone().flatten(),
178 self.sha1.flatten(),
179 self.md5.flatten(),
180 self.sha256.flatten(),
181 self.programming_language.clone().flatten(),
182 self.package_data.clone().unwrap_or_default(),
183 self.license_expression.clone().flatten(),
184 self.license_detections.clone().unwrap_or_default(),
185 self.license_clues.clone().unwrap_or_default(),
186 self.copyrights.clone().unwrap_or_default(),
187 self.holders.clone().unwrap_or_default(),
188 self.authors.clone().unwrap_or_default(),
189 self.emails.clone().unwrap_or_default(),
190 self.urls.clone().unwrap_or_default(),
191 self.for_packages.clone().unwrap_or_default(),
192 self.scan_errors.clone().unwrap_or_default(),
193 );
194 file_info.scan_diagnostics = if let Some(diagnostics) = &self.scan_diagnostics {
195 diagnostics.clone()
196 } else {
197 diagnostics_from_legacy_scan_errors(&file_info.scan_errors)
198 };
199 file_info.scan_errors = file_info
200 .scan_diagnostics
201 .iter()
202 .map(|diagnostic| diagnostic.message.clone())
203 .collect();
204 file_info.license_policy = self.license_policy.clone().flatten();
205 file_info.sha1_git = self.sha1_git.flatten();
206 file_info.is_binary = self.is_binary.flatten();
207 file_info.is_text = self.is_text.flatten();
208 file_info.is_archive = self.is_archive.flatten();
209 file_info.is_media = self.is_media.flatten();
210 file_info.is_script = self.is_script.flatten();
211 file_info.files_count = self.files_count.flatten();
212 file_info.dirs_count = self.dirs_count.flatten();
213 file_info.size_count = self.size_count.flatten();
214 Ok(file_info)
215 }
216}
217
218impl FileInfo {
219 #[allow(clippy::too_many_arguments)]
220 pub fn new(
222 name: String,
223 base_name: String,
224 extension: String,
225 path: String,
226 file_type: FileType,
227 mime_type: Option<String>,
228 file_type_label: Option<String>,
229 size: u64,
230 date: Option<String>,
231 sha1: Option<Sha1Digest>,
232 md5: Option<Md5Digest>,
233 sha256: Option<Sha256Digest>,
234 programming_language: Option<String>,
235 package_data: Vec<PackageData>,
236 mut license_expression: Option<String>,
237 mut license_detections: Vec<LicenseDetection>,
238 license_clues: Vec<Match>,
239 copyrights: Vec<Copyright>,
240 holders: Vec<Holder>,
241 authors: Vec<Author>,
242 emails: Vec<OutputEmail>,
243 urls: Vec<OutputURL>,
244 for_packages: Vec<PackageUid>,
245 scan_errors: Vec<String>,
246 ) -> Self {
247 let mut package_data = package_data;
248 for package in &mut package_data {
249 enrich_package_data_license_provenance(package, &path);
250 }
251
252 license_expression = license_expression.or_else(|| {
254 let expressions = package_data
255 .iter()
256 .filter_map(|pkg| pkg.get_license_expression());
257 combine_license_expressions(expressions)
258 });
259
260 if license_detections.is_empty() {
262 for pkg in &package_data {
263 license_detections.extend(pkg.license_detections.clone());
264 }
265 }
266
267 if license_expression.is_none() && !license_detections.is_empty() {
269 let expressions = license_detections
270 .iter()
271 .map(|detection| detection.license_expression.clone());
272 license_expression = combine_license_expressions(expressions);
273 }
274
275 let mut file_info = FileInfo {
276 name,
277 base_name,
278 extension,
279 path,
280 file_type,
281 mime_type,
282 file_type_label,
283 size,
284 date,
285 sha1,
286 md5,
287 sha256,
288 sha1_git: None,
289 programming_language,
290 package_data,
291 license_expression,
292 license_detections,
293 license_clues,
294 percentage_of_license_text: None,
295 copyrights,
296 holders,
297 authors,
298 emails,
299 urls,
300 for_packages,
301 scan_diagnostics: diagnostics_from_legacy_scan_errors(&scan_errors),
302 scan_errors,
303 license_policy: None,
304 is_generated: None,
305 is_binary: None,
306 is_text: None,
307 is_archive: None,
308 is_media: None,
309 is_source: None,
310 is_script: None,
311 files_count: None,
312 dirs_count: None,
313 size_count: None,
314 source_count: None,
315 is_legal: false,
316 is_manifest: false,
317 is_readme: false,
318 is_top_level: false,
319 is_key_file: false,
320 is_community: false,
321 facets: vec![],
322 tallies: None,
323 };
324
325 file_info.backfill_license_provenance();
326 file_info
327 }
328
329 pub fn backfill_license_provenance(&mut self) {
330 for detection in &mut self.license_detections {
331 enrich_license_detection_provenance(detection, &self.path);
332 }
333
334 for package in &mut self.package_data {
335 enrich_package_data_license_provenance(package, &self.path);
336 }
337 }
338}
339
340impl FileInfo {
341 pub fn warning_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
342 self.scan_diagnostics
343 .iter()
344 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Warning)
345 }
346
347 pub fn error_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
348 self.scan_diagnostics
349 .iter()
350 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
351 }
352}
353
354fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
355 for detection in &mut package_data.license_detections {
356 enrich_license_detection_provenance(detection, path);
357 }
358 for detection in &mut package_data.other_license_detections {
359 enrich_license_detection_provenance(detection, path);
360 }
361}
362
363pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
364 for detection_match in &mut detection.matches {
365 if detection_match.from_file.is_none() {
366 detection_match.from_file = Some(path.to_string());
367 }
368
369 if detection_match.rule_identifier.is_none() {
370 detection_match.rule_identifier = detection_match.matcher.clone();
371 }
372 }
373
374 if detection.identifier.is_none() {
375 detection.identifier = Some(compute_public_detection_identifier(detection));
376 }
377}
378
379fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
380 let expression = python_safe_name(&detection.license_expression);
381 let mut hasher = Sha1::new();
382 hasher.update(format_public_detection_content(detection).as_bytes());
383 let hex_str = hex::encode(hasher.finalize());
384 let uuid_hex = &hex_str[..32];
385 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
386 .map(|uuid| uuid.to_string())
387 .unwrap_or_else(|_| uuid_hex.to_string());
388
389 format!("{}-{}", expression, content_uuid)
390}
391
392fn format_public_detection_content(detection: &LicenseDetection) -> String {
393 let mut result = String::from("(");
394
395 for (index, detection_match) in detection.matches.iter().enumerate() {
396 if index > 0 {
397 result.push_str(", ");
398 }
399 result.push_str(&format!(
400 "({}, {}, {})",
401 python_str_repr(
402 detection_match
403 .rule_identifier
404 .as_deref()
405 .or(detection_match.matcher.as_deref())
406 .unwrap_or("parser-declared-license")
407 ),
408 detection_match.score.value() as f32,
409 python_token_tuple_repr(&tokenize_without_stopwords(
410 detection_match.matched_text.as_deref().unwrap_or_default(),
411 )),
412 ));
413 }
414
415 if detection.matches.len() == 1 {
416 result.push(',');
417 }
418 result.push(')');
419 result
420}
421
422fn python_safe_name(value: &str) -> String {
423 let mut result = String::new();
424 let mut prev_underscore = false;
425
426 for character in value.chars() {
427 if character.is_alphanumeric() {
428 result.push(character);
429 prev_underscore = false;
430 } else if !prev_underscore {
431 result.push('_');
432 prev_underscore = true;
433 }
434 }
435
436 let trimmed = result.trim_matches('_');
437 if trimmed.is_empty() {
438 String::new()
439 } else {
440 trimmed.to_string()
441 }
442}
443
444fn python_str_repr(value: &str) -> String {
445 if value.contains('\'') && !value.contains('"') {
446 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
447 } else {
448 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
449 }
450}
451
452fn python_token_tuple_repr(tokens: &[String]) -> String {
453 if tokens.is_empty() {
454 return String::from("()");
455 }
456
457 let mut result = String::from("(");
458 for (index, token) in tokens.iter().enumerate() {
459 if index > 0 {
460 result.push_str(", ");
461 }
462 result.push_str(&python_str_repr(token));
463 }
464
465 if tokens.len() == 1 {
466 result.push(',');
467 }
468 result.push(')');
469 result
470}
471
472#[derive(Serialize, Deserialize, Debug, Clone, Default)]
478pub struct PackageData {
479 #[serde(rename = "type")] pub package_type: Option<PackageType>,
481 pub namespace: Option<String>,
482 pub name: Option<String>,
483 pub version: Option<String>,
484 #[serde(default)]
485 pub qualifiers: Option<HashMap<String, String>>,
486 pub subpath: Option<String>,
487 pub primary_language: Option<String>,
488 pub description: Option<String>,
489 pub release_date: Option<String>,
490 #[serde(default)]
491 pub parties: Vec<Party>,
492 #[serde(default)]
493 pub keywords: Vec<String>,
494 pub homepage_url: Option<String>,
495 pub download_url: Option<String>,
496 pub size: Option<u64>,
497 pub sha1: Option<Sha1Digest>,
498 pub md5: Option<Md5Digest>,
499 pub sha256: Option<Sha256Digest>,
500 pub sha512: Option<Sha512Digest>,
501 pub bug_tracking_url: Option<String>,
502 pub code_view_url: Option<String>,
503 pub vcs_url: Option<String>,
504 pub copyright: Option<String>,
505 pub holder: Option<String>,
506 pub declared_license_expression: Option<String>,
507 pub declared_license_expression_spdx: Option<String>,
508 #[serde(default)]
509 pub license_detections: Vec<LicenseDetection>,
510 pub other_license_expression: Option<String>,
511 pub other_license_expression_spdx: Option<String>,
512 #[serde(default)]
513 pub other_license_detections: Vec<LicenseDetection>,
514 pub extracted_license_statement: Option<String>,
515 pub notice_text: Option<String>,
516 #[serde(default)]
517 pub source_packages: Vec<String>,
518 #[serde(default)]
519 pub file_references: Vec<FileReference>,
520 #[serde(default)]
521 pub is_private: bool,
522 #[serde(default)]
523 pub is_virtual: bool,
524 #[serde(default)]
525 pub extra_data: Option<HashMap<String, serde_json::Value>>,
526 #[serde(default)]
527 pub dependencies: Vec<Dependency>,
528 pub repository_homepage_url: Option<String>,
529 pub repository_download_url: Option<String>,
530 pub api_data_url: Option<String>,
531 pub datasource_id: Option<DatasourceId>,
532 pub purl: Option<String>,
533}
534
535impl PackageData {
536 pub fn get_license_expression(&self) -> Option<String> {
539 if self.license_detections.is_empty() {
540 return None;
541 }
542
543 let expressions = self
544 .license_detections
545 .iter()
546 .map(|detection| detection.license_expression.clone());
547 combine_license_expressions(expressions)
548 }
549}
550
551#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
555pub struct LicenseDetection {
556 pub license_expression: String,
557 pub license_expression_spdx: String,
558 pub matches: Vec<Match>,
559 #[serde(default)]
560 pub detection_log: Vec<String>,
561 pub identifier: Option<String>,
562}
563
564#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
568pub struct Match {
569 pub license_expression: String,
570 pub license_expression_spdx: String,
571 pub from_file: Option<String>,
572 pub start_line: LineNumber,
573 pub end_line: LineNumber,
574 pub matcher: Option<String>,
575 pub score: MatchScore,
576 pub matched_length: Option<usize>,
577 pub match_coverage: Option<f64>,
578 pub rule_relevance: Option<u8>,
579 pub rule_identifier: Option<String>,
580 pub rule_url: Option<String>,
581 pub matched_text: Option<String>,
582 pub matched_text_diagnostics: Option<String>,
583 #[serde(default)]
584 pub referenced_filenames: Option<Vec<String>>,
585}
586
587#[derive(Serialize, Deserialize, Debug, Clone)]
588pub struct Copyright {
589 pub copyright: String,
590 pub start_line: LineNumber,
591 pub end_line: LineNumber,
592}
593
594#[derive(Serialize, Deserialize, Debug, Clone)]
595pub struct Holder {
596 pub holder: String,
597 pub start_line: LineNumber,
598 pub end_line: LineNumber,
599}
600
601#[derive(Serialize, Deserialize, Debug, Clone)]
602pub struct Author {
603 pub author: String,
604 pub start_line: LineNumber,
605 pub end_line: LineNumber,
606}
607
608#[derive(Serialize, Deserialize, Debug, Clone)]
613pub struct Dependency {
614 pub purl: Option<String>,
615 pub extracted_requirement: Option<String>,
616 pub scope: Option<String>,
617 pub is_runtime: Option<bool>,
618 pub is_optional: Option<bool>,
619 pub is_pinned: Option<bool>,
620 pub is_direct: Option<bool>,
621 pub resolved_package: Option<Box<ResolvedPackage>>,
622 #[serde(default)]
623 pub extra_data: Option<HashMap<String, serde_json::Value>>,
624}
625
626#[derive(Serialize, Deserialize, Debug, Clone)]
627pub struct ResolvedPackage {
628 #[serde(rename = "type")]
629 pub package_type: PackageType,
630 pub namespace: String,
631 pub name: String,
632 pub version: String,
633 #[serde(default)]
634 pub qualifiers: Option<HashMap<String, String>>,
635 pub subpath: Option<String>,
636 pub primary_language: Option<String>,
637 pub description: Option<String>,
638 pub release_date: Option<String>,
639 #[serde(default)]
640 pub parties: Vec<Party>,
641 #[serde(default)]
642 pub keywords: Vec<String>,
643 pub homepage_url: Option<String>,
644 pub download_url: Option<String>,
645 pub size: Option<u64>,
646 pub sha1: Option<Sha1Digest>,
647 pub md5: Option<Md5Digest>,
648 pub sha256: Option<Sha256Digest>,
649 pub sha512: Option<Sha512Digest>,
650 pub bug_tracking_url: Option<String>,
651 pub code_view_url: Option<String>,
652 pub vcs_url: Option<String>,
653 pub copyright: Option<String>,
654 pub holder: Option<String>,
655 pub declared_license_expression: Option<String>,
656 pub declared_license_expression_spdx: Option<String>,
657 #[serde(default)]
658 pub license_detections: Vec<LicenseDetection>,
659 pub other_license_expression: Option<String>,
660 pub other_license_expression_spdx: Option<String>,
661 #[serde(default)]
662 pub other_license_detections: Vec<LicenseDetection>,
663 pub extracted_license_statement: Option<String>,
664 pub notice_text: Option<String>,
665 #[serde(default)]
666 pub source_packages: Vec<String>,
667 #[serde(default)]
668 pub file_references: Vec<FileReference>,
669 #[serde(default)]
670 pub is_private: bool,
671 #[serde(default)]
672 pub is_virtual: bool,
673 #[serde(default)]
674 pub extra_data: Option<HashMap<String, serde_json::Value>>,
675 #[serde(default)]
676 pub dependencies: Vec<Dependency>,
677 pub repository_homepage_url: Option<String>,
678 pub repository_download_url: Option<String>,
679 pub api_data_url: Option<String>,
680 pub datasource_id: Option<DatasourceId>,
681 pub purl: Option<String>,
682}
683
684impl ResolvedPackage {
685 pub fn new(
686 package_type: PackageType,
687 namespace: String,
688 name: String,
689 version: String,
690 ) -> Self {
691 Self {
692 package_type,
693 namespace,
694 name,
695 version,
696 qualifiers: None,
697 subpath: None,
698 primary_language: None,
699 description: None,
700 release_date: None,
701 parties: vec![],
702 keywords: vec![],
703 homepage_url: None,
704 download_url: None,
705 size: None,
706 sha1: None,
707 md5: None,
708 sha256: None,
709 sha512: None,
710 bug_tracking_url: None,
711 code_view_url: None,
712 vcs_url: None,
713 copyright: None,
714 holder: None,
715 declared_license_expression: None,
716 declared_license_expression_spdx: None,
717 license_detections: vec![],
718 other_license_expression: None,
719 other_license_expression_spdx: None,
720 other_license_detections: vec![],
721 extracted_license_statement: None,
722 notice_text: None,
723 source_packages: vec![],
724 file_references: vec![],
725 is_private: false,
726 is_virtual: false,
727 extra_data: None,
728 dependencies: vec![],
729 repository_homepage_url: None,
730 repository_download_url: None,
731 api_data_url: None,
732 datasource_id: None,
733 purl: None,
734 }
735 }
736
737 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
738 Self {
739 package_type: package_data.package_type.unwrap_or(fallback_type),
740 namespace: package_data.namespace.clone().unwrap_or_default(),
741 name: package_data.name.clone().unwrap_or_default(),
742 version: package_data.version.clone().unwrap_or_default(),
743 qualifiers: package_data.qualifiers.clone(),
744 subpath: package_data.subpath.clone(),
745 primary_language: package_data.primary_language.clone(),
746 description: package_data.description.clone(),
747 release_date: package_data.release_date.clone(),
748 parties: package_data.parties.clone(),
749 keywords: package_data.keywords.clone(),
750 homepage_url: package_data.homepage_url.clone(),
751 download_url: package_data.download_url.clone(),
752 size: package_data.size,
753 sha1: package_data.sha1,
754 md5: package_data.md5,
755 sha256: package_data.sha256,
756 sha512: package_data.sha512,
757 bug_tracking_url: package_data.bug_tracking_url.clone(),
758 code_view_url: package_data.code_view_url.clone(),
759 vcs_url: package_data.vcs_url.clone(),
760 copyright: package_data.copyright.clone(),
761 holder: package_data.holder.clone(),
762 declared_license_expression: package_data.declared_license_expression.clone(),
763 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
764 license_detections: package_data.license_detections.clone(),
765 other_license_expression: package_data.other_license_expression.clone(),
766 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
767 other_license_detections: package_data.other_license_detections.clone(),
768 extracted_license_statement: package_data.extracted_license_statement.clone(),
769 notice_text: package_data.notice_text.clone(),
770 source_packages: package_data.source_packages.clone(),
771 file_references: package_data.file_references.clone(),
772 is_private: package_data.is_private,
773 is_virtual: package_data.is_virtual,
774 extra_data: package_data.extra_data.clone(),
775 dependencies: package_data.dependencies.clone(),
776 repository_homepage_url: package_data.repository_homepage_url.clone(),
777 repository_download_url: package_data.repository_download_url.clone(),
778 api_data_url: package_data.api_data_url.clone(),
779 datasource_id: package_data.datasource_id,
780 purl: package_data.purl.clone(),
781 }
782 }
783}
784
785#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
789pub struct Party {
790 pub r#type: Option<String>,
791 pub role: Option<String>,
792 pub name: Option<String>,
793 pub email: Option<String>,
794 pub url: Option<String>,
795 pub organization: Option<String>,
796 pub organization_url: Option<String>,
797 pub timezone: Option<String>,
798}
799
800impl Party {
801 pub(crate) fn person(role: &str, name: Option<String>, email: Option<String>) -> Self {
802 Self {
803 r#type: Some("person".to_string()),
804 role: Some(role.to_string()),
805 name,
806 email,
807 url: None,
808 organization: None,
809 organization_url: None,
810 timezone: None,
811 }
812 }
813}
814
815#[derive(Serialize, Deserialize, Debug, Clone)]
819pub struct FileReference {
820 pub path: String,
821 pub size: Option<u64>,
822 pub sha1: Option<Sha1Digest>,
823 pub md5: Option<Md5Digest>,
824 pub sha256: Option<Sha256Digest>,
825 pub sha512: Option<Sha512Digest>,
826 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
827}
828
829impl FileReference {
830 pub(crate) fn from_path(path: String) -> Self {
831 Self {
832 path,
833 size: None,
834 sha1: None,
835 md5: None,
836 sha256: None,
837 sha512: None,
838 extra_data: None,
839 }
840 }
841}
842
843#[derive(Serialize, Deserialize, Debug, Clone)]
853pub struct Package {
854 #[serde(rename = "type")]
855 pub package_type: Option<PackageType>,
856 pub namespace: Option<String>,
857 pub name: Option<String>,
858 pub version: Option<String>,
859 #[serde(default)]
860 pub qualifiers: Option<HashMap<String, String>>,
861 pub subpath: Option<String>,
862 pub primary_language: Option<String>,
863 pub description: Option<String>,
864 pub release_date: Option<String>,
865 #[serde(default)]
866 pub parties: Vec<Party>,
867 #[serde(default)]
868 pub keywords: Vec<String>,
869 pub homepage_url: Option<String>,
870 pub download_url: Option<String>,
871 pub size: Option<u64>,
872 pub sha1: Option<Sha1Digest>,
873 pub md5: Option<Md5Digest>,
874 pub sha256: Option<Sha256Digest>,
875 pub sha512: Option<Sha512Digest>,
876 pub bug_tracking_url: Option<String>,
877 pub code_view_url: Option<String>,
878 pub vcs_url: Option<String>,
879 pub copyright: Option<String>,
880 pub holder: Option<String>,
881 pub declared_license_expression: Option<String>,
882 pub declared_license_expression_spdx: Option<String>,
883 #[serde(default)]
884 pub license_detections: Vec<LicenseDetection>,
885 pub other_license_expression: Option<String>,
886 pub other_license_expression_spdx: Option<String>,
887 #[serde(default)]
888 pub other_license_detections: Vec<LicenseDetection>,
889 pub extracted_license_statement: Option<String>,
890 pub notice_text: Option<String>,
891 #[serde(default)]
892 pub source_packages: Vec<String>,
893 #[serde(default)]
894 pub is_private: bool,
895 #[serde(default)]
896 pub is_virtual: bool,
897 #[serde(default)]
898 pub extra_data: Option<HashMap<String, serde_json::Value>>,
899 pub repository_homepage_url: Option<String>,
900 pub repository_download_url: Option<String>,
901 pub api_data_url: Option<String>,
902 pub purl: Option<String>,
903 pub package_uid: PackageUid,
905 pub datafile_paths: Vec<String>,
907 pub datasource_ids: Vec<DatasourceId>,
909}
910
911impl Package {
912 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
918 let mut package_data = package_data.clone();
919 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
920
921 let mut package = Package {
922 package_type: package_data.package_type,
923 namespace: package_data.namespace.clone(),
924 name: package_data.name.clone(),
925 version: package_data.version.clone(),
926 qualifiers: package_data.qualifiers.clone(),
927 subpath: package_data.subpath.clone(),
928 primary_language: package_data.primary_language.clone(),
929 description: package_data.description.clone(),
930 release_date: package_data.release_date.clone(),
931 parties: package_data.parties.clone(),
932 keywords: package_data.keywords.clone(),
933 homepage_url: package_data.homepage_url.clone(),
934 download_url: package_data.download_url.clone(),
935 size: package_data.size,
936 sha1: package_data.sha1,
937 md5: package_data.md5,
938 sha256: package_data.sha256,
939 sha512: package_data.sha512,
940 bug_tracking_url: package_data.bug_tracking_url.clone(),
941 code_view_url: package_data.code_view_url.clone(),
942 vcs_url: package_data.vcs_url.clone(),
943 copyright: package_data.copyright.clone(),
944 holder: package_data.holder.clone(),
945 declared_license_expression: package_data.declared_license_expression.clone(),
946 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
947 license_detections: package_data.license_detections.clone(),
948 other_license_expression: package_data.other_license_expression.clone(),
949 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
950 other_license_detections: package_data.other_license_detections.clone(),
951 extracted_license_statement: package_data.extracted_license_statement.clone(),
952 notice_text: package_data.notice_text.clone(),
953 source_packages: package_data.source_packages.clone(),
954 is_private: package_data.is_private,
955 is_virtual: package_data.is_virtual,
956 extra_data: package_data.extra_data.clone(),
957 repository_homepage_url: package_data.repository_homepage_url.clone(),
958 repository_download_url: package_data.repository_download_url.clone(),
959 api_data_url: package_data.api_data_url.clone(),
960 purl: package_data.purl.clone(),
961 package_uid: PackageUid::empty(),
962 datafile_paths: vec![datafile_path],
963 datasource_ids: if let Some(dsid) = package_data.datasource_id {
964 vec![dsid]
965 } else {
966 vec![]
967 },
968 };
969
970 package.refresh_identity();
971 if package.package_uid.is_empty() {
972 package.package_uid = package.fallback_package_uid();
973 }
974
975 package
976 }
977
978 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
984 let mut package_data = package_data.clone();
985 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
986
987 if let Some(dsid) = package_data.datasource_id {
988 self.datasource_ids.push(dsid);
989 }
990 self.datafile_paths.push(datafile_path);
991
992 macro_rules! fill_if_empty {
993 ($field:ident) => {
994 if self.$field.is_none() {
995 self.$field = package_data.$field;
996 }
997 };
998 }
999
1000 fill_if_empty!(package_type);
1001 fill_if_empty!(name);
1002 fill_if_empty!(namespace);
1003 fill_if_empty!(version);
1004 fill_if_empty!(qualifiers);
1005 fill_if_empty!(subpath);
1006 fill_if_empty!(primary_language);
1007 fill_if_empty!(description);
1008 fill_if_empty!(release_date);
1009 fill_if_empty!(homepage_url);
1010 fill_if_empty!(download_url);
1011 fill_if_empty!(size);
1012 fill_if_empty!(sha1);
1013 fill_if_empty!(md5);
1014 fill_if_empty!(sha256);
1015 fill_if_empty!(sha512);
1016 fill_if_empty!(bug_tracking_url);
1017 fill_if_empty!(code_view_url);
1018 fill_if_empty!(vcs_url);
1019 fill_if_empty!(copyright);
1020 fill_if_empty!(holder);
1021 fill_if_empty!(declared_license_expression);
1022 fill_if_empty!(declared_license_expression_spdx);
1023 fill_if_empty!(other_license_expression);
1024 fill_if_empty!(other_license_expression_spdx);
1025 fill_if_empty!(extracted_license_statement);
1026 fill_if_empty!(notice_text);
1027 match (&mut self.extra_data, &package_data.extra_data) {
1028 (None, Some(extra_data)) => {
1029 self.extra_data = Some(extra_data.clone());
1030 }
1031 (Some(existing), Some(incoming)) => {
1032 for (key, value) in incoming {
1033 existing.entry(key.clone()).or_insert_with(|| value.clone());
1034 }
1035 }
1036 _ => {}
1037 }
1038 fill_if_empty!(repository_homepage_url);
1039 fill_if_empty!(repository_download_url);
1040 fill_if_empty!(api_data_url);
1041
1042 for party in &package_data.parties {
1043 if let Some(existing) = self.parties.iter_mut().find(|p| {
1044 p.role == party.role
1045 && ((p.name.is_some() && p.name == party.name)
1046 || (p.email.is_some() && p.email == party.email))
1047 }) {
1048 if existing.name.is_none() {
1049 existing.name = party.name.clone();
1050 }
1051 if existing.email.is_none() {
1052 existing.email = party.email.clone();
1053 }
1054 } else {
1055 self.parties.push(party.clone());
1056 }
1057 }
1058
1059 for keyword in &package_data.keywords {
1060 if !self.keywords.contains(keyword) {
1061 self.keywords.push(keyword.clone());
1062 }
1063 }
1064
1065 for detection in &package_data.license_detections {
1066 self.license_detections.push(detection.clone());
1067 }
1068
1069 for detection in &package_data.other_license_detections {
1070 self.other_license_detections.push(detection.clone());
1071 }
1072
1073 for source_pkg in &package_data.source_packages {
1074 if !self.source_packages.contains(source_pkg) {
1075 self.source_packages.push(source_pkg.clone());
1076 }
1077 }
1078
1079 self.refresh_identity();
1080 }
1081
1082 pub fn backfill_license_provenance(&mut self) {
1083 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1084 return;
1085 };
1086
1087 for detection in &mut self.license_detections {
1088 enrich_license_detection_provenance(detection, &datafile_path);
1089 }
1090 for detection in &mut self.other_license_detections {
1091 enrich_license_detection_provenance(detection, &datafile_path);
1092 }
1093 }
1094
1095 fn refresh_identity(&mut self) {
1096 let Some(next_purl) = self.build_current_purl() else {
1097 return;
1098 };
1099
1100 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1101 self.package_uid = PackageUid::new(&next_purl);
1102 }
1103
1104 self.purl = Some(next_purl);
1105 }
1106
1107 fn fallback_package_uid(&self) -> PackageUid {
1108 let name = self
1109 .name
1110 .as_deref()
1111 .map(str::trim)
1112 .filter(|value| !value.is_empty())
1113 .unwrap_or("unknown");
1114 let version = self
1115 .version
1116 .as_deref()
1117 .map(str::trim)
1118 .filter(|value| !value.is_empty())
1119 .unwrap_or("unknown");
1120 let datasource = self
1121 .datasource_ids
1122 .first()
1123 .map(DatasourceId::as_str)
1124 .unwrap_or("unknown");
1125
1126 PackageUid::new_opaque(&format!("generated-package:{datasource}/{name}@{version}"))
1127 }
1128
1129 fn build_current_purl(&self) -> Option<String> {
1130 if let Some(existing_purl) = self.purl.as_deref() {
1131 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1132
1133 if let Some(version) = self
1134 .version
1135 .as_deref()
1136 .filter(|value| !value.trim().is_empty())
1137 {
1138 purl.with_version(version).ok()?;
1139 } else {
1140 purl.without_version();
1141 }
1142
1143 return Some(purl.to_string());
1144 }
1145
1146 if let (Some(package_type), Some(name)) = (
1147 self.package_type.as_ref(),
1148 self.name
1149 .as_deref()
1150 .filter(|value| !value.trim().is_empty()),
1151 ) {
1152 let purl_type = match package_type {
1153 PackageType::Deno => "generic",
1154 _ => package_type.as_str(),
1155 };
1156
1157 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1158
1159 if let Some(namespace) = self
1160 .namespace
1161 .as_deref()
1162 .filter(|value| !value.trim().is_empty())
1163 {
1164 purl.with_namespace(namespace).ok()?;
1165 }
1166
1167 if let Some(version) = self
1168 .version
1169 .as_deref()
1170 .filter(|value| !value.trim().is_empty())
1171 {
1172 purl.with_version(version).ok()?;
1173 }
1174
1175 if let Some(qualifiers) = &self.qualifiers {
1176 for (key, value) in qualifiers {
1177 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1178 }
1179 }
1180
1181 if let Some(subpath) = self
1182 .subpath
1183 .as_deref()
1184 .filter(|value| !value.trim().is_empty())
1185 {
1186 purl.with_subpath(subpath).ok()?;
1187 }
1188
1189 return Some(purl.to_string());
1190 }
1191 None
1192 }
1193}
1194
1195#[cfg(test)]
1196mod tests {
1197 use super::*;
1198
1199 #[test]
1200 fn file_info_new_backfills_package_detection_provenance() {
1201 let package_data = PackageData {
1202 package_type: Some(PackageType::Npm),
1203 license_detections: vec![LicenseDetection {
1204 license_expression: "mit".to_string(),
1205 license_expression_spdx: "MIT".to_string(),
1206 matches: vec![Match {
1207 license_expression: "mit".to_string(),
1208 license_expression_spdx: "MIT".to_string(),
1209 from_file: None,
1210 start_line: LineNumber::ONE,
1211 end_line: LineNumber::ONE,
1212 matcher: Some("parser-declared-license".to_string()),
1213 score: MatchScore::MAX,
1214 matched_length: Some(1),
1215 match_coverage: Some(100.0),
1216 rule_relevance: Some(100),
1217 rule_identifier: None,
1218 rule_url: None,
1219 matched_text: Some("MIT".to_string()),
1220 referenced_filenames: None,
1221 matched_text_diagnostics: None,
1222 }],
1223 detection_log: vec![],
1224 identifier: None,
1225 }],
1226 ..PackageData::default()
1227 };
1228
1229 let file_info = FileInfo::new(
1230 "package.json".to_string(),
1231 "package".to_string(),
1232 ".json".to_string(),
1233 "project/package.json".to_string(),
1234 FileType::File,
1235 None,
1236 None,
1237 1,
1238 None,
1239 None,
1240 None,
1241 None,
1242 None,
1243 vec![package_data],
1244 None,
1245 vec![],
1246 vec![],
1247 vec![],
1248 vec![],
1249 vec![],
1250 vec![],
1251 vec![],
1252 vec![],
1253 vec![],
1254 );
1255
1256 assert_eq!(file_info.license_detections.len(), 1);
1257 assert_eq!(
1258 file_info.license_detections[0].matches[0]
1259 .from_file
1260 .as_deref(),
1261 Some("project/package.json")
1262 );
1263 assert!(file_info.license_detections[0].identifier.is_some());
1264 assert_eq!(
1265 file_info.package_data[0].license_detections[0].matches[0]
1266 .from_file
1267 .as_deref(),
1268 Some("project/package.json")
1269 );
1270 assert_eq!(
1271 file_info.package_data[0].license_detections[0].matches[0]
1272 .rule_identifier
1273 .as_deref(),
1274 Some("parser-declared-license")
1275 );
1276 assert!(
1277 file_info.package_data[0].license_detections[0]
1278 .identifier
1279 .is_some()
1280 );
1281 }
1282
1283 #[test]
1284 fn package_from_package_data_backfills_detection_provenance() {
1285 let package_data = PackageData {
1286 package_type: Some(PackageType::Npm),
1287 license_detections: vec![LicenseDetection {
1288 license_expression: "mit".to_string(),
1289 license_expression_spdx: "MIT".to_string(),
1290 matches: vec![Match {
1291 license_expression: "mit".to_string(),
1292 license_expression_spdx: "MIT".to_string(),
1293 from_file: None,
1294 start_line: LineNumber::ONE,
1295 end_line: LineNumber::ONE,
1296 matcher: Some("parser-declared-license".to_string()),
1297 score: MatchScore::MAX,
1298 matched_length: Some(1),
1299 match_coverage: Some(100.0),
1300 rule_relevance: Some(100),
1301 rule_identifier: None,
1302 rule_url: None,
1303 matched_text: Some("MIT".to_string()),
1304 referenced_filenames: None,
1305 matched_text_diagnostics: None,
1306 }],
1307 detection_log: vec![],
1308 identifier: None,
1309 }],
1310 ..PackageData::default()
1311 };
1312
1313 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1314
1315 assert_eq!(
1316 package.license_detections[0].matches[0]
1317 .from_file
1318 .as_deref(),
1319 Some("project/package.json")
1320 );
1321 assert_eq!(
1322 package.license_detections[0].matches[0]
1323 .rule_identifier
1324 .as_deref(),
1325 Some("parser-declared-license")
1326 );
1327 assert!(package.license_detections[0].identifier.is_some());
1328 }
1329
1330 #[test]
1331 fn package_from_package_data_preserves_existing_purl_qualifiers() {
1332 let package_data = PackageData {
1333 package_type: Some(PackageType::Alpine),
1334 namespace: Some("alpine".to_string()),
1335 name: Some("busybox".to_string()),
1336 version: Some("1.35.0-r17".to_string()),
1337 purl: Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64".to_string()),
1338 ..PackageData::default()
1339 };
1340
1341 let package = Package::from_package_data(&package_data, "lib/apk/db/installed".to_string());
1342
1343 assert_eq!(
1344 package.purl.as_deref(),
1345 Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64")
1346 );
1347 assert!(
1348 package
1349 .package_uid
1350 .starts_with("pkg:alpine/busybox@1.35.0-r17?arch=x86_64&uuid=")
1351 );
1352 }
1353}
1354
1355#[derive(Serialize, Deserialize, Debug, Clone)]
1360pub struct TopLevelDependency {
1361 pub purl: Option<String>,
1362 pub extracted_requirement: Option<String>,
1363 pub scope: Option<String>,
1364 pub is_runtime: Option<bool>,
1365 pub is_optional: Option<bool>,
1366 pub is_pinned: Option<bool>,
1367 pub is_direct: Option<bool>,
1368 pub resolved_package: Option<Box<ResolvedPackage>>,
1369 #[serde(default)]
1370 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1371 pub dependency_uid: DependencyUid,
1373 pub for_package_uid: Option<PackageUid>,
1375 pub datafile_path: String,
1377 pub datasource_id: DatasourceId,
1379 pub namespace: Option<String>,
1381}
1382
1383impl TopLevelDependency {
1384 pub fn from_dependency(
1386 dep: &Dependency,
1387 datafile_path: String,
1388 datasource_id: DatasourceId,
1389 for_package_uid: Option<PackageUid>,
1390 ) -> Self {
1391 let dependency_uid = dep
1392 .purl
1393 .as_ref()
1394 .map(|p| DependencyUid::new(p))
1395 .unwrap_or_else(DependencyUid::empty);
1396
1397 TopLevelDependency {
1398 purl: dep.purl.clone(),
1399 extracted_requirement: dep.extracted_requirement.clone(),
1400 scope: dep.scope.clone(),
1401 is_runtime: dep.is_runtime,
1402 is_optional: dep.is_optional,
1403 is_pinned: dep.is_pinned,
1404 is_direct: dep.is_direct,
1405 resolved_package: dep.resolved_package.clone(),
1406 extra_data: dep.extra_data.clone(),
1407 dependency_uid,
1408 for_package_uid,
1409 datafile_path,
1410 datasource_id,
1411 namespace: None,
1412 }
1413 }
1414}
1415
1416#[derive(Serialize, Deserialize, Debug, Clone)]
1417pub struct OutputEmail {
1418 pub email: String,
1419 pub start_line: LineNumber,
1420 pub end_line: LineNumber,
1421}
1422
1423#[derive(Serialize, Deserialize, Debug, Clone)]
1424pub struct OutputURL {
1425 pub url: String,
1426 pub start_line: LineNumber,
1427 pub end_line: LineNumber,
1428}
1429
1430#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1431pub struct LicensePolicyEntry {
1432 pub license_key: String,
1433 pub label: String,
1434 pub color_code: String,
1435 pub icon: String,
1436}
1437
1438#[derive(Debug, Clone, PartialEq)]
1439pub enum FileType {
1440 File,
1441 Directory,
1442}
1443
1444impl serde::Serialize for FileType {
1445 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1446 where
1447 S: serde::Serializer,
1448 {
1449 match self {
1450 FileType::File => serializer.serialize_str("file"),
1451 FileType::Directory => serializer.serialize_str("directory"),
1452 }
1453 }
1454}
1455
1456impl<'de> Deserialize<'de> for FileType {
1457 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1458 where
1459 D: serde::Deserializer<'de>,
1460 {
1461 let value = String::deserialize(deserializer)?;
1462 match value.as_str() {
1463 "file" => Ok(FileType::File),
1464 "directory" => Ok(FileType::Directory),
1465 _ => Err(serde::de::Error::custom("invalid file type")),
1466 }
1467 }
1468}