1use derive_builder::Builder;
5use packageurl::PackageUrl;
6use serde::{Deserialize, Serialize};
7use sha1::{Digest, Sha1};
8use std::collections::HashMap;
9use std::str::FromStr;
10
11use super::DatasourceId;
12use super::DependencyUid;
13use super::DiagnosticSeverity;
14use super::GitSha1;
15use super::LineNumber;
16use super::MatchScore;
17use super::Md5Digest;
18use super::PackageType;
19use super::PackageUid;
20use super::ScanDiagnostic;
21use super::Sha1Digest;
22use super::Sha256Digest;
23use super::Sha512Digest;
24use super::diagnostics_from_legacy_scan_errors;
25use crate::license_detection::tokenize::tokenize_without_stopwords;
26use crate::models::output::Tallies;
27use crate::utils::spdx::combine_license_expressions;
28
29#[derive(Debug, Builder, Serialize, Deserialize, Clone)]
30#[builder(build_fn(skip))]
31pub struct FileInfo {
33 pub name: String,
34 pub base_name: String,
35 pub extension: String,
36 pub path: String,
37 #[serde(rename = "type")] pub file_type: FileType,
39 #[builder(default)]
40 #[serde(default)]
41 pub mime_type: Option<String>,
42 #[builder(default)]
43 #[serde(rename = "file_type", default)]
44 pub file_type_label: Option<String>,
45 pub size: u64,
46 #[builder(default)]
47 #[serde(default)]
48 pub date: Option<String>,
49 #[builder(default)]
50 #[serde(default)]
51 pub sha1: Option<Sha1Digest>,
52 #[builder(default)]
53 #[serde(default)]
54 pub md5: Option<Md5Digest>,
55 #[builder(default)]
56 #[serde(default)]
57 pub sha256: Option<Sha256Digest>,
58 #[builder(default)]
59 #[serde(default)]
60 pub sha1_git: Option<GitSha1>,
61 #[builder(default)]
62 #[serde(default)]
63 pub programming_language: Option<String>,
64 #[builder(default)]
65 #[serde(default)]
66 pub package_data: Vec<PackageData>,
67 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
69 pub license_expression: Option<String>,
70 #[builder(default)]
71 #[serde(default)]
72 pub license_detections: Vec<LicenseDetection>,
73 #[builder(default)]
74 #[serde(default)]
75 pub license_clues: Vec<Match>,
76 #[builder(default)]
77 #[serde(default)]
78 pub percentage_of_license_text: Option<f64>,
79 #[builder(default)]
80 #[serde(default)]
81 pub copyrights: Vec<Copyright>,
82 #[builder(default)]
83 #[serde(default)]
84 pub holders: Vec<Holder>,
85 #[builder(default)]
86 #[serde(default)]
87 pub authors: Vec<Author>,
88 #[builder(default)]
89 #[serde(default)]
90 pub emails: Vec<OutputEmail>,
91 #[builder(default)]
92 #[serde(default)]
93 pub urls: Vec<OutputURL>,
94 #[builder(default)]
95 #[serde(default)]
96 pub for_packages: Vec<PackageUid>,
97 #[builder(default)]
98 #[serde(default)]
99 pub scan_errors: Vec<String>,
100 #[builder(default)]
101 #[serde(default)]
102 pub scan_diagnostics: Vec<ScanDiagnostic>,
103 #[builder(default)]
104 #[serde(default)]
105 pub license_policy: Option<Vec<LicensePolicyEntry>>,
106 #[builder(default)]
107 #[serde(default)]
108 pub is_generated: Option<bool>,
109 #[builder(default)]
110 #[serde(default)]
111 pub is_binary: Option<bool>,
112 #[builder(default)]
113 #[serde(default)]
114 pub is_text: Option<bool>,
115 #[builder(default)]
116 #[serde(default)]
117 pub is_archive: Option<bool>,
118 #[builder(default)]
119 #[serde(default)]
120 pub is_media: Option<bool>,
121 #[builder(default)]
122 #[serde(default)]
123 pub is_source: Option<bool>,
124 #[builder(default)]
125 #[serde(default)]
126 pub is_script: Option<bool>,
127 #[builder(default)]
128 #[serde(default)]
129 pub files_count: Option<usize>,
130 #[builder(default)]
131 #[serde(default)]
132 pub dirs_count: Option<usize>,
133 #[builder(default)]
134 #[serde(default)]
135 pub size_count: Option<u64>,
136 #[builder(default)]
137 #[serde(default)]
138 pub source_count: Option<usize>,
139 #[builder(default)]
140 #[serde(default)]
141 pub is_legal: bool,
142 #[builder(default)]
143 #[serde(default)]
144 pub is_manifest: bool,
145 #[builder(default)]
146 #[serde(default)]
147 pub is_readme: bool,
148 #[builder(default)]
149 #[serde(default)]
150 pub is_top_level: bool,
151 #[builder(default)]
152 #[serde(default)]
153 pub is_key_file: bool,
154 #[builder(default)]
155 #[serde(default)]
156 pub is_community: bool,
157 #[builder(default)]
158 #[serde(default)]
159 pub facets: Vec<String>,
160 #[builder(default)]
161 #[serde(default)]
162 pub tallies: Option<Tallies>,
163}
164
165impl FileInfoBuilder {
166 pub fn build(&self) -> Result<FileInfo, String> {
168 let mut file_info = FileInfo::new(
169 self.name.clone().ok_or("Missing field: name")?,
170 self.base_name.clone().ok_or("Missing field: base_name")?,
171 self.extension.clone().ok_or("Missing field: extension")?,
172 self.path.clone().ok_or("Missing field: path")?,
173 self.file_type.clone().ok_or("Missing field: file_type")?,
174 self.mime_type.clone().flatten(),
175 self.file_type_label.clone().flatten(),
176 self.size.ok_or("Missing field: size")?,
177 self.date.clone().flatten(),
178 self.sha1.flatten(),
179 self.md5.flatten(),
180 self.sha256.flatten(),
181 self.programming_language.clone().flatten(),
182 self.package_data.clone().unwrap_or_default(),
183 self.license_expression.clone().flatten(),
184 self.license_detections.clone().unwrap_or_default(),
185 self.license_clues.clone().unwrap_or_default(),
186 self.copyrights.clone().unwrap_or_default(),
187 self.holders.clone().unwrap_or_default(),
188 self.authors.clone().unwrap_or_default(),
189 self.emails.clone().unwrap_or_default(),
190 self.urls.clone().unwrap_or_default(),
191 self.for_packages.clone().unwrap_or_default(),
192 self.scan_errors.clone().unwrap_or_default(),
193 );
194 file_info.scan_diagnostics = if let Some(diagnostics) = &self.scan_diagnostics {
195 diagnostics.clone()
196 } else {
197 diagnostics_from_legacy_scan_errors(&file_info.scan_errors)
198 };
199 file_info.scan_errors = file_info
200 .scan_diagnostics
201 .iter()
202 .map(|diagnostic| diagnostic.message.clone())
203 .collect();
204 file_info.license_policy = self.license_policy.clone().flatten();
205 file_info.sha1_git = self.sha1_git.flatten();
206 file_info.is_binary = self.is_binary.flatten();
207 file_info.is_text = self.is_text.flatten();
208 file_info.is_archive = self.is_archive.flatten();
209 file_info.is_media = self.is_media.flatten();
210 file_info.is_script = self.is_script.flatten();
211 file_info.files_count = self.files_count.flatten();
212 file_info.dirs_count = self.dirs_count.flatten();
213 file_info.size_count = self.size_count.flatten();
214 Ok(file_info)
215 }
216}
217
218impl FileInfo {
219 #[allow(clippy::too_many_arguments)]
220 pub fn new(
222 name: String,
223 base_name: String,
224 extension: String,
225 path: String,
226 file_type: FileType,
227 mime_type: Option<String>,
228 file_type_label: Option<String>,
229 size: u64,
230 date: Option<String>,
231 sha1: Option<Sha1Digest>,
232 md5: Option<Md5Digest>,
233 sha256: Option<Sha256Digest>,
234 programming_language: Option<String>,
235 package_data: Vec<PackageData>,
236 mut license_expression: Option<String>,
237 mut license_detections: Vec<LicenseDetection>,
238 license_clues: Vec<Match>,
239 copyrights: Vec<Copyright>,
240 holders: Vec<Holder>,
241 authors: Vec<Author>,
242 emails: Vec<OutputEmail>,
243 urls: Vec<OutputURL>,
244 for_packages: Vec<PackageUid>,
245 scan_errors: Vec<String>,
246 ) -> Self {
247 let mut package_data = package_data;
248 for package in &mut package_data {
249 enrich_package_data_license_provenance(package, &path);
250 }
251
252 license_expression = license_expression.or_else(|| {
254 let expressions = package_data
255 .iter()
256 .filter_map(|pkg| pkg.get_license_expression());
257 combine_license_expressions(expressions)
258 });
259
260 if license_detections.is_empty() {
262 for pkg in &package_data {
263 license_detections.extend(pkg.license_detections.clone());
264 }
265 }
266
267 if license_expression.is_none() && !license_detections.is_empty() {
269 let expressions = license_detections
270 .iter()
271 .map(|detection| detection.license_expression.clone());
272 let expressions: Vec<String> = expressions.collect();
273 license_expression = crate::utils::spdx::select_primary_license_expression(
274 expressions.clone(),
275 )
276 .or_else(|| {
277 crate::utils::spdx::combine_license_expressions_preserving_structure(expressions)
278 });
279 }
280
281 let mut file_info = FileInfo {
282 name,
283 base_name,
284 extension,
285 path,
286 file_type,
287 mime_type,
288 file_type_label,
289 size,
290 date,
291 sha1,
292 md5,
293 sha256,
294 sha1_git: None,
295 programming_language,
296 package_data,
297 license_expression,
298 license_detections,
299 license_clues,
300 percentage_of_license_text: None,
301 copyrights,
302 holders,
303 authors,
304 emails,
305 urls,
306 for_packages,
307 scan_diagnostics: diagnostics_from_legacy_scan_errors(&scan_errors),
308 scan_errors,
309 license_policy: None,
310 is_generated: None,
311 is_binary: None,
312 is_text: None,
313 is_archive: None,
314 is_media: None,
315 is_source: None,
316 is_script: None,
317 files_count: None,
318 dirs_count: None,
319 size_count: None,
320 source_count: None,
321 is_legal: false,
322 is_manifest: false,
323 is_readme: false,
324 is_top_level: false,
325 is_key_file: false,
326 is_community: false,
327 facets: vec![],
328 tallies: None,
329 };
330
331 file_info.backfill_license_provenance();
332 file_info
333 }
334
335 pub fn backfill_license_provenance(&mut self) {
336 for detection in &mut self.license_detections {
337 enrich_license_detection_provenance(detection, &self.path);
338 }
339
340 for package in &mut self.package_data {
341 enrich_package_data_license_provenance(package, &self.path);
342 }
343 }
344}
345
346impl FileInfo {
347 pub fn warning_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
348 self.scan_diagnostics
349 .iter()
350 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Warning)
351 }
352
353 pub fn error_diagnostics(&self) -> impl Iterator<Item = &ScanDiagnostic> {
354 self.scan_diagnostics
355 .iter()
356 .filter(|diagnostic| diagnostic.severity == DiagnosticSeverity::Error)
357 }
358}
359
360fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
361 for detection in &mut package_data.license_detections {
362 enrich_license_detection_provenance(detection, path);
363 }
364 for detection in &mut package_data.other_license_detections {
365 enrich_license_detection_provenance(detection, path);
366 }
367}
368
369pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
370 for detection_match in &mut detection.matches {
371 if detection_match.from_file.is_none() {
372 detection_match.from_file = Some(path.to_string());
373 }
374
375 if detection_match.rule_identifier.is_none() {
376 detection_match.rule_identifier = detection_match.matcher.clone();
377 }
378 }
379
380 if detection.identifier.is_none() {
381 detection.identifier = Some(compute_public_detection_identifier(detection));
382 }
383}
384
385fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
386 let expression = python_safe_name(&detection.license_expression);
387 let mut hasher = Sha1::new();
388 hasher.update(format_public_detection_content(detection).as_bytes());
389 let hex_str = hex::encode(hasher.finalize());
390 let uuid_hex = &hex_str[..32];
391 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
392 .map(|uuid| uuid.to_string())
393 .unwrap_or_else(|_| uuid_hex.to_string());
394
395 format!("{}-{}", expression, content_uuid)
396}
397
398fn format_public_detection_content(detection: &LicenseDetection) -> String {
399 let mut result = String::from("(");
400
401 for (index, detection_match) in detection.matches.iter().enumerate() {
402 if index > 0 {
403 result.push_str(", ");
404 }
405 result.push_str(&format!(
406 "({}, {}, {})",
407 python_str_repr(
408 detection_match
409 .rule_identifier
410 .as_deref()
411 .or(detection_match.matcher.as_deref())
412 .unwrap_or("parser-declared-license")
413 ),
414 detection_match.score.value() as f32,
415 python_token_tuple_repr(&tokenize_without_stopwords(
416 detection_match.matched_text.as_deref().unwrap_or_default(),
417 )),
418 ));
419 }
420
421 if detection.matches.len() == 1 {
422 result.push(',');
423 }
424 result.push(')');
425 result
426}
427
428fn python_safe_name(value: &str) -> String {
429 let mut result = String::new();
430 let mut prev_underscore = false;
431
432 for character in value.chars() {
433 if character.is_alphanumeric() {
434 result.push(character);
435 prev_underscore = false;
436 } else if !prev_underscore {
437 result.push('_');
438 prev_underscore = true;
439 }
440 }
441
442 let trimmed = result.trim_matches('_');
443 if trimmed.is_empty() {
444 String::new()
445 } else {
446 trimmed.to_string()
447 }
448}
449
450fn python_str_repr(value: &str) -> String {
451 if value.contains('\'') && !value.contains('"') {
452 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
453 } else {
454 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
455 }
456}
457
458fn python_token_tuple_repr(tokens: &[String]) -> String {
459 if tokens.is_empty() {
460 return String::from("()");
461 }
462
463 let mut result = String::from("(");
464 for (index, token) in tokens.iter().enumerate() {
465 if index > 0 {
466 result.push_str(", ");
467 }
468 result.push_str(&python_str_repr(token));
469 }
470
471 if tokens.len() == 1 {
472 result.push(',');
473 }
474 result.push(')');
475 result
476}
477
478#[derive(Serialize, Deserialize, Debug, Clone, Default)]
484pub struct PackageData {
485 #[serde(rename = "type")] pub package_type: Option<PackageType>,
487 pub namespace: Option<String>,
488 pub name: Option<String>,
489 pub version: Option<String>,
490 #[serde(default)]
491 pub qualifiers: Option<HashMap<String, String>>,
492 pub subpath: Option<String>,
493 pub primary_language: Option<String>,
494 pub description: Option<String>,
495 pub release_date: Option<String>,
496 #[serde(default)]
497 pub parties: Vec<Party>,
498 #[serde(default)]
499 pub keywords: Vec<String>,
500 pub homepage_url: Option<String>,
501 pub download_url: Option<String>,
502 pub size: Option<u64>,
503 pub sha1: Option<Sha1Digest>,
504 pub md5: Option<Md5Digest>,
505 pub sha256: Option<Sha256Digest>,
506 pub sha512: Option<Sha512Digest>,
507 pub bug_tracking_url: Option<String>,
508 pub code_view_url: Option<String>,
509 pub vcs_url: Option<String>,
510 pub copyright: Option<String>,
511 pub holder: Option<String>,
512 pub declared_license_expression: Option<String>,
513 pub declared_license_expression_spdx: Option<String>,
514 #[serde(default)]
515 pub license_detections: Vec<LicenseDetection>,
516 pub other_license_expression: Option<String>,
517 pub other_license_expression_spdx: Option<String>,
518 #[serde(default)]
519 pub other_license_detections: Vec<LicenseDetection>,
520 pub extracted_license_statement: Option<String>,
521 pub notice_text: Option<String>,
522 #[serde(default)]
523 pub source_packages: Vec<String>,
524 #[serde(default)]
525 pub file_references: Vec<FileReference>,
526 #[serde(default)]
527 pub is_private: bool,
528 #[serde(default)]
529 pub is_virtual: bool,
530 #[serde(default)]
531 pub extra_data: Option<HashMap<String, serde_json::Value>>,
532 #[serde(default)]
533 pub dependencies: Vec<Dependency>,
534 pub repository_homepage_url: Option<String>,
535 pub repository_download_url: Option<String>,
536 pub api_data_url: Option<String>,
537 pub datasource_id: Option<DatasourceId>,
538 pub purl: Option<String>,
539}
540
541impl PackageData {
542 pub fn get_license_expression(&self) -> Option<String> {
545 if self.license_detections.is_empty() {
546 return None;
547 }
548
549 let expressions = self
550 .license_detections
551 .iter()
552 .map(|detection| detection.license_expression.clone());
553 combine_license_expressions(expressions)
554 }
555}
556
557#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
561pub struct LicenseDetection {
562 pub license_expression: String,
563 pub license_expression_spdx: String,
564 pub matches: Vec<Match>,
565 #[serde(default)]
566 pub detection_log: Vec<String>,
567 pub identifier: Option<String>,
568}
569
570#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
574pub struct Match {
575 pub license_expression: String,
576 pub license_expression_spdx: String,
577 pub from_file: Option<String>,
578 pub start_line: LineNumber,
579 pub end_line: LineNumber,
580 pub matcher: Option<String>,
581 pub score: MatchScore,
582 pub matched_length: Option<usize>,
583 pub match_coverage: Option<f64>,
584 pub rule_relevance: Option<u8>,
585 pub rule_identifier: Option<String>,
586 pub rule_url: Option<String>,
587 pub matched_text: Option<String>,
588 pub matched_text_diagnostics: Option<String>,
589 #[serde(default)]
590 pub referenced_filenames: Option<Vec<String>>,
591}
592
593#[derive(Serialize, Deserialize, Debug, Clone)]
594pub struct Copyright {
595 pub copyright: String,
596 pub start_line: LineNumber,
597 pub end_line: LineNumber,
598}
599
600#[derive(Serialize, Deserialize, Debug, Clone)]
601pub struct Holder {
602 pub holder: String,
603 pub start_line: LineNumber,
604 pub end_line: LineNumber,
605}
606
607#[derive(Serialize, Deserialize, Debug, Clone)]
608pub struct Author {
609 pub author: String,
610 pub start_line: LineNumber,
611 pub end_line: LineNumber,
612}
613
614#[derive(Serialize, Deserialize, Debug, Clone)]
619pub struct Dependency {
620 pub purl: Option<String>,
621 pub extracted_requirement: Option<String>,
622 pub scope: Option<String>,
623 pub is_runtime: Option<bool>,
624 pub is_optional: Option<bool>,
625 pub is_pinned: Option<bool>,
626 pub is_direct: Option<bool>,
627 pub resolved_package: Option<Box<ResolvedPackage>>,
628 #[serde(default)]
629 pub extra_data: Option<HashMap<String, serde_json::Value>>,
630}
631
632#[derive(Serialize, Deserialize, Debug, Clone)]
633pub struct ResolvedPackage {
634 #[serde(rename = "type")]
635 pub package_type: PackageType,
636 pub namespace: String,
637 pub name: String,
638 pub version: String,
639 #[serde(default)]
640 pub qualifiers: Option<HashMap<String, String>>,
641 pub subpath: Option<String>,
642 pub primary_language: Option<String>,
643 pub description: Option<String>,
644 pub release_date: Option<String>,
645 #[serde(default)]
646 pub parties: Vec<Party>,
647 #[serde(default)]
648 pub keywords: Vec<String>,
649 pub homepage_url: Option<String>,
650 pub download_url: Option<String>,
651 pub size: Option<u64>,
652 pub sha1: Option<Sha1Digest>,
653 pub md5: Option<Md5Digest>,
654 pub sha256: Option<Sha256Digest>,
655 pub sha512: Option<Sha512Digest>,
656 pub bug_tracking_url: Option<String>,
657 pub code_view_url: Option<String>,
658 pub vcs_url: Option<String>,
659 pub copyright: Option<String>,
660 pub holder: Option<String>,
661 pub declared_license_expression: Option<String>,
662 pub declared_license_expression_spdx: Option<String>,
663 #[serde(default)]
664 pub license_detections: Vec<LicenseDetection>,
665 pub other_license_expression: Option<String>,
666 pub other_license_expression_spdx: Option<String>,
667 #[serde(default)]
668 pub other_license_detections: Vec<LicenseDetection>,
669 pub extracted_license_statement: Option<String>,
670 pub notice_text: Option<String>,
671 #[serde(default)]
672 pub source_packages: Vec<String>,
673 #[serde(default)]
674 pub file_references: Vec<FileReference>,
675 #[serde(default)]
676 pub is_private: bool,
677 #[serde(default)]
678 pub is_virtual: bool,
679 #[serde(default)]
680 pub extra_data: Option<HashMap<String, serde_json::Value>>,
681 #[serde(default)]
682 pub dependencies: Vec<Dependency>,
683 pub repository_homepage_url: Option<String>,
684 pub repository_download_url: Option<String>,
685 pub api_data_url: Option<String>,
686 pub datasource_id: Option<DatasourceId>,
687 pub purl: Option<String>,
688}
689
690impl ResolvedPackage {
691 pub fn new(
692 package_type: PackageType,
693 namespace: String,
694 name: String,
695 version: String,
696 ) -> Self {
697 Self {
698 package_type,
699 namespace,
700 name,
701 version,
702 qualifiers: None,
703 subpath: None,
704 primary_language: None,
705 description: None,
706 release_date: None,
707 parties: vec![],
708 keywords: vec![],
709 homepage_url: None,
710 download_url: None,
711 size: None,
712 sha1: None,
713 md5: None,
714 sha256: None,
715 sha512: None,
716 bug_tracking_url: None,
717 code_view_url: None,
718 vcs_url: None,
719 copyright: None,
720 holder: None,
721 declared_license_expression: None,
722 declared_license_expression_spdx: None,
723 license_detections: vec![],
724 other_license_expression: None,
725 other_license_expression_spdx: None,
726 other_license_detections: vec![],
727 extracted_license_statement: None,
728 notice_text: None,
729 source_packages: vec![],
730 file_references: vec![],
731 is_private: false,
732 is_virtual: false,
733 extra_data: None,
734 dependencies: vec![],
735 repository_homepage_url: None,
736 repository_download_url: None,
737 api_data_url: None,
738 datasource_id: None,
739 purl: None,
740 }
741 }
742
743 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
744 Self {
745 package_type: package_data.package_type.unwrap_or(fallback_type),
746 namespace: package_data.namespace.clone().unwrap_or_default(),
747 name: package_data.name.clone().unwrap_or_default(),
748 version: package_data.version.clone().unwrap_or_default(),
749 qualifiers: package_data.qualifiers.clone(),
750 subpath: package_data.subpath.clone(),
751 primary_language: package_data.primary_language.clone(),
752 description: package_data.description.clone(),
753 release_date: package_data.release_date.clone(),
754 parties: package_data.parties.clone(),
755 keywords: package_data.keywords.clone(),
756 homepage_url: package_data.homepage_url.clone(),
757 download_url: package_data.download_url.clone(),
758 size: package_data.size,
759 sha1: package_data.sha1,
760 md5: package_data.md5,
761 sha256: package_data.sha256,
762 sha512: package_data.sha512,
763 bug_tracking_url: package_data.bug_tracking_url.clone(),
764 code_view_url: package_data.code_view_url.clone(),
765 vcs_url: package_data.vcs_url.clone(),
766 copyright: package_data.copyright.clone(),
767 holder: package_data.holder.clone(),
768 declared_license_expression: package_data.declared_license_expression.clone(),
769 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
770 license_detections: package_data.license_detections.clone(),
771 other_license_expression: package_data.other_license_expression.clone(),
772 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
773 other_license_detections: package_data.other_license_detections.clone(),
774 extracted_license_statement: package_data.extracted_license_statement.clone(),
775 notice_text: package_data.notice_text.clone(),
776 source_packages: package_data.source_packages.clone(),
777 file_references: package_data.file_references.clone(),
778 is_private: package_data.is_private,
779 is_virtual: package_data.is_virtual,
780 extra_data: package_data.extra_data.clone(),
781 dependencies: package_data.dependencies.clone(),
782 repository_homepage_url: package_data.repository_homepage_url.clone(),
783 repository_download_url: package_data.repository_download_url.clone(),
784 api_data_url: package_data.api_data_url.clone(),
785 datasource_id: package_data.datasource_id,
786 purl: package_data.purl.clone(),
787 }
788 }
789}
790
791#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
795pub struct Party {
796 pub r#type: Option<String>,
797 pub role: Option<String>,
798 pub name: Option<String>,
799 pub email: Option<String>,
800 pub url: Option<String>,
801 pub organization: Option<String>,
802 pub organization_url: Option<String>,
803 pub timezone: Option<String>,
804}
805
806impl Party {
807 pub(crate) fn person(role: &str, name: Option<String>, email: Option<String>) -> Self {
808 Self {
809 r#type: Some("person".to_string()),
810 role: Some(role.to_string()),
811 name,
812 email,
813 url: None,
814 organization: None,
815 organization_url: None,
816 timezone: None,
817 }
818 }
819}
820
821#[derive(Serialize, Deserialize, Debug, Clone)]
825pub struct FileReference {
826 pub path: String,
827 pub size: Option<u64>,
828 pub sha1: Option<Sha1Digest>,
829 pub md5: Option<Md5Digest>,
830 pub sha256: Option<Sha256Digest>,
831 pub sha512: Option<Sha512Digest>,
832 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
833}
834
835impl FileReference {
836 pub(crate) fn from_path(path: String) -> Self {
837 Self {
838 path,
839 size: None,
840 sha1: None,
841 md5: None,
842 sha256: None,
843 sha512: None,
844 extra_data: None,
845 }
846 }
847}
848
849#[derive(Serialize, Deserialize, Debug, Clone)]
859pub struct Package {
860 #[serde(rename = "type")]
861 pub package_type: Option<PackageType>,
862 pub namespace: Option<String>,
863 pub name: Option<String>,
864 pub version: Option<String>,
865 #[serde(default)]
866 pub qualifiers: Option<HashMap<String, String>>,
867 pub subpath: Option<String>,
868 pub primary_language: Option<String>,
869 pub description: Option<String>,
870 pub release_date: Option<String>,
871 #[serde(default)]
872 pub parties: Vec<Party>,
873 #[serde(default)]
874 pub keywords: Vec<String>,
875 pub homepage_url: Option<String>,
876 pub download_url: Option<String>,
877 pub size: Option<u64>,
878 pub sha1: Option<Sha1Digest>,
879 pub md5: Option<Md5Digest>,
880 pub sha256: Option<Sha256Digest>,
881 pub sha512: Option<Sha512Digest>,
882 pub bug_tracking_url: Option<String>,
883 pub code_view_url: Option<String>,
884 pub vcs_url: Option<String>,
885 pub copyright: Option<String>,
886 pub holder: Option<String>,
887 pub declared_license_expression: Option<String>,
888 pub declared_license_expression_spdx: Option<String>,
889 #[serde(default)]
890 pub license_detections: Vec<LicenseDetection>,
891 pub other_license_expression: Option<String>,
892 pub other_license_expression_spdx: Option<String>,
893 #[serde(default)]
894 pub other_license_detections: Vec<LicenseDetection>,
895 pub extracted_license_statement: Option<String>,
896 pub notice_text: Option<String>,
897 #[serde(default)]
898 pub source_packages: Vec<String>,
899 #[serde(default)]
900 pub is_private: bool,
901 #[serde(default)]
902 pub is_virtual: bool,
903 #[serde(default)]
904 pub extra_data: Option<HashMap<String, serde_json::Value>>,
905 pub repository_homepage_url: Option<String>,
906 pub repository_download_url: Option<String>,
907 pub api_data_url: Option<String>,
908 pub purl: Option<String>,
909 pub package_uid: PackageUid,
911 pub datafile_paths: Vec<String>,
913 pub datasource_ids: Vec<DatasourceId>,
915}
916
917impl Package {
918 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
924 let mut package_data = package_data.clone();
925 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
926
927 let mut package = Package {
928 package_type: package_data.package_type,
929 namespace: package_data.namespace.clone(),
930 name: package_data.name.clone(),
931 version: package_data.version.clone(),
932 qualifiers: package_data.qualifiers.clone(),
933 subpath: package_data.subpath.clone(),
934 primary_language: package_data.primary_language.clone(),
935 description: package_data.description.clone(),
936 release_date: package_data.release_date.clone(),
937 parties: package_data.parties.clone(),
938 keywords: package_data.keywords.clone(),
939 homepage_url: package_data.homepage_url.clone(),
940 download_url: package_data.download_url.clone(),
941 size: package_data.size,
942 sha1: package_data.sha1,
943 md5: package_data.md5,
944 sha256: package_data.sha256,
945 sha512: package_data.sha512,
946 bug_tracking_url: package_data.bug_tracking_url.clone(),
947 code_view_url: package_data.code_view_url.clone(),
948 vcs_url: package_data.vcs_url.clone(),
949 copyright: package_data.copyright.clone(),
950 holder: package_data.holder.clone(),
951 declared_license_expression: package_data.declared_license_expression.clone(),
952 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
953 license_detections: package_data.license_detections.clone(),
954 other_license_expression: package_data.other_license_expression.clone(),
955 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
956 other_license_detections: package_data.other_license_detections.clone(),
957 extracted_license_statement: package_data.extracted_license_statement.clone(),
958 notice_text: package_data.notice_text.clone(),
959 source_packages: package_data.source_packages.clone(),
960 is_private: package_data.is_private,
961 is_virtual: package_data.is_virtual,
962 extra_data: package_data.extra_data.clone(),
963 repository_homepage_url: package_data.repository_homepage_url.clone(),
964 repository_download_url: package_data.repository_download_url.clone(),
965 api_data_url: package_data.api_data_url.clone(),
966 purl: package_data.purl.clone(),
967 package_uid: PackageUid::empty(),
968 datafile_paths: vec![datafile_path],
969 datasource_ids: if let Some(dsid) = package_data.datasource_id {
970 vec![dsid]
971 } else {
972 vec![]
973 },
974 };
975
976 package.refresh_identity();
977 if package.package_uid.is_empty() {
978 package.package_uid = package.fallback_package_uid();
979 }
980
981 package
982 }
983
984 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
990 let mut package_data = package_data.clone();
991 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
992
993 if let Some(dsid) = package_data.datasource_id {
994 self.datasource_ids.push(dsid);
995 }
996 self.datafile_paths.push(datafile_path);
997
998 macro_rules! fill_if_empty {
999 ($field:ident) => {
1000 if self.$field.is_none() {
1001 self.$field = package_data.$field;
1002 }
1003 };
1004 }
1005
1006 fill_if_empty!(package_type);
1007 fill_if_empty!(name);
1008 fill_if_empty!(namespace);
1009 fill_if_empty!(version);
1010 fill_if_empty!(qualifiers);
1011 fill_if_empty!(subpath);
1012 fill_if_empty!(primary_language);
1013 fill_if_empty!(description);
1014 fill_if_empty!(release_date);
1015 fill_if_empty!(homepage_url);
1016 fill_if_empty!(download_url);
1017 fill_if_empty!(size);
1018 fill_if_empty!(sha1);
1019 fill_if_empty!(md5);
1020 fill_if_empty!(sha256);
1021 fill_if_empty!(sha512);
1022 fill_if_empty!(bug_tracking_url);
1023 fill_if_empty!(code_view_url);
1024 fill_if_empty!(vcs_url);
1025 fill_if_empty!(copyright);
1026 fill_if_empty!(holder);
1027 fill_if_empty!(declared_license_expression);
1028 fill_if_empty!(declared_license_expression_spdx);
1029 fill_if_empty!(other_license_expression);
1030 fill_if_empty!(other_license_expression_spdx);
1031 fill_if_empty!(extracted_license_statement);
1032 fill_if_empty!(notice_text);
1033 match (&mut self.extra_data, &package_data.extra_data) {
1034 (None, Some(extra_data)) => {
1035 self.extra_data = Some(extra_data.clone());
1036 }
1037 (Some(existing), Some(incoming)) => {
1038 for (key, value) in incoming {
1039 existing.entry(key.clone()).or_insert_with(|| value.clone());
1040 }
1041 }
1042 _ => {}
1043 }
1044 fill_if_empty!(repository_homepage_url);
1045 fill_if_empty!(repository_download_url);
1046 fill_if_empty!(api_data_url);
1047
1048 for party in &package_data.parties {
1049 if let Some(existing) = self.parties.iter_mut().find(|p| {
1050 p.role == party.role
1051 && ((p.name.is_some() && p.name == party.name)
1052 || (p.email.is_some() && p.email == party.email))
1053 }) {
1054 if existing.name.is_none() {
1055 existing.name = party.name.clone();
1056 }
1057 if existing.email.is_none() {
1058 existing.email = party.email.clone();
1059 }
1060 } else {
1061 self.parties.push(party.clone());
1062 }
1063 }
1064
1065 for keyword in &package_data.keywords {
1066 if !self.keywords.contains(keyword) {
1067 self.keywords.push(keyword.clone());
1068 }
1069 }
1070
1071 for detection in &package_data.license_detections {
1072 self.license_detections.push(detection.clone());
1073 }
1074
1075 for detection in &package_data.other_license_detections {
1076 self.other_license_detections.push(detection.clone());
1077 }
1078
1079 for source_pkg in &package_data.source_packages {
1080 if !self.source_packages.contains(source_pkg) {
1081 self.source_packages.push(source_pkg.clone());
1082 }
1083 }
1084
1085 self.refresh_identity();
1086 }
1087
1088 pub fn backfill_license_provenance(&mut self) {
1089 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1090 return;
1091 };
1092
1093 for detection in &mut self.license_detections {
1094 enrich_license_detection_provenance(detection, &datafile_path);
1095 }
1096 for detection in &mut self.other_license_detections {
1097 enrich_license_detection_provenance(detection, &datafile_path);
1098 }
1099 }
1100
1101 fn refresh_identity(&mut self) {
1102 let Some(next_purl) = self.build_current_purl() else {
1103 return;
1104 };
1105
1106 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1107 self.package_uid = PackageUid::new(&next_purl);
1108 }
1109
1110 self.purl = Some(next_purl);
1111 }
1112
1113 fn fallback_package_uid(&self) -> PackageUid {
1114 let name = self
1115 .name
1116 .as_deref()
1117 .map(str::trim)
1118 .filter(|value| !value.is_empty())
1119 .unwrap_or("unknown");
1120 let version = self
1121 .version
1122 .as_deref()
1123 .map(str::trim)
1124 .filter(|value| !value.is_empty())
1125 .unwrap_or("unknown");
1126 let datasource = self
1127 .datasource_ids
1128 .first()
1129 .map(DatasourceId::as_str)
1130 .unwrap_or("unknown");
1131
1132 PackageUid::new_opaque(&format!("generated-package:{datasource}/{name}@{version}"))
1133 }
1134
1135 fn build_current_purl(&self) -> Option<String> {
1136 if let Some(existing_purl) = self.purl.as_deref() {
1137 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1138
1139 if let Some(version) = self
1140 .version
1141 .as_deref()
1142 .filter(|value| !value.trim().is_empty())
1143 {
1144 purl.with_version(version).ok()?;
1145 } else {
1146 purl.without_version();
1147 }
1148
1149 return Some(purl.to_string());
1150 }
1151
1152 if let (Some(package_type), Some(name)) = (
1153 self.package_type.as_ref(),
1154 self.name
1155 .as_deref()
1156 .filter(|value| !value.trim().is_empty()),
1157 ) {
1158 let purl_type = match package_type {
1159 PackageType::Deno => "generic",
1160 _ => package_type.as_str(),
1161 };
1162
1163 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1164
1165 if let Some(namespace) = self
1166 .namespace
1167 .as_deref()
1168 .filter(|value| !value.trim().is_empty())
1169 {
1170 purl.with_namespace(namespace).ok()?;
1171 }
1172
1173 if let Some(version) = self
1174 .version
1175 .as_deref()
1176 .filter(|value| !value.trim().is_empty())
1177 {
1178 purl.with_version(version).ok()?;
1179 }
1180
1181 if let Some(qualifiers) = &self.qualifiers {
1182 for (key, value) in qualifiers {
1183 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1184 }
1185 }
1186
1187 if let Some(subpath) = self
1188 .subpath
1189 .as_deref()
1190 .filter(|value| !value.trim().is_empty())
1191 {
1192 purl.with_subpath(subpath).ok()?;
1193 }
1194
1195 return Some(purl.to_string());
1196 }
1197 None
1198 }
1199}
1200
1201#[cfg(test)]
1202mod tests {
1203 use super::*;
1204
1205 #[test]
1206 fn file_info_new_backfills_package_detection_provenance() {
1207 let package_data = PackageData {
1208 package_type: Some(PackageType::Npm),
1209 license_detections: vec![LicenseDetection {
1210 license_expression: "mit".to_string(),
1211 license_expression_spdx: "MIT".to_string(),
1212 matches: vec![Match {
1213 license_expression: "mit".to_string(),
1214 license_expression_spdx: "MIT".to_string(),
1215 from_file: None,
1216 start_line: LineNumber::ONE,
1217 end_line: LineNumber::ONE,
1218 matcher: Some("parser-declared-license".to_string()),
1219 score: MatchScore::MAX,
1220 matched_length: Some(1),
1221 match_coverage: Some(100.0),
1222 rule_relevance: Some(100),
1223 rule_identifier: None,
1224 rule_url: None,
1225 matched_text: Some("MIT".to_string()),
1226 referenced_filenames: None,
1227 matched_text_diagnostics: None,
1228 }],
1229 detection_log: vec![],
1230 identifier: None,
1231 }],
1232 ..PackageData::default()
1233 };
1234
1235 let file_info = FileInfo::new(
1236 "package.json".to_string(),
1237 "package".to_string(),
1238 ".json".to_string(),
1239 "project/package.json".to_string(),
1240 FileType::File,
1241 None,
1242 None,
1243 1,
1244 None,
1245 None,
1246 None,
1247 None,
1248 None,
1249 vec![package_data],
1250 None,
1251 vec![],
1252 vec![],
1253 vec![],
1254 vec![],
1255 vec![],
1256 vec![],
1257 vec![],
1258 vec![],
1259 vec![],
1260 );
1261
1262 assert_eq!(file_info.license_detections.len(), 1);
1263 assert_eq!(
1264 file_info.license_detections[0].matches[0]
1265 .from_file
1266 .as_deref(),
1267 Some("project/package.json")
1268 );
1269 assert!(file_info.license_detections[0].identifier.is_some());
1270 assert_eq!(
1271 file_info.package_data[0].license_detections[0].matches[0]
1272 .from_file
1273 .as_deref(),
1274 Some("project/package.json")
1275 );
1276 assert_eq!(
1277 file_info.package_data[0].license_detections[0].matches[0]
1278 .rule_identifier
1279 .as_deref(),
1280 Some("parser-declared-license")
1281 );
1282 assert!(
1283 file_info.package_data[0].license_detections[0]
1284 .identifier
1285 .is_some()
1286 );
1287 }
1288
1289 #[test]
1290 fn package_from_package_data_backfills_detection_provenance() {
1291 let package_data = PackageData {
1292 package_type: Some(PackageType::Npm),
1293 license_detections: vec![LicenseDetection {
1294 license_expression: "mit".to_string(),
1295 license_expression_spdx: "MIT".to_string(),
1296 matches: vec![Match {
1297 license_expression: "mit".to_string(),
1298 license_expression_spdx: "MIT".to_string(),
1299 from_file: None,
1300 start_line: LineNumber::ONE,
1301 end_line: LineNumber::ONE,
1302 matcher: Some("parser-declared-license".to_string()),
1303 score: MatchScore::MAX,
1304 matched_length: Some(1),
1305 match_coverage: Some(100.0),
1306 rule_relevance: Some(100),
1307 rule_identifier: None,
1308 rule_url: None,
1309 matched_text: Some("MIT".to_string()),
1310 referenced_filenames: None,
1311 matched_text_diagnostics: None,
1312 }],
1313 detection_log: vec![],
1314 identifier: None,
1315 }],
1316 ..PackageData::default()
1317 };
1318
1319 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1320
1321 assert_eq!(
1322 package.license_detections[0].matches[0]
1323 .from_file
1324 .as_deref(),
1325 Some("project/package.json")
1326 );
1327 assert_eq!(
1328 package.license_detections[0].matches[0]
1329 .rule_identifier
1330 .as_deref(),
1331 Some("parser-declared-license")
1332 );
1333 assert!(package.license_detections[0].identifier.is_some());
1334 }
1335
1336 #[test]
1337 fn package_from_package_data_preserves_existing_purl_qualifiers() {
1338 let package_data = PackageData {
1339 package_type: Some(PackageType::Alpine),
1340 namespace: Some("alpine".to_string()),
1341 name: Some("busybox".to_string()),
1342 version: Some("1.35.0-r17".to_string()),
1343 purl: Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64".to_string()),
1344 ..PackageData::default()
1345 };
1346
1347 let package = Package::from_package_data(&package_data, "lib/apk/db/installed".to_string());
1348
1349 assert_eq!(
1350 package.purl.as_deref(),
1351 Some("pkg:alpine/busybox@1.35.0-r17?arch=x86_64")
1352 );
1353 assert!(
1354 package
1355 .package_uid
1356 .starts_with("pkg:alpine/busybox@1.35.0-r17?arch=x86_64&uuid=")
1357 );
1358 }
1359}
1360
1361#[derive(Serialize, Deserialize, Debug, Clone)]
1366pub struct TopLevelDependency {
1367 pub purl: Option<String>,
1368 pub extracted_requirement: Option<String>,
1369 pub scope: Option<String>,
1370 pub is_runtime: Option<bool>,
1371 pub is_optional: Option<bool>,
1372 pub is_pinned: Option<bool>,
1373 pub is_direct: Option<bool>,
1374 pub resolved_package: Option<Box<ResolvedPackage>>,
1375 #[serde(default)]
1376 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1377 pub dependency_uid: DependencyUid,
1379 pub for_package_uid: Option<PackageUid>,
1381 pub datafile_path: String,
1383 pub datasource_id: DatasourceId,
1385 pub namespace: Option<String>,
1387}
1388
1389impl TopLevelDependency {
1390 pub fn from_dependency(
1392 dep: &Dependency,
1393 datafile_path: String,
1394 datasource_id: DatasourceId,
1395 for_package_uid: Option<PackageUid>,
1396 ) -> Self {
1397 let dependency_uid = dep
1398 .purl
1399 .as_ref()
1400 .map(|p| DependencyUid::new(p))
1401 .unwrap_or_else(DependencyUid::empty);
1402
1403 TopLevelDependency {
1404 purl: dep.purl.clone(),
1405 extracted_requirement: dep.extracted_requirement.clone(),
1406 scope: dep.scope.clone(),
1407 is_runtime: dep.is_runtime,
1408 is_optional: dep.is_optional,
1409 is_pinned: dep.is_pinned,
1410 is_direct: dep.is_direct,
1411 resolved_package: dep.resolved_package.clone(),
1412 extra_data: dep.extra_data.clone(),
1413 dependency_uid,
1414 for_package_uid,
1415 datafile_path,
1416 datasource_id,
1417 namespace: None,
1418 }
1419 }
1420}
1421
1422#[derive(Serialize, Deserialize, Debug, Clone)]
1423pub struct OutputEmail {
1424 pub email: String,
1425 pub start_line: LineNumber,
1426 pub end_line: LineNumber,
1427}
1428
1429#[derive(Serialize, Deserialize, Debug, Clone)]
1430pub struct OutputURL {
1431 pub url: String,
1432 pub start_line: LineNumber,
1433 pub end_line: LineNumber,
1434}
1435
1436#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1437pub struct LicensePolicyEntry {
1438 pub license_key: String,
1439 pub label: String,
1440 pub color_code: String,
1441 pub icon: String,
1442}
1443
1444#[derive(Debug, Clone, PartialEq)]
1445pub enum FileType {
1446 File,
1447 Directory,
1448}
1449
1450impl serde::Serialize for FileType {
1451 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1452 where
1453 S: serde::Serializer,
1454 {
1455 match self {
1456 FileType::File => serializer.serialize_str("file"),
1457 FileType::Directory => serializer.serialize_str("directory"),
1458 }
1459 }
1460}
1461
1462impl<'de> Deserialize<'de> for FileType {
1463 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1464 where
1465 D: serde::Deserializer<'de>,
1466 {
1467 let value = String::deserialize(deserializer)?;
1468 match value.as_str() {
1469 "file" => Ok(FileType::File),
1470 "directory" => Ok(FileType::Directory),
1471 _ => Err(serde::de::Error::custom("invalid file type")),
1472 }
1473 }
1474}