1use derive_builder::Builder;
2use packageurl::PackageUrl;
3use serde::{Deserialize, Serialize};
4use sha1::{Digest, Sha1};
5use std::collections::HashMap;
6use std::str::FromStr;
7
8use super::DatasourceId;
9use super::DependencyUid;
10use super::GitSha1;
11use super::LineNumber;
12use super::MatchScore;
13use super::Md5Digest;
14use super::PackageType;
15use super::PackageUid;
16use super::Sha1Digest;
17use super::Sha256Digest;
18use super::Sha512Digest;
19use crate::license_detection::tokenize::tokenize_without_stopwords;
20use crate::models::output::Tallies;
21use crate::utils::spdx::combine_license_expressions;
22
23#[derive(Debug, Builder, Serialize, Deserialize, Clone)]
24#[builder(build_fn(skip))]
25pub struct FileInfo {
27 pub name: String,
28 pub base_name: String,
29 pub extension: String,
30 pub path: String,
31 #[serde(rename = "type")] pub file_type: FileType,
33 #[builder(default)]
34 #[serde(default)]
35 pub mime_type: Option<String>,
36 #[builder(default)]
37 #[serde(rename = "file_type", default)]
38 pub file_type_label: Option<String>,
39 pub size: u64,
40 #[builder(default)]
41 #[serde(default)]
42 pub date: Option<String>,
43 #[builder(default)]
44 #[serde(default)]
45 pub sha1: Option<Sha1Digest>,
46 #[builder(default)]
47 #[serde(default)]
48 pub md5: Option<Md5Digest>,
49 #[builder(default)]
50 #[serde(default)]
51 pub sha256: Option<Sha256Digest>,
52 #[builder(default)]
53 #[serde(default)]
54 pub sha1_git: Option<GitSha1>,
55 #[builder(default)]
56 #[serde(default)]
57 pub programming_language: Option<String>,
58 #[builder(default)]
59 #[serde(default)]
60 pub package_data: Vec<PackageData>,
61 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
63 pub license_expression: Option<String>,
64 #[builder(default)]
65 #[serde(default)]
66 pub license_detections: Vec<LicenseDetection>,
67 #[builder(default)]
68 #[serde(default)]
69 pub license_clues: Vec<Match>,
70 #[builder(default)]
71 #[serde(default)]
72 pub percentage_of_license_text: Option<f64>,
73 #[builder(default)]
74 #[serde(default)]
75 pub copyrights: Vec<Copyright>,
76 #[builder(default)]
77 #[serde(default)]
78 pub holders: Vec<Holder>,
79 #[builder(default)]
80 #[serde(default)]
81 pub authors: Vec<Author>,
82 #[builder(default)]
83 #[serde(default)]
84 pub emails: Vec<OutputEmail>,
85 #[builder(default)]
86 #[serde(default)]
87 pub urls: Vec<OutputURL>,
88 #[builder(default)]
89 #[serde(default)]
90 pub for_packages: Vec<PackageUid>,
91 #[builder(default)]
92 #[serde(default)]
93 pub scan_errors: Vec<String>,
94 #[builder(default)]
95 #[serde(default)]
96 pub license_policy: Option<Vec<LicensePolicyEntry>>,
97 #[builder(default)]
98 #[serde(default)]
99 pub is_generated: Option<bool>,
100 #[builder(default)]
101 #[serde(default)]
102 pub is_binary: Option<bool>,
103 #[builder(default)]
104 #[serde(default)]
105 pub is_text: Option<bool>,
106 #[builder(default)]
107 #[serde(default)]
108 pub is_archive: Option<bool>,
109 #[builder(default)]
110 #[serde(default)]
111 pub is_media: Option<bool>,
112 #[builder(default)]
113 #[serde(default)]
114 pub is_source: Option<bool>,
115 #[builder(default)]
116 #[serde(default)]
117 pub is_script: Option<bool>,
118 #[builder(default)]
119 #[serde(default)]
120 pub files_count: Option<usize>,
121 #[builder(default)]
122 #[serde(default)]
123 pub dirs_count: Option<usize>,
124 #[builder(default)]
125 #[serde(default)]
126 pub size_count: Option<u64>,
127 #[builder(default)]
128 #[serde(default)]
129 pub source_count: Option<usize>,
130 #[builder(default)]
131 #[serde(default)]
132 pub is_legal: bool,
133 #[builder(default)]
134 #[serde(default)]
135 pub is_manifest: bool,
136 #[builder(default)]
137 #[serde(default)]
138 pub is_readme: bool,
139 #[builder(default)]
140 #[serde(default)]
141 pub is_top_level: bool,
142 #[builder(default)]
143 #[serde(default)]
144 pub is_key_file: bool,
145 #[builder(default)]
146 #[serde(default)]
147 pub is_community: bool,
148 #[builder(default)]
149 #[serde(default)]
150 pub facets: Vec<String>,
151 #[builder(default)]
152 #[serde(default)]
153 pub tallies: Option<Tallies>,
154}
155
156impl FileInfoBuilder {
157 pub fn build(&self) -> Result<FileInfo, String> {
159 let mut file_info = FileInfo::new(
160 self.name.clone().ok_or("Missing field: name")?,
161 self.base_name.clone().ok_or("Missing field: base_name")?,
162 self.extension.clone().ok_or("Missing field: extension")?,
163 self.path.clone().ok_or("Missing field: path")?,
164 self.file_type.clone().ok_or("Missing field: file_type")?,
165 self.mime_type.clone().flatten(),
166 self.file_type_label.clone().flatten(),
167 self.size.ok_or("Missing field: size")?,
168 self.date.clone().flatten(),
169 self.sha1.flatten(),
170 self.md5.flatten(),
171 self.sha256.flatten(),
172 self.programming_language.clone().flatten(),
173 self.package_data.clone().unwrap_or_default(),
174 self.license_expression.clone().flatten(),
175 self.license_detections.clone().unwrap_or_default(),
176 self.license_clues.clone().unwrap_or_default(),
177 self.copyrights.clone().unwrap_or_default(),
178 self.holders.clone().unwrap_or_default(),
179 self.authors.clone().unwrap_or_default(),
180 self.emails.clone().unwrap_or_default(),
181 self.urls.clone().unwrap_or_default(),
182 self.for_packages.clone().unwrap_or_default(),
183 self.scan_errors.clone().unwrap_or_default(),
184 );
185 file_info.license_policy = self.license_policy.clone().flatten();
186 file_info.sha1_git = self.sha1_git.flatten();
187 file_info.is_binary = self.is_binary.flatten();
188 file_info.is_text = self.is_text.flatten();
189 file_info.is_archive = self.is_archive.flatten();
190 file_info.is_media = self.is_media.flatten();
191 file_info.is_script = self.is_script.flatten();
192 file_info.files_count = self.files_count.flatten();
193 file_info.dirs_count = self.dirs_count.flatten();
194 file_info.size_count = self.size_count.flatten();
195 Ok(file_info)
196 }
197}
198
199impl FileInfo {
200 #[allow(clippy::too_many_arguments)]
201 pub fn new(
203 name: String,
204 base_name: String,
205 extension: String,
206 path: String,
207 file_type: FileType,
208 mime_type: Option<String>,
209 file_type_label: Option<String>,
210 size: u64,
211 date: Option<String>,
212 sha1: Option<Sha1Digest>,
213 md5: Option<Md5Digest>,
214 sha256: Option<Sha256Digest>,
215 programming_language: Option<String>,
216 package_data: Vec<PackageData>,
217 mut license_expression: Option<String>,
218 mut license_detections: Vec<LicenseDetection>,
219 license_clues: Vec<Match>,
220 copyrights: Vec<Copyright>,
221 holders: Vec<Holder>,
222 authors: Vec<Author>,
223 emails: Vec<OutputEmail>,
224 urls: Vec<OutputURL>,
225 for_packages: Vec<PackageUid>,
226 scan_errors: Vec<String>,
227 ) -> Self {
228 let mut package_data = package_data;
229 for package in &mut package_data {
230 enrich_package_data_license_provenance(package, &path);
231 }
232
233 license_expression = license_expression.or_else(|| {
235 let expressions = package_data
236 .iter()
237 .filter_map(|pkg| pkg.get_license_expression());
238 combine_license_expressions(expressions)
239 });
240
241 if license_detections.is_empty() {
243 for pkg in &package_data {
244 license_detections.extend(pkg.license_detections.clone());
245 }
246 }
247
248 if license_expression.is_none() && !license_detections.is_empty() {
250 let expressions = license_detections
251 .iter()
252 .map(|detection| detection.license_expression.clone());
253 license_expression = combine_license_expressions(expressions);
254 }
255
256 let mut file_info = FileInfo {
257 name,
258 base_name,
259 extension,
260 path,
261 file_type,
262 mime_type,
263 file_type_label,
264 size,
265 date,
266 sha1,
267 md5,
268 sha256,
269 sha1_git: None,
270 programming_language,
271 package_data,
272 license_expression,
273 license_detections,
274 license_clues,
275 percentage_of_license_text: None,
276 copyrights,
277 holders,
278 authors,
279 emails,
280 urls,
281 for_packages,
282 scan_errors,
283 license_policy: None,
284 is_generated: None,
285 is_binary: None,
286 is_text: None,
287 is_archive: None,
288 is_media: None,
289 is_source: None,
290 is_script: None,
291 files_count: None,
292 dirs_count: None,
293 size_count: None,
294 source_count: None,
295 is_legal: false,
296 is_manifest: false,
297 is_readme: false,
298 is_top_level: false,
299 is_key_file: false,
300 is_community: false,
301 facets: vec![],
302 tallies: None,
303 };
304 file_info.backfill_license_provenance();
305 file_info
306 }
307
308 pub fn backfill_license_provenance(&mut self) {
309 for detection in &mut self.license_detections {
310 enrich_license_detection_provenance(detection, &self.path);
311 }
312
313 for package in &mut self.package_data {
314 enrich_package_data_license_provenance(package, &self.path);
315 }
316 }
317}
318
319fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
320 for detection in &mut package_data.license_detections {
321 enrich_license_detection_provenance(detection, path);
322 }
323 for detection in &mut package_data.other_license_detections {
324 enrich_license_detection_provenance(detection, path);
325 }
326}
327
328pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
329 for detection_match in &mut detection.matches {
330 if detection_match.from_file.is_none() {
331 detection_match.from_file = Some(path.to_string());
332 }
333
334 if detection_match.rule_identifier.is_none() {
335 detection_match.rule_identifier = detection_match.matcher.clone();
336 }
337 }
338
339 if detection.identifier.is_none() {
340 detection.identifier = Some(compute_public_detection_identifier(detection));
341 }
342}
343
344fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
345 let expression = python_safe_name(&detection.license_expression);
346 let mut hasher = Sha1::new();
347 hasher.update(format_public_detection_content(detection).as_bytes());
348 let hex_str = hex::encode(hasher.finalize());
349 let uuid_hex = &hex_str[..32];
350 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
351 .map(|uuid| uuid.to_string())
352 .unwrap_or_else(|_| uuid_hex.to_string());
353
354 format!("{}-{}", expression, content_uuid)
355}
356
357fn format_public_detection_content(detection: &LicenseDetection) -> String {
358 let mut result = String::from("(");
359
360 for (index, detection_match) in detection.matches.iter().enumerate() {
361 if index > 0 {
362 result.push_str(", ");
363 }
364 result.push_str(&format!(
365 "({}, {}, {})",
366 python_str_repr(
367 detection_match
368 .rule_identifier
369 .as_deref()
370 .or(detection_match.matcher.as_deref())
371 .unwrap_or("parser-declared-license")
372 ),
373 detection_match.score.value() as f32,
374 python_token_tuple_repr(&tokenize_without_stopwords(
375 detection_match.matched_text.as_deref().unwrap_or_default(),
376 )),
377 ));
378 }
379
380 if detection.matches.len() == 1 {
381 result.push(',');
382 }
383 result.push(')');
384 result
385}
386
387fn python_safe_name(value: &str) -> String {
388 let mut result = String::new();
389 let mut prev_underscore = false;
390
391 for character in value.chars() {
392 if character.is_alphanumeric() {
393 result.push(character);
394 prev_underscore = false;
395 } else if !prev_underscore {
396 result.push('_');
397 prev_underscore = true;
398 }
399 }
400
401 let trimmed = result.trim_matches('_');
402 if trimmed.is_empty() {
403 String::new()
404 } else {
405 trimmed.to_string()
406 }
407}
408
409fn python_str_repr(value: &str) -> String {
410 if value.contains('\'') && !value.contains('"') {
411 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
412 } else {
413 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
414 }
415}
416
417fn python_token_tuple_repr(tokens: &[String]) -> String {
418 if tokens.is_empty() {
419 return String::from("()");
420 }
421
422 let mut result = String::from("(");
423 for (index, token) in tokens.iter().enumerate() {
424 if index > 0 {
425 result.push_str(", ");
426 }
427 result.push_str(&python_str_repr(token));
428 }
429
430 if tokens.len() == 1 {
431 result.push(',');
432 }
433 result.push(')');
434 result
435}
436
437#[derive(Serialize, Deserialize, Debug, Clone, Default)]
443pub struct PackageData {
444 #[serde(rename = "type")] pub package_type: Option<PackageType>,
446 pub namespace: Option<String>,
447 pub name: Option<String>,
448 pub version: Option<String>,
449 #[serde(default)]
450 pub qualifiers: Option<HashMap<String, String>>,
451 pub subpath: Option<String>,
452 pub primary_language: Option<String>,
453 pub description: Option<String>,
454 pub release_date: Option<String>,
455 #[serde(default)]
456 pub parties: Vec<Party>,
457 #[serde(default)]
458 pub keywords: Vec<String>,
459 pub homepage_url: Option<String>,
460 pub download_url: Option<String>,
461 pub size: Option<u64>,
462 pub sha1: Option<Sha1Digest>,
463 pub md5: Option<Md5Digest>,
464 pub sha256: Option<Sha256Digest>,
465 pub sha512: Option<Sha512Digest>,
466 pub bug_tracking_url: Option<String>,
467 pub code_view_url: Option<String>,
468 pub vcs_url: Option<String>,
469 pub copyright: Option<String>,
470 pub holder: Option<String>,
471 pub declared_license_expression: Option<String>,
472 pub declared_license_expression_spdx: Option<String>,
473 #[serde(default)]
474 pub license_detections: Vec<LicenseDetection>,
475 pub other_license_expression: Option<String>,
476 pub other_license_expression_spdx: Option<String>,
477 #[serde(default)]
478 pub other_license_detections: Vec<LicenseDetection>,
479 pub extracted_license_statement: Option<String>,
480 pub notice_text: Option<String>,
481 #[serde(default)]
482 pub source_packages: Vec<String>,
483 #[serde(default)]
484 pub file_references: Vec<FileReference>,
485 #[serde(default)]
486 pub is_private: bool,
487 #[serde(default)]
488 pub is_virtual: bool,
489 #[serde(default)]
490 pub extra_data: Option<HashMap<String, serde_json::Value>>,
491 #[serde(default)]
492 pub dependencies: Vec<Dependency>,
493 pub repository_homepage_url: Option<String>,
494 pub repository_download_url: Option<String>,
495 pub api_data_url: Option<String>,
496 pub datasource_id: Option<DatasourceId>,
497 pub purl: Option<String>,
498}
499
500impl PackageData {
501 pub fn get_license_expression(&self) -> Option<String> {
504 if self.license_detections.is_empty() {
505 return None;
506 }
507
508 let expressions = self
509 .license_detections
510 .iter()
511 .map(|detection| detection.license_expression.clone());
512 combine_license_expressions(expressions)
513 }
514}
515
516#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
520pub struct LicenseDetection {
521 pub license_expression: String,
522 pub license_expression_spdx: String,
523 pub matches: Vec<Match>,
524 #[serde(default)]
525 pub detection_log: Vec<String>,
526 pub identifier: Option<String>,
527}
528
529#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
533pub struct Match {
534 pub license_expression: String,
535 pub license_expression_spdx: String,
536 pub from_file: Option<String>,
537 pub start_line: LineNumber,
538 pub end_line: LineNumber,
539 pub matcher: Option<String>,
540 pub score: MatchScore,
541 pub matched_length: Option<usize>,
542 pub match_coverage: Option<f64>,
543 pub rule_relevance: Option<u8>,
544 pub rule_identifier: Option<String>,
545 pub rule_url: Option<String>,
546 pub matched_text: Option<String>,
547 pub matched_text_diagnostics: Option<String>,
548 #[serde(default)]
549 pub referenced_filenames: Option<Vec<String>>,
550}
551
552#[derive(Serialize, Deserialize, Debug, Clone)]
553pub struct Copyright {
554 pub copyright: String,
555 pub start_line: LineNumber,
556 pub end_line: LineNumber,
557}
558
559#[derive(Serialize, Deserialize, Debug, Clone)]
560pub struct Holder {
561 pub holder: String,
562 pub start_line: LineNumber,
563 pub end_line: LineNumber,
564}
565
566#[derive(Serialize, Deserialize, Debug, Clone)]
567pub struct Author {
568 pub author: String,
569 pub start_line: LineNumber,
570 pub end_line: LineNumber,
571}
572
573#[derive(Serialize, Deserialize, Debug, Clone)]
578pub struct Dependency {
579 pub purl: Option<String>,
580 pub extracted_requirement: Option<String>,
581 pub scope: Option<String>,
582 pub is_runtime: Option<bool>,
583 pub is_optional: Option<bool>,
584 pub is_pinned: Option<bool>,
585 pub is_direct: Option<bool>,
586 pub resolved_package: Option<Box<ResolvedPackage>>,
587 #[serde(default)]
588 pub extra_data: Option<HashMap<String, serde_json::Value>>,
589}
590
591#[derive(Serialize, Deserialize, Debug, Clone)]
592pub struct ResolvedPackage {
593 #[serde(rename = "type")]
594 pub package_type: PackageType,
595 pub namespace: String,
596 pub name: String,
597 pub version: String,
598 #[serde(default)]
599 pub qualifiers: Option<HashMap<String, String>>,
600 pub subpath: Option<String>,
601 pub primary_language: Option<String>,
602 pub description: Option<String>,
603 pub release_date: Option<String>,
604 #[serde(default)]
605 pub parties: Vec<Party>,
606 #[serde(default)]
607 pub keywords: Vec<String>,
608 pub homepage_url: Option<String>,
609 pub download_url: Option<String>,
610 pub size: Option<u64>,
611 pub sha1: Option<Sha1Digest>,
612 pub md5: Option<Md5Digest>,
613 pub sha256: Option<Sha256Digest>,
614 pub sha512: Option<Sha512Digest>,
615 pub bug_tracking_url: Option<String>,
616 pub code_view_url: Option<String>,
617 pub vcs_url: Option<String>,
618 pub copyright: Option<String>,
619 pub holder: Option<String>,
620 pub declared_license_expression: Option<String>,
621 pub declared_license_expression_spdx: Option<String>,
622 #[serde(default)]
623 pub license_detections: Vec<LicenseDetection>,
624 pub other_license_expression: Option<String>,
625 pub other_license_expression_spdx: Option<String>,
626 #[serde(default)]
627 pub other_license_detections: Vec<LicenseDetection>,
628 pub extracted_license_statement: Option<String>,
629 pub notice_text: Option<String>,
630 #[serde(default)]
631 pub source_packages: Vec<String>,
632 #[serde(default)]
633 pub file_references: Vec<FileReference>,
634 #[serde(default)]
635 pub is_private: bool,
636 #[serde(default)]
637 pub is_virtual: bool,
638 #[serde(default)]
639 pub extra_data: Option<HashMap<String, serde_json::Value>>,
640 #[serde(default)]
641 pub dependencies: Vec<Dependency>,
642 pub repository_homepage_url: Option<String>,
643 pub repository_download_url: Option<String>,
644 pub api_data_url: Option<String>,
645 pub datasource_id: Option<DatasourceId>,
646 pub purl: Option<String>,
647}
648
649impl ResolvedPackage {
650 pub fn new(
651 package_type: PackageType,
652 namespace: String,
653 name: String,
654 version: String,
655 ) -> Self {
656 Self {
657 package_type,
658 namespace,
659 name,
660 version,
661 qualifiers: None,
662 subpath: None,
663 primary_language: None,
664 description: None,
665 release_date: None,
666 parties: vec![],
667 keywords: vec![],
668 homepage_url: None,
669 download_url: None,
670 size: None,
671 sha1: None,
672 md5: None,
673 sha256: None,
674 sha512: None,
675 bug_tracking_url: None,
676 code_view_url: None,
677 vcs_url: None,
678 copyright: None,
679 holder: None,
680 declared_license_expression: None,
681 declared_license_expression_spdx: None,
682 license_detections: vec![],
683 other_license_expression: None,
684 other_license_expression_spdx: None,
685 other_license_detections: vec![],
686 extracted_license_statement: None,
687 notice_text: None,
688 source_packages: vec![],
689 file_references: vec![],
690 is_private: false,
691 is_virtual: false,
692 extra_data: None,
693 dependencies: vec![],
694 repository_homepage_url: None,
695 repository_download_url: None,
696 api_data_url: None,
697 datasource_id: None,
698 purl: None,
699 }
700 }
701
702 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
703 Self {
704 package_type: package_data.package_type.unwrap_or(fallback_type),
705 namespace: package_data.namespace.clone().unwrap_or_default(),
706 name: package_data.name.clone().unwrap_or_default(),
707 version: package_data.version.clone().unwrap_or_default(),
708 qualifiers: package_data.qualifiers.clone(),
709 subpath: package_data.subpath.clone(),
710 primary_language: package_data.primary_language.clone(),
711 description: package_data.description.clone(),
712 release_date: package_data.release_date.clone(),
713 parties: package_data.parties.clone(),
714 keywords: package_data.keywords.clone(),
715 homepage_url: package_data.homepage_url.clone(),
716 download_url: package_data.download_url.clone(),
717 size: package_data.size,
718 sha1: package_data.sha1,
719 md5: package_data.md5,
720 sha256: package_data.sha256,
721 sha512: package_data.sha512,
722 bug_tracking_url: package_data.bug_tracking_url.clone(),
723 code_view_url: package_data.code_view_url.clone(),
724 vcs_url: package_data.vcs_url.clone(),
725 copyright: package_data.copyright.clone(),
726 holder: package_data.holder.clone(),
727 declared_license_expression: package_data.declared_license_expression.clone(),
728 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
729 license_detections: package_data.license_detections.clone(),
730 other_license_expression: package_data.other_license_expression.clone(),
731 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
732 other_license_detections: package_data.other_license_detections.clone(),
733 extracted_license_statement: package_data.extracted_license_statement.clone(),
734 notice_text: package_data.notice_text.clone(),
735 source_packages: package_data.source_packages.clone(),
736 file_references: package_data.file_references.clone(),
737 is_private: package_data.is_private,
738 is_virtual: package_data.is_virtual,
739 extra_data: package_data.extra_data.clone(),
740 dependencies: package_data.dependencies.clone(),
741 repository_homepage_url: package_data.repository_homepage_url.clone(),
742 repository_download_url: package_data.repository_download_url.clone(),
743 api_data_url: package_data.api_data_url.clone(),
744 datasource_id: package_data.datasource_id,
745 purl: package_data.purl.clone(),
746 }
747 }
748}
749
750#[derive(Serialize, Deserialize, Debug, Clone)]
754pub struct Party {
755 pub r#type: Option<String>,
756 pub role: Option<String>,
757 pub name: Option<String>,
758 pub email: Option<String>,
759 pub url: Option<String>,
760 pub organization: Option<String>,
761 pub organization_url: Option<String>,
762 pub timezone: Option<String>,
763}
764
765#[derive(Serialize, Deserialize, Debug, Clone)]
769pub struct FileReference {
770 pub path: String,
771 pub size: Option<u64>,
772 pub sha1: Option<Sha1Digest>,
773 pub md5: Option<Md5Digest>,
774 pub sha256: Option<Sha256Digest>,
775 pub sha512: Option<Sha512Digest>,
776 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
777}
778
779#[derive(Serialize, Deserialize, Debug, Clone)]
789pub struct Package {
790 #[serde(rename = "type")]
791 pub package_type: Option<PackageType>,
792 pub namespace: Option<String>,
793 pub name: Option<String>,
794 pub version: Option<String>,
795 #[serde(default)]
796 pub qualifiers: Option<HashMap<String, String>>,
797 pub subpath: Option<String>,
798 pub primary_language: Option<String>,
799 pub description: Option<String>,
800 pub release_date: Option<String>,
801 #[serde(default)]
802 pub parties: Vec<Party>,
803 #[serde(default)]
804 pub keywords: Vec<String>,
805 pub homepage_url: Option<String>,
806 pub download_url: Option<String>,
807 pub size: Option<u64>,
808 pub sha1: Option<Sha1Digest>,
809 pub md5: Option<Md5Digest>,
810 pub sha256: Option<Sha256Digest>,
811 pub sha512: Option<Sha512Digest>,
812 pub bug_tracking_url: Option<String>,
813 pub code_view_url: Option<String>,
814 pub vcs_url: Option<String>,
815 pub copyright: Option<String>,
816 pub holder: Option<String>,
817 pub declared_license_expression: Option<String>,
818 pub declared_license_expression_spdx: Option<String>,
819 #[serde(default)]
820 pub license_detections: Vec<LicenseDetection>,
821 pub other_license_expression: Option<String>,
822 pub other_license_expression_spdx: Option<String>,
823 #[serde(default)]
824 pub other_license_detections: Vec<LicenseDetection>,
825 pub extracted_license_statement: Option<String>,
826 pub notice_text: Option<String>,
827 #[serde(default)]
828 pub source_packages: Vec<String>,
829 #[serde(default)]
830 pub is_private: bool,
831 #[serde(default)]
832 pub is_virtual: bool,
833 #[serde(default)]
834 pub extra_data: Option<HashMap<String, serde_json::Value>>,
835 pub repository_homepage_url: Option<String>,
836 pub repository_download_url: Option<String>,
837 pub api_data_url: Option<String>,
838 pub purl: Option<String>,
839 pub package_uid: PackageUid,
841 pub datafile_paths: Vec<String>,
843 pub datasource_ids: Vec<DatasourceId>,
845}
846
847impl Package {
848 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
853 let mut package_data = package_data.clone();
854 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
855
856 let package_uid = package_data
857 .purl
858 .as_ref()
859 .map(|p| PackageUid::new(p))
860 .unwrap_or_else(PackageUid::empty);
861
862 Package {
863 package_type: package_data.package_type,
864 namespace: package_data.namespace.clone(),
865 name: package_data.name.clone(),
866 version: package_data.version.clone(),
867 qualifiers: package_data.qualifiers.clone(),
868 subpath: package_data.subpath.clone(),
869 primary_language: package_data.primary_language.clone(),
870 description: package_data.description.clone(),
871 release_date: package_data.release_date.clone(),
872 parties: package_data.parties.clone(),
873 keywords: package_data.keywords.clone(),
874 homepage_url: package_data.homepage_url.clone(),
875 download_url: package_data.download_url.clone(),
876 size: package_data.size,
877 sha1: package_data.sha1,
878 md5: package_data.md5,
879 sha256: package_data.sha256,
880 sha512: package_data.sha512,
881 bug_tracking_url: package_data.bug_tracking_url.clone(),
882 code_view_url: package_data.code_view_url.clone(),
883 vcs_url: package_data.vcs_url.clone(),
884 copyright: package_data.copyright.clone(),
885 holder: package_data.holder.clone(),
886 declared_license_expression: package_data.declared_license_expression.clone(),
887 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
888 license_detections: package_data.license_detections.clone(),
889 other_license_expression: package_data.other_license_expression.clone(),
890 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
891 other_license_detections: package_data.other_license_detections.clone(),
892 extracted_license_statement: package_data.extracted_license_statement.clone(),
893 notice_text: package_data.notice_text.clone(),
894 source_packages: package_data.source_packages.clone(),
895 is_private: package_data.is_private,
896 is_virtual: package_data.is_virtual,
897 extra_data: package_data.extra_data.clone(),
898 repository_homepage_url: package_data.repository_homepage_url.clone(),
899 repository_download_url: package_data.repository_download_url.clone(),
900 api_data_url: package_data.api_data_url.clone(),
901 purl: package_data.purl.clone(),
902 package_uid,
903 datafile_paths: vec![datafile_path],
904 datasource_ids: if let Some(dsid) = package_data.datasource_id {
905 vec![dsid]
906 } else {
907 vec![]
908 },
909 }
910 }
911
912 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
918 let mut package_data = package_data.clone();
919 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
920
921 if let Some(dsid) = package_data.datasource_id {
922 self.datasource_ids.push(dsid);
923 }
924 self.datafile_paths.push(datafile_path);
925
926 macro_rules! fill_if_empty {
927 ($field:ident) => {
928 if self.$field.is_none() {
929 self.$field = package_data.$field;
930 }
931 };
932 }
933
934 fill_if_empty!(package_type);
935 fill_if_empty!(name);
936 fill_if_empty!(namespace);
937 fill_if_empty!(version);
938 fill_if_empty!(qualifiers);
939 fill_if_empty!(subpath);
940 fill_if_empty!(primary_language);
941 fill_if_empty!(description);
942 fill_if_empty!(release_date);
943 fill_if_empty!(homepage_url);
944 fill_if_empty!(download_url);
945 fill_if_empty!(size);
946 fill_if_empty!(sha1);
947 fill_if_empty!(md5);
948 fill_if_empty!(sha256);
949 fill_if_empty!(sha512);
950 fill_if_empty!(bug_tracking_url);
951 fill_if_empty!(code_view_url);
952 fill_if_empty!(vcs_url);
953 fill_if_empty!(copyright);
954 fill_if_empty!(holder);
955 fill_if_empty!(declared_license_expression);
956 fill_if_empty!(declared_license_expression_spdx);
957 fill_if_empty!(other_license_expression);
958 fill_if_empty!(other_license_expression_spdx);
959 fill_if_empty!(extracted_license_statement);
960 fill_if_empty!(notice_text);
961 match (&mut self.extra_data, &package_data.extra_data) {
962 (None, Some(extra_data)) => {
963 self.extra_data = Some(extra_data.clone());
964 }
965 (Some(existing), Some(incoming)) => {
966 for (key, value) in incoming {
967 existing.entry(key.clone()).or_insert_with(|| value.clone());
968 }
969 }
970 _ => {}
971 }
972 fill_if_empty!(repository_homepage_url);
973 fill_if_empty!(repository_download_url);
974 fill_if_empty!(api_data_url);
975
976 for party in &package_data.parties {
977 if let Some(existing) = self.parties.iter_mut().find(|p| {
978 p.role == party.role
979 && ((p.name.is_some() && p.name == party.name)
980 || (p.email.is_some() && p.email == party.email))
981 }) {
982 if existing.name.is_none() {
983 existing.name = party.name.clone();
984 }
985 if existing.email.is_none() {
986 existing.email = party.email.clone();
987 }
988 } else {
989 self.parties.push(party.clone());
990 }
991 }
992
993 for keyword in &package_data.keywords {
994 if !self.keywords.contains(keyword) {
995 self.keywords.push(keyword.clone());
996 }
997 }
998
999 for detection in &package_data.license_detections {
1000 self.license_detections.push(detection.clone());
1001 }
1002
1003 for detection in &package_data.other_license_detections {
1004 self.other_license_detections.push(detection.clone());
1005 }
1006
1007 for source_pkg in &package_data.source_packages {
1008 if !self.source_packages.contains(source_pkg) {
1009 self.source_packages.push(source_pkg.clone());
1010 }
1011 }
1012
1013 self.refresh_identity();
1014 }
1015
1016 pub fn backfill_license_provenance(&mut self) {
1017 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1018 return;
1019 };
1020
1021 for detection in &mut self.license_detections {
1022 enrich_license_detection_provenance(detection, &datafile_path);
1023 }
1024 for detection in &mut self.other_license_detections {
1025 enrich_license_detection_provenance(detection, &datafile_path);
1026 }
1027 }
1028
1029 fn refresh_identity(&mut self) {
1030 let Some(next_purl) = self.build_current_purl() else {
1031 return;
1032 };
1033
1034 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1035 self.package_uid = PackageUid::new(&next_purl);
1036 }
1037
1038 self.purl = Some(next_purl);
1039 }
1040
1041 fn build_current_purl(&self) -> Option<String> {
1042 if let (Some(package_type), Some(name)) = (
1043 self.package_type.as_ref(),
1044 self.name
1045 .as_deref()
1046 .filter(|value| !value.trim().is_empty()),
1047 ) {
1048 let purl_type = match package_type {
1049 PackageType::Deno => "generic",
1050 _ => package_type.as_str(),
1051 };
1052
1053 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1054
1055 if let Some(namespace) = self
1056 .namespace
1057 .as_deref()
1058 .filter(|value| !value.trim().is_empty())
1059 {
1060 purl.with_namespace(namespace).ok()?;
1061 }
1062
1063 if let Some(version) = self
1064 .version
1065 .as_deref()
1066 .filter(|value| !value.trim().is_empty())
1067 {
1068 purl.with_version(version).ok()?;
1069 }
1070
1071 if let Some(qualifiers) = &self.qualifiers {
1072 for (key, value) in qualifiers {
1073 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1074 }
1075 }
1076
1077 if let Some(subpath) = self
1078 .subpath
1079 .as_deref()
1080 .filter(|value| !value.trim().is_empty())
1081 {
1082 purl.with_subpath(subpath).ok()?;
1083 }
1084
1085 return Some(purl.to_string());
1086 }
1087
1088 let existing_purl = self.purl.as_deref()?;
1089 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1090
1091 if let Some(version) = self
1092 .version
1093 .as_deref()
1094 .filter(|value| !value.trim().is_empty())
1095 {
1096 purl.with_version(version).ok()?;
1097 } else {
1098 purl.without_version();
1099 }
1100
1101 Some(purl.to_string())
1102 }
1103}
1104
1105#[cfg(test)]
1106mod tests {
1107 use super::*;
1108
1109 #[test]
1110 fn file_info_new_backfills_package_detection_provenance() {
1111 let package_data = PackageData {
1112 package_type: Some(PackageType::Npm),
1113 license_detections: vec![LicenseDetection {
1114 license_expression: "mit".to_string(),
1115 license_expression_spdx: "MIT".to_string(),
1116 matches: vec![Match {
1117 license_expression: "mit".to_string(),
1118 license_expression_spdx: "MIT".to_string(),
1119 from_file: None,
1120 start_line: LineNumber::ONE,
1121 end_line: LineNumber::ONE,
1122 matcher: Some("parser-declared-license".to_string()),
1123 score: MatchScore::MAX,
1124 matched_length: Some(1),
1125 match_coverage: Some(100.0),
1126 rule_relevance: Some(100),
1127 rule_identifier: None,
1128 rule_url: None,
1129 matched_text: Some("MIT".to_string()),
1130 referenced_filenames: None,
1131 matched_text_diagnostics: None,
1132 }],
1133 detection_log: vec![],
1134 identifier: None,
1135 }],
1136 ..PackageData::default()
1137 };
1138
1139 let file_info = FileInfo::new(
1140 "package.json".to_string(),
1141 "package".to_string(),
1142 ".json".to_string(),
1143 "project/package.json".to_string(),
1144 FileType::File,
1145 None,
1146 None,
1147 1,
1148 None,
1149 None,
1150 None,
1151 None,
1152 None,
1153 vec![package_data],
1154 None,
1155 vec![],
1156 vec![],
1157 vec![],
1158 vec![],
1159 vec![],
1160 vec![],
1161 vec![],
1162 vec![],
1163 vec![],
1164 );
1165
1166 assert_eq!(file_info.license_detections.len(), 1);
1167 assert_eq!(
1168 file_info.license_detections[0].matches[0]
1169 .from_file
1170 .as_deref(),
1171 Some("project/package.json")
1172 );
1173 assert!(file_info.license_detections[0].identifier.is_some());
1174 assert_eq!(
1175 file_info.package_data[0].license_detections[0].matches[0]
1176 .from_file
1177 .as_deref(),
1178 Some("project/package.json")
1179 );
1180 assert_eq!(
1181 file_info.package_data[0].license_detections[0].matches[0]
1182 .rule_identifier
1183 .as_deref(),
1184 Some("parser-declared-license")
1185 );
1186 assert!(
1187 file_info.package_data[0].license_detections[0]
1188 .identifier
1189 .is_some()
1190 );
1191 }
1192
1193 #[test]
1194 fn package_from_package_data_backfills_detection_provenance() {
1195 let package_data = PackageData {
1196 package_type: Some(PackageType::Npm),
1197 license_detections: vec![LicenseDetection {
1198 license_expression: "mit".to_string(),
1199 license_expression_spdx: "MIT".to_string(),
1200 matches: vec![Match {
1201 license_expression: "mit".to_string(),
1202 license_expression_spdx: "MIT".to_string(),
1203 from_file: None,
1204 start_line: LineNumber::ONE,
1205 end_line: LineNumber::ONE,
1206 matcher: Some("parser-declared-license".to_string()),
1207 score: MatchScore::MAX,
1208 matched_length: Some(1),
1209 match_coverage: Some(100.0),
1210 rule_relevance: Some(100),
1211 rule_identifier: None,
1212 rule_url: None,
1213 matched_text: Some("MIT".to_string()),
1214 referenced_filenames: None,
1215 matched_text_diagnostics: None,
1216 }],
1217 detection_log: vec![],
1218 identifier: None,
1219 }],
1220 ..PackageData::default()
1221 };
1222
1223 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1224
1225 assert_eq!(
1226 package.license_detections[0].matches[0]
1227 .from_file
1228 .as_deref(),
1229 Some("project/package.json")
1230 );
1231 assert_eq!(
1232 package.license_detections[0].matches[0]
1233 .rule_identifier
1234 .as_deref(),
1235 Some("parser-declared-license")
1236 );
1237 assert!(package.license_detections[0].identifier.is_some());
1238 }
1239}
1240
1241#[derive(Serialize, Deserialize, Debug, Clone)]
1246pub struct TopLevelDependency {
1247 pub purl: Option<String>,
1248 pub extracted_requirement: Option<String>,
1249 pub scope: Option<String>,
1250 pub is_runtime: Option<bool>,
1251 pub is_optional: Option<bool>,
1252 pub is_pinned: Option<bool>,
1253 pub is_direct: Option<bool>,
1254 pub resolved_package: Option<Box<ResolvedPackage>>,
1255 #[serde(default)]
1256 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1257 pub dependency_uid: DependencyUid,
1259 pub for_package_uid: Option<PackageUid>,
1261 pub datafile_path: String,
1263 pub datasource_id: DatasourceId,
1265 pub namespace: Option<String>,
1267}
1268
1269impl TopLevelDependency {
1270 pub fn from_dependency(
1272 dep: &Dependency,
1273 datafile_path: String,
1274 datasource_id: DatasourceId,
1275 for_package_uid: Option<PackageUid>,
1276 ) -> Self {
1277 let dependency_uid = dep
1278 .purl
1279 .as_ref()
1280 .map(|p| DependencyUid::new(p))
1281 .unwrap_or_else(DependencyUid::empty);
1282
1283 TopLevelDependency {
1284 purl: dep.purl.clone(),
1285 extracted_requirement: dep.extracted_requirement.clone(),
1286 scope: dep.scope.clone(),
1287 is_runtime: dep.is_runtime,
1288 is_optional: dep.is_optional,
1289 is_pinned: dep.is_pinned,
1290 is_direct: dep.is_direct,
1291 resolved_package: dep.resolved_package.clone(),
1292 extra_data: dep.extra_data.clone(),
1293 dependency_uid,
1294 for_package_uid,
1295 datafile_path,
1296 datasource_id,
1297 namespace: None,
1298 }
1299 }
1300}
1301
1302#[derive(Serialize, Deserialize, Debug, Clone)]
1303pub struct OutputEmail {
1304 pub email: String,
1305 pub start_line: LineNumber,
1306 pub end_line: LineNumber,
1307}
1308
1309#[derive(Serialize, Deserialize, Debug, Clone)]
1310pub struct OutputURL {
1311 pub url: String,
1312 pub start_line: LineNumber,
1313 pub end_line: LineNumber,
1314}
1315
1316#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1317pub struct LicensePolicyEntry {
1318 pub license_key: String,
1319 pub label: String,
1320 pub color_code: String,
1321 pub icon: String,
1322}
1323
1324#[derive(Debug, Clone, PartialEq)]
1325pub enum FileType {
1326 File,
1327 Directory,
1328}
1329
1330impl serde::Serialize for FileType {
1331 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1332 where
1333 S: serde::Serializer,
1334 {
1335 match self {
1336 FileType::File => serializer.serialize_str("file"),
1337 FileType::Directory => serializer.serialize_str("directory"),
1338 }
1339 }
1340}
1341
1342impl<'de> Deserialize<'de> for FileType {
1343 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1344 where
1345 D: serde::Deserializer<'de>,
1346 {
1347 let value = String::deserialize(deserializer)?;
1348 match value.as_str() {
1349 "file" => Ok(FileType::File),
1350 "directory" => Ok(FileType::Directory),
1351 _ => Err(serde::de::Error::custom("invalid file type")),
1352 }
1353 }
1354}