1use derive_builder::Builder;
2use packageurl::PackageUrl;
3use serde::{Deserialize, Serialize};
4use sha1::{Digest, Sha1};
5use std::collections::HashMap;
6use std::str::FromStr;
7
8use super::DatasourceId;
9use super::DependencyUid;
10use super::GitSha1;
11use super::LineNumber;
12use super::MatchScore;
13use super::Md5Digest;
14use super::PackageType;
15use super::PackageUid;
16use super::Sha1Digest;
17use super::Sha256Digest;
18use super::Sha512Digest;
19use crate::license_detection::tokenize::tokenize_without_stopwords;
20use crate::models::output::Tallies;
21use crate::utils::spdx::combine_license_expressions;
22
23#[derive(Debug, Builder, Serialize, Deserialize, Clone)]
24#[builder(build_fn(skip))]
25pub struct FileInfo {
27 pub name: String,
28 pub base_name: String,
29 pub extension: String,
30 pub path: String,
31 #[serde(rename = "type")] pub file_type: FileType,
33 #[builder(default)]
34 #[serde(default)]
35 pub mime_type: Option<String>,
36 #[builder(default)]
37 #[serde(rename = "file_type", default)]
38 pub file_type_label: Option<String>,
39 pub size: u64,
40 #[builder(default)]
41 #[serde(default)]
42 pub date: Option<String>,
43 #[builder(default)]
44 #[serde(default)]
45 pub sha1: Option<Sha1Digest>,
46 #[builder(default)]
47 #[serde(default)]
48 pub md5: Option<Md5Digest>,
49 #[builder(default)]
50 #[serde(default)]
51 pub sha256: Option<Sha256Digest>,
52 #[builder(default)]
53 #[serde(default)]
54 pub sha1_git: Option<GitSha1>,
55 #[builder(default)]
56 #[serde(default)]
57 pub programming_language: Option<String>,
58 #[builder(default)]
59 #[serde(default)]
60 pub package_data: Vec<PackageData>,
61 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
63 pub license_expression: Option<String>,
64 #[builder(default)]
65 #[serde(default)]
66 pub license_detections: Vec<LicenseDetection>,
67 #[builder(default)]
68 #[serde(default)]
69 pub license_clues: Vec<Match>,
70 #[builder(default)]
71 #[serde(default)]
72 pub percentage_of_license_text: Option<f64>,
73 #[builder(default)]
74 #[serde(default)]
75 pub copyrights: Vec<Copyright>,
76 #[builder(default)]
77 #[serde(default)]
78 pub holders: Vec<Holder>,
79 #[builder(default)]
80 #[serde(default)]
81 pub authors: Vec<Author>,
82 #[builder(default)]
83 #[serde(default)]
84 pub emails: Vec<OutputEmail>,
85 #[builder(default)]
86 #[serde(default)]
87 pub urls: Vec<OutputURL>,
88 #[builder(default)]
89 #[serde(default)]
90 pub for_packages: Vec<PackageUid>,
91 #[builder(default)]
92 #[serde(default)]
93 pub scan_errors: Vec<String>,
94 #[builder(default)]
95 #[serde(default)]
96 pub license_policy: Option<Vec<LicensePolicyEntry>>,
97 #[builder(default)]
98 #[serde(default)]
99 pub is_generated: Option<bool>,
100 #[builder(default)]
101 #[serde(default)]
102 pub is_binary: Option<bool>,
103 #[builder(default)]
104 #[serde(default)]
105 pub is_text: Option<bool>,
106 #[builder(default)]
107 #[serde(default)]
108 pub is_archive: Option<bool>,
109 #[builder(default)]
110 #[serde(default)]
111 pub is_media: Option<bool>,
112 #[builder(default)]
113 #[serde(default)]
114 pub is_source: Option<bool>,
115 #[builder(default)]
116 #[serde(default)]
117 pub is_script: Option<bool>,
118 #[builder(default)]
119 #[serde(default)]
120 pub files_count: Option<usize>,
121 #[builder(default)]
122 #[serde(default)]
123 pub dirs_count: Option<usize>,
124 #[builder(default)]
125 #[serde(default)]
126 pub size_count: Option<u64>,
127 #[builder(default)]
128 #[serde(default)]
129 pub source_count: Option<usize>,
130 #[builder(default)]
131 #[serde(default)]
132 pub is_legal: bool,
133 #[builder(default)]
134 #[serde(default)]
135 pub is_manifest: bool,
136 #[builder(default)]
137 #[serde(default)]
138 pub is_readme: bool,
139 #[builder(default)]
140 #[serde(default)]
141 pub is_top_level: bool,
142 #[builder(default)]
143 #[serde(default)]
144 pub is_key_file: bool,
145 #[builder(default)]
146 #[serde(default)]
147 pub is_community: bool,
148 #[builder(default)]
149 #[serde(default)]
150 pub facets: Vec<String>,
151 #[builder(default)]
152 #[serde(default)]
153 pub tallies: Option<Tallies>,
154}
155
156impl FileInfoBuilder {
157 pub fn build(&self) -> Result<FileInfo, String> {
159 let mut file_info = FileInfo::new(
160 self.name.clone().ok_or("Missing field: name")?,
161 self.base_name.clone().ok_or("Missing field: base_name")?,
162 self.extension.clone().ok_or("Missing field: extension")?,
163 self.path.clone().ok_or("Missing field: path")?,
164 self.file_type.clone().ok_or("Missing field: file_type")?,
165 self.mime_type.clone().flatten(),
166 self.file_type_label.clone().flatten(),
167 self.size.ok_or("Missing field: size")?,
168 self.date.clone().flatten(),
169 self.sha1.flatten(),
170 self.md5.flatten(),
171 self.sha256.flatten(),
172 self.programming_language.clone().flatten(),
173 self.package_data.clone().unwrap_or_default(),
174 self.license_expression.clone().flatten(),
175 self.license_detections.clone().unwrap_or_default(),
176 self.license_clues.clone().unwrap_or_default(),
177 self.copyrights.clone().unwrap_or_default(),
178 self.holders.clone().unwrap_or_default(),
179 self.authors.clone().unwrap_or_default(),
180 self.emails.clone().unwrap_or_default(),
181 self.urls.clone().unwrap_or_default(),
182 self.for_packages.clone().unwrap_or_default(),
183 self.scan_errors.clone().unwrap_or_default(),
184 );
185 file_info.license_policy = self.license_policy.clone().flatten();
186 file_info.sha1_git = self.sha1_git.flatten();
187 file_info.is_binary = self.is_binary.flatten();
188 file_info.is_text = self.is_text.flatten();
189 file_info.is_archive = self.is_archive.flatten();
190 file_info.is_media = self.is_media.flatten();
191 file_info.is_script = self.is_script.flatten();
192 file_info.files_count = self.files_count.flatten();
193 file_info.dirs_count = self.dirs_count.flatten();
194 file_info.size_count = self.size_count.flatten();
195 Ok(file_info)
196 }
197}
198
199impl FileInfo {
200 #[allow(clippy::too_many_arguments)]
201 pub fn new(
203 name: String,
204 base_name: String,
205 extension: String,
206 path: String,
207 file_type: FileType,
208 mime_type: Option<String>,
209 file_type_label: Option<String>,
210 size: u64,
211 date: Option<String>,
212 sha1: Option<Sha1Digest>,
213 md5: Option<Md5Digest>,
214 sha256: Option<Sha256Digest>,
215 programming_language: Option<String>,
216 package_data: Vec<PackageData>,
217 mut license_expression: Option<String>,
218 mut license_detections: Vec<LicenseDetection>,
219 license_clues: Vec<Match>,
220 copyrights: Vec<Copyright>,
221 holders: Vec<Holder>,
222 authors: Vec<Author>,
223 emails: Vec<OutputEmail>,
224 urls: Vec<OutputURL>,
225 for_packages: Vec<PackageUid>,
226 scan_errors: Vec<String>,
227 ) -> Self {
228 let mut package_data = package_data;
229 for package in &mut package_data {
230 enrich_package_data_license_provenance(package, &path);
231 }
232
233 license_expression = license_expression.or_else(|| {
235 let expressions = package_data
236 .iter()
237 .filter_map(|pkg| pkg.get_license_expression());
238 combine_license_expressions(expressions)
239 });
240
241 if license_detections.is_empty() {
243 for pkg in &package_data {
244 license_detections.extend(pkg.license_detections.clone());
245 }
246 }
247
248 if license_expression.is_none() && !license_detections.is_empty() {
250 let expressions = license_detections
251 .iter()
252 .map(|detection| detection.license_expression.clone());
253 license_expression = combine_license_expressions(expressions);
254 }
255
256 let mut file_info = FileInfo {
257 name,
258 base_name,
259 extension,
260 path,
261 file_type,
262 mime_type,
263 file_type_label,
264 size,
265 date,
266 sha1,
267 md5,
268 sha256,
269 sha1_git: None,
270 programming_language,
271 package_data,
272 license_expression,
273 license_detections,
274 license_clues,
275 percentage_of_license_text: None,
276 copyrights,
277 holders,
278 authors,
279 emails,
280 urls,
281 for_packages,
282 scan_errors,
283 license_policy: None,
284 is_generated: None,
285 is_binary: None,
286 is_text: None,
287 is_archive: None,
288 is_media: None,
289 is_source: None,
290 is_script: None,
291 files_count: None,
292 dirs_count: None,
293 size_count: None,
294 source_count: None,
295 is_legal: false,
296 is_manifest: false,
297 is_readme: false,
298 is_top_level: false,
299 is_key_file: false,
300 is_community: false,
301 facets: vec![],
302 tallies: None,
303 };
304 file_info.backfill_license_provenance();
305 file_info
306 }
307
308 pub fn backfill_license_provenance(&mut self) {
309 for detection in &mut self.license_detections {
310 enrich_license_detection_provenance(detection, &self.path);
311 }
312
313 for package in &mut self.package_data {
314 enrich_package_data_license_provenance(package, &self.path);
315 }
316 }
317}
318
319fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
320 for detection in &mut package_data.license_detections {
321 enrich_license_detection_provenance(detection, path);
322 }
323 for detection in &mut package_data.other_license_detections {
324 enrich_license_detection_provenance(detection, path);
325 }
326}
327
328pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
329 for detection_match in &mut detection.matches {
330 if detection_match.from_file.is_none() {
331 detection_match.from_file = Some(path.to_string());
332 }
333 }
334
335 if detection.identifier.is_none() {
336 detection.identifier = Some(compute_public_detection_identifier(detection));
337 }
338}
339
340fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
341 let expression = python_safe_name(&detection.license_expression);
342 let mut hasher = Sha1::new();
343 hasher.update(format_public_detection_content(detection).as_bytes());
344 let hex_str = hex::encode(hasher.finalize());
345 let uuid_hex = &hex_str[..32];
346 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
347 .map(|uuid| uuid.to_string())
348 .unwrap_or_else(|_| uuid_hex.to_string());
349
350 format!("{}-{}", expression, content_uuid)
351}
352
353fn format_public_detection_content(detection: &LicenseDetection) -> String {
354 let mut result = String::from("(");
355
356 for (index, detection_match) in detection.matches.iter().enumerate() {
357 if index > 0 {
358 result.push_str(", ");
359 }
360 result.push_str(&format!(
361 "({}, {}, {})",
362 python_str_repr(
363 detection_match
364 .rule_identifier
365 .as_deref()
366 .or(detection_match.matcher.as_deref())
367 .unwrap_or("parser-declared-license")
368 ),
369 detection_match.score.value() as f32,
370 python_token_tuple_repr(&tokenize_without_stopwords(
371 detection_match.matched_text.as_deref().unwrap_or_default(),
372 )),
373 ));
374 }
375
376 if detection.matches.len() == 1 {
377 result.push(',');
378 }
379 result.push(')');
380 result
381}
382
383fn python_safe_name(value: &str) -> String {
384 let mut result = String::new();
385 let mut prev_underscore = false;
386
387 for character in value.chars() {
388 if character.is_alphanumeric() {
389 result.push(character);
390 prev_underscore = false;
391 } else if !prev_underscore {
392 result.push('_');
393 prev_underscore = true;
394 }
395 }
396
397 let trimmed = result.trim_matches('_');
398 if trimmed.is_empty() {
399 String::new()
400 } else {
401 trimmed.to_string()
402 }
403}
404
405fn python_str_repr(value: &str) -> String {
406 if value.contains('\'') && !value.contains('"') {
407 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
408 } else {
409 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
410 }
411}
412
413fn python_token_tuple_repr(tokens: &[String]) -> String {
414 if tokens.is_empty() {
415 return String::from("()");
416 }
417
418 let mut result = String::from("(");
419 for (index, token) in tokens.iter().enumerate() {
420 if index > 0 {
421 result.push_str(", ");
422 }
423 result.push_str(&python_str_repr(token));
424 }
425
426 if tokens.len() == 1 {
427 result.push(',');
428 }
429 result.push(')');
430 result
431}
432
433#[derive(Serialize, Deserialize, Debug, Clone, Default)]
439pub struct PackageData {
440 #[serde(rename = "type")] pub package_type: Option<PackageType>,
442 pub namespace: Option<String>,
443 pub name: Option<String>,
444 pub version: Option<String>,
445 #[serde(default)]
446 pub qualifiers: Option<HashMap<String, String>>,
447 pub subpath: Option<String>,
448 pub primary_language: Option<String>,
449 pub description: Option<String>,
450 pub release_date: Option<String>,
451 #[serde(default)]
452 pub parties: Vec<Party>,
453 #[serde(default)]
454 pub keywords: Vec<String>,
455 pub homepage_url: Option<String>,
456 pub download_url: Option<String>,
457 pub size: Option<u64>,
458 pub sha1: Option<Sha1Digest>,
459 pub md5: Option<Md5Digest>,
460 pub sha256: Option<Sha256Digest>,
461 pub sha512: Option<Sha512Digest>,
462 pub bug_tracking_url: Option<String>,
463 pub code_view_url: Option<String>,
464 pub vcs_url: Option<String>,
465 pub copyright: Option<String>,
466 pub holder: Option<String>,
467 pub declared_license_expression: Option<String>,
468 pub declared_license_expression_spdx: Option<String>,
469 #[serde(default)]
470 pub license_detections: Vec<LicenseDetection>,
471 pub other_license_expression: Option<String>,
472 pub other_license_expression_spdx: Option<String>,
473 #[serde(default)]
474 pub other_license_detections: Vec<LicenseDetection>,
475 pub extracted_license_statement: Option<String>,
476 pub notice_text: Option<String>,
477 #[serde(default)]
478 pub source_packages: Vec<String>,
479 #[serde(default)]
480 pub file_references: Vec<FileReference>,
481 #[serde(default)]
482 pub is_private: bool,
483 #[serde(default)]
484 pub is_virtual: bool,
485 #[serde(default)]
486 pub extra_data: Option<HashMap<String, serde_json::Value>>,
487 #[serde(default)]
488 pub dependencies: Vec<Dependency>,
489 pub repository_homepage_url: Option<String>,
490 pub repository_download_url: Option<String>,
491 pub api_data_url: Option<String>,
492 pub datasource_id: Option<DatasourceId>,
493 pub purl: Option<String>,
494}
495
496impl PackageData {
497 pub fn get_license_expression(&self) -> Option<String> {
500 if self.license_detections.is_empty() {
501 return None;
502 }
503
504 let expressions = self
505 .license_detections
506 .iter()
507 .map(|detection| detection.license_expression.clone());
508 combine_license_expressions(expressions)
509 }
510}
511
512#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
516pub struct LicenseDetection {
517 pub license_expression: String,
518 pub license_expression_spdx: String,
519 pub matches: Vec<Match>,
520 #[serde(default)]
521 pub detection_log: Vec<String>,
522 pub identifier: Option<String>,
523}
524
525#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
529pub struct Match {
530 pub license_expression: String,
531 pub license_expression_spdx: String,
532 pub from_file: Option<String>,
533 pub start_line: LineNumber,
534 pub end_line: LineNumber,
535 pub matcher: Option<String>,
536 pub score: MatchScore,
537 pub matched_length: Option<usize>,
538 pub match_coverage: Option<f64>,
539 pub rule_relevance: Option<u8>,
540 pub rule_identifier: Option<String>,
541 pub rule_url: Option<String>,
542 pub matched_text: Option<String>,
543 pub matched_text_diagnostics: Option<String>,
544 #[serde(default)]
545 pub referenced_filenames: Option<Vec<String>>,
546}
547
548#[derive(Serialize, Deserialize, Debug, Clone)]
549pub struct Copyright {
550 pub copyright: String,
551 pub start_line: LineNumber,
552 pub end_line: LineNumber,
553}
554
555#[derive(Serialize, Deserialize, Debug, Clone)]
556pub struct Holder {
557 pub holder: String,
558 pub start_line: LineNumber,
559 pub end_line: LineNumber,
560}
561
562#[derive(Serialize, Deserialize, Debug, Clone)]
563pub struct Author {
564 pub author: String,
565 pub start_line: LineNumber,
566 pub end_line: LineNumber,
567}
568
569#[derive(Serialize, Deserialize, Debug, Clone)]
574pub struct Dependency {
575 pub purl: Option<String>,
576 pub extracted_requirement: Option<String>,
577 pub scope: Option<String>,
578 pub is_runtime: Option<bool>,
579 pub is_optional: Option<bool>,
580 pub is_pinned: Option<bool>,
581 pub is_direct: Option<bool>,
582 pub resolved_package: Option<Box<ResolvedPackage>>,
583 #[serde(default)]
584 pub extra_data: Option<HashMap<String, serde_json::Value>>,
585}
586
587#[derive(Serialize, Deserialize, Debug, Clone)]
588pub struct ResolvedPackage {
589 #[serde(rename = "type")]
590 pub package_type: PackageType,
591 pub namespace: String,
592 pub name: String,
593 pub version: String,
594 #[serde(default)]
595 pub qualifiers: Option<HashMap<String, String>>,
596 pub subpath: Option<String>,
597 pub primary_language: Option<String>,
598 pub description: Option<String>,
599 pub release_date: Option<String>,
600 #[serde(default)]
601 pub parties: Vec<Party>,
602 #[serde(default)]
603 pub keywords: Vec<String>,
604 pub homepage_url: Option<String>,
605 pub download_url: Option<String>,
606 pub size: Option<u64>,
607 pub sha1: Option<Sha1Digest>,
608 pub md5: Option<Md5Digest>,
609 pub sha256: Option<Sha256Digest>,
610 pub sha512: Option<Sha512Digest>,
611 pub bug_tracking_url: Option<String>,
612 pub code_view_url: Option<String>,
613 pub vcs_url: Option<String>,
614 pub copyright: Option<String>,
615 pub holder: Option<String>,
616 pub declared_license_expression: Option<String>,
617 pub declared_license_expression_spdx: Option<String>,
618 #[serde(default)]
619 pub license_detections: Vec<LicenseDetection>,
620 pub other_license_expression: Option<String>,
621 pub other_license_expression_spdx: Option<String>,
622 #[serde(default)]
623 pub other_license_detections: Vec<LicenseDetection>,
624 pub extracted_license_statement: Option<String>,
625 pub notice_text: Option<String>,
626 #[serde(default)]
627 pub source_packages: Vec<String>,
628 #[serde(default)]
629 pub file_references: Vec<FileReference>,
630 #[serde(default)]
631 pub is_private: bool,
632 #[serde(default)]
633 pub is_virtual: bool,
634 #[serde(default)]
635 pub extra_data: Option<HashMap<String, serde_json::Value>>,
636 #[serde(default)]
637 pub dependencies: Vec<Dependency>,
638 pub repository_homepage_url: Option<String>,
639 pub repository_download_url: Option<String>,
640 pub api_data_url: Option<String>,
641 pub datasource_id: Option<DatasourceId>,
642 pub purl: Option<String>,
643}
644
645impl ResolvedPackage {
646 pub fn new(
647 package_type: PackageType,
648 namespace: String,
649 name: String,
650 version: String,
651 ) -> Self {
652 Self {
653 package_type,
654 namespace,
655 name,
656 version,
657 qualifiers: None,
658 subpath: None,
659 primary_language: None,
660 description: None,
661 release_date: None,
662 parties: vec![],
663 keywords: vec![],
664 homepage_url: None,
665 download_url: None,
666 size: None,
667 sha1: None,
668 md5: None,
669 sha256: None,
670 sha512: None,
671 bug_tracking_url: None,
672 code_view_url: None,
673 vcs_url: None,
674 copyright: None,
675 holder: None,
676 declared_license_expression: None,
677 declared_license_expression_spdx: None,
678 license_detections: vec![],
679 other_license_expression: None,
680 other_license_expression_spdx: None,
681 other_license_detections: vec![],
682 extracted_license_statement: None,
683 notice_text: None,
684 source_packages: vec![],
685 file_references: vec![],
686 is_private: false,
687 is_virtual: false,
688 extra_data: None,
689 dependencies: vec![],
690 repository_homepage_url: None,
691 repository_download_url: None,
692 api_data_url: None,
693 datasource_id: None,
694 purl: None,
695 }
696 }
697
698 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
699 Self {
700 package_type: package_data.package_type.unwrap_or(fallback_type),
701 namespace: package_data.namespace.clone().unwrap_or_default(),
702 name: package_data.name.clone().unwrap_or_default(),
703 version: package_data.version.clone().unwrap_or_default(),
704 qualifiers: package_data.qualifiers.clone(),
705 subpath: package_data.subpath.clone(),
706 primary_language: package_data.primary_language.clone(),
707 description: package_data.description.clone(),
708 release_date: package_data.release_date.clone(),
709 parties: package_data.parties.clone(),
710 keywords: package_data.keywords.clone(),
711 homepage_url: package_data.homepage_url.clone(),
712 download_url: package_data.download_url.clone(),
713 size: package_data.size,
714 sha1: package_data.sha1,
715 md5: package_data.md5,
716 sha256: package_data.sha256,
717 sha512: package_data.sha512,
718 bug_tracking_url: package_data.bug_tracking_url.clone(),
719 code_view_url: package_data.code_view_url.clone(),
720 vcs_url: package_data.vcs_url.clone(),
721 copyright: package_data.copyright.clone(),
722 holder: package_data.holder.clone(),
723 declared_license_expression: package_data.declared_license_expression.clone(),
724 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
725 license_detections: package_data.license_detections.clone(),
726 other_license_expression: package_data.other_license_expression.clone(),
727 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
728 other_license_detections: package_data.other_license_detections.clone(),
729 extracted_license_statement: package_data.extracted_license_statement.clone(),
730 notice_text: package_data.notice_text.clone(),
731 source_packages: package_data.source_packages.clone(),
732 file_references: package_data.file_references.clone(),
733 is_private: package_data.is_private,
734 is_virtual: package_data.is_virtual,
735 extra_data: package_data.extra_data.clone(),
736 dependencies: package_data.dependencies.clone(),
737 repository_homepage_url: package_data.repository_homepage_url.clone(),
738 repository_download_url: package_data.repository_download_url.clone(),
739 api_data_url: package_data.api_data_url.clone(),
740 datasource_id: package_data.datasource_id,
741 purl: package_data.purl.clone(),
742 }
743 }
744}
745
746#[derive(Serialize, Deserialize, Debug, Clone)]
750pub struct Party {
751 pub r#type: Option<String>,
752 pub role: Option<String>,
753 pub name: Option<String>,
754 pub email: Option<String>,
755 pub url: Option<String>,
756 pub organization: Option<String>,
757 pub organization_url: Option<String>,
758 pub timezone: Option<String>,
759}
760
761#[derive(Serialize, Deserialize, Debug, Clone)]
765pub struct FileReference {
766 pub path: String,
767 pub size: Option<u64>,
768 pub sha1: Option<Sha1Digest>,
769 pub md5: Option<Md5Digest>,
770 pub sha256: Option<Sha256Digest>,
771 pub sha512: Option<Sha512Digest>,
772 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
773}
774
775#[derive(Serialize, Deserialize, Debug, Clone)]
785pub struct Package {
786 #[serde(rename = "type")]
787 pub package_type: Option<PackageType>,
788 pub namespace: Option<String>,
789 pub name: Option<String>,
790 pub version: Option<String>,
791 #[serde(default)]
792 pub qualifiers: Option<HashMap<String, String>>,
793 pub subpath: Option<String>,
794 pub primary_language: Option<String>,
795 pub description: Option<String>,
796 pub release_date: Option<String>,
797 #[serde(default)]
798 pub parties: Vec<Party>,
799 #[serde(default)]
800 pub keywords: Vec<String>,
801 pub homepage_url: Option<String>,
802 pub download_url: Option<String>,
803 pub size: Option<u64>,
804 pub sha1: Option<Sha1Digest>,
805 pub md5: Option<Md5Digest>,
806 pub sha256: Option<Sha256Digest>,
807 pub sha512: Option<Sha512Digest>,
808 pub bug_tracking_url: Option<String>,
809 pub code_view_url: Option<String>,
810 pub vcs_url: Option<String>,
811 pub copyright: Option<String>,
812 pub holder: Option<String>,
813 pub declared_license_expression: Option<String>,
814 pub declared_license_expression_spdx: Option<String>,
815 #[serde(default)]
816 pub license_detections: Vec<LicenseDetection>,
817 pub other_license_expression: Option<String>,
818 pub other_license_expression_spdx: Option<String>,
819 #[serde(default)]
820 pub other_license_detections: Vec<LicenseDetection>,
821 pub extracted_license_statement: Option<String>,
822 pub notice_text: Option<String>,
823 #[serde(default)]
824 pub source_packages: Vec<String>,
825 #[serde(default)]
826 pub is_private: bool,
827 #[serde(default)]
828 pub is_virtual: bool,
829 #[serde(default)]
830 pub extra_data: Option<HashMap<String, serde_json::Value>>,
831 pub repository_homepage_url: Option<String>,
832 pub repository_download_url: Option<String>,
833 pub api_data_url: Option<String>,
834 pub purl: Option<String>,
835 pub package_uid: PackageUid,
837 pub datafile_paths: Vec<String>,
839 pub datasource_ids: Vec<DatasourceId>,
841}
842
843impl Package {
844 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
849 let mut package_data = package_data.clone();
850 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
851
852 let package_uid = package_data
853 .purl
854 .as_ref()
855 .map(|p| PackageUid::new(p))
856 .unwrap_or_else(PackageUid::empty);
857
858 Package {
859 package_type: package_data.package_type,
860 namespace: package_data.namespace.clone(),
861 name: package_data.name.clone(),
862 version: package_data.version.clone(),
863 qualifiers: package_data.qualifiers.clone(),
864 subpath: package_data.subpath.clone(),
865 primary_language: package_data.primary_language.clone(),
866 description: package_data.description.clone(),
867 release_date: package_data.release_date.clone(),
868 parties: package_data.parties.clone(),
869 keywords: package_data.keywords.clone(),
870 homepage_url: package_data.homepage_url.clone(),
871 download_url: package_data.download_url.clone(),
872 size: package_data.size,
873 sha1: package_data.sha1,
874 md5: package_data.md5,
875 sha256: package_data.sha256,
876 sha512: package_data.sha512,
877 bug_tracking_url: package_data.bug_tracking_url.clone(),
878 code_view_url: package_data.code_view_url.clone(),
879 vcs_url: package_data.vcs_url.clone(),
880 copyright: package_data.copyright.clone(),
881 holder: package_data.holder.clone(),
882 declared_license_expression: package_data.declared_license_expression.clone(),
883 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
884 license_detections: package_data.license_detections.clone(),
885 other_license_expression: package_data.other_license_expression.clone(),
886 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
887 other_license_detections: package_data.other_license_detections.clone(),
888 extracted_license_statement: package_data.extracted_license_statement.clone(),
889 notice_text: package_data.notice_text.clone(),
890 source_packages: package_data.source_packages.clone(),
891 is_private: package_data.is_private,
892 is_virtual: package_data.is_virtual,
893 extra_data: package_data.extra_data.clone(),
894 repository_homepage_url: package_data.repository_homepage_url.clone(),
895 repository_download_url: package_data.repository_download_url.clone(),
896 api_data_url: package_data.api_data_url.clone(),
897 purl: package_data.purl.clone(),
898 package_uid,
899 datafile_paths: vec![datafile_path],
900 datasource_ids: if let Some(dsid) = package_data.datasource_id {
901 vec![dsid]
902 } else {
903 vec![]
904 },
905 }
906 }
907
908 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
914 let mut package_data = package_data.clone();
915 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
916
917 if let Some(dsid) = package_data.datasource_id {
918 self.datasource_ids.push(dsid);
919 }
920 self.datafile_paths.push(datafile_path);
921
922 macro_rules! fill_if_empty {
923 ($field:ident) => {
924 if self.$field.is_none() {
925 self.$field = package_data.$field;
926 }
927 };
928 }
929
930 fill_if_empty!(package_type);
931 fill_if_empty!(name);
932 fill_if_empty!(namespace);
933 fill_if_empty!(version);
934 fill_if_empty!(qualifiers);
935 fill_if_empty!(subpath);
936 fill_if_empty!(primary_language);
937 fill_if_empty!(description);
938 fill_if_empty!(release_date);
939 fill_if_empty!(homepage_url);
940 fill_if_empty!(download_url);
941 fill_if_empty!(size);
942 fill_if_empty!(sha1);
943 fill_if_empty!(md5);
944 fill_if_empty!(sha256);
945 fill_if_empty!(sha512);
946 fill_if_empty!(bug_tracking_url);
947 fill_if_empty!(code_view_url);
948 fill_if_empty!(vcs_url);
949 fill_if_empty!(copyright);
950 fill_if_empty!(holder);
951 fill_if_empty!(declared_license_expression);
952 fill_if_empty!(declared_license_expression_spdx);
953 fill_if_empty!(other_license_expression);
954 fill_if_empty!(other_license_expression_spdx);
955 fill_if_empty!(extracted_license_statement);
956 fill_if_empty!(notice_text);
957 match (&mut self.extra_data, &package_data.extra_data) {
958 (None, Some(extra_data)) => {
959 self.extra_data = Some(extra_data.clone());
960 }
961 (Some(existing), Some(incoming)) => {
962 for (key, value) in incoming {
963 existing.entry(key.clone()).or_insert_with(|| value.clone());
964 }
965 }
966 _ => {}
967 }
968 fill_if_empty!(repository_homepage_url);
969 fill_if_empty!(repository_download_url);
970 fill_if_empty!(api_data_url);
971
972 for party in &package_data.parties {
973 if let Some(existing) = self.parties.iter_mut().find(|p| {
974 p.role == party.role
975 && ((p.name.is_some() && p.name == party.name)
976 || (p.email.is_some() && p.email == party.email))
977 }) {
978 if existing.name.is_none() {
979 existing.name = party.name.clone();
980 }
981 if existing.email.is_none() {
982 existing.email = party.email.clone();
983 }
984 } else {
985 self.parties.push(party.clone());
986 }
987 }
988
989 for keyword in &package_data.keywords {
990 if !self.keywords.contains(keyword) {
991 self.keywords.push(keyword.clone());
992 }
993 }
994
995 for detection in &package_data.license_detections {
996 self.license_detections.push(detection.clone());
997 }
998
999 for detection in &package_data.other_license_detections {
1000 self.other_license_detections.push(detection.clone());
1001 }
1002
1003 for source_pkg in &package_data.source_packages {
1004 if !self.source_packages.contains(source_pkg) {
1005 self.source_packages.push(source_pkg.clone());
1006 }
1007 }
1008
1009 self.refresh_identity();
1010 }
1011
1012 pub fn backfill_license_provenance(&mut self) {
1013 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1014 return;
1015 };
1016
1017 for detection in &mut self.license_detections {
1018 enrich_license_detection_provenance(detection, &datafile_path);
1019 }
1020 for detection in &mut self.other_license_detections {
1021 enrich_license_detection_provenance(detection, &datafile_path);
1022 }
1023 }
1024
1025 fn refresh_identity(&mut self) {
1026 let Some(next_purl) = self.build_current_purl() else {
1027 return;
1028 };
1029
1030 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1031 self.package_uid = PackageUid::new(&next_purl);
1032 }
1033
1034 self.purl = Some(next_purl);
1035 }
1036
1037 fn build_current_purl(&self) -> Option<String> {
1038 if let (Some(package_type), Some(name)) = (
1039 self.package_type.as_ref(),
1040 self.name
1041 .as_deref()
1042 .filter(|value| !value.trim().is_empty()),
1043 ) {
1044 let purl_type = match package_type {
1045 PackageType::Deno => "generic",
1046 _ => package_type.as_str(),
1047 };
1048
1049 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1050
1051 if let Some(namespace) = self
1052 .namespace
1053 .as_deref()
1054 .filter(|value| !value.trim().is_empty())
1055 {
1056 purl.with_namespace(namespace).ok()?;
1057 }
1058
1059 if let Some(version) = self
1060 .version
1061 .as_deref()
1062 .filter(|value| !value.trim().is_empty())
1063 {
1064 purl.with_version(version).ok()?;
1065 }
1066
1067 if let Some(qualifiers) = &self.qualifiers {
1068 for (key, value) in qualifiers {
1069 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1070 }
1071 }
1072
1073 if let Some(subpath) = self
1074 .subpath
1075 .as_deref()
1076 .filter(|value| !value.trim().is_empty())
1077 {
1078 purl.with_subpath(subpath).ok()?;
1079 }
1080
1081 return Some(purl.to_string());
1082 }
1083
1084 let existing_purl = self.purl.as_deref()?;
1085 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1086
1087 if let Some(version) = self
1088 .version
1089 .as_deref()
1090 .filter(|value| !value.trim().is_empty())
1091 {
1092 purl.with_version(version).ok()?;
1093 } else {
1094 purl.without_version();
1095 }
1096
1097 Some(purl.to_string())
1098 }
1099}
1100
1101#[cfg(test)]
1102mod tests {
1103 use super::*;
1104
1105 #[test]
1106 fn file_info_new_backfills_package_detection_provenance() {
1107 let package_data = PackageData {
1108 package_type: Some(PackageType::Npm),
1109 license_detections: vec![LicenseDetection {
1110 license_expression: "mit".to_string(),
1111 license_expression_spdx: "MIT".to_string(),
1112 matches: vec![Match {
1113 license_expression: "mit".to_string(),
1114 license_expression_spdx: "MIT".to_string(),
1115 from_file: None,
1116 start_line: LineNumber::ONE,
1117 end_line: LineNumber::ONE,
1118 matcher: Some("parser-declared-license".to_string()),
1119 score: MatchScore::MAX,
1120 matched_length: Some(1),
1121 match_coverage: Some(100.0),
1122 rule_relevance: Some(100),
1123 rule_identifier: None,
1124 rule_url: None,
1125 matched_text: Some("MIT".to_string()),
1126 referenced_filenames: None,
1127 matched_text_diagnostics: None,
1128 }],
1129 detection_log: vec![],
1130 identifier: None,
1131 }],
1132 ..PackageData::default()
1133 };
1134
1135 let file_info = FileInfo::new(
1136 "package.json".to_string(),
1137 "package".to_string(),
1138 ".json".to_string(),
1139 "project/package.json".to_string(),
1140 FileType::File,
1141 None,
1142 None,
1143 1,
1144 None,
1145 None,
1146 None,
1147 None,
1148 None,
1149 vec![package_data],
1150 None,
1151 vec![],
1152 vec![],
1153 vec![],
1154 vec![],
1155 vec![],
1156 vec![],
1157 vec![],
1158 vec![],
1159 vec![],
1160 );
1161
1162 assert_eq!(file_info.license_detections.len(), 1);
1163 assert_eq!(
1164 file_info.license_detections[0].matches[0]
1165 .from_file
1166 .as_deref(),
1167 Some("project/package.json")
1168 );
1169 assert!(file_info.license_detections[0].identifier.is_some());
1170 assert_eq!(
1171 file_info.package_data[0].license_detections[0].matches[0]
1172 .from_file
1173 .as_deref(),
1174 Some("project/package.json")
1175 );
1176 assert!(
1177 file_info.package_data[0].license_detections[0]
1178 .identifier
1179 .is_some()
1180 );
1181 }
1182
1183 #[test]
1184 fn package_from_package_data_backfills_detection_provenance() {
1185 let package_data = PackageData {
1186 package_type: Some(PackageType::Npm),
1187 license_detections: vec![LicenseDetection {
1188 license_expression: "mit".to_string(),
1189 license_expression_spdx: "MIT".to_string(),
1190 matches: vec![Match {
1191 license_expression: "mit".to_string(),
1192 license_expression_spdx: "MIT".to_string(),
1193 from_file: None,
1194 start_line: LineNumber::ONE,
1195 end_line: LineNumber::ONE,
1196 matcher: Some("parser-declared-license".to_string()),
1197 score: MatchScore::MAX,
1198 matched_length: Some(1),
1199 match_coverage: Some(100.0),
1200 rule_relevance: Some(100),
1201 rule_identifier: None,
1202 rule_url: None,
1203 matched_text: Some("MIT".to_string()),
1204 referenced_filenames: None,
1205 matched_text_diagnostics: None,
1206 }],
1207 detection_log: vec![],
1208 identifier: None,
1209 }],
1210 ..PackageData::default()
1211 };
1212
1213 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1214
1215 assert_eq!(
1216 package.license_detections[0].matches[0]
1217 .from_file
1218 .as_deref(),
1219 Some("project/package.json")
1220 );
1221 assert!(package.license_detections[0].identifier.is_some());
1222 }
1223}
1224
1225#[derive(Serialize, Deserialize, Debug, Clone)]
1230pub struct TopLevelDependency {
1231 pub purl: Option<String>,
1232 pub extracted_requirement: Option<String>,
1233 pub scope: Option<String>,
1234 pub is_runtime: Option<bool>,
1235 pub is_optional: Option<bool>,
1236 pub is_pinned: Option<bool>,
1237 pub is_direct: Option<bool>,
1238 pub resolved_package: Option<Box<ResolvedPackage>>,
1239 #[serde(default)]
1240 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1241 pub dependency_uid: DependencyUid,
1243 pub for_package_uid: Option<PackageUid>,
1245 pub datafile_path: String,
1247 pub datasource_id: DatasourceId,
1249 pub namespace: Option<String>,
1251}
1252
1253impl TopLevelDependency {
1254 pub fn from_dependency(
1256 dep: &Dependency,
1257 datafile_path: String,
1258 datasource_id: DatasourceId,
1259 for_package_uid: Option<PackageUid>,
1260 ) -> Self {
1261 let dependency_uid = dep
1262 .purl
1263 .as_ref()
1264 .map(|p| DependencyUid::new(p))
1265 .unwrap_or_else(DependencyUid::empty);
1266
1267 TopLevelDependency {
1268 purl: dep.purl.clone(),
1269 extracted_requirement: dep.extracted_requirement.clone(),
1270 scope: dep.scope.clone(),
1271 is_runtime: dep.is_runtime,
1272 is_optional: dep.is_optional,
1273 is_pinned: dep.is_pinned,
1274 is_direct: dep.is_direct,
1275 resolved_package: dep.resolved_package.clone(),
1276 extra_data: dep.extra_data.clone(),
1277 dependency_uid,
1278 for_package_uid,
1279 datafile_path,
1280 datasource_id,
1281 namespace: None,
1282 }
1283 }
1284}
1285
1286#[derive(Serialize, Deserialize, Debug, Clone)]
1287pub struct OutputEmail {
1288 pub email: String,
1289 pub start_line: LineNumber,
1290 pub end_line: LineNumber,
1291}
1292
1293#[derive(Serialize, Deserialize, Debug, Clone)]
1294pub struct OutputURL {
1295 pub url: String,
1296 pub start_line: LineNumber,
1297 pub end_line: LineNumber,
1298}
1299
1300#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1301pub struct LicensePolicyEntry {
1302 pub license_key: String,
1303 pub label: String,
1304 pub color_code: String,
1305 pub icon: String,
1306}
1307
1308#[derive(Debug, Clone, PartialEq)]
1309pub enum FileType {
1310 File,
1311 Directory,
1312}
1313
1314impl serde::Serialize for FileType {
1315 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1316 where
1317 S: serde::Serializer,
1318 {
1319 match self {
1320 FileType::File => serializer.serialize_str("file"),
1321 FileType::Directory => serializer.serialize_str("directory"),
1322 }
1323 }
1324}
1325
1326impl<'de> Deserialize<'de> for FileType {
1327 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1328 where
1329 D: serde::Deserializer<'de>,
1330 {
1331 let value = String::deserialize(deserializer)?;
1332 match value.as_str() {
1333 "file" => Ok(FileType::File),
1334 "directory" => Ok(FileType::Directory),
1335 _ => Err(serde::de::Error::custom("invalid file type")),
1336 }
1337 }
1338}