1use derive_builder::Builder;
2use packageurl::PackageUrl;
3use serde::{Deserialize, Serialize};
4use sha1::{Digest, Sha1};
5use std::collections::HashMap;
6use std::str::FromStr;
7
8use super::DatasourceId;
9use super::DependencyUid;
10use super::GitSha1;
11use super::LineNumber;
12use super::MatchScore;
13use super::Md5Digest;
14use super::PackageType;
15use super::PackageUid;
16use super::Sha1Digest;
17use super::Sha256Digest;
18use super::Sha512Digest;
19use crate::license_detection::tokenize::tokenize_without_stopwords;
20use crate::models::output::Tallies;
21use crate::utils::spdx::combine_license_expressions;
22
23#[derive(Debug, Builder, Serialize, Deserialize, Clone)]
24#[builder(build_fn(skip))]
25pub struct FileInfo {
27 pub name: String,
28 pub base_name: String,
29 pub extension: String,
30 pub path: String,
31 #[serde(rename = "type")] pub file_type: FileType,
33 #[builder(default)]
34 #[serde(default)]
35 pub mime_type: Option<String>,
36 #[builder(default)]
37 #[serde(rename = "file_type", default)]
38 pub file_type_label: Option<String>,
39 pub size: u64,
40 #[builder(default)]
41 #[serde(default)]
42 pub date: Option<String>,
43 #[builder(default)]
44 #[serde(default)]
45 pub sha1: Option<Sha1Digest>,
46 #[builder(default)]
47 #[serde(default)]
48 pub md5: Option<Md5Digest>,
49 #[builder(default)]
50 #[serde(default)]
51 pub sha256: Option<Sha256Digest>,
52 #[builder(default)]
53 #[serde(default)]
54 pub sha1_git: Option<GitSha1>,
55 #[builder(default)]
56 #[serde(default)]
57 pub programming_language: Option<String>,
58 #[builder(default)]
59 #[serde(default)]
60 pub package_data: Vec<PackageData>,
61 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
63 pub license_expression: Option<String>,
64 #[builder(default)]
65 #[serde(default)]
66 pub license_detections: Vec<LicenseDetection>,
67 #[builder(default)]
68 #[serde(default)]
69 pub license_clues: Vec<Match>,
70 #[builder(default)]
71 #[serde(default)]
72 pub percentage_of_license_text: Option<f64>,
73 #[builder(default)]
74 #[serde(default)]
75 pub copyrights: Vec<Copyright>,
76 #[builder(default)]
77 #[serde(default)]
78 pub holders: Vec<Holder>,
79 #[builder(default)]
80 #[serde(default)]
81 pub authors: Vec<Author>,
82 #[builder(default)]
83 #[serde(default)]
84 pub emails: Vec<OutputEmail>,
85 #[builder(default)]
86 #[serde(default)]
87 pub urls: Vec<OutputURL>,
88 #[builder(default)]
89 #[serde(default)]
90 pub for_packages: Vec<PackageUid>,
91 #[builder(default)]
92 #[serde(default)]
93 pub scan_errors: Vec<String>,
94 #[builder(default)]
95 #[serde(default)]
96 pub license_policy: Option<Vec<LicensePolicyEntry>>,
97 #[builder(default)]
98 #[serde(default)]
99 pub is_generated: Option<bool>,
100 #[builder(default)]
101 #[serde(default)]
102 pub is_binary: Option<bool>,
103 #[builder(default)]
104 #[serde(default)]
105 pub is_text: Option<bool>,
106 #[builder(default)]
107 #[serde(default)]
108 pub is_archive: Option<bool>,
109 #[builder(default)]
110 #[serde(default)]
111 pub is_media: Option<bool>,
112 #[builder(default)]
113 #[serde(default)]
114 pub is_source: Option<bool>,
115 #[builder(default)]
116 #[serde(default)]
117 pub is_script: Option<bool>,
118 #[builder(default)]
119 #[serde(default)]
120 pub files_count: Option<usize>,
121 #[builder(default)]
122 #[serde(default)]
123 pub dirs_count: Option<usize>,
124 #[builder(default)]
125 #[serde(default)]
126 pub size_count: Option<u64>,
127 #[builder(default)]
128 #[serde(default)]
129 pub source_count: Option<usize>,
130 #[builder(default)]
131 #[serde(default)]
132 pub is_legal: bool,
133 #[builder(default)]
134 #[serde(default)]
135 pub is_manifest: bool,
136 #[builder(default)]
137 #[serde(default)]
138 pub is_readme: bool,
139 #[builder(default)]
140 #[serde(default)]
141 pub is_top_level: bool,
142 #[builder(default)]
143 #[serde(default)]
144 pub is_key_file: bool,
145 #[builder(default)]
146 #[serde(default)]
147 pub is_community: bool,
148 #[builder(default)]
149 #[serde(default)]
150 pub facets: Vec<String>,
151 #[builder(default)]
152 #[serde(default)]
153 pub tallies: Option<Tallies>,
154}
155
156impl FileInfoBuilder {
157 pub fn build(&self) -> Result<FileInfo, String> {
159 let mut file_info = FileInfo::new(
160 self.name.clone().ok_or("Missing field: name")?,
161 self.base_name.clone().ok_or("Missing field: base_name")?,
162 self.extension.clone().ok_or("Missing field: extension")?,
163 self.path.clone().ok_or("Missing field: path")?,
164 self.file_type.clone().ok_or("Missing field: file_type")?,
165 self.mime_type.clone().flatten(),
166 self.file_type_label.clone().flatten(),
167 self.size.ok_or("Missing field: size")?,
168 self.date.clone().flatten(),
169 self.sha1.flatten(),
170 self.md5.flatten(),
171 self.sha256.flatten(),
172 self.programming_language.clone().flatten(),
173 self.package_data.clone().unwrap_or_default(),
174 self.license_expression.clone().flatten(),
175 self.license_detections.clone().unwrap_or_default(),
176 self.license_clues.clone().unwrap_or_default(),
177 self.copyrights.clone().unwrap_or_default(),
178 self.holders.clone().unwrap_or_default(),
179 self.authors.clone().unwrap_or_default(),
180 self.emails.clone().unwrap_or_default(),
181 self.urls.clone().unwrap_or_default(),
182 self.for_packages.clone().unwrap_or_default(),
183 self.scan_errors.clone().unwrap_or_default(),
184 );
185 file_info.license_policy = self.license_policy.clone().flatten();
186 file_info.sha1_git = self.sha1_git.flatten();
187 file_info.is_binary = self.is_binary.flatten();
188 file_info.is_text = self.is_text.flatten();
189 file_info.is_archive = self.is_archive.flatten();
190 file_info.is_media = self.is_media.flatten();
191 file_info.is_script = self.is_script.flatten();
192 file_info.files_count = self.files_count.flatten();
193 file_info.dirs_count = self.dirs_count.flatten();
194 file_info.size_count = self.size_count.flatten();
195 Ok(file_info)
196 }
197}
198
199impl FileInfo {
200 #[allow(clippy::too_many_arguments)]
201 pub fn new(
203 name: String,
204 base_name: String,
205 extension: String,
206 path: String,
207 file_type: FileType,
208 mime_type: Option<String>,
209 file_type_label: Option<String>,
210 size: u64,
211 date: Option<String>,
212 sha1: Option<Sha1Digest>,
213 md5: Option<Md5Digest>,
214 sha256: Option<Sha256Digest>,
215 programming_language: Option<String>,
216 package_data: Vec<PackageData>,
217 mut license_expression: Option<String>,
218 mut license_detections: Vec<LicenseDetection>,
219 license_clues: Vec<Match>,
220 copyrights: Vec<Copyright>,
221 holders: Vec<Holder>,
222 authors: Vec<Author>,
223 emails: Vec<OutputEmail>,
224 urls: Vec<OutputURL>,
225 for_packages: Vec<PackageUid>,
226 scan_errors: Vec<String>,
227 ) -> Self {
228 let mut package_data = package_data;
229 for package in &mut package_data {
230 enrich_package_data_license_provenance(package, &path);
231 }
232
233 license_expression = license_expression.or_else(|| {
235 let expressions = package_data
236 .iter()
237 .filter_map(|pkg| pkg.get_license_expression());
238 combine_license_expressions(expressions)
239 });
240
241 if license_detections.is_empty() {
243 for pkg in &package_data {
244 license_detections.extend(pkg.license_detections.clone());
245 }
246 }
247
248 if license_expression.is_none() && !license_detections.is_empty() {
250 let expressions = license_detections
251 .iter()
252 .map(|detection| detection.license_expression.clone());
253 license_expression = combine_license_expressions(expressions);
254 }
255
256 let mut file_info = FileInfo {
257 name,
258 base_name,
259 extension,
260 path,
261 file_type,
262 mime_type,
263 file_type_label,
264 size,
265 date,
266 sha1,
267 md5,
268 sha256,
269 sha1_git: None,
270 programming_language,
271 package_data,
272 license_expression,
273 license_detections,
274 license_clues,
275 percentage_of_license_text: None,
276 copyrights,
277 holders,
278 authors,
279 emails,
280 urls,
281 for_packages,
282 scan_errors,
283 license_policy: None,
284 is_generated: None,
285 is_binary: None,
286 is_text: None,
287 is_archive: None,
288 is_media: None,
289 is_source: None,
290 is_script: None,
291 files_count: None,
292 dirs_count: None,
293 size_count: None,
294 source_count: None,
295 is_legal: false,
296 is_manifest: false,
297 is_readme: false,
298 is_top_level: false,
299 is_key_file: false,
300 is_community: false,
301 facets: vec![],
302 tallies: None,
303 };
304 file_info.backfill_license_provenance();
305 file_info
306 }
307
308 pub fn backfill_license_provenance(&mut self) {
309 for detection in &mut self.license_detections {
310 enrich_license_detection_provenance(detection, &self.path);
311 }
312
313 for package in &mut self.package_data {
314 enrich_package_data_license_provenance(package, &self.path);
315 }
316 }
317}
318
319fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
320 for detection in &mut package_data.license_detections {
321 enrich_license_detection_provenance(detection, path);
322 }
323 for detection in &mut package_data.other_license_detections {
324 enrich_license_detection_provenance(detection, path);
325 }
326}
327
328pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
329 for detection_match in &mut detection.matches {
330 if detection_match.from_file.is_none() {
331 detection_match.from_file = Some(path.to_string());
332 }
333
334 if detection_match.rule_identifier.is_none() {
335 detection_match.rule_identifier = detection_match.matcher.clone();
336 }
337 }
338
339 if detection.identifier.is_none() {
340 detection.identifier = Some(compute_public_detection_identifier(detection));
341 }
342}
343
344fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
345 let expression = python_safe_name(&detection.license_expression);
346 let mut hasher = Sha1::new();
347 hasher.update(format_public_detection_content(detection).as_bytes());
348 let hex_str = hex::encode(hasher.finalize());
349 let uuid_hex = &hex_str[..32];
350 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
351 .map(|uuid| uuid.to_string())
352 .unwrap_or_else(|_| uuid_hex.to_string());
353
354 format!("{}-{}", expression, content_uuid)
355}
356
357fn format_public_detection_content(detection: &LicenseDetection) -> String {
358 let mut result = String::from("(");
359
360 for (index, detection_match) in detection.matches.iter().enumerate() {
361 if index > 0 {
362 result.push_str(", ");
363 }
364 result.push_str(&format!(
365 "({}, {}, {})",
366 python_str_repr(
367 detection_match
368 .rule_identifier
369 .as_deref()
370 .or(detection_match.matcher.as_deref())
371 .unwrap_or("parser-declared-license")
372 ),
373 detection_match.score.value() as f32,
374 python_token_tuple_repr(&tokenize_without_stopwords(
375 detection_match.matched_text.as_deref().unwrap_or_default(),
376 )),
377 ));
378 }
379
380 if detection.matches.len() == 1 {
381 result.push(',');
382 }
383 result.push(')');
384 result
385}
386
387fn python_safe_name(value: &str) -> String {
388 let mut result = String::new();
389 let mut prev_underscore = false;
390
391 for character in value.chars() {
392 if character.is_alphanumeric() {
393 result.push(character);
394 prev_underscore = false;
395 } else if !prev_underscore {
396 result.push('_');
397 prev_underscore = true;
398 }
399 }
400
401 let trimmed = result.trim_matches('_');
402 if trimmed.is_empty() {
403 String::new()
404 } else {
405 trimmed.to_string()
406 }
407}
408
409fn python_str_repr(value: &str) -> String {
410 if value.contains('\'') && !value.contains('"') {
411 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
412 } else {
413 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
414 }
415}
416
417fn python_token_tuple_repr(tokens: &[String]) -> String {
418 if tokens.is_empty() {
419 return String::from("()");
420 }
421
422 let mut result = String::from("(");
423 for (index, token) in tokens.iter().enumerate() {
424 if index > 0 {
425 result.push_str(", ");
426 }
427 result.push_str(&python_str_repr(token));
428 }
429
430 if tokens.len() == 1 {
431 result.push(',');
432 }
433 result.push(')');
434 result
435}
436
437#[derive(Serialize, Deserialize, Debug, Clone, Default)]
443pub struct PackageData {
444 #[serde(rename = "type")] pub package_type: Option<PackageType>,
446 pub namespace: Option<String>,
447 pub name: Option<String>,
448 pub version: Option<String>,
449 #[serde(default)]
450 pub qualifiers: Option<HashMap<String, String>>,
451 pub subpath: Option<String>,
452 pub primary_language: Option<String>,
453 pub description: Option<String>,
454 pub release_date: Option<String>,
455 #[serde(default)]
456 pub parties: Vec<Party>,
457 #[serde(default)]
458 pub keywords: Vec<String>,
459 pub homepage_url: Option<String>,
460 pub download_url: Option<String>,
461 pub size: Option<u64>,
462 pub sha1: Option<Sha1Digest>,
463 pub md5: Option<Md5Digest>,
464 pub sha256: Option<Sha256Digest>,
465 pub sha512: Option<Sha512Digest>,
466 pub bug_tracking_url: Option<String>,
467 pub code_view_url: Option<String>,
468 pub vcs_url: Option<String>,
469 pub copyright: Option<String>,
470 pub holder: Option<String>,
471 pub declared_license_expression: Option<String>,
472 pub declared_license_expression_spdx: Option<String>,
473 #[serde(default)]
474 pub license_detections: Vec<LicenseDetection>,
475 pub other_license_expression: Option<String>,
476 pub other_license_expression_spdx: Option<String>,
477 #[serde(default)]
478 pub other_license_detections: Vec<LicenseDetection>,
479 pub extracted_license_statement: Option<String>,
480 pub notice_text: Option<String>,
481 #[serde(default)]
482 pub source_packages: Vec<String>,
483 #[serde(default)]
484 pub file_references: Vec<FileReference>,
485 #[serde(default)]
486 pub is_private: bool,
487 #[serde(default)]
488 pub is_virtual: bool,
489 #[serde(default)]
490 pub extra_data: Option<HashMap<String, serde_json::Value>>,
491 #[serde(default)]
492 pub dependencies: Vec<Dependency>,
493 pub repository_homepage_url: Option<String>,
494 pub repository_download_url: Option<String>,
495 pub api_data_url: Option<String>,
496 pub datasource_id: Option<DatasourceId>,
497 pub purl: Option<String>,
498}
499
500impl PackageData {
501 pub fn get_license_expression(&self) -> Option<String> {
504 if self.license_detections.is_empty() {
505 return None;
506 }
507
508 let expressions = self
509 .license_detections
510 .iter()
511 .map(|detection| detection.license_expression.clone());
512 combine_license_expressions(expressions)
513 }
514}
515
516#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
520pub struct LicenseDetection {
521 pub license_expression: String,
522 pub license_expression_spdx: String,
523 pub matches: Vec<Match>,
524 #[serde(default)]
525 pub detection_log: Vec<String>,
526 pub identifier: Option<String>,
527}
528
529#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
533pub struct Match {
534 pub license_expression: String,
535 pub license_expression_spdx: String,
536 pub from_file: Option<String>,
537 pub start_line: LineNumber,
538 pub end_line: LineNumber,
539 pub matcher: Option<String>,
540 pub score: MatchScore,
541 pub matched_length: Option<usize>,
542 pub match_coverage: Option<f64>,
543 pub rule_relevance: Option<u8>,
544 pub rule_identifier: Option<String>,
545 pub rule_url: Option<String>,
546 pub matched_text: Option<String>,
547 pub matched_text_diagnostics: Option<String>,
548 #[serde(default)]
549 pub referenced_filenames: Option<Vec<String>>,
550}
551
552#[derive(Serialize, Deserialize, Debug, Clone)]
553pub struct Copyright {
554 pub copyright: String,
555 pub start_line: LineNumber,
556 pub end_line: LineNumber,
557}
558
559#[derive(Serialize, Deserialize, Debug, Clone)]
560pub struct Holder {
561 pub holder: String,
562 pub start_line: LineNumber,
563 pub end_line: LineNumber,
564}
565
566#[derive(Serialize, Deserialize, Debug, Clone)]
567pub struct Author {
568 pub author: String,
569 pub start_line: LineNumber,
570 pub end_line: LineNumber,
571}
572
573#[derive(Serialize, Deserialize, Debug, Clone)]
578pub struct Dependency {
579 pub purl: Option<String>,
580 pub extracted_requirement: Option<String>,
581 pub scope: Option<String>,
582 pub is_runtime: Option<bool>,
583 pub is_optional: Option<bool>,
584 pub is_pinned: Option<bool>,
585 pub is_direct: Option<bool>,
586 pub resolved_package: Option<Box<ResolvedPackage>>,
587 #[serde(default)]
588 pub extra_data: Option<HashMap<String, serde_json::Value>>,
589}
590
591#[derive(Serialize, Deserialize, Debug, Clone)]
592pub struct ResolvedPackage {
593 #[serde(rename = "type")]
594 pub package_type: PackageType,
595 pub namespace: String,
596 pub name: String,
597 pub version: String,
598 #[serde(default)]
599 pub qualifiers: Option<HashMap<String, String>>,
600 pub subpath: Option<String>,
601 pub primary_language: Option<String>,
602 pub description: Option<String>,
603 pub release_date: Option<String>,
604 #[serde(default)]
605 pub parties: Vec<Party>,
606 #[serde(default)]
607 pub keywords: Vec<String>,
608 pub homepage_url: Option<String>,
609 pub download_url: Option<String>,
610 pub size: Option<u64>,
611 pub sha1: Option<Sha1Digest>,
612 pub md5: Option<Md5Digest>,
613 pub sha256: Option<Sha256Digest>,
614 pub sha512: Option<Sha512Digest>,
615 pub bug_tracking_url: Option<String>,
616 pub code_view_url: Option<String>,
617 pub vcs_url: Option<String>,
618 pub copyright: Option<String>,
619 pub holder: Option<String>,
620 pub declared_license_expression: Option<String>,
621 pub declared_license_expression_spdx: Option<String>,
622 #[serde(default)]
623 pub license_detections: Vec<LicenseDetection>,
624 pub other_license_expression: Option<String>,
625 pub other_license_expression_spdx: Option<String>,
626 #[serde(default)]
627 pub other_license_detections: Vec<LicenseDetection>,
628 pub extracted_license_statement: Option<String>,
629 pub notice_text: Option<String>,
630 #[serde(default)]
631 pub source_packages: Vec<String>,
632 #[serde(default)]
633 pub file_references: Vec<FileReference>,
634 #[serde(default)]
635 pub is_private: bool,
636 #[serde(default)]
637 pub is_virtual: bool,
638 #[serde(default)]
639 pub extra_data: Option<HashMap<String, serde_json::Value>>,
640 #[serde(default)]
641 pub dependencies: Vec<Dependency>,
642 pub repository_homepage_url: Option<String>,
643 pub repository_download_url: Option<String>,
644 pub api_data_url: Option<String>,
645 pub datasource_id: Option<DatasourceId>,
646 pub purl: Option<String>,
647}
648
649impl ResolvedPackage {
650 pub fn new(
651 package_type: PackageType,
652 namespace: String,
653 name: String,
654 version: String,
655 ) -> Self {
656 Self {
657 package_type,
658 namespace,
659 name,
660 version,
661 qualifiers: None,
662 subpath: None,
663 primary_language: None,
664 description: None,
665 release_date: None,
666 parties: vec![],
667 keywords: vec![],
668 homepage_url: None,
669 download_url: None,
670 size: None,
671 sha1: None,
672 md5: None,
673 sha256: None,
674 sha512: None,
675 bug_tracking_url: None,
676 code_view_url: None,
677 vcs_url: None,
678 copyright: None,
679 holder: None,
680 declared_license_expression: None,
681 declared_license_expression_spdx: None,
682 license_detections: vec![],
683 other_license_expression: None,
684 other_license_expression_spdx: None,
685 other_license_detections: vec![],
686 extracted_license_statement: None,
687 notice_text: None,
688 source_packages: vec![],
689 file_references: vec![],
690 is_private: false,
691 is_virtual: false,
692 extra_data: None,
693 dependencies: vec![],
694 repository_homepage_url: None,
695 repository_download_url: None,
696 api_data_url: None,
697 datasource_id: None,
698 purl: None,
699 }
700 }
701
702 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
703 Self {
704 package_type: package_data.package_type.unwrap_or(fallback_type),
705 namespace: package_data.namespace.clone().unwrap_or_default(),
706 name: package_data.name.clone().unwrap_or_default(),
707 version: package_data.version.clone().unwrap_or_default(),
708 qualifiers: package_data.qualifiers.clone(),
709 subpath: package_data.subpath.clone(),
710 primary_language: package_data.primary_language.clone(),
711 description: package_data.description.clone(),
712 release_date: package_data.release_date.clone(),
713 parties: package_data.parties.clone(),
714 keywords: package_data.keywords.clone(),
715 homepage_url: package_data.homepage_url.clone(),
716 download_url: package_data.download_url.clone(),
717 size: package_data.size,
718 sha1: package_data.sha1,
719 md5: package_data.md5,
720 sha256: package_data.sha256,
721 sha512: package_data.sha512,
722 bug_tracking_url: package_data.bug_tracking_url.clone(),
723 code_view_url: package_data.code_view_url.clone(),
724 vcs_url: package_data.vcs_url.clone(),
725 copyright: package_data.copyright.clone(),
726 holder: package_data.holder.clone(),
727 declared_license_expression: package_data.declared_license_expression.clone(),
728 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
729 license_detections: package_data.license_detections.clone(),
730 other_license_expression: package_data.other_license_expression.clone(),
731 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
732 other_license_detections: package_data.other_license_detections.clone(),
733 extracted_license_statement: package_data.extracted_license_statement.clone(),
734 notice_text: package_data.notice_text.clone(),
735 source_packages: package_data.source_packages.clone(),
736 file_references: package_data.file_references.clone(),
737 is_private: package_data.is_private,
738 is_virtual: package_data.is_virtual,
739 extra_data: package_data.extra_data.clone(),
740 dependencies: package_data.dependencies.clone(),
741 repository_homepage_url: package_data.repository_homepage_url.clone(),
742 repository_download_url: package_data.repository_download_url.clone(),
743 api_data_url: package_data.api_data_url.clone(),
744 datasource_id: package_data.datasource_id,
745 purl: package_data.purl.clone(),
746 }
747 }
748}
749
750#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
754pub struct Party {
755 pub r#type: Option<String>,
756 pub role: Option<String>,
757 pub name: Option<String>,
758 pub email: Option<String>,
759 pub url: Option<String>,
760 pub organization: Option<String>,
761 pub organization_url: Option<String>,
762 pub timezone: Option<String>,
763}
764
765impl Party {
766 pub(crate) fn person(role: &str, name: Option<String>, email: Option<String>) -> Self {
767 Self {
768 r#type: Some("person".to_string()),
769 role: Some(role.to_string()),
770 name,
771 email,
772 url: None,
773 organization: None,
774 organization_url: None,
775 timezone: None,
776 }
777 }
778}
779
780#[derive(Serialize, Deserialize, Debug, Clone)]
784pub struct FileReference {
785 pub path: String,
786 pub size: Option<u64>,
787 pub sha1: Option<Sha1Digest>,
788 pub md5: Option<Md5Digest>,
789 pub sha256: Option<Sha256Digest>,
790 pub sha512: Option<Sha512Digest>,
791 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
792}
793
794impl FileReference {
795 pub(crate) fn from_path(path: String) -> Self {
796 Self {
797 path,
798 size: None,
799 sha1: None,
800 md5: None,
801 sha256: None,
802 sha512: None,
803 extra_data: None,
804 }
805 }
806}
807
808#[derive(Serialize, Deserialize, Debug, Clone)]
818pub struct Package {
819 #[serde(rename = "type")]
820 pub package_type: Option<PackageType>,
821 pub namespace: Option<String>,
822 pub name: Option<String>,
823 pub version: Option<String>,
824 #[serde(default)]
825 pub qualifiers: Option<HashMap<String, String>>,
826 pub subpath: Option<String>,
827 pub primary_language: Option<String>,
828 pub description: Option<String>,
829 pub release_date: Option<String>,
830 #[serde(default)]
831 pub parties: Vec<Party>,
832 #[serde(default)]
833 pub keywords: Vec<String>,
834 pub homepage_url: Option<String>,
835 pub download_url: Option<String>,
836 pub size: Option<u64>,
837 pub sha1: Option<Sha1Digest>,
838 pub md5: Option<Md5Digest>,
839 pub sha256: Option<Sha256Digest>,
840 pub sha512: Option<Sha512Digest>,
841 pub bug_tracking_url: Option<String>,
842 pub code_view_url: Option<String>,
843 pub vcs_url: Option<String>,
844 pub copyright: Option<String>,
845 pub holder: Option<String>,
846 pub declared_license_expression: Option<String>,
847 pub declared_license_expression_spdx: Option<String>,
848 #[serde(default)]
849 pub license_detections: Vec<LicenseDetection>,
850 pub other_license_expression: Option<String>,
851 pub other_license_expression_spdx: Option<String>,
852 #[serde(default)]
853 pub other_license_detections: Vec<LicenseDetection>,
854 pub extracted_license_statement: Option<String>,
855 pub notice_text: Option<String>,
856 #[serde(default)]
857 pub source_packages: Vec<String>,
858 #[serde(default)]
859 pub is_private: bool,
860 #[serde(default)]
861 pub is_virtual: bool,
862 #[serde(default)]
863 pub extra_data: Option<HashMap<String, serde_json::Value>>,
864 pub repository_homepage_url: Option<String>,
865 pub repository_download_url: Option<String>,
866 pub api_data_url: Option<String>,
867 pub purl: Option<String>,
868 pub package_uid: PackageUid,
870 pub datafile_paths: Vec<String>,
872 pub datasource_ids: Vec<DatasourceId>,
874}
875
876impl Package {
877 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
882 let mut package_data = package_data.clone();
883 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
884
885 let package_uid = package_data
886 .purl
887 .as_ref()
888 .map(|p| PackageUid::new(p))
889 .unwrap_or_else(PackageUid::empty);
890
891 Package {
892 package_type: package_data.package_type,
893 namespace: package_data.namespace.clone(),
894 name: package_data.name.clone(),
895 version: package_data.version.clone(),
896 qualifiers: package_data.qualifiers.clone(),
897 subpath: package_data.subpath.clone(),
898 primary_language: package_data.primary_language.clone(),
899 description: package_data.description.clone(),
900 release_date: package_data.release_date.clone(),
901 parties: package_data.parties.clone(),
902 keywords: package_data.keywords.clone(),
903 homepage_url: package_data.homepage_url.clone(),
904 download_url: package_data.download_url.clone(),
905 size: package_data.size,
906 sha1: package_data.sha1,
907 md5: package_data.md5,
908 sha256: package_data.sha256,
909 sha512: package_data.sha512,
910 bug_tracking_url: package_data.bug_tracking_url.clone(),
911 code_view_url: package_data.code_view_url.clone(),
912 vcs_url: package_data.vcs_url.clone(),
913 copyright: package_data.copyright.clone(),
914 holder: package_data.holder.clone(),
915 declared_license_expression: package_data.declared_license_expression.clone(),
916 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
917 license_detections: package_data.license_detections.clone(),
918 other_license_expression: package_data.other_license_expression.clone(),
919 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
920 other_license_detections: package_data.other_license_detections.clone(),
921 extracted_license_statement: package_data.extracted_license_statement.clone(),
922 notice_text: package_data.notice_text.clone(),
923 source_packages: package_data.source_packages.clone(),
924 is_private: package_data.is_private,
925 is_virtual: package_data.is_virtual,
926 extra_data: package_data.extra_data.clone(),
927 repository_homepage_url: package_data.repository_homepage_url.clone(),
928 repository_download_url: package_data.repository_download_url.clone(),
929 api_data_url: package_data.api_data_url.clone(),
930 purl: package_data.purl.clone(),
931 package_uid,
932 datafile_paths: vec![datafile_path],
933 datasource_ids: if let Some(dsid) = package_data.datasource_id {
934 vec![dsid]
935 } else {
936 vec![]
937 },
938 }
939 }
940
941 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
947 let mut package_data = package_data.clone();
948 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
949
950 if let Some(dsid) = package_data.datasource_id {
951 self.datasource_ids.push(dsid);
952 }
953 self.datafile_paths.push(datafile_path);
954
955 macro_rules! fill_if_empty {
956 ($field:ident) => {
957 if self.$field.is_none() {
958 self.$field = package_data.$field;
959 }
960 };
961 }
962
963 fill_if_empty!(package_type);
964 fill_if_empty!(name);
965 fill_if_empty!(namespace);
966 fill_if_empty!(version);
967 fill_if_empty!(qualifiers);
968 fill_if_empty!(subpath);
969 fill_if_empty!(primary_language);
970 fill_if_empty!(description);
971 fill_if_empty!(release_date);
972 fill_if_empty!(homepage_url);
973 fill_if_empty!(download_url);
974 fill_if_empty!(size);
975 fill_if_empty!(sha1);
976 fill_if_empty!(md5);
977 fill_if_empty!(sha256);
978 fill_if_empty!(sha512);
979 fill_if_empty!(bug_tracking_url);
980 fill_if_empty!(code_view_url);
981 fill_if_empty!(vcs_url);
982 fill_if_empty!(copyright);
983 fill_if_empty!(holder);
984 fill_if_empty!(declared_license_expression);
985 fill_if_empty!(declared_license_expression_spdx);
986 fill_if_empty!(other_license_expression);
987 fill_if_empty!(other_license_expression_spdx);
988 fill_if_empty!(extracted_license_statement);
989 fill_if_empty!(notice_text);
990 match (&mut self.extra_data, &package_data.extra_data) {
991 (None, Some(extra_data)) => {
992 self.extra_data = Some(extra_data.clone());
993 }
994 (Some(existing), Some(incoming)) => {
995 for (key, value) in incoming {
996 existing.entry(key.clone()).or_insert_with(|| value.clone());
997 }
998 }
999 _ => {}
1000 }
1001 fill_if_empty!(repository_homepage_url);
1002 fill_if_empty!(repository_download_url);
1003 fill_if_empty!(api_data_url);
1004
1005 for party in &package_data.parties {
1006 if let Some(existing) = self.parties.iter_mut().find(|p| {
1007 p.role == party.role
1008 && ((p.name.is_some() && p.name == party.name)
1009 || (p.email.is_some() && p.email == party.email))
1010 }) {
1011 if existing.name.is_none() {
1012 existing.name = party.name.clone();
1013 }
1014 if existing.email.is_none() {
1015 existing.email = party.email.clone();
1016 }
1017 } else {
1018 self.parties.push(party.clone());
1019 }
1020 }
1021
1022 for keyword in &package_data.keywords {
1023 if !self.keywords.contains(keyword) {
1024 self.keywords.push(keyword.clone());
1025 }
1026 }
1027
1028 for detection in &package_data.license_detections {
1029 self.license_detections.push(detection.clone());
1030 }
1031
1032 for detection in &package_data.other_license_detections {
1033 self.other_license_detections.push(detection.clone());
1034 }
1035
1036 for source_pkg in &package_data.source_packages {
1037 if !self.source_packages.contains(source_pkg) {
1038 self.source_packages.push(source_pkg.clone());
1039 }
1040 }
1041
1042 self.refresh_identity();
1043 }
1044
1045 pub fn backfill_license_provenance(&mut self) {
1046 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1047 return;
1048 };
1049
1050 for detection in &mut self.license_detections {
1051 enrich_license_detection_provenance(detection, &datafile_path);
1052 }
1053 for detection in &mut self.other_license_detections {
1054 enrich_license_detection_provenance(detection, &datafile_path);
1055 }
1056 }
1057
1058 fn refresh_identity(&mut self) {
1059 let Some(next_purl) = self.build_current_purl() else {
1060 return;
1061 };
1062
1063 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1064 self.package_uid = PackageUid::new(&next_purl);
1065 }
1066
1067 self.purl = Some(next_purl);
1068 }
1069
1070 fn build_current_purl(&self) -> Option<String> {
1071 if let (Some(package_type), Some(name)) = (
1072 self.package_type.as_ref(),
1073 self.name
1074 .as_deref()
1075 .filter(|value| !value.trim().is_empty()),
1076 ) {
1077 let purl_type = match package_type {
1078 PackageType::Deno => "generic",
1079 _ => package_type.as_str(),
1080 };
1081
1082 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1083
1084 if let Some(namespace) = self
1085 .namespace
1086 .as_deref()
1087 .filter(|value| !value.trim().is_empty())
1088 {
1089 purl.with_namespace(namespace).ok()?;
1090 }
1091
1092 if let Some(version) = self
1093 .version
1094 .as_deref()
1095 .filter(|value| !value.trim().is_empty())
1096 {
1097 purl.with_version(version).ok()?;
1098 }
1099
1100 if let Some(qualifiers) = &self.qualifiers {
1101 for (key, value) in qualifiers {
1102 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1103 }
1104 }
1105
1106 if let Some(subpath) = self
1107 .subpath
1108 .as_deref()
1109 .filter(|value| !value.trim().is_empty())
1110 {
1111 purl.with_subpath(subpath).ok()?;
1112 }
1113
1114 return Some(purl.to_string());
1115 }
1116
1117 let existing_purl = self.purl.as_deref()?;
1118 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1119
1120 if let Some(version) = self
1121 .version
1122 .as_deref()
1123 .filter(|value| !value.trim().is_empty())
1124 {
1125 purl.with_version(version).ok()?;
1126 } else {
1127 purl.without_version();
1128 }
1129
1130 Some(purl.to_string())
1131 }
1132}
1133
1134#[cfg(test)]
1135mod tests {
1136 use super::*;
1137
1138 #[test]
1139 fn file_info_new_backfills_package_detection_provenance() {
1140 let package_data = PackageData {
1141 package_type: Some(PackageType::Npm),
1142 license_detections: vec![LicenseDetection {
1143 license_expression: "mit".to_string(),
1144 license_expression_spdx: "MIT".to_string(),
1145 matches: vec![Match {
1146 license_expression: "mit".to_string(),
1147 license_expression_spdx: "MIT".to_string(),
1148 from_file: None,
1149 start_line: LineNumber::ONE,
1150 end_line: LineNumber::ONE,
1151 matcher: Some("parser-declared-license".to_string()),
1152 score: MatchScore::MAX,
1153 matched_length: Some(1),
1154 match_coverage: Some(100.0),
1155 rule_relevance: Some(100),
1156 rule_identifier: None,
1157 rule_url: None,
1158 matched_text: Some("MIT".to_string()),
1159 referenced_filenames: None,
1160 matched_text_diagnostics: None,
1161 }],
1162 detection_log: vec![],
1163 identifier: None,
1164 }],
1165 ..PackageData::default()
1166 };
1167
1168 let file_info = FileInfo::new(
1169 "package.json".to_string(),
1170 "package".to_string(),
1171 ".json".to_string(),
1172 "project/package.json".to_string(),
1173 FileType::File,
1174 None,
1175 None,
1176 1,
1177 None,
1178 None,
1179 None,
1180 None,
1181 None,
1182 vec![package_data],
1183 None,
1184 vec![],
1185 vec![],
1186 vec![],
1187 vec![],
1188 vec![],
1189 vec![],
1190 vec![],
1191 vec![],
1192 vec![],
1193 );
1194
1195 assert_eq!(file_info.license_detections.len(), 1);
1196 assert_eq!(
1197 file_info.license_detections[0].matches[0]
1198 .from_file
1199 .as_deref(),
1200 Some("project/package.json")
1201 );
1202 assert!(file_info.license_detections[0].identifier.is_some());
1203 assert_eq!(
1204 file_info.package_data[0].license_detections[0].matches[0]
1205 .from_file
1206 .as_deref(),
1207 Some("project/package.json")
1208 );
1209 assert_eq!(
1210 file_info.package_data[0].license_detections[0].matches[0]
1211 .rule_identifier
1212 .as_deref(),
1213 Some("parser-declared-license")
1214 );
1215 assert!(
1216 file_info.package_data[0].license_detections[0]
1217 .identifier
1218 .is_some()
1219 );
1220 }
1221
1222 #[test]
1223 fn package_from_package_data_backfills_detection_provenance() {
1224 let package_data = PackageData {
1225 package_type: Some(PackageType::Npm),
1226 license_detections: vec![LicenseDetection {
1227 license_expression: "mit".to_string(),
1228 license_expression_spdx: "MIT".to_string(),
1229 matches: vec![Match {
1230 license_expression: "mit".to_string(),
1231 license_expression_spdx: "MIT".to_string(),
1232 from_file: None,
1233 start_line: LineNumber::ONE,
1234 end_line: LineNumber::ONE,
1235 matcher: Some("parser-declared-license".to_string()),
1236 score: MatchScore::MAX,
1237 matched_length: Some(1),
1238 match_coverage: Some(100.0),
1239 rule_relevance: Some(100),
1240 rule_identifier: None,
1241 rule_url: None,
1242 matched_text: Some("MIT".to_string()),
1243 referenced_filenames: None,
1244 matched_text_diagnostics: None,
1245 }],
1246 detection_log: vec![],
1247 identifier: None,
1248 }],
1249 ..PackageData::default()
1250 };
1251
1252 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1253
1254 assert_eq!(
1255 package.license_detections[0].matches[0]
1256 .from_file
1257 .as_deref(),
1258 Some("project/package.json")
1259 );
1260 assert_eq!(
1261 package.license_detections[0].matches[0]
1262 .rule_identifier
1263 .as_deref(),
1264 Some("parser-declared-license")
1265 );
1266 assert!(package.license_detections[0].identifier.is_some());
1267 }
1268}
1269
1270#[derive(Serialize, Deserialize, Debug, Clone)]
1275pub struct TopLevelDependency {
1276 pub purl: Option<String>,
1277 pub extracted_requirement: Option<String>,
1278 pub scope: Option<String>,
1279 pub is_runtime: Option<bool>,
1280 pub is_optional: Option<bool>,
1281 pub is_pinned: Option<bool>,
1282 pub is_direct: Option<bool>,
1283 pub resolved_package: Option<Box<ResolvedPackage>>,
1284 #[serde(default)]
1285 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1286 pub dependency_uid: DependencyUid,
1288 pub for_package_uid: Option<PackageUid>,
1290 pub datafile_path: String,
1292 pub datasource_id: DatasourceId,
1294 pub namespace: Option<String>,
1296}
1297
1298impl TopLevelDependency {
1299 pub fn from_dependency(
1301 dep: &Dependency,
1302 datafile_path: String,
1303 datasource_id: DatasourceId,
1304 for_package_uid: Option<PackageUid>,
1305 ) -> Self {
1306 let dependency_uid = dep
1307 .purl
1308 .as_ref()
1309 .map(|p| DependencyUid::new(p))
1310 .unwrap_or_else(DependencyUid::empty);
1311
1312 TopLevelDependency {
1313 purl: dep.purl.clone(),
1314 extracted_requirement: dep.extracted_requirement.clone(),
1315 scope: dep.scope.clone(),
1316 is_runtime: dep.is_runtime,
1317 is_optional: dep.is_optional,
1318 is_pinned: dep.is_pinned,
1319 is_direct: dep.is_direct,
1320 resolved_package: dep.resolved_package.clone(),
1321 extra_data: dep.extra_data.clone(),
1322 dependency_uid,
1323 for_package_uid,
1324 datafile_path,
1325 datasource_id,
1326 namespace: None,
1327 }
1328 }
1329}
1330
1331#[derive(Serialize, Deserialize, Debug, Clone)]
1332pub struct OutputEmail {
1333 pub email: String,
1334 pub start_line: LineNumber,
1335 pub end_line: LineNumber,
1336}
1337
1338#[derive(Serialize, Deserialize, Debug, Clone)]
1339pub struct OutputURL {
1340 pub url: String,
1341 pub start_line: LineNumber,
1342 pub end_line: LineNumber,
1343}
1344
1345#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1346pub struct LicensePolicyEntry {
1347 pub license_key: String,
1348 pub label: String,
1349 pub color_code: String,
1350 pub icon: String,
1351}
1352
1353#[derive(Debug, Clone, PartialEq)]
1354pub enum FileType {
1355 File,
1356 Directory,
1357}
1358
1359impl serde::Serialize for FileType {
1360 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1361 where
1362 S: serde::Serializer,
1363 {
1364 match self {
1365 FileType::File => serializer.serialize_str("file"),
1366 FileType::Directory => serializer.serialize_str("directory"),
1367 }
1368 }
1369}
1370
1371impl<'de> Deserialize<'de> for FileType {
1372 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1373 where
1374 D: serde::Deserializer<'de>,
1375 {
1376 let value = String::deserialize(deserializer)?;
1377 match value.as_str() {
1378 "file" => Ok(FileType::File),
1379 "directory" => Ok(FileType::Directory),
1380 _ => Err(serde::de::Error::custom("invalid file type")),
1381 }
1382 }
1383}