1use derive_builder::Builder;
2use packageurl::PackageUrl;
3use serde::{Deserialize, Serialize};
4use std::str::FromStr;
5use uuid::Uuid;
6
7use sha1::{Digest, Sha1};
8
9use super::DatasourceId;
10use super::PackageType;
11use crate::license_detection::tokenize::tokenize_without_stopwords;
12use crate::models::output::Tallies;
13use crate::utils::spdx::combine_license_expressions;
14
15#[derive(Debug, Builder, Serialize, Deserialize)]
16#[builder(build_fn(skip))]
17pub struct FileInfo {
19 pub name: String,
20 pub base_name: String,
21 pub extension: String,
22 pub path: String,
23 #[serde(rename = "type")] pub file_type: FileType,
25 #[builder(default)]
26 pub mime_type: Option<String>,
27 pub size: u64,
28 #[builder(default)]
29 pub date: Option<String>,
30 #[builder(default)]
31 pub sha1: Option<String>,
32 #[builder(default)]
33 pub md5: Option<String>,
34 #[builder(default)]
35 pub sha256: Option<String>,
36 #[builder(default)]
37 pub programming_language: Option<String>,
38 #[builder(default)]
39 #[serde(default)]
40 pub package_data: Vec<PackageData>,
41 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
43 pub license_expression: Option<String>,
44 #[builder(default)]
45 #[serde(default)]
46 pub license_detections: Vec<LicenseDetection>,
47 #[builder(default)]
48 #[serde(skip_serializing_if = "Vec::is_empty", default)]
49 pub license_clues: Vec<Match>,
50 #[builder(default)]
51 #[serde(skip_serializing_if = "Option::is_none", default)]
52 pub percentage_of_license_text: Option<f64>,
53 #[builder(default)]
54 #[serde(default)]
55 pub copyrights: Vec<Copyright>,
56 #[builder(default)]
57 #[serde(default)]
58 pub holders: Vec<Holder>,
59 #[builder(default)]
60 #[serde(default)]
61 pub authors: Vec<Author>,
62 #[builder(default)]
63 #[serde(skip_serializing_if = "Vec::is_empty", default)]
64 pub emails: Vec<OutputEmail>,
65 #[builder(default)]
66 #[serde(default)]
67 pub urls: Vec<OutputURL>,
68 #[builder(default)]
69 #[serde(default)]
70 pub for_packages: Vec<String>,
71 #[builder(default)]
72 #[serde(default)]
73 pub scan_errors: Vec<String>,
74 #[builder(default)]
75 #[serde(skip_serializing_if = "Option::is_none", default)]
76 pub is_generated: Option<bool>,
77 #[builder(default)]
78 #[serde(skip_serializing_if = "Option::is_none", default)]
79 pub is_source: Option<bool>,
80 #[builder(default)]
81 #[serde(skip_serializing_if = "Option::is_none", default)]
82 pub source_count: Option<usize>,
83 #[builder(default)]
84 #[serde(skip_serializing_if = "is_false", default)]
85 pub is_legal: bool,
86 #[builder(default)]
87 #[serde(skip_serializing_if = "is_false", default)]
88 pub is_manifest: bool,
89 #[builder(default)]
90 #[serde(skip_serializing_if = "is_false", default)]
91 pub is_readme: bool,
92 #[builder(default)]
93 #[serde(skip_serializing_if = "is_false", default)]
94 pub is_top_level: bool,
95 #[builder(default)]
96 #[serde(skip_serializing_if = "is_false", default)]
97 pub is_key_file: bool,
98 #[builder(default)]
99 #[serde(skip_serializing_if = "is_false", default)]
100 pub is_community: bool,
101 #[builder(default)]
102 #[serde(skip_serializing_if = "Vec::is_empty", default)]
103 pub facets: Vec<String>,
104 #[builder(default)]
105 #[serde(skip_serializing_if = "Option::is_none", default)]
106 pub tallies: Option<Tallies>,
107}
108
109impl FileInfoBuilder {
110 pub fn build(&self) -> Result<FileInfo, String> {
112 Ok(FileInfo::new(
113 self.name.clone().ok_or("Missing field: name")?,
114 self.base_name.clone().ok_or("Missing field: base_name")?,
115 self.extension.clone().ok_or("Missing field: extension")?,
116 self.path.clone().ok_or("Missing field: path")?,
117 self.file_type.clone().ok_or("Missing field: file_type")?,
118 self.mime_type.clone().flatten(),
119 self.size.ok_or("Missing field: size")?,
120 self.date.clone().flatten(),
121 self.sha1.clone().flatten(),
122 self.md5.clone().flatten(),
123 self.sha256.clone().flatten(),
124 self.programming_language.clone().flatten(),
125 self.package_data.clone().unwrap_or_default(),
126 self.license_expression.clone().flatten(),
127 self.license_detections.clone().unwrap_or_default(),
128 self.license_clues.clone().unwrap_or_default(),
129 self.copyrights.clone().unwrap_or_default(),
130 self.holders.clone().unwrap_or_default(),
131 self.authors.clone().unwrap_or_default(),
132 self.emails.clone().unwrap_or_default(),
133 self.urls.clone().unwrap_or_default(),
134 self.for_packages.clone().unwrap_or_default(),
135 self.scan_errors.clone().unwrap_or_default(),
136 ))
137 }
138}
139
140impl FileInfo {
141 #[allow(clippy::too_many_arguments)]
142 pub fn new(
144 name: String,
145 base_name: String,
146 extension: String,
147 path: String,
148 file_type: FileType,
149 mime_type: Option<String>,
150 size: u64,
151 date: Option<String>,
152 sha1: Option<String>,
153 md5: Option<String>,
154 sha256: Option<String>,
155 programming_language: Option<String>,
156 package_data: Vec<PackageData>,
157 mut license_expression: Option<String>,
158 mut license_detections: Vec<LicenseDetection>,
159 license_clues: Vec<Match>,
160 copyrights: Vec<Copyright>,
161 holders: Vec<Holder>,
162 authors: Vec<Author>,
163 emails: Vec<OutputEmail>,
164 urls: Vec<OutputURL>,
165 for_packages: Vec<String>,
166 scan_errors: Vec<String>,
167 ) -> Self {
168 let mut package_data = package_data;
169 for package in &mut package_data {
170 enrich_package_data_license_provenance(package, &path);
171 }
172
173 license_expression = license_expression.or_else(|| {
175 let expressions = package_data
176 .iter()
177 .filter_map(|pkg| pkg.get_license_expression());
178 combine_license_expressions(expressions)
179 });
180
181 if license_detections.is_empty() {
183 for pkg in &package_data {
184 license_detections.extend(pkg.license_detections.clone());
185 }
186 }
187
188 if license_expression.is_none() && !license_detections.is_empty() {
190 let expressions = license_detections
191 .iter()
192 .map(|detection| detection.license_expression.clone());
193 license_expression = combine_license_expressions(expressions);
194 }
195
196 let mut file_info = FileInfo {
197 name,
198 base_name,
199 extension,
200 path,
201 file_type,
202 mime_type,
203 size,
204 date,
205 sha1,
206 md5,
207 sha256,
208 programming_language,
209 package_data,
210 license_expression,
211 license_detections,
212 license_clues,
213 percentage_of_license_text: None,
214 copyrights,
215 holders,
216 authors,
217 emails,
218 urls,
219 for_packages,
220 scan_errors,
221 is_generated: None,
222 is_source: None,
223 source_count: None,
224 is_legal: false,
225 is_manifest: false,
226 is_readme: false,
227 is_top_level: false,
228 is_key_file: false,
229 is_community: false,
230 facets: vec![],
231 tallies: None,
232 };
233 file_info.backfill_license_provenance();
234 file_info
235 }
236
237 pub fn backfill_license_provenance(&mut self) {
238 for detection in &mut self.license_detections {
239 enrich_license_detection_provenance(detection, &self.path);
240 }
241
242 for package in &mut self.package_data {
243 enrich_package_data_license_provenance(package, &self.path);
244 }
245 }
246}
247
248fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
249 for detection in &mut package_data.license_detections {
250 enrich_license_detection_provenance(detection, path);
251 }
252 for detection in &mut package_data.other_license_detections {
253 enrich_license_detection_provenance(detection, path);
254 }
255}
256
257pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
258 for detection_match in &mut detection.matches {
259 if detection_match.from_file.is_none() {
260 detection_match.from_file = Some(path.to_string());
261 }
262 }
263
264 if detection.identifier.is_none() {
265 detection.identifier = Some(compute_public_detection_identifier(detection));
266 }
267}
268
269fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
270 let expression = python_safe_name(&detection.license_expression);
271 let mut hasher = Sha1::new();
272 hasher.update(format_public_detection_content(detection).as_bytes());
273 let hex_str = hex::encode(hasher.finalize());
274 let uuid_hex = &hex_str[..32];
275 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
276 .map(|uuid| uuid.to_string())
277 .unwrap_or_else(|_| uuid_hex.to_string());
278
279 format!("{}-{}", expression, content_uuid)
280}
281
282fn format_public_detection_content(detection: &LicenseDetection) -> String {
283 let mut result = String::from("(");
284
285 for (index, detection_match) in detection.matches.iter().enumerate() {
286 if index > 0 {
287 result.push_str(", ");
288 }
289 result.push_str(&format!(
290 "({}, {}, {})",
291 python_str_repr(
292 detection_match
293 .rule_identifier
294 .as_deref()
295 .or(detection_match.matcher.as_deref())
296 .unwrap_or("parser-declared-license")
297 ),
298 detection_match.score as f32,
299 python_token_tuple_repr(&tokenize_without_stopwords(
300 detection_match.matched_text.as_deref().unwrap_or_default(),
301 )),
302 ));
303 }
304
305 if detection.matches.len() == 1 {
306 result.push(',');
307 }
308 result.push(')');
309 result
310}
311
312fn python_safe_name(value: &str) -> String {
313 let mut result = String::new();
314 let mut prev_underscore = false;
315
316 for character in value.chars() {
317 if character.is_alphanumeric() {
318 result.push(character);
319 prev_underscore = false;
320 } else if !prev_underscore {
321 result.push('_');
322 prev_underscore = true;
323 }
324 }
325
326 let trimmed = result.trim_matches('_');
327 if trimmed.is_empty() {
328 String::new()
329 } else {
330 trimmed.to_string()
331 }
332}
333
334fn python_str_repr(value: &str) -> String {
335 if value.contains('\'') && !value.contains('"') {
336 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
337 } else {
338 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
339 }
340}
341
342fn python_token_tuple_repr(tokens: &[String]) -> String {
343 if tokens.is_empty() {
344 return String::from("()");
345 }
346
347 let mut result = String::from("(");
348 for (index, token) in tokens.iter().enumerate() {
349 if index > 0 {
350 result.push_str(", ");
351 }
352 result.push_str(&python_str_repr(token));
353 }
354
355 if tokens.len() == 1 {
356 result.push(',');
357 }
358 result.push(')');
359 result
360}
361
362#[derive(Serialize, Deserialize, Debug, Clone, Default)]
368pub struct PackageData {
369 #[serde(rename = "type")] pub package_type: Option<PackageType>,
371 #[serde(skip_serializing_if = "Option::is_none")]
372 pub namespace: Option<String>,
373 #[serde(skip_serializing_if = "Option::is_none")]
374 pub name: Option<String>,
375 #[serde(skip_serializing_if = "Option::is_none")]
376 pub version: Option<String>,
377 #[serde(skip_serializing_if = "Option::is_none")]
378 pub qualifiers: Option<std::collections::HashMap<String, String>>,
379 #[serde(skip_serializing_if = "Option::is_none")]
380 pub subpath: Option<String>,
381 #[serde(skip_serializing_if = "Option::is_none")]
382 pub primary_language: Option<String>,
383 #[serde(skip_serializing_if = "Option::is_none")]
384 pub description: Option<String>,
385 #[serde(skip_serializing_if = "Option::is_none")]
386 pub release_date: Option<String>,
387 pub parties: Vec<Party>,
388 #[serde(skip_serializing_if = "Vec::is_empty", default)]
389 pub keywords: Vec<String>,
390 #[serde(skip_serializing_if = "Option::is_none")]
391 pub homepage_url: Option<String>,
392 #[serde(skip_serializing_if = "Option::is_none")]
393 pub download_url: Option<String>,
394 #[serde(skip_serializing_if = "Option::is_none")]
395 pub size: Option<u64>,
396 #[serde(skip_serializing_if = "Option::is_none")]
397 pub sha1: Option<String>,
398 #[serde(skip_serializing_if = "Option::is_none")]
399 pub md5: Option<String>,
400 #[serde(skip_serializing_if = "Option::is_none")]
401 pub sha256: Option<String>,
402 #[serde(skip_serializing_if = "Option::is_none")]
403 pub sha512: Option<String>,
404 #[serde(skip_serializing_if = "Option::is_none")]
405 pub bug_tracking_url: Option<String>,
406 #[serde(skip_serializing_if = "Option::is_none")]
407 pub code_view_url: Option<String>,
408 #[serde(skip_serializing_if = "Option::is_none")]
409 pub vcs_url: Option<String>,
410 #[serde(skip_serializing_if = "Option::is_none")]
411 pub copyright: Option<String>,
412 #[serde(skip_serializing_if = "Option::is_none")]
413 pub holder: Option<String>,
414 #[serde(skip_serializing_if = "Option::is_none")]
415 pub declared_license_expression: Option<String>,
416 #[serde(skip_serializing_if = "Option::is_none")]
417 pub declared_license_expression_spdx: Option<String>,
418 #[serde(default)]
419 pub license_detections: Vec<LicenseDetection>,
420 #[serde(skip_serializing_if = "Option::is_none")]
421 pub other_license_expression: Option<String>,
422 #[serde(skip_serializing_if = "Option::is_none")]
423 pub other_license_expression_spdx: Option<String>,
424 #[serde(skip_serializing_if = "Vec::is_empty", default)]
425 pub other_license_detections: Vec<LicenseDetection>,
426 #[serde(skip_serializing_if = "Option::is_none")]
427 pub extracted_license_statement: Option<String>,
428 #[serde(skip_serializing_if = "Option::is_none")]
429 pub notice_text: Option<String>,
430 #[serde(skip_serializing_if = "Vec::is_empty", default)]
431 pub source_packages: Vec<String>,
432 #[serde(skip_serializing_if = "Vec::is_empty", default)]
433 pub file_references: Vec<FileReference>,
434 #[serde(skip_serializing_if = "is_false", default)]
435 pub is_private: bool,
436 #[serde(skip_serializing_if = "is_false", default)]
437 pub is_virtual: bool,
438 #[serde(skip_serializing_if = "Option::is_none")]
439 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
440 #[serde(default)]
441 pub dependencies: Vec<Dependency>,
442 #[serde(skip_serializing_if = "Option::is_none")]
443 pub repository_homepage_url: Option<String>,
444 #[serde(skip_serializing_if = "Option::is_none")]
445 pub repository_download_url: Option<String>,
446 #[serde(skip_serializing_if = "Option::is_none")]
447 pub api_data_url: Option<String>,
448 #[serde(skip_serializing_if = "Option::is_none")]
449 pub datasource_id: Option<DatasourceId>,
450 #[serde(skip_serializing_if = "Option::is_none")]
451 pub purl: Option<String>,
452}
453
454fn is_false(b: &bool) -> bool {
456 !b
457}
458
459impl PackageData {
460 pub fn get_license_expression(&self) -> Option<String> {
463 if self.license_detections.is_empty() {
464 return None;
465 }
466
467 let expressions = self
468 .license_detections
469 .iter()
470 .map(|detection| detection.license_expression.clone());
471 combine_license_expressions(expressions)
472 }
473}
474
475#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
479pub struct LicenseDetection {
480 pub license_expression: String,
481 pub license_expression_spdx: String,
482 pub matches: Vec<Match>,
483 #[serde(skip_serializing_if = "Vec::is_empty", default)]
484 pub detection_log: Vec<String>,
485 #[serde(skip_serializing_if = "Option::is_none")]
486 pub identifier: Option<String>,
487}
488
489#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
493pub struct Match {
494 pub license_expression: String,
495 pub license_expression_spdx: String,
496 #[serde(skip_serializing_if = "Option::is_none")]
497 pub from_file: Option<String>,
498 pub start_line: usize,
499 pub end_line: usize,
500 #[serde(skip_serializing_if = "Option::is_none")]
501 pub matcher: Option<String>,
502 pub score: f64,
503 #[serde(skip_serializing_if = "Option::is_none")]
504 pub matched_length: Option<usize>,
505 #[serde(skip_serializing_if = "Option::is_none")]
506 pub match_coverage: Option<f64>,
507 #[serde(skip_serializing_if = "Option::is_none")]
508 pub rule_relevance: Option<usize>,
509 #[serde(skip_serializing_if = "Option::is_none")]
510 pub rule_identifier: Option<String>,
511 pub rule_url: Option<String>,
512 #[serde(skip_serializing_if = "Option::is_none")]
513 pub matched_text: Option<String>,
514 #[serde(skip_serializing_if = "Option::is_none")]
515 pub matched_text_diagnostics: Option<String>,
516 #[serde(skip_serializing_if = "Option::is_none", default)]
517 pub referenced_filenames: Option<Vec<String>>,
518}
519
520#[derive(Serialize, Deserialize, Debug, Clone)]
521pub struct Copyright {
522 pub copyright: String,
523 pub start_line: usize,
524 pub end_line: usize,
525}
526
527#[derive(Serialize, Deserialize, Debug, Clone)]
528pub struct Holder {
529 pub holder: String,
530 pub start_line: usize,
531 pub end_line: usize,
532}
533
534#[derive(Serialize, Deserialize, Debug, Clone)]
535pub struct Author {
536 pub author: String,
537 pub start_line: usize,
538 pub end_line: usize,
539}
540
541#[derive(Serialize, Deserialize, Debug, Clone)]
546pub struct Dependency {
547 pub purl: Option<String>,
548 #[serde(skip_serializing_if = "Option::is_none")]
549 pub extracted_requirement: Option<String>,
550 pub scope: Option<String>,
551 #[serde(skip_serializing_if = "Option::is_none")]
552 pub is_runtime: Option<bool>,
553 #[serde(skip_serializing_if = "Option::is_none")]
554 pub is_optional: Option<bool>,
555 #[serde(skip_serializing_if = "Option::is_none")]
556 pub is_pinned: Option<bool>,
557 #[serde(skip_serializing_if = "Option::is_none")]
558 pub is_direct: Option<bool>,
559 #[serde(skip_serializing_if = "Option::is_none")]
560 pub resolved_package: Option<Box<ResolvedPackage>>,
561 #[serde(skip_serializing_if = "Option::is_none")]
562 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
563}
564
565#[derive(Serialize, Deserialize, Debug, Clone)]
566pub struct ResolvedPackage {
567 #[serde(rename = "type")]
568 pub package_type: PackageType,
569 #[serde(skip_serializing_if = "String::is_empty")]
570 pub namespace: String,
571 pub name: String,
572 pub version: String,
573 #[serde(skip_serializing_if = "Option::is_none")]
574 pub primary_language: Option<String>,
575 #[serde(skip_serializing_if = "Option::is_none")]
576 pub download_url: Option<String>,
577 #[serde(skip_serializing_if = "Option::is_none")]
578 pub sha1: Option<String>,
579 #[serde(skip_serializing_if = "Option::is_none")]
580 pub sha256: Option<String>,
581 #[serde(skip_serializing_if = "Option::is_none")]
582 pub sha512: Option<String>,
583 #[serde(skip_serializing_if = "Option::is_none")]
584 pub md5: Option<String>,
585 pub is_virtual: bool,
586 #[serde(skip_serializing_if = "Option::is_none")]
587 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
588 pub dependencies: Vec<Dependency>,
589 #[serde(skip_serializing_if = "Option::is_none")]
590 pub repository_homepage_url: Option<String>,
591 #[serde(skip_serializing_if = "Option::is_none")]
592 pub repository_download_url: Option<String>,
593 #[serde(skip_serializing_if = "Option::is_none")]
594 pub api_data_url: Option<String>,
595 #[serde(skip_serializing_if = "Option::is_none")]
596 pub datasource_id: Option<DatasourceId>,
597 #[serde(skip_serializing_if = "Option::is_none")]
598 pub purl: Option<String>,
599}
600
601#[derive(Serialize, Deserialize, Debug, Clone)]
605pub struct Party {
606 #[serde(skip_serializing_if = "Option::is_none")]
607 pub r#type: Option<String>,
608 #[serde(skip_serializing_if = "Option::is_none")]
609 pub role: Option<String>,
610 #[serde(skip_serializing_if = "Option::is_none")]
611 pub name: Option<String>,
612 #[serde(skip_serializing_if = "Option::is_none")]
613 pub email: Option<String>,
614 #[serde(skip_serializing_if = "Option::is_none")]
615 pub url: Option<String>,
616 #[serde(skip_serializing_if = "Option::is_none")]
617 pub organization: Option<String>,
618 #[serde(skip_serializing_if = "Option::is_none")]
619 pub organization_url: Option<String>,
620 #[serde(skip_serializing_if = "Option::is_none")]
621 pub timezone: Option<String>,
622}
623
624#[derive(Serialize, Deserialize, Debug, Clone)]
628pub struct FileReference {
629 pub path: String,
630 #[serde(skip_serializing_if = "Option::is_none")]
631 pub size: Option<u64>,
632 #[serde(skip_serializing_if = "Option::is_none")]
633 pub sha1: Option<String>,
634 #[serde(skip_serializing_if = "Option::is_none")]
635 pub md5: Option<String>,
636 #[serde(skip_serializing_if = "Option::is_none")]
637 pub sha256: Option<String>,
638 #[serde(skip_serializing_if = "Option::is_none")]
639 pub sha512: Option<String>,
640 #[serde(skip_serializing_if = "Option::is_none")]
641 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
642}
643
644#[derive(Serialize, Deserialize, Debug, Clone)]
654pub struct Package {
655 #[serde(rename = "type")]
656 pub package_type: Option<PackageType>,
657 #[serde(skip_serializing_if = "Option::is_none")]
658 pub namespace: Option<String>,
659 #[serde(skip_serializing_if = "Option::is_none")]
660 pub name: Option<String>,
661 #[serde(skip_serializing_if = "Option::is_none")]
662 pub version: Option<String>,
663 #[serde(skip_serializing_if = "Option::is_none")]
664 pub qualifiers: Option<std::collections::HashMap<String, String>>,
665 #[serde(skip_serializing_if = "Option::is_none")]
666 pub subpath: Option<String>,
667 #[serde(skip_serializing_if = "Option::is_none")]
668 pub primary_language: Option<String>,
669 #[serde(skip_serializing_if = "Option::is_none")]
670 pub description: Option<String>,
671 #[serde(skip_serializing_if = "Option::is_none")]
672 pub release_date: Option<String>,
673 pub parties: Vec<Party>,
674 #[serde(skip_serializing_if = "Vec::is_empty", default)]
675 pub keywords: Vec<String>,
676 #[serde(skip_serializing_if = "Option::is_none")]
677 pub homepage_url: Option<String>,
678 #[serde(skip_serializing_if = "Option::is_none")]
679 pub download_url: Option<String>,
680 #[serde(skip_serializing_if = "Option::is_none")]
681 pub size: Option<u64>,
682 #[serde(skip_serializing_if = "Option::is_none")]
683 pub sha1: Option<String>,
684 #[serde(skip_serializing_if = "Option::is_none")]
685 pub md5: Option<String>,
686 #[serde(skip_serializing_if = "Option::is_none")]
687 pub sha256: Option<String>,
688 #[serde(skip_serializing_if = "Option::is_none")]
689 pub sha512: Option<String>,
690 #[serde(skip_serializing_if = "Option::is_none")]
691 pub bug_tracking_url: Option<String>,
692 #[serde(skip_serializing_if = "Option::is_none")]
693 pub code_view_url: Option<String>,
694 #[serde(skip_serializing_if = "Option::is_none")]
695 pub vcs_url: Option<String>,
696 #[serde(skip_serializing_if = "Option::is_none")]
697 pub copyright: Option<String>,
698 #[serde(skip_serializing_if = "Option::is_none")]
699 pub holder: Option<String>,
700 #[serde(skip_serializing_if = "Option::is_none")]
701 pub declared_license_expression: Option<String>,
702 #[serde(skip_serializing_if = "Option::is_none")]
703 pub declared_license_expression_spdx: Option<String>,
704 #[serde(skip_serializing_if = "Vec::is_empty", default)]
705 pub license_detections: Vec<LicenseDetection>,
706 #[serde(skip_serializing_if = "Option::is_none")]
707 pub other_license_expression: Option<String>,
708 #[serde(skip_serializing_if = "Option::is_none")]
709 pub other_license_expression_spdx: Option<String>,
710 #[serde(skip_serializing_if = "Vec::is_empty", default)]
711 pub other_license_detections: Vec<LicenseDetection>,
712 #[serde(skip_serializing_if = "Option::is_none")]
713 pub extracted_license_statement: Option<String>,
714 #[serde(skip_serializing_if = "Option::is_none")]
715 pub notice_text: Option<String>,
716 #[serde(skip_serializing_if = "Vec::is_empty", default)]
717 pub source_packages: Vec<String>,
718 #[serde(skip_serializing_if = "is_false", default)]
719 pub is_private: bool,
720 #[serde(skip_serializing_if = "is_false", default)]
721 pub is_virtual: bool,
722 #[serde(skip_serializing_if = "Option::is_none")]
723 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
724 #[serde(skip_serializing_if = "Option::is_none")]
725 pub repository_homepage_url: Option<String>,
726 #[serde(skip_serializing_if = "Option::is_none")]
727 pub repository_download_url: Option<String>,
728 #[serde(skip_serializing_if = "Option::is_none")]
729 pub api_data_url: Option<String>,
730 #[serde(skip_serializing_if = "Option::is_none")]
731 pub purl: Option<String>,
732 pub package_uid: String,
734 pub datafile_paths: Vec<String>,
736 pub datasource_ids: Vec<DatasourceId>,
738}
739
740impl Package {
741 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
746 let mut package_data = package_data.clone();
747 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
748
749 let package_uid = package_data
750 .purl
751 .as_ref()
752 .map(|p| build_package_uid(p))
753 .unwrap_or_default();
754
755 Package {
756 package_type: package_data.package_type,
757 namespace: package_data.namespace.clone(),
758 name: package_data.name.clone(),
759 version: package_data.version.clone(),
760 qualifiers: package_data.qualifiers.clone(),
761 subpath: package_data.subpath.clone(),
762 primary_language: package_data.primary_language.clone(),
763 description: package_data.description.clone(),
764 release_date: package_data.release_date.clone(),
765 parties: package_data.parties.clone(),
766 keywords: package_data.keywords.clone(),
767 homepage_url: package_data.homepage_url.clone(),
768 download_url: package_data.download_url.clone(),
769 size: package_data.size,
770 sha1: package_data.sha1.clone(),
771 md5: package_data.md5.clone(),
772 sha256: package_data.sha256.clone(),
773 sha512: package_data.sha512.clone(),
774 bug_tracking_url: package_data.bug_tracking_url.clone(),
775 code_view_url: package_data.code_view_url.clone(),
776 vcs_url: package_data.vcs_url.clone(),
777 copyright: package_data.copyright.clone(),
778 holder: package_data.holder.clone(),
779 declared_license_expression: package_data.declared_license_expression.clone(),
780 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
781 license_detections: package_data.license_detections.clone(),
782 other_license_expression: package_data.other_license_expression.clone(),
783 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
784 other_license_detections: package_data.other_license_detections.clone(),
785 extracted_license_statement: package_data.extracted_license_statement.clone(),
786 notice_text: package_data.notice_text.clone(),
787 source_packages: package_data.source_packages.clone(),
788 is_private: package_data.is_private,
789 is_virtual: package_data.is_virtual,
790 extra_data: package_data.extra_data.clone(),
791 repository_homepage_url: package_data.repository_homepage_url.clone(),
792 repository_download_url: package_data.repository_download_url.clone(),
793 api_data_url: package_data.api_data_url.clone(),
794 purl: package_data.purl.clone(),
795 package_uid,
796 datafile_paths: vec![datafile_path],
797 datasource_ids: if let Some(dsid) = package_data.datasource_id {
798 vec![dsid]
799 } else {
800 vec![]
801 },
802 }
803 }
804
805 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
811 let mut package_data = package_data.clone();
812 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
813
814 if let Some(dsid) = package_data.datasource_id {
815 self.datasource_ids.push(dsid);
816 }
817 self.datafile_paths.push(datafile_path);
818
819 macro_rules! fill_if_empty {
820 ($field:ident) => {
821 if self.$field.is_none() {
822 self.$field = package_data.$field.clone();
823 }
824 };
825 }
826
827 fill_if_empty!(package_type);
828 fill_if_empty!(name);
829 fill_if_empty!(namespace);
830 fill_if_empty!(version);
831 fill_if_empty!(qualifiers);
832 fill_if_empty!(subpath);
833 fill_if_empty!(primary_language);
834 fill_if_empty!(description);
835 fill_if_empty!(release_date);
836 fill_if_empty!(homepage_url);
837 fill_if_empty!(download_url);
838 fill_if_empty!(size);
839 fill_if_empty!(sha1);
840 fill_if_empty!(md5);
841 fill_if_empty!(sha256);
842 fill_if_empty!(sha512);
843 fill_if_empty!(bug_tracking_url);
844 fill_if_empty!(code_view_url);
845 fill_if_empty!(vcs_url);
846 fill_if_empty!(copyright);
847 fill_if_empty!(holder);
848 fill_if_empty!(declared_license_expression);
849 fill_if_empty!(declared_license_expression_spdx);
850 fill_if_empty!(other_license_expression);
851 fill_if_empty!(other_license_expression_spdx);
852 fill_if_empty!(extracted_license_statement);
853 fill_if_empty!(notice_text);
854 match (&mut self.extra_data, &package_data.extra_data) {
855 (None, Some(extra_data)) => {
856 self.extra_data = Some(extra_data.clone());
857 }
858 (Some(existing), Some(incoming)) => {
859 for (key, value) in incoming {
860 existing.entry(key.clone()).or_insert_with(|| value.clone());
861 }
862 }
863 _ => {}
864 }
865 fill_if_empty!(repository_homepage_url);
866 fill_if_empty!(repository_download_url);
867 fill_if_empty!(api_data_url);
868
869 for party in &package_data.parties {
870 if let Some(existing) = self.parties.iter_mut().find(|p| {
871 p.role == party.role
872 && ((p.name.is_some() && p.name == party.name)
873 || (p.email.is_some() && p.email == party.email))
874 }) {
875 if existing.name.is_none() {
876 existing.name = party.name.clone();
877 }
878 if existing.email.is_none() {
879 existing.email = party.email.clone();
880 }
881 } else {
882 self.parties.push(party.clone());
883 }
884 }
885
886 for keyword in &package_data.keywords {
887 if !self.keywords.contains(keyword) {
888 self.keywords.push(keyword.clone());
889 }
890 }
891
892 for detection in &package_data.license_detections {
893 self.license_detections.push(detection.clone());
894 }
895
896 for detection in &package_data.other_license_detections {
897 self.other_license_detections.push(detection.clone());
898 }
899
900 for source_pkg in &package_data.source_packages {
901 if !self.source_packages.contains(source_pkg) {
902 self.source_packages.push(source_pkg.clone());
903 }
904 }
905
906 self.refresh_identity();
907 }
908
909 pub fn backfill_license_provenance(&mut self) {
910 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
911 return;
912 };
913
914 for detection in &mut self.license_detections {
915 enrich_license_detection_provenance(detection, &datafile_path);
916 }
917 for detection in &mut self.other_license_detections {
918 enrich_license_detection_provenance(detection, &datafile_path);
919 }
920 }
921
922 fn refresh_identity(&mut self) {
923 let Some(next_purl) = self.build_current_purl() else {
924 return;
925 };
926
927 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
928 self.package_uid = build_package_uid(&next_purl);
929 }
930
931 self.purl = Some(next_purl);
932 }
933
934 fn build_current_purl(&self) -> Option<String> {
935 if let (Some(package_type), Some(name)) = (
936 self.package_type.as_ref(),
937 self.name
938 .as_deref()
939 .filter(|value| !value.trim().is_empty()),
940 ) {
941 let purl_type = match package_type {
942 PackageType::Deno => "generic",
943 _ => package_type.as_str(),
944 };
945
946 let mut purl = PackageUrl::new(purl_type, name).ok()?;
947
948 if let Some(namespace) = self
949 .namespace
950 .as_deref()
951 .filter(|value| !value.trim().is_empty())
952 {
953 purl.with_namespace(namespace).ok()?;
954 }
955
956 if let Some(version) = self
957 .version
958 .as_deref()
959 .filter(|value| !value.trim().is_empty())
960 {
961 purl.with_version(version).ok()?;
962 }
963
964 if let Some(qualifiers) = &self.qualifiers {
965 for (key, value) in qualifiers {
966 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
967 }
968 }
969
970 if let Some(subpath) = self
971 .subpath
972 .as_deref()
973 .filter(|value| !value.trim().is_empty())
974 {
975 purl.with_subpath(subpath).ok()?;
976 }
977
978 return Some(purl.to_string());
979 }
980
981 let existing_purl = self.purl.as_deref()?;
982 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
983
984 if let Some(version) = self
985 .version
986 .as_deref()
987 .filter(|value| !value.trim().is_empty())
988 {
989 purl.with_version(version).ok()?;
990 } else {
991 purl.without_version();
992 }
993
994 Some(purl.to_string())
995 }
996}
997
998#[cfg(test)]
999mod tests {
1000 use super::*;
1001
1002 #[test]
1003 fn file_info_new_backfills_package_detection_provenance() {
1004 let package_data = PackageData {
1005 package_type: Some(PackageType::Npm),
1006 license_detections: vec![LicenseDetection {
1007 license_expression: "mit".to_string(),
1008 license_expression_spdx: "MIT".to_string(),
1009 matches: vec![Match {
1010 license_expression: "mit".to_string(),
1011 license_expression_spdx: "MIT".to_string(),
1012 from_file: None,
1013 start_line: 1,
1014 end_line: 1,
1015 matcher: Some("parser-declared-license".to_string()),
1016 score: 100.0,
1017 matched_length: Some(1),
1018 match_coverage: Some(100.0),
1019 rule_relevance: Some(100),
1020 rule_identifier: None,
1021 rule_url: None,
1022 matched_text: Some("MIT".to_string()),
1023 referenced_filenames: None,
1024 matched_text_diagnostics: None,
1025 }],
1026 detection_log: vec![],
1027 identifier: None,
1028 }],
1029 ..PackageData::default()
1030 };
1031
1032 let file_info = FileInfo::new(
1033 "package.json".to_string(),
1034 "package".to_string(),
1035 ".json".to_string(),
1036 "project/package.json".to_string(),
1037 FileType::File,
1038 None,
1039 1,
1040 None,
1041 None,
1042 None,
1043 None,
1044 None,
1045 vec![package_data],
1046 None,
1047 vec![],
1048 vec![],
1049 vec![],
1050 vec![],
1051 vec![],
1052 vec![],
1053 vec![],
1054 vec![],
1055 vec![],
1056 );
1057
1058 assert_eq!(file_info.license_detections.len(), 1);
1059 assert_eq!(
1060 file_info.license_detections[0].matches[0]
1061 .from_file
1062 .as_deref(),
1063 Some("project/package.json")
1064 );
1065 assert!(file_info.license_detections[0].identifier.is_some());
1066 assert_eq!(
1067 file_info.package_data[0].license_detections[0].matches[0]
1068 .from_file
1069 .as_deref(),
1070 Some("project/package.json")
1071 );
1072 assert!(
1073 file_info.package_data[0].license_detections[0]
1074 .identifier
1075 .is_some()
1076 );
1077 }
1078
1079 #[test]
1080 fn package_from_package_data_backfills_detection_provenance() {
1081 let package_data = PackageData {
1082 package_type: Some(PackageType::Npm),
1083 license_detections: vec![LicenseDetection {
1084 license_expression: "mit".to_string(),
1085 license_expression_spdx: "MIT".to_string(),
1086 matches: vec![Match {
1087 license_expression: "mit".to_string(),
1088 license_expression_spdx: "MIT".to_string(),
1089 from_file: None,
1090 start_line: 1,
1091 end_line: 1,
1092 matcher: Some("parser-declared-license".to_string()),
1093 score: 100.0,
1094 matched_length: Some(1),
1095 match_coverage: Some(100.0),
1096 rule_relevance: Some(100),
1097 rule_identifier: None,
1098 rule_url: None,
1099 matched_text: Some("MIT".to_string()),
1100 referenced_filenames: None,
1101 matched_text_diagnostics: None,
1102 }],
1103 detection_log: vec![],
1104 identifier: None,
1105 }],
1106 ..PackageData::default()
1107 };
1108
1109 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1110
1111 assert_eq!(
1112 package.license_detections[0].matches[0]
1113 .from_file
1114 .as_deref(),
1115 Some("project/package.json")
1116 );
1117 assert!(package.license_detections[0].identifier.is_some());
1118 }
1119}
1120
1121#[derive(Serialize, Deserialize, Debug, Clone)]
1126pub struct TopLevelDependency {
1127 pub purl: Option<String>,
1128 #[serde(skip_serializing_if = "Option::is_none")]
1129 pub extracted_requirement: Option<String>,
1130 pub scope: Option<String>,
1131 #[serde(skip_serializing_if = "Option::is_none")]
1132 pub is_runtime: Option<bool>,
1133 #[serde(skip_serializing_if = "Option::is_none")]
1134 pub is_optional: Option<bool>,
1135 #[serde(skip_serializing_if = "Option::is_none")]
1136 pub is_pinned: Option<bool>,
1137 #[serde(skip_serializing_if = "Option::is_none")]
1138 pub is_direct: Option<bool>,
1139 #[serde(skip_serializing_if = "Option::is_none")]
1140 pub resolved_package: Option<Box<ResolvedPackage>>,
1141 #[serde(skip_serializing_if = "Option::is_none")]
1142 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
1143 pub dependency_uid: String,
1145 #[serde(skip_serializing_if = "Option::is_none")]
1147 pub for_package_uid: Option<String>,
1148 pub datafile_path: String,
1150 pub datasource_id: DatasourceId,
1152 #[serde(skip_serializing_if = "Option::is_none")]
1154 pub namespace: Option<String>,
1155}
1156
1157impl TopLevelDependency {
1158 pub fn from_dependency(
1160 dep: &Dependency,
1161 datafile_path: String,
1162 datasource_id: DatasourceId,
1163 for_package_uid: Option<String>,
1164 ) -> Self {
1165 let dependency_uid = dep
1166 .purl
1167 .as_ref()
1168 .map(|p| build_package_uid(p))
1169 .unwrap_or_default();
1170
1171 TopLevelDependency {
1172 purl: dep.purl.clone(),
1173 extracted_requirement: dep.extracted_requirement.clone(),
1174 scope: dep.scope.clone(),
1175 is_runtime: dep.is_runtime,
1176 is_optional: dep.is_optional,
1177 is_pinned: dep.is_pinned,
1178 is_direct: dep.is_direct,
1179 resolved_package: dep.resolved_package.clone(),
1180 extra_data: dep.extra_data.clone(),
1181 dependency_uid,
1182 for_package_uid,
1183 datafile_path,
1184 datasource_id,
1185 namespace: None,
1186 }
1187 }
1188}
1189
1190pub fn build_package_uid(purl: &str) -> String {
1194 let uuid = Uuid::new_v4();
1195 if purl.contains('?') {
1196 format!("{}&uuid={}", purl, uuid)
1197 } else {
1198 format!("{}?uuid={}", purl, uuid)
1199 }
1200}
1201
1202#[derive(Serialize, Deserialize, Debug, Clone)]
1203pub struct OutputEmail {
1204 pub email: String,
1205 pub start_line: usize,
1206 pub end_line: usize,
1207}
1208
1209#[derive(Serialize, Deserialize, Debug, Clone)]
1210pub struct OutputURL {
1211 pub url: String,
1212 pub start_line: usize,
1213 pub end_line: usize,
1214}
1215
1216#[derive(Debug, Clone, PartialEq)]
1217pub enum FileType {
1218 File,
1219 Directory,
1220}
1221
1222impl Serialize for FileType {
1223 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1224 where
1225 S: serde::Serializer,
1226 {
1227 let value = match self {
1228 FileType::File => "file",
1229 FileType::Directory => "directory",
1230 };
1231 serializer.serialize_str(value)
1232 }
1233}
1234
1235impl<'de> Deserialize<'de> for FileType {
1236 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1237 where
1238 D: serde::Deserializer<'de>,
1239 {
1240 let value = String::deserialize(deserializer)?;
1241 match value.as_str() {
1242 "file" => Ok(FileType::File),
1243 "directory" => Ok(FileType::Directory),
1244 _ => Err(serde::de::Error::custom("invalid file type")),
1245 }
1246 }
1247}