1use derive_builder::Builder;
2use packageurl::PackageUrl;
3use serde::ser::Error as SerError;
4use serde::{Deserialize, Serialize, Serializer};
5use serde_json::{Map, Value};
6use std::collections::HashMap;
7use std::str::FromStr;
8use uuid::Uuid;
9
10use sha1::{Digest, Sha1};
11
12use super::DatasourceId;
13use super::PackageType;
14use crate::license_detection::tokenize::tokenize_without_stopwords;
15use crate::models::output::Tallies;
16use crate::utils::spdx::combine_license_expressions;
17
18#[derive(Debug, Builder, Deserialize, Clone)]
19#[builder(build_fn(skip))]
20pub struct FileInfo {
22 pub name: String,
23 pub base_name: String,
24 pub extension: String,
25 pub path: String,
26 #[serde(rename = "type")] pub file_type: FileType,
28 #[builder(default)]
29 #[serde(skip_serializing_if = "Option::is_none", default)]
30 pub mime_type: Option<String>,
31 #[builder(default)]
32 #[serde(rename = "file_type", skip_serializing_if = "Option::is_none", default)]
33 pub file_type_label: Option<String>,
34 pub size: u64,
35 #[builder(default)]
36 #[serde(skip_serializing_if = "Option::is_none", default)]
37 pub date: Option<String>,
38 #[builder(default)]
39 #[serde(skip_serializing_if = "Option::is_none", default)]
40 pub sha1: Option<String>,
41 #[builder(default)]
42 #[serde(skip_serializing_if = "Option::is_none", default)]
43 pub md5: Option<String>,
44 #[builder(default)]
45 #[serde(skip_serializing_if = "Option::is_none", default)]
46 pub sha256: Option<String>,
47 #[builder(default)]
48 #[serde(skip_serializing_if = "Option::is_none", default)]
49 pub sha1_git: Option<String>,
50 #[builder(default)]
51 #[serde(skip_serializing_if = "Option::is_none", default)]
52 pub programming_language: Option<String>,
53 #[builder(default)]
54 #[serde(default)]
55 pub package_data: Vec<PackageData>,
56 #[serde(rename = "detected_license_expression_spdx")] #[builder(default)]
58 pub license_expression: Option<String>,
59 #[builder(default)]
60 #[serde(default)]
61 pub license_detections: Vec<LicenseDetection>,
62 #[builder(default)]
63 #[serde(skip_serializing_if = "Vec::is_empty", default)]
64 pub license_clues: Vec<Match>,
65 #[builder(default)]
66 #[serde(skip_serializing_if = "Option::is_none", default)]
67 pub percentage_of_license_text: Option<f64>,
68 #[builder(default)]
69 #[serde(default)]
70 pub copyrights: Vec<Copyright>,
71 #[builder(default)]
72 #[serde(default)]
73 pub holders: Vec<Holder>,
74 #[builder(default)]
75 #[serde(default)]
76 pub authors: Vec<Author>,
77 #[builder(default)]
78 #[serde(skip_serializing_if = "Vec::is_empty", default)]
79 pub emails: Vec<OutputEmail>,
80 #[builder(default)]
81 #[serde(default)]
82 pub urls: Vec<OutputURL>,
83 #[builder(default)]
84 #[serde(default)]
85 pub for_packages: Vec<String>,
86 #[builder(default)]
87 #[serde(default)]
88 pub scan_errors: Vec<String>,
89 #[builder(default)]
90 #[serde(skip_serializing_if = "Option::is_none", default)]
91 pub license_policy: Option<Vec<LicensePolicyEntry>>,
92 #[builder(default)]
93 #[serde(skip_serializing_if = "Option::is_none", default)]
94 pub is_generated: Option<bool>,
95 #[builder(default)]
96 #[serde(skip_serializing_if = "Option::is_none", default)]
97 pub is_binary: Option<bool>,
98 #[builder(default)]
99 #[serde(skip_serializing_if = "Option::is_none", default)]
100 pub is_text: Option<bool>,
101 #[builder(default)]
102 #[serde(skip_serializing_if = "Option::is_none", default)]
103 pub is_archive: Option<bool>,
104 #[builder(default)]
105 #[serde(skip_serializing_if = "Option::is_none", default)]
106 pub is_media: Option<bool>,
107 #[builder(default)]
108 #[serde(skip_serializing_if = "Option::is_none", default)]
109 pub is_source: Option<bool>,
110 #[builder(default)]
111 #[serde(skip_serializing_if = "Option::is_none", default)]
112 pub is_script: Option<bool>,
113 #[builder(default)]
114 #[serde(skip_serializing_if = "Option::is_none", default)]
115 pub files_count: Option<usize>,
116 #[builder(default)]
117 #[serde(skip_serializing_if = "Option::is_none", default)]
118 pub dirs_count: Option<usize>,
119 #[builder(default)]
120 #[serde(skip_serializing_if = "Option::is_none", default)]
121 pub size_count: Option<u64>,
122 #[builder(default)]
123 #[serde(skip_serializing_if = "Option::is_none", default)]
124 pub source_count: Option<usize>,
125 #[builder(default)]
126 #[serde(skip_serializing_if = "is_false", default)]
127 pub is_legal: bool,
128 #[builder(default)]
129 #[serde(skip_serializing_if = "is_false", default)]
130 pub is_manifest: bool,
131 #[builder(default)]
132 #[serde(skip_serializing_if = "is_false", default)]
133 pub is_readme: bool,
134 #[builder(default)]
135 #[serde(skip_serializing_if = "is_false", default)]
136 pub is_top_level: bool,
137 #[builder(default)]
138 #[serde(skip_serializing_if = "is_false", default)]
139 pub is_key_file: bool,
140 #[builder(default)]
141 #[serde(skip_serializing_if = "is_false", default)]
142 pub is_community: bool,
143 #[builder(default)]
144 #[serde(skip_serializing_if = "Vec::is_empty", default)]
145 pub facets: Vec<String>,
146 #[builder(default)]
147 #[serde(skip_serializing_if = "Option::is_none", default)]
148 pub tallies: Option<Tallies>,
149}
150
151impl FileInfoBuilder {
152 pub fn build(&self) -> Result<FileInfo, String> {
154 let mut file_info = FileInfo::new(
155 self.name.clone().ok_or("Missing field: name")?,
156 self.base_name.clone().ok_or("Missing field: base_name")?,
157 self.extension.clone().ok_or("Missing field: extension")?,
158 self.path.clone().ok_or("Missing field: path")?,
159 self.file_type.clone().ok_or("Missing field: file_type")?,
160 self.mime_type.clone().flatten(),
161 self.file_type_label.clone().flatten(),
162 self.size.ok_or("Missing field: size")?,
163 self.date.clone().flatten(),
164 self.sha1.clone().flatten(),
165 self.md5.clone().flatten(),
166 self.sha256.clone().flatten(),
167 self.programming_language.clone().flatten(),
168 self.package_data.clone().unwrap_or_default(),
169 self.license_expression.clone().flatten(),
170 self.license_detections.clone().unwrap_or_default(),
171 self.license_clues.clone().unwrap_or_default(),
172 self.copyrights.clone().unwrap_or_default(),
173 self.holders.clone().unwrap_or_default(),
174 self.authors.clone().unwrap_or_default(),
175 self.emails.clone().unwrap_or_default(),
176 self.urls.clone().unwrap_or_default(),
177 self.for_packages.clone().unwrap_or_default(),
178 self.scan_errors.clone().unwrap_or_default(),
179 );
180 file_info.license_policy = self.license_policy.clone().flatten();
181 file_info.sha1_git = self.sha1_git.clone().flatten();
182 file_info.is_binary = self.is_binary.flatten();
183 file_info.is_text = self.is_text.flatten();
184 file_info.is_archive = self.is_archive.flatten();
185 file_info.is_media = self.is_media.flatten();
186 file_info.is_script = self.is_script.flatten();
187 file_info.files_count = self.files_count.flatten();
188 file_info.dirs_count = self.dirs_count.flatten();
189 file_info.size_count = self.size_count.flatten();
190 Ok(file_info)
191 }
192}
193
194impl Serialize for FileInfo {
195 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
196 where
197 S: Serializer,
198 {
199 let mut map = Map::new();
200 insert_json(&mut map, "path", &self.path)?;
201 insert_json(&mut map, "type", &self.file_type)?;
202 insert_json(&mut map, "name", &self.name)?;
203 insert_json(&mut map, "base_name", &self.base_name)?;
204 insert_json(&mut map, "extension", &self.extension)?;
205 insert_json(&mut map, "size", self.size)?;
206
207 if self.should_serialize_info_surface() {
208 insert_json(&mut map, "date", &self.date)?;
209 insert_json(&mut map, "sha1", &self.sha1)?;
210 insert_json(&mut map, "md5", &self.md5)?;
211 insert_json(&mut map, "sha256", &self.sha256)?;
212 insert_json(&mut map, "sha1_git", &self.sha1_git)?;
213 insert_json(&mut map, "mime_type", &self.mime_type)?;
214 insert_json(&mut map, "file_type", &self.file_type_label)?;
215 insert_json(&mut map, "programming_language", &self.programming_language)?;
216 insert_json(&mut map, "is_binary", self.is_binary)?;
217 insert_json(&mut map, "is_text", self.is_text)?;
218 insert_json(&mut map, "is_archive", self.is_archive)?;
219 insert_json(&mut map, "is_media", self.is_media)?;
220 insert_json(&mut map, "is_source", self.is_source)?;
221 insert_json(&mut map, "is_script", self.is_script)?;
222 insert_json(&mut map, "files_count", self.files_count)?;
223 insert_json(&mut map, "dirs_count", self.dirs_count)?;
224 insert_json(&mut map, "size_count", self.size_count)?;
225 }
226
227 insert_json(&mut map, "package_data", &self.package_data)?;
228 insert_json(
229 &mut map,
230 "detected_license_expression_spdx",
231 &self.license_expression,
232 )?;
233 insert_json(&mut map, "license_detections", &self.license_detections)?;
234 if !self.license_clues.is_empty() {
235 insert_json(&mut map, "license_clues", &self.license_clues)?;
236 }
237 if self.percentage_of_license_text.is_some() {
238 insert_json(
239 &mut map,
240 "percentage_of_license_text",
241 self.percentage_of_license_text,
242 )?;
243 }
244 insert_json(&mut map, "copyrights", &self.copyrights)?;
245 insert_json(&mut map, "holders", &self.holders)?;
246 insert_json(&mut map, "authors", &self.authors)?;
247 if !self.emails.is_empty() {
248 insert_json(&mut map, "emails", &self.emails)?;
249 }
250 insert_json(&mut map, "urls", &self.urls)?;
251 insert_json(&mut map, "for_packages", &self.for_packages)?;
252 insert_json(&mut map, "scan_errors", &self.scan_errors)?;
253 if self.license_policy.is_some() {
254 insert_json(&mut map, "license_policy", &self.license_policy)?;
255 }
256 if self.is_generated.is_some() {
257 insert_json(&mut map, "is_generated", self.is_generated)?;
258 }
259 if self.source_count.is_some() {
260 insert_json(&mut map, "source_count", self.source_count)?;
261 }
262 if self.is_legal {
263 insert_json(&mut map, "is_legal", self.is_legal)?;
264 }
265 if self.is_manifest {
266 insert_json(&mut map, "is_manifest", self.is_manifest)?;
267 }
268 if self.is_readme {
269 insert_json(&mut map, "is_readme", self.is_readme)?;
270 }
271 if self.is_top_level {
272 insert_json(&mut map, "is_top_level", self.is_top_level)?;
273 }
274 if self.is_key_file {
275 insert_json(&mut map, "is_key_file", self.is_key_file)?;
276 }
277 if self.is_community {
278 insert_json(&mut map, "is_community", self.is_community)?;
279 }
280 if !self.facets.is_empty() {
281 insert_json(&mut map, "facets", &self.facets)?;
282 }
283 if self.tallies.is_some() {
284 insert_json(&mut map, "tallies", &self.tallies)?;
285 }
286
287 map.serialize(serializer)
288 }
289}
290
291impl FileInfo {
292 fn should_serialize_info_surface(&self) -> bool {
293 self.date.is_some()
294 || self.sha1.is_some()
295 || self.md5.is_some()
296 || self.sha256.is_some()
297 || self.sha1_git.is_some()
298 || self.mime_type.is_some()
299 || self.file_type_label.is_some()
300 || self.programming_language.is_some()
301 || self.is_binary.is_some()
302 || self.is_text.is_some()
303 || self.is_archive.is_some()
304 || self.is_media.is_some()
305 || self.is_source.is_some()
306 || self.is_script.is_some()
307 || self.files_count.is_some()
308 || self.dirs_count.is_some()
309 || self.size_count.is_some()
310 }
311}
312
313fn insert_json<S: Serialize, E: SerError>(
314 map: &mut Map<String, Value>,
315 key: &str,
316 value: S,
317) -> Result<(), E> {
318 map.insert(
319 key.to_string(),
320 serde_json::to_value(value).map_err(E::custom)?,
321 );
322 Ok(())
323}
324
325impl FileInfo {
326 #[allow(clippy::too_many_arguments)]
327 pub fn new(
329 name: String,
330 base_name: String,
331 extension: String,
332 path: String,
333 file_type: FileType,
334 mime_type: Option<String>,
335 file_type_label: Option<String>,
336 size: u64,
337 date: Option<String>,
338 sha1: Option<String>,
339 md5: Option<String>,
340 sha256: Option<String>,
341 programming_language: Option<String>,
342 package_data: Vec<PackageData>,
343 mut license_expression: Option<String>,
344 mut license_detections: Vec<LicenseDetection>,
345 license_clues: Vec<Match>,
346 copyrights: Vec<Copyright>,
347 holders: Vec<Holder>,
348 authors: Vec<Author>,
349 emails: Vec<OutputEmail>,
350 urls: Vec<OutputURL>,
351 for_packages: Vec<String>,
352 scan_errors: Vec<String>,
353 ) -> Self {
354 let mut package_data = package_data;
355 for package in &mut package_data {
356 enrich_package_data_license_provenance(package, &path);
357 }
358
359 license_expression = license_expression.or_else(|| {
361 let expressions = package_data
362 .iter()
363 .filter_map(|pkg| pkg.get_license_expression());
364 combine_license_expressions(expressions)
365 });
366
367 if license_detections.is_empty() {
369 for pkg in &package_data {
370 license_detections.extend(pkg.license_detections.clone());
371 }
372 }
373
374 if license_expression.is_none() && !license_detections.is_empty() {
376 let expressions = license_detections
377 .iter()
378 .map(|detection| detection.license_expression.clone());
379 license_expression = combine_license_expressions(expressions);
380 }
381
382 let mut file_info = FileInfo {
383 name,
384 base_name,
385 extension,
386 path,
387 file_type,
388 mime_type,
389 file_type_label,
390 size,
391 date,
392 sha1,
393 md5,
394 sha256,
395 sha1_git: None,
396 programming_language,
397 package_data,
398 license_expression,
399 license_detections,
400 license_clues,
401 percentage_of_license_text: None,
402 copyrights,
403 holders,
404 authors,
405 emails,
406 urls,
407 for_packages,
408 scan_errors,
409 license_policy: None,
410 is_generated: None,
411 is_binary: None,
412 is_text: None,
413 is_archive: None,
414 is_media: None,
415 is_source: None,
416 is_script: None,
417 files_count: None,
418 dirs_count: None,
419 size_count: None,
420 source_count: None,
421 is_legal: false,
422 is_manifest: false,
423 is_readme: false,
424 is_top_level: false,
425 is_key_file: false,
426 is_community: false,
427 facets: vec![],
428 tallies: None,
429 };
430 file_info.backfill_license_provenance();
431 file_info
432 }
433
434 pub fn backfill_license_provenance(&mut self) {
435 for detection in &mut self.license_detections {
436 enrich_license_detection_provenance(detection, &self.path);
437 }
438
439 for package in &mut self.package_data {
440 enrich_package_data_license_provenance(package, &self.path);
441 }
442 }
443}
444
445fn enrich_package_data_license_provenance(package_data: &mut PackageData, path: &str) {
446 for detection in &mut package_data.license_detections {
447 enrich_license_detection_provenance(detection, path);
448 }
449 for detection in &mut package_data.other_license_detections {
450 enrich_license_detection_provenance(detection, path);
451 }
452}
453
454pub(crate) fn enrich_license_detection_provenance(detection: &mut LicenseDetection, path: &str) {
455 for detection_match in &mut detection.matches {
456 if detection_match.from_file.is_none() {
457 detection_match.from_file = Some(path.to_string());
458 }
459 }
460
461 if detection.identifier.is_none() {
462 detection.identifier = Some(compute_public_detection_identifier(detection));
463 }
464}
465
466fn compute_public_detection_identifier(detection: &LicenseDetection) -> String {
467 let expression = python_safe_name(&detection.license_expression);
468 let mut hasher = Sha1::new();
469 hasher.update(format_public_detection_content(detection).as_bytes());
470 let hex_str = hex::encode(hasher.finalize());
471 let uuid_hex = &hex_str[..32];
472 let content_uuid = uuid::Uuid::parse_str(uuid_hex)
473 .map(|uuid| uuid.to_string())
474 .unwrap_or_else(|_| uuid_hex.to_string());
475
476 format!("{}-{}", expression, content_uuid)
477}
478
479fn format_public_detection_content(detection: &LicenseDetection) -> String {
480 let mut result = String::from("(");
481
482 for (index, detection_match) in detection.matches.iter().enumerate() {
483 if index > 0 {
484 result.push_str(", ");
485 }
486 result.push_str(&format!(
487 "({}, {}, {})",
488 python_str_repr(
489 detection_match
490 .rule_identifier
491 .as_deref()
492 .or(detection_match.matcher.as_deref())
493 .unwrap_or("parser-declared-license")
494 ),
495 detection_match.score as f32,
496 python_token_tuple_repr(&tokenize_without_stopwords(
497 detection_match.matched_text.as_deref().unwrap_or_default(),
498 )),
499 ));
500 }
501
502 if detection.matches.len() == 1 {
503 result.push(',');
504 }
505 result.push(')');
506 result
507}
508
509fn python_safe_name(value: &str) -> String {
510 let mut result = String::new();
511 let mut prev_underscore = false;
512
513 for character in value.chars() {
514 if character.is_alphanumeric() {
515 result.push(character);
516 prev_underscore = false;
517 } else if !prev_underscore {
518 result.push('_');
519 prev_underscore = true;
520 }
521 }
522
523 let trimmed = result.trim_matches('_');
524 if trimmed.is_empty() {
525 String::new()
526 } else {
527 trimmed.to_string()
528 }
529}
530
531fn python_str_repr(value: &str) -> String {
532 if value.contains('\'') && !value.contains('"') {
533 format!("\"{}\"", value.replace('\\', "\\\\").replace('"', "\\\""))
534 } else {
535 format!("'{}'", value.replace('\\', "\\\\").replace('\'', "\\\'"))
536 }
537}
538
539fn python_token_tuple_repr(tokens: &[String]) -> String {
540 if tokens.is_empty() {
541 return String::from("()");
542 }
543
544 let mut result = String::from("(");
545 for (index, token) in tokens.iter().enumerate() {
546 if index > 0 {
547 result.push_str(", ");
548 }
549 result.push_str(&python_str_repr(token));
550 }
551
552 if tokens.len() == 1 {
553 result.push(',');
554 }
555 result.push(')');
556 result
557}
558
559#[derive(Serialize, Deserialize, Debug, Clone, Default)]
565pub struct PackageData {
566 #[serde(rename = "type")] pub package_type: Option<PackageType>,
568 pub namespace: Option<String>,
569 pub name: Option<String>,
570 pub version: Option<String>,
571 #[serde(default, serialize_with = "serialize_optional_map_as_object")]
572 pub qualifiers: Option<HashMap<String, String>>,
573 pub subpath: Option<String>,
574 pub primary_language: Option<String>,
575 pub description: Option<String>,
576 pub release_date: Option<String>,
577 #[serde(default)]
578 pub parties: Vec<Party>,
579 #[serde(default)]
580 pub keywords: Vec<String>,
581 pub homepage_url: Option<String>,
582 pub download_url: Option<String>,
583 pub size: Option<u64>,
584 pub sha1: Option<String>,
585 pub md5: Option<String>,
586 pub sha256: Option<String>,
587 pub sha512: Option<String>,
588 pub bug_tracking_url: Option<String>,
589 pub code_view_url: Option<String>,
590 pub vcs_url: Option<String>,
591 pub copyright: Option<String>,
592 pub holder: Option<String>,
593 pub declared_license_expression: Option<String>,
594 pub declared_license_expression_spdx: Option<String>,
595 #[serde(default)]
596 pub license_detections: Vec<LicenseDetection>,
597 pub other_license_expression: Option<String>,
598 pub other_license_expression_spdx: Option<String>,
599 #[serde(default)]
600 pub other_license_detections: Vec<LicenseDetection>,
601 pub extracted_license_statement: Option<String>,
602 pub notice_text: Option<String>,
603 #[serde(default)]
604 pub source_packages: Vec<String>,
605 #[serde(default)]
606 pub file_references: Vec<FileReference>,
607 #[serde(default)]
608 pub is_private: bool,
609 #[serde(default)]
610 pub is_virtual: bool,
611 #[serde(default, serialize_with = "serialize_optional_map_as_object")]
612 pub extra_data: Option<HashMap<String, serde_json::Value>>,
613 #[serde(default)]
614 pub dependencies: Vec<Dependency>,
615 pub repository_homepage_url: Option<String>,
616 pub repository_download_url: Option<String>,
617 pub api_data_url: Option<String>,
618 pub datasource_id: Option<DatasourceId>,
619 pub purl: Option<String>,
620}
621
622fn serialize_optional_map_as_object<S, T>(
623 value: &Option<HashMap<String, T>>,
624 serializer: S,
625) -> Result<S::Ok, S::Error>
626where
627 S: Serializer,
628 T: Serialize,
629{
630 match value {
631 Some(map) => map.serialize(serializer),
632 None => HashMap::<String, T>::new().serialize(serializer),
633 }
634}
635
636impl PackageData {
637 pub fn get_license_expression(&self) -> Option<String> {
640 if self.license_detections.is_empty() {
641 return None;
642 }
643
644 let expressions = self
645 .license_detections
646 .iter()
647 .map(|detection| detection.license_expression.clone());
648 combine_license_expressions(expressions)
649 }
650}
651
652#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
656pub struct LicenseDetection {
657 pub license_expression: String,
658 pub license_expression_spdx: String,
659 pub matches: Vec<Match>,
660 #[serde(skip_serializing_if = "Vec::is_empty", default)]
661 pub detection_log: Vec<String>,
662 #[serde(skip_serializing_if = "Option::is_none")]
663 pub identifier: Option<String>,
664}
665
666#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
670pub struct Match {
671 pub license_expression: String,
672 pub license_expression_spdx: String,
673 #[serde(skip_serializing_if = "Option::is_none")]
674 pub from_file: Option<String>,
675 pub start_line: usize,
676 pub end_line: usize,
677 #[serde(skip_serializing_if = "Option::is_none")]
678 pub matcher: Option<String>,
679 pub score: f64,
680 #[serde(skip_serializing_if = "Option::is_none")]
681 pub matched_length: Option<usize>,
682 #[serde(skip_serializing_if = "Option::is_none")]
683 pub match_coverage: Option<f64>,
684 #[serde(skip_serializing_if = "Option::is_none")]
685 pub rule_relevance: Option<usize>,
686 #[serde(skip_serializing_if = "Option::is_none")]
687 pub rule_identifier: Option<String>,
688 pub rule_url: Option<String>,
689 #[serde(skip_serializing_if = "Option::is_none")]
690 pub matched_text: Option<String>,
691 #[serde(skip_serializing_if = "Option::is_none")]
692 pub matched_text_diagnostics: Option<String>,
693 #[serde(skip_serializing_if = "Option::is_none", default)]
694 pub referenced_filenames: Option<Vec<String>>,
695}
696
697#[derive(Serialize, Deserialize, Debug, Clone)]
698pub struct Copyright {
699 pub copyright: String,
700 pub start_line: usize,
701 pub end_line: usize,
702}
703
704#[derive(Serialize, Deserialize, Debug, Clone)]
705pub struct Holder {
706 pub holder: String,
707 pub start_line: usize,
708 pub end_line: usize,
709}
710
711#[derive(Serialize, Deserialize, Debug, Clone)]
712pub struct Author {
713 pub author: String,
714 pub start_line: usize,
715 pub end_line: usize,
716}
717
718#[derive(Serialize, Deserialize, Debug, Clone)]
723pub struct Dependency {
724 pub purl: Option<String>,
725 pub extracted_requirement: Option<String>,
726 pub scope: Option<String>,
727 pub is_runtime: Option<bool>,
728 pub is_optional: Option<bool>,
729 pub is_pinned: Option<bool>,
730 pub is_direct: Option<bool>,
731 pub resolved_package: Option<Box<ResolvedPackage>>,
732 #[serde(default, serialize_with = "serialize_optional_map_as_object")]
733 pub extra_data: Option<HashMap<String, serde_json::Value>>,
734}
735
736#[derive(Serialize, Deserialize, Debug, Clone)]
737pub struct ResolvedPackage {
738 #[serde(rename = "type")]
739 pub package_type: PackageType,
740 pub namespace: String,
741 pub name: String,
742 pub version: String,
743 #[serde(default, serialize_with = "serialize_optional_map_as_object")]
744 pub qualifiers: Option<HashMap<String, String>>,
745 pub subpath: Option<String>,
746 pub primary_language: Option<String>,
747 pub description: Option<String>,
748 pub release_date: Option<String>,
749 #[serde(default)]
750 pub parties: Vec<Party>,
751 #[serde(default)]
752 pub keywords: Vec<String>,
753 pub homepage_url: Option<String>,
754 pub download_url: Option<String>,
755 pub size: Option<u64>,
756 pub sha1: Option<String>,
757 pub md5: Option<String>,
758 pub sha256: Option<String>,
759 pub sha512: Option<String>,
760 pub bug_tracking_url: Option<String>,
761 pub code_view_url: Option<String>,
762 pub vcs_url: Option<String>,
763 pub copyright: Option<String>,
764 pub holder: Option<String>,
765 pub declared_license_expression: Option<String>,
766 pub declared_license_expression_spdx: Option<String>,
767 #[serde(default)]
768 pub license_detections: Vec<LicenseDetection>,
769 pub other_license_expression: Option<String>,
770 pub other_license_expression_spdx: Option<String>,
771 #[serde(default)]
772 pub other_license_detections: Vec<LicenseDetection>,
773 pub extracted_license_statement: Option<String>,
774 pub notice_text: Option<String>,
775 #[serde(default)]
776 pub source_packages: Vec<String>,
777 #[serde(default)]
778 pub file_references: Vec<FileReference>,
779 #[serde(default)]
780 pub is_private: bool,
781 #[serde(default)]
782 pub is_virtual: bool,
783 #[serde(default, serialize_with = "serialize_optional_map_as_object")]
784 pub extra_data: Option<HashMap<String, serde_json::Value>>,
785 #[serde(default)]
786 pub dependencies: Vec<Dependency>,
787 pub repository_homepage_url: Option<String>,
788 pub repository_download_url: Option<String>,
789 pub api_data_url: Option<String>,
790 pub datasource_id: Option<DatasourceId>,
791 pub purl: Option<String>,
792}
793
794impl ResolvedPackage {
795 pub fn new(
796 package_type: PackageType,
797 namespace: String,
798 name: String,
799 version: String,
800 ) -> Self {
801 Self {
802 package_type,
803 namespace,
804 name,
805 version,
806 qualifiers: None,
807 subpath: None,
808 primary_language: None,
809 description: None,
810 release_date: None,
811 parties: vec![],
812 keywords: vec![],
813 homepage_url: None,
814 download_url: None,
815 size: None,
816 sha1: None,
817 md5: None,
818 sha256: None,
819 sha512: None,
820 bug_tracking_url: None,
821 code_view_url: None,
822 vcs_url: None,
823 copyright: None,
824 holder: None,
825 declared_license_expression: None,
826 declared_license_expression_spdx: None,
827 license_detections: vec![],
828 other_license_expression: None,
829 other_license_expression_spdx: None,
830 other_license_detections: vec![],
831 extracted_license_statement: None,
832 notice_text: None,
833 source_packages: vec![],
834 file_references: vec![],
835 is_private: false,
836 is_virtual: false,
837 extra_data: None,
838 dependencies: vec![],
839 repository_homepage_url: None,
840 repository_download_url: None,
841 api_data_url: None,
842 datasource_id: None,
843 purl: None,
844 }
845 }
846
847 pub fn from_package_data(package_data: &PackageData, fallback_type: PackageType) -> Self {
848 Self {
849 package_type: package_data.package_type.unwrap_or(fallback_type),
850 namespace: package_data.namespace.clone().unwrap_or_default(),
851 name: package_data.name.clone().unwrap_or_default(),
852 version: package_data.version.clone().unwrap_or_default(),
853 qualifiers: package_data.qualifiers.clone(),
854 subpath: package_data.subpath.clone(),
855 primary_language: package_data.primary_language.clone(),
856 description: package_data.description.clone(),
857 release_date: package_data.release_date.clone(),
858 parties: package_data.parties.clone(),
859 keywords: package_data.keywords.clone(),
860 homepage_url: package_data.homepage_url.clone(),
861 download_url: package_data.download_url.clone(),
862 size: package_data.size,
863 sha1: package_data.sha1.clone(),
864 md5: package_data.md5.clone(),
865 sha256: package_data.sha256.clone(),
866 sha512: package_data.sha512.clone(),
867 bug_tracking_url: package_data.bug_tracking_url.clone(),
868 code_view_url: package_data.code_view_url.clone(),
869 vcs_url: package_data.vcs_url.clone(),
870 copyright: package_data.copyright.clone(),
871 holder: package_data.holder.clone(),
872 declared_license_expression: package_data.declared_license_expression.clone(),
873 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
874 license_detections: package_data.license_detections.clone(),
875 other_license_expression: package_data.other_license_expression.clone(),
876 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
877 other_license_detections: package_data.other_license_detections.clone(),
878 extracted_license_statement: package_data.extracted_license_statement.clone(),
879 notice_text: package_data.notice_text.clone(),
880 source_packages: package_data.source_packages.clone(),
881 file_references: package_data.file_references.clone(),
882 is_private: package_data.is_private,
883 is_virtual: package_data.is_virtual,
884 extra_data: package_data.extra_data.clone(),
885 dependencies: package_data.dependencies.clone(),
886 repository_homepage_url: package_data.repository_homepage_url.clone(),
887 repository_download_url: package_data.repository_download_url.clone(),
888 api_data_url: package_data.api_data_url.clone(),
889 datasource_id: package_data.datasource_id,
890 purl: package_data.purl.clone(),
891 }
892 }
893}
894
895#[derive(Serialize, Deserialize, Debug, Clone)]
899pub struct Party {
900 #[serde(skip_serializing_if = "Option::is_none")]
901 pub r#type: Option<String>,
902 #[serde(skip_serializing_if = "Option::is_none")]
903 pub role: Option<String>,
904 #[serde(skip_serializing_if = "Option::is_none")]
905 pub name: Option<String>,
906 #[serde(skip_serializing_if = "Option::is_none")]
907 pub email: Option<String>,
908 #[serde(skip_serializing_if = "Option::is_none")]
909 pub url: Option<String>,
910 #[serde(skip_serializing_if = "Option::is_none")]
911 pub organization: Option<String>,
912 #[serde(skip_serializing_if = "Option::is_none")]
913 pub organization_url: Option<String>,
914 #[serde(skip_serializing_if = "Option::is_none")]
915 pub timezone: Option<String>,
916}
917
918#[derive(Serialize, Deserialize, Debug, Clone)]
922pub struct FileReference {
923 pub path: String,
924 #[serde(skip_serializing_if = "Option::is_none")]
925 pub size: Option<u64>,
926 #[serde(skip_serializing_if = "Option::is_none")]
927 pub sha1: Option<String>,
928 #[serde(skip_serializing_if = "Option::is_none")]
929 pub md5: Option<String>,
930 #[serde(skip_serializing_if = "Option::is_none")]
931 pub sha256: Option<String>,
932 #[serde(skip_serializing_if = "Option::is_none")]
933 pub sha512: Option<String>,
934 #[serde(skip_serializing_if = "Option::is_none")]
935 pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
936}
937
938#[derive(Serialize, Deserialize, Debug, Clone)]
948pub struct Package {
949 #[serde(rename = "type")]
950 pub package_type: Option<PackageType>,
951 pub namespace: Option<String>,
952 pub name: Option<String>,
953 pub version: Option<String>,
954 #[serde(default, serialize_with = "serialize_optional_map_as_object")]
955 pub qualifiers: Option<HashMap<String, String>>,
956 pub subpath: Option<String>,
957 pub primary_language: Option<String>,
958 pub description: Option<String>,
959 pub release_date: Option<String>,
960 #[serde(default)]
961 pub parties: Vec<Party>,
962 #[serde(default)]
963 pub keywords: Vec<String>,
964 pub homepage_url: Option<String>,
965 pub download_url: Option<String>,
966 pub size: Option<u64>,
967 pub sha1: Option<String>,
968 pub md5: Option<String>,
969 pub sha256: Option<String>,
970 pub sha512: Option<String>,
971 pub bug_tracking_url: Option<String>,
972 pub code_view_url: Option<String>,
973 pub vcs_url: Option<String>,
974 pub copyright: Option<String>,
975 pub holder: Option<String>,
976 pub declared_license_expression: Option<String>,
977 pub declared_license_expression_spdx: Option<String>,
978 #[serde(default)]
979 pub license_detections: Vec<LicenseDetection>,
980 pub other_license_expression: Option<String>,
981 pub other_license_expression_spdx: Option<String>,
982 #[serde(default)]
983 pub other_license_detections: Vec<LicenseDetection>,
984 pub extracted_license_statement: Option<String>,
985 pub notice_text: Option<String>,
986 #[serde(default)]
987 pub source_packages: Vec<String>,
988 #[serde(default)]
989 pub is_private: bool,
990 #[serde(default)]
991 pub is_virtual: bool,
992 #[serde(default, serialize_with = "serialize_optional_map_as_object")]
993 pub extra_data: Option<HashMap<String, serde_json::Value>>,
994 pub repository_homepage_url: Option<String>,
995 pub repository_download_url: Option<String>,
996 pub api_data_url: Option<String>,
997 pub purl: Option<String>,
998 pub package_uid: String,
1000 pub datafile_paths: Vec<String>,
1002 pub datasource_ids: Vec<DatasourceId>,
1004}
1005
1006impl Package {
1007 pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
1012 let mut package_data = package_data.clone();
1013 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
1014
1015 let package_uid = package_data
1016 .purl
1017 .as_ref()
1018 .map(|p| build_package_uid(p))
1019 .unwrap_or_default();
1020
1021 Package {
1022 package_type: package_data.package_type,
1023 namespace: package_data.namespace.clone(),
1024 name: package_data.name.clone(),
1025 version: package_data.version.clone(),
1026 qualifiers: package_data.qualifiers.clone(),
1027 subpath: package_data.subpath.clone(),
1028 primary_language: package_data.primary_language.clone(),
1029 description: package_data.description.clone(),
1030 release_date: package_data.release_date.clone(),
1031 parties: package_data.parties.clone(),
1032 keywords: package_data.keywords.clone(),
1033 homepage_url: package_data.homepage_url.clone(),
1034 download_url: package_data.download_url.clone(),
1035 size: package_data.size,
1036 sha1: package_data.sha1.clone(),
1037 md5: package_data.md5.clone(),
1038 sha256: package_data.sha256.clone(),
1039 sha512: package_data.sha512.clone(),
1040 bug_tracking_url: package_data.bug_tracking_url.clone(),
1041 code_view_url: package_data.code_view_url.clone(),
1042 vcs_url: package_data.vcs_url.clone(),
1043 copyright: package_data.copyright.clone(),
1044 holder: package_data.holder.clone(),
1045 declared_license_expression: package_data.declared_license_expression.clone(),
1046 declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
1047 license_detections: package_data.license_detections.clone(),
1048 other_license_expression: package_data.other_license_expression.clone(),
1049 other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
1050 other_license_detections: package_data.other_license_detections.clone(),
1051 extracted_license_statement: package_data.extracted_license_statement.clone(),
1052 notice_text: package_data.notice_text.clone(),
1053 source_packages: package_data.source_packages.clone(),
1054 is_private: package_data.is_private,
1055 is_virtual: package_data.is_virtual,
1056 extra_data: package_data.extra_data.clone(),
1057 repository_homepage_url: package_data.repository_homepage_url.clone(),
1058 repository_download_url: package_data.repository_download_url.clone(),
1059 api_data_url: package_data.api_data_url.clone(),
1060 purl: package_data.purl.clone(),
1061 package_uid,
1062 datafile_paths: vec![datafile_path],
1063 datasource_ids: if let Some(dsid) = package_data.datasource_id {
1064 vec![dsid]
1065 } else {
1066 vec![]
1067 },
1068 }
1069 }
1070
1071 pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
1077 let mut package_data = package_data.clone();
1078 enrich_package_data_license_provenance(&mut package_data, &datafile_path);
1079
1080 if let Some(dsid) = package_data.datasource_id {
1081 self.datasource_ids.push(dsid);
1082 }
1083 self.datafile_paths.push(datafile_path);
1084
1085 macro_rules! fill_if_empty {
1086 ($field:ident) => {
1087 if self.$field.is_none() {
1088 self.$field = package_data.$field.clone();
1089 }
1090 };
1091 }
1092
1093 fill_if_empty!(package_type);
1094 fill_if_empty!(name);
1095 fill_if_empty!(namespace);
1096 fill_if_empty!(version);
1097 fill_if_empty!(qualifiers);
1098 fill_if_empty!(subpath);
1099 fill_if_empty!(primary_language);
1100 fill_if_empty!(description);
1101 fill_if_empty!(release_date);
1102 fill_if_empty!(homepage_url);
1103 fill_if_empty!(download_url);
1104 fill_if_empty!(size);
1105 fill_if_empty!(sha1);
1106 fill_if_empty!(md5);
1107 fill_if_empty!(sha256);
1108 fill_if_empty!(sha512);
1109 fill_if_empty!(bug_tracking_url);
1110 fill_if_empty!(code_view_url);
1111 fill_if_empty!(vcs_url);
1112 fill_if_empty!(copyright);
1113 fill_if_empty!(holder);
1114 fill_if_empty!(declared_license_expression);
1115 fill_if_empty!(declared_license_expression_spdx);
1116 fill_if_empty!(other_license_expression);
1117 fill_if_empty!(other_license_expression_spdx);
1118 fill_if_empty!(extracted_license_statement);
1119 fill_if_empty!(notice_text);
1120 match (&mut self.extra_data, &package_data.extra_data) {
1121 (None, Some(extra_data)) => {
1122 self.extra_data = Some(extra_data.clone());
1123 }
1124 (Some(existing), Some(incoming)) => {
1125 for (key, value) in incoming {
1126 existing.entry(key.clone()).or_insert_with(|| value.clone());
1127 }
1128 }
1129 _ => {}
1130 }
1131 fill_if_empty!(repository_homepage_url);
1132 fill_if_empty!(repository_download_url);
1133 fill_if_empty!(api_data_url);
1134
1135 for party in &package_data.parties {
1136 if let Some(existing) = self.parties.iter_mut().find(|p| {
1137 p.role == party.role
1138 && ((p.name.is_some() && p.name == party.name)
1139 || (p.email.is_some() && p.email == party.email))
1140 }) {
1141 if existing.name.is_none() {
1142 existing.name = party.name.clone();
1143 }
1144 if existing.email.is_none() {
1145 existing.email = party.email.clone();
1146 }
1147 } else {
1148 self.parties.push(party.clone());
1149 }
1150 }
1151
1152 for keyword in &package_data.keywords {
1153 if !self.keywords.contains(keyword) {
1154 self.keywords.push(keyword.clone());
1155 }
1156 }
1157
1158 for detection in &package_data.license_detections {
1159 self.license_detections.push(detection.clone());
1160 }
1161
1162 for detection in &package_data.other_license_detections {
1163 self.other_license_detections.push(detection.clone());
1164 }
1165
1166 for source_pkg in &package_data.source_packages {
1167 if !self.source_packages.contains(source_pkg) {
1168 self.source_packages.push(source_pkg.clone());
1169 }
1170 }
1171
1172 self.refresh_identity();
1173 }
1174
1175 pub fn backfill_license_provenance(&mut self) {
1176 let Some(datafile_path) = self.datafile_paths.first().cloned() else {
1177 return;
1178 };
1179
1180 for detection in &mut self.license_detections {
1181 enrich_license_detection_provenance(detection, &datafile_path);
1182 }
1183 for detection in &mut self.other_license_detections {
1184 enrich_license_detection_provenance(detection, &datafile_path);
1185 }
1186 }
1187
1188 fn refresh_identity(&mut self) {
1189 let Some(next_purl) = self.build_current_purl() else {
1190 return;
1191 };
1192
1193 if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
1194 self.package_uid = build_package_uid(&next_purl);
1195 }
1196
1197 self.purl = Some(next_purl);
1198 }
1199
1200 fn build_current_purl(&self) -> Option<String> {
1201 if let (Some(package_type), Some(name)) = (
1202 self.package_type.as_ref(),
1203 self.name
1204 .as_deref()
1205 .filter(|value| !value.trim().is_empty()),
1206 ) {
1207 let purl_type = match package_type {
1208 PackageType::Deno => "generic",
1209 _ => package_type.as_str(),
1210 };
1211
1212 let mut purl = PackageUrl::new(purl_type, name).ok()?;
1213
1214 if let Some(namespace) = self
1215 .namespace
1216 .as_deref()
1217 .filter(|value| !value.trim().is_empty())
1218 {
1219 purl.with_namespace(namespace).ok()?;
1220 }
1221
1222 if let Some(version) = self
1223 .version
1224 .as_deref()
1225 .filter(|value| !value.trim().is_empty())
1226 {
1227 purl.with_version(version).ok()?;
1228 }
1229
1230 if let Some(qualifiers) = &self.qualifiers {
1231 for (key, value) in qualifiers {
1232 purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
1233 }
1234 }
1235
1236 if let Some(subpath) = self
1237 .subpath
1238 .as_deref()
1239 .filter(|value| !value.trim().is_empty())
1240 {
1241 purl.with_subpath(subpath).ok()?;
1242 }
1243
1244 return Some(purl.to_string());
1245 }
1246
1247 let existing_purl = self.purl.as_deref()?;
1248 let mut purl = PackageUrl::from_str(existing_purl).ok()?;
1249
1250 if let Some(version) = self
1251 .version
1252 .as_deref()
1253 .filter(|value| !value.trim().is_empty())
1254 {
1255 purl.with_version(version).ok()?;
1256 } else {
1257 purl.without_version();
1258 }
1259
1260 Some(purl.to_string())
1261 }
1262}
1263
1264#[cfg(test)]
1265mod tests {
1266 use super::*;
1267 use serde_json::json;
1268
1269 fn assert_has_key(value: &Value, key: &str) {
1270 assert!(
1271 value
1272 .as_object()
1273 .is_some_and(|object| object.contains_key(key)),
1274 "missing key {key} in {value:#?}"
1275 );
1276 }
1277
1278 #[test]
1279 fn file_info_new_backfills_package_detection_provenance() {
1280 let package_data = PackageData {
1281 package_type: Some(PackageType::Npm),
1282 license_detections: vec![LicenseDetection {
1283 license_expression: "mit".to_string(),
1284 license_expression_spdx: "MIT".to_string(),
1285 matches: vec![Match {
1286 license_expression: "mit".to_string(),
1287 license_expression_spdx: "MIT".to_string(),
1288 from_file: None,
1289 start_line: 1,
1290 end_line: 1,
1291 matcher: Some("parser-declared-license".to_string()),
1292 score: 100.0,
1293 matched_length: Some(1),
1294 match_coverage: Some(100.0),
1295 rule_relevance: Some(100),
1296 rule_identifier: None,
1297 rule_url: None,
1298 matched_text: Some("MIT".to_string()),
1299 referenced_filenames: None,
1300 matched_text_diagnostics: None,
1301 }],
1302 detection_log: vec![],
1303 identifier: None,
1304 }],
1305 ..PackageData::default()
1306 };
1307
1308 let file_info = FileInfo::new(
1309 "package.json".to_string(),
1310 "package".to_string(),
1311 ".json".to_string(),
1312 "project/package.json".to_string(),
1313 FileType::File,
1314 None,
1315 None,
1316 1,
1317 None,
1318 None,
1319 None,
1320 None,
1321 None,
1322 vec![package_data],
1323 None,
1324 vec![],
1325 vec![],
1326 vec![],
1327 vec![],
1328 vec![],
1329 vec![],
1330 vec![],
1331 vec![],
1332 vec![],
1333 );
1334
1335 assert_eq!(file_info.license_detections.len(), 1);
1336 assert_eq!(
1337 file_info.license_detections[0].matches[0]
1338 .from_file
1339 .as_deref(),
1340 Some("project/package.json")
1341 );
1342 assert!(file_info.license_detections[0].identifier.is_some());
1343 assert_eq!(
1344 file_info.package_data[0].license_detections[0].matches[0]
1345 .from_file
1346 .as_deref(),
1347 Some("project/package.json")
1348 );
1349 assert!(
1350 file_info.package_data[0].license_detections[0]
1351 .identifier
1352 .is_some()
1353 );
1354 }
1355
1356 #[test]
1357 fn package_from_package_data_backfills_detection_provenance() {
1358 let package_data = PackageData {
1359 package_type: Some(PackageType::Npm),
1360 license_detections: vec![LicenseDetection {
1361 license_expression: "mit".to_string(),
1362 license_expression_spdx: "MIT".to_string(),
1363 matches: vec![Match {
1364 license_expression: "mit".to_string(),
1365 license_expression_spdx: "MIT".to_string(),
1366 from_file: None,
1367 start_line: 1,
1368 end_line: 1,
1369 matcher: Some("parser-declared-license".to_string()),
1370 score: 100.0,
1371 matched_length: Some(1),
1372 match_coverage: Some(100.0),
1373 rule_relevance: Some(100),
1374 rule_identifier: None,
1375 rule_url: None,
1376 matched_text: Some("MIT".to_string()),
1377 referenced_filenames: None,
1378 matched_text_diagnostics: None,
1379 }],
1380 detection_log: vec![],
1381 identifier: None,
1382 }],
1383 ..PackageData::default()
1384 };
1385
1386 let package = Package::from_package_data(&package_data, "project/package.json".to_string());
1387
1388 assert_eq!(
1389 package.license_detections[0].matches[0]
1390 .from_file
1391 .as_deref(),
1392 Some("project/package.json")
1393 );
1394 assert!(package.license_detections[0].identifier.is_some());
1395 }
1396
1397 #[test]
1398 fn package_data_serialization_includes_scancode_style_defaults() {
1399 let package_data = PackageData {
1400 package_type: Some(PackageType::Npm),
1401 name: Some("left-pad".to_string()),
1402 datasource_id: Some(DatasourceId::NpmPackageJson),
1403 ..PackageData::default()
1404 };
1405
1406 let value = serde_json::to_value(&package_data).expect("package data should serialize");
1407
1408 assert_has_key(&value, "namespace");
1409 assert_eq!(value["namespace"], Value::Null);
1410 assert_eq!(value["qualifiers"], json!({}));
1411 assert_eq!(value["subpath"], Value::Null);
1412 assert_eq!(value["description"], Value::Null);
1413 assert_eq!(value["license_detections"], json!([]));
1414 assert_eq!(value["other_license_detections"], json!([]));
1415 assert_eq!(value["source_packages"], json!([]));
1416 assert_eq!(value["file_references"], json!([]));
1417 assert_eq!(value["is_private"], json!(false));
1418 assert_eq!(value["is_virtual"], json!(false));
1419 assert_eq!(value["extra_data"], json!({}));
1420 assert_eq!(value["repository_homepage_url"], Value::Null);
1421 assert_eq!(value["repository_download_url"], Value::Null);
1422 assert_eq!(value["api_data_url"], Value::Null);
1423 assert_has_key(&value, "datasource_id");
1424 assert_eq!(value["purl"], Value::Null);
1425 }
1426
1427 #[test]
1428 fn package_serialization_includes_scancode_style_defaults() {
1429 let package = Package::from_package_data(
1430 &PackageData {
1431 package_type: Some(PackageType::Npm),
1432 name: Some("left-pad".to_string()),
1433 datasource_id: Some(DatasourceId::NpmPackageJson),
1434 ..PackageData::default()
1435 },
1436 "project/package.json".to_string(),
1437 );
1438
1439 let value = serde_json::to_value(&package).expect("package should serialize");
1440
1441 assert_eq!(value["namespace"], Value::Null);
1442 assert_eq!(value["qualifiers"], json!({}));
1443 assert_eq!(value["subpath"], Value::Null);
1444 assert_eq!(value["keywords"], json!([]));
1445 assert_eq!(value["license_detections"], json!([]));
1446 assert_eq!(value["other_license_detections"], json!([]));
1447 assert_eq!(value["source_packages"], json!([]));
1448 assert_eq!(value["is_private"], json!(false));
1449 assert_eq!(value["is_virtual"], json!(false));
1450 assert_eq!(value["extra_data"], json!({}));
1451 assert_eq!(value["repository_homepage_url"], Value::Null);
1452 assert_eq!(value["repository_download_url"], Value::Null);
1453 assert_eq!(value["api_data_url"], Value::Null);
1454 assert_eq!(value["purl"], Value::Null);
1455 }
1456
1457 #[test]
1458 fn dependency_shapes_serialize_with_explicit_nulls_and_empty_objects() {
1459 let dependency = Dependency {
1460 purl: None,
1461 extracted_requirement: None,
1462 scope: None,
1463 is_runtime: None,
1464 is_optional: None,
1465 is_pinned: None,
1466 is_direct: None,
1467 resolved_package: None,
1468 extra_data: None,
1469 };
1470
1471 let dependency_value =
1472 serde_json::to_value(&dependency).expect("dependency should serialize");
1473 assert_eq!(dependency_value["extracted_requirement"], Value::Null);
1474 assert_eq!(dependency_value["is_runtime"], Value::Null);
1475 assert_eq!(dependency_value["is_optional"], Value::Null);
1476 assert_eq!(dependency_value["is_pinned"], Value::Null);
1477 assert_eq!(dependency_value["is_direct"], Value::Null);
1478 assert_eq!(dependency_value["resolved_package"], Value::Null);
1479 assert_eq!(dependency_value["extra_data"], json!({}));
1480
1481 let top_level = TopLevelDependency::from_dependency(
1482 &dependency,
1483 "project/package-lock.json".to_string(),
1484 DatasourceId::NpmPackageLockJson,
1485 None,
1486 );
1487 let top_level_value =
1488 serde_json::to_value(&top_level).expect("top-level dependency should serialize");
1489
1490 assert_eq!(top_level_value["resolved_package"], Value::Null);
1491 assert_eq!(top_level_value["extra_data"], json!({}));
1492 assert_eq!(top_level_value["for_package_uid"], Value::Null);
1493 assert_eq!(top_level_value["namespace"], Value::Null);
1494 }
1495
1496 #[test]
1497 fn nested_resolved_package_serialization_uses_full_package_shape() {
1498 let dependency = Dependency {
1499 purl: Some("pkg:npm/left-pad@1.3.0".to_string()),
1500 extracted_requirement: Some("1.3.0".to_string()),
1501 scope: Some("dependencies".to_string()),
1502 is_runtime: Some(true),
1503 is_optional: Some(false),
1504 is_pinned: Some(true),
1505 is_direct: Some(true),
1506 resolved_package: Some(Box::new(ResolvedPackage {
1507 primary_language: Some("JavaScript".to_string()),
1508 datasource_id: Some(DatasourceId::NpmPackageLockJson),
1509 purl: Some("pkg:npm/left-pad@1.3.0".to_string()),
1510 ..ResolvedPackage::new(
1511 PackageType::Npm,
1512 String::new(),
1513 "left-pad".to_string(),
1514 "1.3.0".to_string(),
1515 )
1516 })),
1517 extra_data: None,
1518 };
1519
1520 let value = serde_json::to_value(&dependency).expect("dependency should serialize");
1521 let resolved_package = &value["resolved_package"];
1522
1523 assert_eq!(resolved_package["namespace"], json!(""));
1524 assert_eq!(resolved_package["qualifiers"], json!({}));
1525 assert_eq!(resolved_package["subpath"], Value::Null);
1526 assert_eq!(resolved_package["description"], Value::Null);
1527 assert_eq!(resolved_package["license_detections"], json!([]));
1528 assert_eq!(resolved_package["other_license_detections"], json!([]));
1529 assert_eq!(resolved_package["source_packages"], json!([]));
1530 assert_eq!(resolved_package["file_references"], json!([]));
1531 assert_eq!(resolved_package["is_private"], json!(false));
1532 assert_eq!(resolved_package["is_virtual"], json!(false));
1533 assert_eq!(resolved_package["extra_data"], json!({}));
1534 assert_eq!(resolved_package["repository_homepage_url"], Value::Null);
1535 assert_eq!(resolved_package["repository_download_url"], Value::Null);
1536 assert_eq!(resolved_package["api_data_url"], Value::Null);
1537 assert_has_key(resolved_package, "datasource_id");
1538 assert_has_key(resolved_package, "purl");
1539 }
1540}
1541
1542#[derive(Serialize, Deserialize, Debug, Clone)]
1547pub struct TopLevelDependency {
1548 pub purl: Option<String>,
1549 pub extracted_requirement: Option<String>,
1550 pub scope: Option<String>,
1551 pub is_runtime: Option<bool>,
1552 pub is_optional: Option<bool>,
1553 pub is_pinned: Option<bool>,
1554 pub is_direct: Option<bool>,
1555 pub resolved_package: Option<Box<ResolvedPackage>>,
1556 #[serde(default, serialize_with = "serialize_optional_map_as_object")]
1557 pub extra_data: Option<HashMap<String, serde_json::Value>>,
1558 pub dependency_uid: String,
1560 pub for_package_uid: Option<String>,
1562 pub datafile_path: String,
1564 pub datasource_id: DatasourceId,
1566 pub namespace: Option<String>,
1568}
1569
1570impl TopLevelDependency {
1571 pub fn from_dependency(
1573 dep: &Dependency,
1574 datafile_path: String,
1575 datasource_id: DatasourceId,
1576 for_package_uid: Option<String>,
1577 ) -> Self {
1578 let dependency_uid = dep
1579 .purl
1580 .as_ref()
1581 .map(|p| build_package_uid(p))
1582 .unwrap_or_default();
1583
1584 TopLevelDependency {
1585 purl: dep.purl.clone(),
1586 extracted_requirement: dep.extracted_requirement.clone(),
1587 scope: dep.scope.clone(),
1588 is_runtime: dep.is_runtime,
1589 is_optional: dep.is_optional,
1590 is_pinned: dep.is_pinned,
1591 is_direct: dep.is_direct,
1592 resolved_package: dep.resolved_package.clone(),
1593 extra_data: dep.extra_data.clone(),
1594 dependency_uid,
1595 for_package_uid,
1596 datafile_path,
1597 datasource_id,
1598 namespace: None,
1599 }
1600 }
1601}
1602
1603pub fn build_package_uid(purl: &str) -> String {
1607 let uuid = Uuid::new_v4();
1608 if purl.contains('?') {
1609 format!("{}&uuid={}", purl, uuid)
1610 } else {
1611 format!("{}?uuid={}", purl, uuid)
1612 }
1613}
1614
1615#[derive(Serialize, Deserialize, Debug, Clone)]
1616pub struct OutputEmail {
1617 pub email: String,
1618 pub start_line: usize,
1619 pub end_line: usize,
1620}
1621
1622#[derive(Serialize, Deserialize, Debug, Clone)]
1623pub struct OutputURL {
1624 pub url: String,
1625 pub start_line: usize,
1626 pub end_line: usize,
1627}
1628
1629#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
1630pub struct LicensePolicyEntry {
1631 pub license_key: String,
1632 pub label: String,
1633 pub color_code: String,
1634 pub icon: String,
1635}
1636
1637#[derive(Debug, Clone, PartialEq)]
1638pub enum FileType {
1639 File,
1640 Directory,
1641}
1642
1643impl Serialize for FileType {
1644 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1645 where
1646 S: serde::Serializer,
1647 {
1648 let value = match self {
1649 FileType::File => "file",
1650 FileType::Directory => "directory",
1651 };
1652 serializer.serialize_str(value)
1653 }
1654}
1655
1656impl<'de> Deserialize<'de> for FileType {
1657 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1658 where
1659 D: serde::Deserializer<'de>,
1660 {
1661 let value = String::deserialize(deserializer)?;
1662 match value.as_str() {
1663 "file" => Ok(FileType::File),
1664 "directory" => Ok(FileType::Directory),
1665 _ => Err(serde::de::Error::custom("invalid file type")),
1666 }
1667 }
1668}