Skip to main content

provenant/models/
file_info.rs

1use derive_builder::Builder;
2use packageurl::PackageUrl;
3use serde::{Deserialize, Serialize};
4use std::str::FromStr;
5use uuid::Uuid;
6
7use super::DatasourceId;
8use super::PackageType;
9use crate::models::output::Tallies;
10use crate::utils::spdx::combine_license_expressions;
11
12#[derive(Debug, Builder, Serialize, Deserialize)]
13#[builder(build_fn(skip))]
14/// File-level scan result containing metadata and detected findings.
15pub struct FileInfo {
16    pub name: String,
17    pub base_name: String,
18    pub extension: String,
19    pub path: String,
20    #[serde(rename = "type")] // name used by ScanCode
21    pub file_type: FileType,
22    #[builder(default)]
23    pub mime_type: Option<String>,
24    pub size: u64,
25    #[builder(default)]
26    pub date: Option<String>,
27    #[builder(default)]
28    pub sha1: Option<String>,
29    #[builder(default)]
30    pub md5: Option<String>,
31    #[builder(default)]
32    pub sha256: Option<String>,
33    #[builder(default)]
34    pub programming_language: Option<String>,
35    #[builder(default)]
36    #[serde(default)]
37    pub package_data: Vec<PackageData>,
38    #[serde(rename = "detected_license_expression_spdx")] // name used by ScanCode
39    #[builder(default)]
40    pub license_expression: Option<String>,
41    #[builder(default)]
42    #[serde(default)]
43    pub license_detections: Vec<LicenseDetection>,
44    #[builder(default)]
45    #[serde(default)]
46    pub copyrights: Vec<Copyright>,
47    #[builder(default)]
48    #[serde(default)]
49    pub holders: Vec<Holder>,
50    #[builder(default)]
51    #[serde(default)]
52    pub authors: Vec<Author>,
53    #[builder(default)]
54    #[serde(skip_serializing_if = "Vec::is_empty", default)]
55    pub emails: Vec<OutputEmail>,
56    #[builder(default)]
57    #[serde(default)]
58    pub urls: Vec<OutputURL>,
59    #[builder(default)]
60    #[serde(default)]
61    pub for_packages: Vec<String>,
62    #[builder(default)]
63    #[serde(default)]
64    pub scan_errors: Vec<String>,
65    #[builder(default)]
66    #[serde(skip_serializing_if = "Option::is_none", default)]
67    pub is_generated: Option<bool>,
68    #[builder(default)]
69    #[serde(skip_serializing_if = "Option::is_none", default)]
70    pub is_source: Option<bool>,
71    #[builder(default)]
72    #[serde(skip_serializing_if = "Option::is_none", default)]
73    pub source_count: Option<usize>,
74    #[builder(default)]
75    #[serde(skip_serializing_if = "is_false", default)]
76    pub is_legal: bool,
77    #[builder(default)]
78    #[serde(skip_serializing_if = "is_false", default)]
79    pub is_manifest: bool,
80    #[builder(default)]
81    #[serde(skip_serializing_if = "is_false", default)]
82    pub is_readme: bool,
83    #[builder(default)]
84    #[serde(skip_serializing_if = "is_false", default)]
85    pub is_top_level: bool,
86    #[builder(default)]
87    #[serde(skip_serializing_if = "is_false", default)]
88    pub is_key_file: bool,
89    #[builder(default)]
90    #[serde(skip_serializing_if = "is_false", default)]
91    pub is_community: bool,
92    #[builder(default)]
93    #[serde(skip_serializing_if = "Vec::is_empty", default)]
94    pub facets: Vec<String>,
95    #[builder(default)]
96    #[serde(skip_serializing_if = "Option::is_none", default)]
97    pub tallies: Option<Tallies>,
98}
99
100impl FileInfoBuilder {
101    /// Build a [`FileInfo`] from the current builder state.
102    pub fn build(&self) -> Result<FileInfo, String> {
103        Ok(FileInfo::new(
104            self.name.clone().ok_or("Missing field: name")?,
105            self.base_name.clone().ok_or("Missing field: base_name")?,
106            self.extension.clone().ok_or("Missing field: extension")?,
107            self.path.clone().ok_or("Missing field: path")?,
108            self.file_type.clone().ok_or("Missing field: file_type")?,
109            self.mime_type.clone().flatten(),
110            self.size.ok_or("Missing field: size")?,
111            self.date.clone().flatten(),
112            self.sha1.clone().flatten(),
113            self.md5.clone().flatten(),
114            self.sha256.clone().flatten(),
115            self.programming_language.clone().flatten(),
116            self.package_data.clone().unwrap_or_default(),
117            self.license_expression.clone().flatten(),
118            self.license_detections.clone().unwrap_or_default(),
119            self.copyrights.clone().unwrap_or_default(),
120            self.holders.clone().unwrap_or_default(),
121            self.authors.clone().unwrap_or_default(),
122            self.emails.clone().unwrap_or_default(),
123            self.urls.clone().unwrap_or_default(),
124            self.for_packages.clone().unwrap_or_default(),
125            self.scan_errors.clone().unwrap_or_default(),
126        ))
127    }
128}
129
130impl FileInfo {
131    #[allow(clippy::too_many_arguments)]
132    /// Construct a [`FileInfo`] from fully resolved scanner fields.
133    pub fn new(
134        name: String,
135        base_name: String,
136        extension: String,
137        path: String,
138        file_type: FileType,
139        mime_type: Option<String>,
140        size: u64,
141        date: Option<String>,
142        sha1: Option<String>,
143        md5: Option<String>,
144        sha256: Option<String>,
145        programming_language: Option<String>,
146        package_data: Vec<PackageData>,
147        mut license_expression: Option<String>,
148        mut license_detections: Vec<LicenseDetection>,
149        copyrights: Vec<Copyright>,
150        holders: Vec<Holder>,
151        authors: Vec<Author>,
152        emails: Vec<OutputEmail>,
153        urls: Vec<OutputURL>,
154        for_packages: Vec<String>,
155        scan_errors: Vec<String>,
156    ) -> Self {
157        // Combine license expressions from package data if license_expression is None
158        license_expression = license_expression.or_else(|| {
159            let expressions = package_data
160                .iter()
161                .filter_map(|pkg| pkg.get_license_expression());
162            combine_license_expressions(expressions)
163        });
164
165        // Combine license detections from package data if none are provided
166        if license_detections.is_empty() {
167            for pkg in &package_data {
168                license_detections.extend(pkg.license_detections.clone());
169            }
170        }
171
172        // Combine license expressions from license detections if license_expression is still None
173        if license_expression.is_none() && !license_detections.is_empty() {
174            let expressions = license_detections
175                .iter()
176                .map(|detection| detection.license_expression.clone());
177            license_expression = combine_license_expressions(expressions);
178        }
179
180        FileInfo {
181            name,
182            base_name,
183            extension,
184            path,
185            file_type,
186            mime_type,
187            size,
188            date,
189            sha1,
190            md5,
191            sha256,
192            programming_language,
193            package_data,
194            license_expression,
195            license_detections,
196            copyrights,
197            holders,
198            authors,
199            emails,
200            urls,
201            for_packages,
202            scan_errors,
203            is_generated: None,
204            is_source: None,
205            source_count: None,
206            is_legal: false,
207            is_manifest: false,
208            is_readme: false,
209            is_top_level: false,
210            is_key_file: false,
211            is_community: false,
212            facets: vec![],
213            tallies: None,
214        }
215    }
216}
217
218/// Package metadata extracted from manifest files.
219///
220/// Compatible with ScanCode Toolkit output format. Contains standardized package
221/// information including name, version, dependencies, licenses, and other metadata.
222/// This is the primary data structure returned by all parsers.
223#[derive(Serialize, Deserialize, Debug, Clone, Default)]
224pub struct PackageData {
225    #[serde(rename = "type")] // name used by ScanCode
226    pub package_type: Option<PackageType>,
227    #[serde(skip_serializing_if = "Option::is_none")]
228    pub namespace: Option<String>,
229    #[serde(skip_serializing_if = "Option::is_none")]
230    pub name: Option<String>,
231    #[serde(skip_serializing_if = "Option::is_none")]
232    pub version: Option<String>,
233    #[serde(skip_serializing_if = "Option::is_none")]
234    pub qualifiers: Option<std::collections::HashMap<String, String>>,
235    #[serde(skip_serializing_if = "Option::is_none")]
236    pub subpath: Option<String>,
237    #[serde(skip_serializing_if = "Option::is_none")]
238    pub primary_language: Option<String>,
239    #[serde(skip_serializing_if = "Option::is_none")]
240    pub description: Option<String>,
241    #[serde(skip_serializing_if = "Option::is_none")]
242    pub release_date: Option<String>,
243    pub parties: Vec<Party>,
244    #[serde(skip_serializing_if = "Vec::is_empty", default)]
245    pub keywords: Vec<String>,
246    #[serde(skip_serializing_if = "Option::is_none")]
247    pub homepage_url: Option<String>,
248    #[serde(skip_serializing_if = "Option::is_none")]
249    pub download_url: Option<String>,
250    #[serde(skip_serializing_if = "Option::is_none")]
251    pub size: Option<u64>,
252    #[serde(skip_serializing_if = "Option::is_none")]
253    pub sha1: Option<String>,
254    #[serde(skip_serializing_if = "Option::is_none")]
255    pub md5: Option<String>,
256    #[serde(skip_serializing_if = "Option::is_none")]
257    pub sha256: Option<String>,
258    #[serde(skip_serializing_if = "Option::is_none")]
259    pub sha512: Option<String>,
260    #[serde(skip_serializing_if = "Option::is_none")]
261    pub bug_tracking_url: Option<String>,
262    #[serde(skip_serializing_if = "Option::is_none")]
263    pub code_view_url: Option<String>,
264    #[serde(skip_serializing_if = "Option::is_none")]
265    pub vcs_url: Option<String>,
266    #[serde(skip_serializing_if = "Option::is_none")]
267    pub copyright: Option<String>,
268    #[serde(skip_serializing_if = "Option::is_none")]
269    pub holder: Option<String>,
270    #[serde(skip_serializing_if = "Option::is_none")]
271    pub declared_license_expression: Option<String>,
272    #[serde(skip_serializing_if = "Option::is_none")]
273    pub declared_license_expression_spdx: Option<String>,
274    #[serde(skip_serializing_if = "Vec::is_empty", default)]
275    pub license_detections: Vec<LicenseDetection>,
276    #[serde(skip_serializing_if = "Option::is_none")]
277    pub other_license_expression: Option<String>,
278    #[serde(skip_serializing_if = "Option::is_none")]
279    pub other_license_expression_spdx: Option<String>,
280    #[serde(skip_serializing_if = "Vec::is_empty", default)]
281    pub other_license_detections: Vec<LicenseDetection>,
282    #[serde(skip_serializing_if = "Option::is_none")]
283    pub extracted_license_statement: Option<String>,
284    #[serde(skip_serializing_if = "Option::is_none")]
285    pub notice_text: Option<String>,
286    #[serde(skip_serializing_if = "Vec::is_empty", default)]
287    pub source_packages: Vec<String>,
288    #[serde(skip_serializing_if = "Vec::is_empty", default)]
289    pub file_references: Vec<FileReference>,
290    #[serde(skip_serializing_if = "is_false", default)]
291    pub is_private: bool,
292    #[serde(skip_serializing_if = "is_false", default)]
293    pub is_virtual: bool,
294    #[serde(skip_serializing_if = "Option::is_none")]
295    pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
296    #[serde(skip_serializing_if = "Vec::is_empty", default)]
297    pub dependencies: Vec<Dependency>,
298    #[serde(skip_serializing_if = "Option::is_none")]
299    pub repository_homepage_url: Option<String>,
300    #[serde(skip_serializing_if = "Option::is_none")]
301    pub repository_download_url: Option<String>,
302    #[serde(skip_serializing_if = "Option::is_none")]
303    pub api_data_url: Option<String>,
304    #[serde(skip_serializing_if = "Option::is_none")]
305    pub datasource_id: Option<DatasourceId>,
306    #[serde(skip_serializing_if = "Option::is_none")]
307    pub purl: Option<String>,
308}
309
310// Helper function for serde skip_serializing_if
311fn is_false(b: &bool) -> bool {
312    !b
313}
314
315impl PackageData {
316    /// Extracts a single license expression from all license detections in this package.
317    /// Returns None if there are no license detections.
318    pub fn get_license_expression(&self) -> Option<String> {
319        if self.license_detections.is_empty() {
320            return None;
321        }
322
323        let expressions = self
324            .license_detections
325            .iter()
326            .map(|detection| detection.license_expression.clone());
327        combine_license_expressions(expressions)
328    }
329}
330
331/// License detection result containing matched license expressions.
332///
333/// Aggregates multiple license matches into a single SPDX license expression.
334#[derive(Serialize, Deserialize, Debug, Clone)]
335pub struct LicenseDetection {
336    pub license_expression: String,
337    pub license_expression_spdx: String,
338    pub matches: Vec<Match>,
339    #[serde(skip_serializing_if = "Option::is_none")]
340    pub identifier: Option<String>,
341}
342
343/// Individual license text match with location and confidence score.
344///
345/// Represents a specific region of text that matched a known license pattern.
346#[derive(Serialize, Deserialize, Debug, Clone)]
347pub struct Match {
348    pub license_expression: String,
349    pub license_expression_spdx: String,
350    #[serde(skip_serializing_if = "Option::is_none")]
351    pub from_file: Option<String>,
352    pub start_line: usize,
353    pub end_line: usize,
354    #[serde(skip_serializing_if = "Option::is_none")]
355    pub matcher: Option<String>,
356    pub score: f64,
357    #[serde(skip_serializing_if = "Option::is_none")]
358    pub matched_length: Option<usize>,
359    #[serde(skip_serializing_if = "Option::is_none")]
360    pub match_coverage: Option<f64>,
361    #[serde(skip_serializing_if = "Option::is_none")]
362    pub rule_relevance: Option<usize>,
363    #[serde(skip_serializing_if = "Option::is_none")]
364    pub rule_identifier: Option<String>,
365    pub rule_url: Option<String>,
366    #[serde(skip_serializing_if = "Option::is_none")]
367    pub matched_text: Option<String>,
368}
369
370#[derive(Serialize, Deserialize, Debug, Clone)]
371pub struct Copyright {
372    pub copyright: String,
373    pub start_line: usize,
374    pub end_line: usize,
375}
376
377#[derive(Serialize, Deserialize, Debug, Clone)]
378pub struct Holder {
379    pub holder: String,
380    pub start_line: usize,
381    pub end_line: usize,
382}
383
384#[derive(Serialize, Deserialize, Debug, Clone)]
385pub struct Author {
386    pub author: String,
387    pub start_line: usize,
388    pub end_line: usize,
389}
390
391/// Package dependency information with version constraints.
392///
393/// Represents a declared dependency with scope (e.g., runtime, dev, optional)
394/// and optional resolved package details.
395#[derive(Serialize, Deserialize, Debug, Clone)]
396pub struct Dependency {
397    pub purl: Option<String>,
398    #[serde(skip_serializing_if = "Option::is_none")]
399    pub extracted_requirement: Option<String>,
400    pub scope: Option<String>,
401    #[serde(skip_serializing_if = "Option::is_none")]
402    pub is_runtime: Option<bool>,
403    #[serde(skip_serializing_if = "Option::is_none")]
404    pub is_optional: Option<bool>,
405    #[serde(skip_serializing_if = "Option::is_none")]
406    pub is_pinned: Option<bool>,
407    #[serde(skip_serializing_if = "Option::is_none")]
408    pub is_direct: Option<bool>,
409    #[serde(skip_serializing_if = "Option::is_none")]
410    pub resolved_package: Option<Box<ResolvedPackage>>,
411    #[serde(skip_serializing_if = "Option::is_none")]
412    pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
413}
414
415#[derive(Serialize, Deserialize, Debug, Clone)]
416pub struct ResolvedPackage {
417    #[serde(rename = "type")]
418    pub package_type: PackageType,
419    #[serde(skip_serializing_if = "String::is_empty")]
420    pub namespace: String,
421    pub name: String,
422    pub version: String,
423    #[serde(skip_serializing_if = "Option::is_none")]
424    pub primary_language: Option<String>,
425    #[serde(skip_serializing_if = "Option::is_none")]
426    pub download_url: Option<String>,
427    #[serde(skip_serializing_if = "Option::is_none")]
428    pub sha1: Option<String>,
429    #[serde(skip_serializing_if = "Option::is_none")]
430    pub sha256: Option<String>,
431    #[serde(skip_serializing_if = "Option::is_none")]
432    pub sha512: Option<String>,
433    #[serde(skip_serializing_if = "Option::is_none")]
434    pub md5: Option<String>,
435    pub is_virtual: bool,
436    #[serde(skip_serializing_if = "Option::is_none")]
437    pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
438    pub dependencies: Vec<Dependency>,
439    #[serde(skip_serializing_if = "Option::is_none")]
440    pub repository_homepage_url: Option<String>,
441    #[serde(skip_serializing_if = "Option::is_none")]
442    pub repository_download_url: Option<String>,
443    #[serde(skip_serializing_if = "Option::is_none")]
444    pub api_data_url: Option<String>,
445    #[serde(skip_serializing_if = "Option::is_none")]
446    pub datasource_id: Option<DatasourceId>,
447    #[serde(skip_serializing_if = "Option::is_none")]
448    pub purl: Option<String>,
449}
450
451/// Author, maintainer, or contributor information.
452///
453/// Represents a person or organization associated with a package.
454#[derive(Serialize, Deserialize, Debug, Clone)]
455pub struct Party {
456    #[serde(skip_serializing_if = "Option::is_none")]
457    pub r#type: Option<String>,
458    #[serde(skip_serializing_if = "Option::is_none")]
459    pub role: Option<String>,
460    #[serde(skip_serializing_if = "Option::is_none")]
461    pub name: Option<String>,
462    #[serde(skip_serializing_if = "Option::is_none")]
463    pub email: Option<String>,
464    #[serde(skip_serializing_if = "Option::is_none")]
465    pub url: Option<String>,
466    #[serde(skip_serializing_if = "Option::is_none")]
467    pub organization: Option<String>,
468    #[serde(skip_serializing_if = "Option::is_none")]
469    pub organization_url: Option<String>,
470    #[serde(skip_serializing_if = "Option::is_none")]
471    pub timezone: Option<String>,
472}
473
474/// Reference to a file within a package archive with checksums.
475///
476/// Used in SBOM generation to track files within distribution archives.
477#[derive(Serialize, Deserialize, Debug, Clone)]
478pub struct FileReference {
479    pub path: String,
480    #[serde(skip_serializing_if = "Option::is_none")]
481    pub size: Option<u64>,
482    #[serde(skip_serializing_if = "Option::is_none")]
483    pub sha1: Option<String>,
484    #[serde(skip_serializing_if = "Option::is_none")]
485    pub md5: Option<String>,
486    #[serde(skip_serializing_if = "Option::is_none")]
487    pub sha256: Option<String>,
488    #[serde(skip_serializing_if = "Option::is_none")]
489    pub sha512: Option<String>,
490    #[serde(skip_serializing_if = "Option::is_none")]
491    pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
492}
493
494/// Top-level assembled package, created by merging one or more `PackageData`
495/// objects from related manifest/lockfiles (e.g., package.json + package-lock.json).
496///
497/// Compatible with ScanCode Toolkit output format. The key differences from
498/// `PackageData` are:
499/// - `package_uid`: unique identifier (PURL with UUID qualifier)
500/// - `datafile_paths`: list of all contributing files
501/// - `datasource_ids`: list of all contributing parsers
502/// - Excludes `dependencies` and `file_references` (hoisted to top-level)
503#[derive(Serialize, Deserialize, Debug, Clone)]
504pub struct Package {
505    #[serde(rename = "type")]
506    pub package_type: Option<PackageType>,
507    #[serde(skip_serializing_if = "Option::is_none")]
508    pub namespace: Option<String>,
509    #[serde(skip_serializing_if = "Option::is_none")]
510    pub name: Option<String>,
511    #[serde(skip_serializing_if = "Option::is_none")]
512    pub version: Option<String>,
513    #[serde(skip_serializing_if = "Option::is_none")]
514    pub qualifiers: Option<std::collections::HashMap<String, String>>,
515    #[serde(skip_serializing_if = "Option::is_none")]
516    pub subpath: Option<String>,
517    #[serde(skip_serializing_if = "Option::is_none")]
518    pub primary_language: Option<String>,
519    #[serde(skip_serializing_if = "Option::is_none")]
520    pub description: Option<String>,
521    #[serde(skip_serializing_if = "Option::is_none")]
522    pub release_date: Option<String>,
523    pub parties: Vec<Party>,
524    #[serde(skip_serializing_if = "Vec::is_empty", default)]
525    pub keywords: Vec<String>,
526    #[serde(skip_serializing_if = "Option::is_none")]
527    pub homepage_url: Option<String>,
528    #[serde(skip_serializing_if = "Option::is_none")]
529    pub download_url: Option<String>,
530    #[serde(skip_serializing_if = "Option::is_none")]
531    pub size: Option<u64>,
532    #[serde(skip_serializing_if = "Option::is_none")]
533    pub sha1: Option<String>,
534    #[serde(skip_serializing_if = "Option::is_none")]
535    pub md5: Option<String>,
536    #[serde(skip_serializing_if = "Option::is_none")]
537    pub sha256: Option<String>,
538    #[serde(skip_serializing_if = "Option::is_none")]
539    pub sha512: Option<String>,
540    #[serde(skip_serializing_if = "Option::is_none")]
541    pub bug_tracking_url: Option<String>,
542    #[serde(skip_serializing_if = "Option::is_none")]
543    pub code_view_url: Option<String>,
544    #[serde(skip_serializing_if = "Option::is_none")]
545    pub vcs_url: Option<String>,
546    #[serde(skip_serializing_if = "Option::is_none")]
547    pub copyright: Option<String>,
548    #[serde(skip_serializing_if = "Option::is_none")]
549    pub holder: Option<String>,
550    #[serde(skip_serializing_if = "Option::is_none")]
551    pub declared_license_expression: Option<String>,
552    #[serde(skip_serializing_if = "Option::is_none")]
553    pub declared_license_expression_spdx: Option<String>,
554    #[serde(skip_serializing_if = "Vec::is_empty", default)]
555    pub license_detections: Vec<LicenseDetection>,
556    #[serde(skip_serializing_if = "Option::is_none")]
557    pub other_license_expression: Option<String>,
558    #[serde(skip_serializing_if = "Option::is_none")]
559    pub other_license_expression_spdx: Option<String>,
560    #[serde(skip_serializing_if = "Vec::is_empty", default)]
561    pub other_license_detections: Vec<LicenseDetection>,
562    #[serde(skip_serializing_if = "Option::is_none")]
563    pub extracted_license_statement: Option<String>,
564    #[serde(skip_serializing_if = "Option::is_none")]
565    pub notice_text: Option<String>,
566    #[serde(skip_serializing_if = "Vec::is_empty", default)]
567    pub source_packages: Vec<String>,
568    #[serde(skip_serializing_if = "is_false", default)]
569    pub is_private: bool,
570    #[serde(skip_serializing_if = "is_false", default)]
571    pub is_virtual: bool,
572    #[serde(skip_serializing_if = "Option::is_none")]
573    pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
574    #[serde(skip_serializing_if = "Option::is_none")]
575    pub repository_homepage_url: Option<String>,
576    #[serde(skip_serializing_if = "Option::is_none")]
577    pub repository_download_url: Option<String>,
578    #[serde(skip_serializing_if = "Option::is_none")]
579    pub api_data_url: Option<String>,
580    #[serde(skip_serializing_if = "Option::is_none")]
581    pub purl: Option<String>,
582    /// Unique identifier for this package instance (PURL with UUID qualifier).
583    pub package_uid: String,
584    /// Paths to all datafiles that contributed to this package.
585    pub datafile_paths: Vec<String>,
586    /// Datasource identifiers for all parsers that contributed to this package.
587    pub datasource_ids: Vec<DatasourceId>,
588}
589
590impl Package {
591    /// Create a `Package` from a `PackageData` and its source file path.
592    ///
593    /// Generates a unique `package_uid` by appending a UUID qualifier to the PURL.
594    /// If the `PackageData` has no PURL, the package_uid will be an empty string.
595    pub fn from_package_data(package_data: &PackageData, datafile_path: String) -> Self {
596        let package_uid = package_data
597            .purl
598            .as_ref()
599            .map(|p| build_package_uid(p))
600            .unwrap_or_default();
601
602        Package {
603            package_type: package_data.package_type,
604            namespace: package_data.namespace.clone(),
605            name: package_data.name.clone(),
606            version: package_data.version.clone(),
607            qualifiers: package_data.qualifiers.clone(),
608            subpath: package_data.subpath.clone(),
609            primary_language: package_data.primary_language.clone(),
610            description: package_data.description.clone(),
611            release_date: package_data.release_date.clone(),
612            parties: package_data.parties.clone(),
613            keywords: package_data.keywords.clone(),
614            homepage_url: package_data.homepage_url.clone(),
615            download_url: package_data.download_url.clone(),
616            size: package_data.size,
617            sha1: package_data.sha1.clone(),
618            md5: package_data.md5.clone(),
619            sha256: package_data.sha256.clone(),
620            sha512: package_data.sha512.clone(),
621            bug_tracking_url: package_data.bug_tracking_url.clone(),
622            code_view_url: package_data.code_view_url.clone(),
623            vcs_url: package_data.vcs_url.clone(),
624            copyright: package_data.copyright.clone(),
625            holder: package_data.holder.clone(),
626            declared_license_expression: package_data.declared_license_expression.clone(),
627            declared_license_expression_spdx: package_data.declared_license_expression_spdx.clone(),
628            license_detections: package_data.license_detections.clone(),
629            other_license_expression: package_data.other_license_expression.clone(),
630            other_license_expression_spdx: package_data.other_license_expression_spdx.clone(),
631            other_license_detections: package_data.other_license_detections.clone(),
632            extracted_license_statement: package_data.extracted_license_statement.clone(),
633            notice_text: package_data.notice_text.clone(),
634            source_packages: package_data.source_packages.clone(),
635            is_private: package_data.is_private,
636            is_virtual: package_data.is_virtual,
637            extra_data: package_data.extra_data.clone(),
638            repository_homepage_url: package_data.repository_homepage_url.clone(),
639            repository_download_url: package_data.repository_download_url.clone(),
640            api_data_url: package_data.api_data_url.clone(),
641            purl: package_data.purl.clone(),
642            package_uid,
643            datafile_paths: vec![datafile_path],
644            datasource_ids: if let Some(dsid) = package_data.datasource_id {
645                vec![dsid]
646            } else {
647                vec![]
648            },
649        }
650    }
651
652    /// Update this package with data from another `PackageData`.
653    ///
654    /// Merges data from a related file (e.g., lockfile) into this package.
655    /// Existing non-empty values are preserved; empty fields are filled from
656    /// the new data. Lists (parties, license_detections) are merged.
657    pub fn update(&mut self, package_data: &PackageData, datafile_path: String) {
658        if let Some(dsid) = package_data.datasource_id {
659            self.datasource_ids.push(dsid);
660        }
661        self.datafile_paths.push(datafile_path);
662
663        macro_rules! fill_if_empty {
664            ($field:ident) => {
665                if self.$field.is_none() {
666                    self.$field = package_data.$field.clone();
667                }
668            };
669        }
670
671        fill_if_empty!(package_type);
672        fill_if_empty!(name);
673        fill_if_empty!(namespace);
674        fill_if_empty!(version);
675        fill_if_empty!(qualifiers);
676        fill_if_empty!(subpath);
677        fill_if_empty!(primary_language);
678        fill_if_empty!(description);
679        fill_if_empty!(release_date);
680        fill_if_empty!(homepage_url);
681        fill_if_empty!(download_url);
682        fill_if_empty!(size);
683        fill_if_empty!(sha1);
684        fill_if_empty!(md5);
685        fill_if_empty!(sha256);
686        fill_if_empty!(sha512);
687        fill_if_empty!(bug_tracking_url);
688        fill_if_empty!(code_view_url);
689        fill_if_empty!(vcs_url);
690        fill_if_empty!(copyright);
691        fill_if_empty!(holder);
692        fill_if_empty!(declared_license_expression);
693        fill_if_empty!(declared_license_expression_spdx);
694        fill_if_empty!(other_license_expression);
695        fill_if_empty!(other_license_expression_spdx);
696        fill_if_empty!(extracted_license_statement);
697        fill_if_empty!(notice_text);
698        match (&mut self.extra_data, &package_data.extra_data) {
699            (None, Some(extra_data)) => {
700                self.extra_data = Some(extra_data.clone());
701            }
702            (Some(existing), Some(incoming)) => {
703                for (key, value) in incoming {
704                    existing.entry(key.clone()).or_insert_with(|| value.clone());
705                }
706            }
707            _ => {}
708        }
709        fill_if_empty!(repository_homepage_url);
710        fill_if_empty!(repository_download_url);
711        fill_if_empty!(api_data_url);
712
713        for party in &package_data.parties {
714            if let Some(existing) = self.parties.iter_mut().find(|p| {
715                p.role == party.role
716                    && ((p.name.is_some() && p.name == party.name)
717                        || (p.email.is_some() && p.email == party.email))
718            }) {
719                if existing.name.is_none() {
720                    existing.name = party.name.clone();
721                }
722                if existing.email.is_none() {
723                    existing.email = party.email.clone();
724                }
725            } else {
726                self.parties.push(party.clone());
727            }
728        }
729
730        for keyword in &package_data.keywords {
731            if !self.keywords.contains(keyword) {
732                self.keywords.push(keyword.clone());
733            }
734        }
735
736        for detection in &package_data.license_detections {
737            self.license_detections.push(detection.clone());
738        }
739
740        for detection in &package_data.other_license_detections {
741            self.other_license_detections.push(detection.clone());
742        }
743
744        for source_pkg in &package_data.source_packages {
745            if !self.source_packages.contains(source_pkg) {
746                self.source_packages.push(source_pkg.clone());
747            }
748        }
749
750        self.refresh_identity();
751    }
752
753    fn refresh_identity(&mut self) {
754        let Some(next_purl) = self.build_current_purl() else {
755            return;
756        };
757
758        if self.purl.as_deref() != Some(next_purl.as_str()) || self.package_uid.is_empty() {
759            self.package_uid = build_package_uid(&next_purl);
760        }
761
762        self.purl = Some(next_purl);
763    }
764
765    fn build_current_purl(&self) -> Option<String> {
766        if let (Some(package_type), Some(name)) = (
767            self.package_type.as_ref(),
768            self.name
769                .as_deref()
770                .filter(|value| !value.trim().is_empty()),
771        ) {
772            let purl_type = match package_type {
773                PackageType::Deno => "generic",
774                _ => package_type.as_str(),
775            };
776
777            let mut purl = PackageUrl::new(purl_type, name).ok()?;
778
779            if let Some(namespace) = self
780                .namespace
781                .as_deref()
782                .filter(|value| !value.trim().is_empty())
783            {
784                purl.with_namespace(namespace).ok()?;
785            }
786
787            if let Some(version) = self
788                .version
789                .as_deref()
790                .filter(|value| !value.trim().is_empty())
791            {
792                purl.with_version(version).ok()?;
793            }
794
795            if let Some(qualifiers) = &self.qualifiers {
796                for (key, value) in qualifiers {
797                    purl.add_qualifier(key.as_str(), value.as_str()).ok()?;
798                }
799            }
800
801            if let Some(subpath) = self
802                .subpath
803                .as_deref()
804                .filter(|value| !value.trim().is_empty())
805            {
806                purl.with_subpath(subpath).ok()?;
807            }
808
809            return Some(purl.to_string());
810        }
811
812        let existing_purl = self.purl.as_deref()?;
813        let mut purl = PackageUrl::from_str(existing_purl).ok()?;
814
815        if let Some(version) = self
816            .version
817            .as_deref()
818            .filter(|value| !value.trim().is_empty())
819        {
820            purl.with_version(version).ok()?;
821        } else {
822            purl.without_version();
823        }
824
825        Some(purl.to_string())
826    }
827}
828
829/// Top-level dependency instance, created during package assembly.
830///
831/// Extends the file-level `Dependency` with traceability fields that link
832/// each dependency to its owning package and source datafile.
833#[derive(Serialize, Deserialize, Debug, Clone)]
834pub struct TopLevelDependency {
835    pub purl: Option<String>,
836    #[serde(skip_serializing_if = "Option::is_none")]
837    pub extracted_requirement: Option<String>,
838    pub scope: Option<String>,
839    #[serde(skip_serializing_if = "Option::is_none")]
840    pub is_runtime: Option<bool>,
841    #[serde(skip_serializing_if = "Option::is_none")]
842    pub is_optional: Option<bool>,
843    #[serde(skip_serializing_if = "Option::is_none")]
844    pub is_pinned: Option<bool>,
845    #[serde(skip_serializing_if = "Option::is_none")]
846    pub is_direct: Option<bool>,
847    #[serde(skip_serializing_if = "Option::is_none")]
848    pub resolved_package: Option<Box<ResolvedPackage>>,
849    #[serde(skip_serializing_if = "Option::is_none")]
850    pub extra_data: Option<std::collections::HashMap<String, serde_json::Value>>,
851    /// Unique identifier for this dependency instance (PURL with UUID qualifier).
852    pub dependency_uid: String,
853    /// The `package_uid` of the package this dependency belongs to.
854    #[serde(skip_serializing_if = "Option::is_none")]
855    pub for_package_uid: Option<String>,
856    /// Path to the datafile where this dependency was declared.
857    pub datafile_path: String,
858    /// Datasource identifier for the parser that extracted this dependency.
859    pub datasource_id: DatasourceId,
860    /// Namespace for the dependency (e.g., distribution name for RPM packages).
861    #[serde(skip_serializing_if = "Option::is_none")]
862    pub namespace: Option<String>,
863}
864
865impl TopLevelDependency {
866    /// Create a `TopLevelDependency` from a file-level `Dependency`.
867    pub fn from_dependency(
868        dep: &Dependency,
869        datafile_path: String,
870        datasource_id: DatasourceId,
871        for_package_uid: Option<String>,
872    ) -> Self {
873        let dependency_uid = dep
874            .purl
875            .as_ref()
876            .map(|p| build_package_uid(p))
877            .unwrap_or_default();
878
879        TopLevelDependency {
880            purl: dep.purl.clone(),
881            extracted_requirement: dep.extracted_requirement.clone(),
882            scope: dep.scope.clone(),
883            is_runtime: dep.is_runtime,
884            is_optional: dep.is_optional,
885            is_pinned: dep.is_pinned,
886            is_direct: dep.is_direct,
887            resolved_package: dep.resolved_package.clone(),
888            extra_data: dep.extra_data.clone(),
889            dependency_uid,
890            for_package_uid,
891            datafile_path,
892            datasource_id,
893            namespace: None,
894        }
895    }
896}
897
898/// Generate a unique package identifier by appending a UUID v4 qualifier to a PURL.
899///
900/// The format matches Python ScanCode: `pkg:type/name@version?uuid=<uuid-v4>`
901pub fn build_package_uid(purl: &str) -> String {
902    let uuid = Uuid::new_v4();
903    if purl.contains('?') {
904        format!("{}&uuid={}", purl, uuid)
905    } else {
906        format!("{}?uuid={}", purl, uuid)
907    }
908}
909
910#[derive(Serialize, Deserialize, Debug, Clone)]
911pub struct OutputEmail {
912    pub email: String,
913    pub start_line: usize,
914    pub end_line: usize,
915}
916
917#[derive(Serialize, Deserialize, Debug, Clone)]
918pub struct OutputURL {
919    pub url: String,
920    pub start_line: usize,
921    pub end_line: usize,
922}
923
924#[derive(Debug, Clone, PartialEq)]
925pub enum FileType {
926    File,
927    Directory,
928}
929
930impl Serialize for FileType {
931    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
932    where
933        S: serde::Serializer,
934    {
935        let value = match self {
936            FileType::File => "file",
937            FileType::Directory => "directory",
938        };
939        serializer.serialize_str(value)
940    }
941}
942
943impl<'de> Deserialize<'de> for FileType {
944    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
945    where
946        D: serde::Deserializer<'de>,
947    {
948        let value = String::deserialize(deserializer)?;
949        match value.as_str() {
950            "file" => Ok(FileType::File),
951            "directory" => Ok(FileType::Directory),
952            _ => Err(serde::de::Error::custom("invalid file type")),
953        }
954    }
955}