Skip to main content

provenant/output_schema/
file_info.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use serde::{Deserialize, Serialize, Serializer};
5use serde_json::Map;
6
7use super::author::OutputAuthor;
8use super::copyright::OutputCopyright;
9use super::email::OutputEmail;
10use super::holder::OutputHolder;
11use super::license_detection::OutputLicenseDetection;
12use super::license_match::OutputMatch;
13use super::license_policy_entry::OutputLicensePolicyEntry;
14use super::package_data::OutputPackageData;
15use super::serde_helpers::insert_json;
16use super::tallies::OutputTallies;
17use super::url::OutputURL;
18
19#[derive(Debug, Clone, Deserialize)]
20pub struct OutputFileInfo {
21    #[serde(default)]
22    pub name: String,
23    #[serde(default)]
24    pub base_name: String,
25    #[serde(default)]
26    pub extension: String,
27    pub path: String,
28    #[serde(rename = "type")]
29    pub file_type: crate::models::FileType,
30    pub mime_type: Option<String>,
31    pub file_type_label: Option<String>,
32    #[serde(default)]
33    pub size: u64,
34    pub date: Option<String>,
35    pub sha1: Option<String>,
36    pub md5: Option<String>,
37    pub sha256: Option<String>,
38    pub sha1_git: Option<String>,
39    pub programming_language: Option<String>,
40    #[serde(default)]
41    pub package_data: Vec<OutputPackageData>,
42    #[serde(rename = "detected_license_expression_spdx")]
43    pub license_expression: Option<String>,
44    #[serde(default)]
45    pub license_detections: Vec<OutputLicenseDetection>,
46    #[serde(default, skip_serializing_if = "Vec::is_empty")]
47    pub license_clues: Vec<OutputMatch>,
48    pub percentage_of_license_text: Option<f64>,
49    #[serde(default)]
50    pub copyrights: Vec<OutputCopyright>,
51    #[serde(default)]
52    pub holders: Vec<OutputHolder>,
53    #[serde(default)]
54    pub authors: Vec<OutputAuthor>,
55    #[serde(default, skip_serializing_if = "Vec::is_empty")]
56    pub emails: Vec<OutputEmail>,
57    #[serde(default)]
58    pub urls: Vec<OutputURL>,
59    #[serde(default)]
60    pub for_packages: Vec<String>,
61    #[serde(default)]
62    pub scan_errors: Vec<String>,
63    pub license_policy: Option<Vec<OutputLicensePolicyEntry>>,
64    pub is_generated: Option<bool>,
65    pub is_binary: Option<bool>,
66    pub is_text: Option<bool>,
67    pub is_archive: Option<bool>,
68    pub is_media: Option<bool>,
69    pub is_source: Option<bool>,
70    pub is_script: Option<bool>,
71    pub files_count: Option<usize>,
72    pub dirs_count: Option<usize>,
73    pub size_count: Option<u64>,
74    pub source_count: Option<usize>,
75    #[serde(default, skip_serializing_if = "is_false")]
76    pub is_legal: bool,
77    #[serde(default, skip_serializing_if = "is_false")]
78    pub is_manifest: bool,
79    #[serde(default, skip_serializing_if = "is_false")]
80    pub is_readme: bool,
81    #[serde(default, skip_serializing_if = "is_false")]
82    pub is_top_level: bool,
83    #[serde(default, skip_serializing_if = "is_false")]
84    pub is_key_file: bool,
85    #[serde(default, skip_serializing_if = "is_false")]
86    pub is_community: bool,
87    #[serde(default, skip_serializing_if = "Vec::is_empty")]
88    pub facets: Vec<String>,
89    pub tallies: Option<OutputTallies>,
90}
91
92impl OutputFileInfo {
93    pub(crate) fn should_serialize_info_surface(&self) -> bool {
94        self.date.is_some()
95            || self.sha1.is_some()
96            || self.md5.is_some()
97            || self.sha256.is_some()
98            || self.sha1_git.is_some()
99            || self.mime_type.is_some()
100            || self.file_type_label.is_some()
101            || self.programming_language.is_some()
102            || self.is_binary.is_some()
103            || self.is_text.is_some()
104            || self.is_archive.is_some()
105            || self.is_media.is_some()
106            || self.is_source.is_some()
107            || self.is_script.is_some()
108            || self.files_count.is_some()
109            || self.dirs_count.is_some()
110            || self.size_count.is_some()
111    }
112
113    pub(crate) fn should_serialize_license_surface(&self) -> bool {
114        self.license_expression.is_some()
115            || !self.license_detections.is_empty()
116            || !self.license_clues.is_empty()
117            || self.percentage_of_license_text.is_some()
118    }
119
120    pub(crate) fn detected_license_expression_spdx(&self) -> Option<String> {
121        {
122            let expressions: Vec<String> = self
123                .license_detections
124                .iter()
125                .filter(|detection| !detection.license_expression_spdx.is_empty())
126                .map(|detection| detection.license_expression_spdx.clone())
127                .collect();
128            crate::utils::spdx::select_primary_license_expression(expressions.clone()).or_else(
129                || {
130                    crate::utils::spdx::combine_license_expressions_preserving_structure(
131                        expressions,
132                    )
133                },
134            )
135        }
136        .or_else(|| {
137            let expressions: Vec<String> = self
138                .package_data
139                .iter()
140                .flat_map(|package_data| package_data.license_detections.iter())
141                .filter(|detection| !detection.license_expression_spdx.is_empty())
142                .map(|detection| detection.license_expression_spdx.clone())
143                .collect();
144            crate::utils::spdx::select_primary_license_expression(expressions.clone()).or_else(
145                || {
146                    crate::utils::spdx::combine_license_expressions_preserving_structure(
147                        expressions,
148                    )
149                },
150            )
151        })
152        .or_else(|| {
153            self.license_expression
154                .clone()
155                .filter(|expression| !expression.is_empty())
156        })
157    }
158}
159
160impl Serialize for OutputFileInfo {
161    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
162    where
163        S: Serializer,
164    {
165        let mut map = Map::new();
166        insert_json(&mut map, "path", &self.path)?;
167        insert_json(&mut map, "type", &self.file_type)?;
168        insert_json(&mut map, "name", &self.name)?;
169        insert_json(&mut map, "base_name", &self.base_name)?;
170        insert_json(&mut map, "extension", &self.extension)?;
171        insert_json(&mut map, "size", self.size)?;
172
173        if self.should_serialize_info_surface() {
174            insert_json(&mut map, "date", &self.date)?;
175            insert_json(&mut map, "sha1", self.sha1.as_ref())?;
176            insert_json(&mut map, "md5", self.md5.as_ref())?;
177            insert_json(&mut map, "sha256", self.sha256.as_ref())?;
178            insert_json(&mut map, "sha1_git", self.sha1_git.as_ref())?;
179            insert_json(&mut map, "mime_type", &self.mime_type)?;
180            insert_json(&mut map, "file_type", &self.file_type_label)?;
181            insert_json(&mut map, "programming_language", &self.programming_language)?;
182            insert_json(&mut map, "is_binary", self.is_binary)?;
183            insert_json(&mut map, "is_text", self.is_text)?;
184            insert_json(&mut map, "is_archive", self.is_archive)?;
185            insert_json(&mut map, "is_media", self.is_media)?;
186            insert_json(&mut map, "is_source", self.is_source)?;
187            insert_json(&mut map, "is_script", self.is_script)?;
188            insert_json(&mut map, "files_count", self.files_count)?;
189            insert_json(&mut map, "dirs_count", self.dirs_count)?;
190            insert_json(&mut map, "size_count", self.size_count)?;
191        }
192
193        insert_json(&mut map, "package_data", &self.package_data)?;
194        insert_json(
195            &mut map,
196            "detected_license_expression_spdx",
197            self.detected_license_expression_spdx(),
198        )?;
199        insert_json(&mut map, "license_detections", &self.license_detections)?;
200        if self.should_serialize_license_surface() {
201            insert_json(&mut map, "license_clues", &self.license_clues)?;
202        }
203        if self.percentage_of_license_text.is_some() {
204            insert_json(
205                &mut map,
206                "percentage_of_license_text",
207                self.percentage_of_license_text,
208            )?;
209        }
210        insert_json(&mut map, "copyrights", &self.copyrights)?;
211        insert_json(&mut map, "holders", &self.holders)?;
212        insert_json(&mut map, "authors", &self.authors)?;
213        if !self.emails.is_empty() {
214            insert_json(&mut map, "emails", &self.emails)?;
215        }
216        insert_json(&mut map, "urls", &self.urls)?;
217        insert_json(&mut map, "for_packages", &self.for_packages)?;
218        insert_json(&mut map, "scan_errors", &self.scan_errors)?;
219        if self.license_policy.is_some() {
220            insert_json(&mut map, "license_policy", &self.license_policy)?;
221        }
222        if self.is_generated.is_some() {
223            insert_json(&mut map, "is_generated", self.is_generated)?;
224        }
225        if self.source_count.is_some() {
226            insert_json(&mut map, "source_count", self.source_count)?;
227        }
228        if self.is_legal {
229            insert_json(&mut map, "is_legal", self.is_legal)?;
230        }
231        if self.is_manifest {
232            insert_json(&mut map, "is_manifest", self.is_manifest)?;
233        }
234        if self.is_readme {
235            insert_json(&mut map, "is_readme", self.is_readme)?;
236        }
237        if self.is_top_level {
238            insert_json(&mut map, "is_top_level", self.is_top_level)?;
239        }
240        if self.is_key_file {
241            insert_json(&mut map, "is_key_file", self.is_key_file)?;
242        }
243        if self.is_community {
244            insert_json(&mut map, "is_community", self.is_community)?;
245        }
246        if !self.facets.is_empty() {
247            insert_json(&mut map, "facets", &self.facets)?;
248        }
249        if self.tallies.is_some() {
250            insert_json(&mut map, "tallies", &self.tallies)?;
251        }
252
253        map.serialize(serializer)
254    }
255}
256
257impl From<&crate::models::FileInfo> for OutputFileInfo {
258    fn from(value: &crate::models::FileInfo) -> Self {
259        Self::from_with_compat_mode(value, crate::cli::CompatibilityMode::Native)
260    }
261}
262
263impl OutputFileInfo {
264    pub fn from_with_compat_mode(
265        value: &crate::models::FileInfo,
266        mode: crate::cli::CompatibilityMode,
267    ) -> Self {
268        Self {
269            name: value.name.clone(),
270            base_name: value.base_name.clone(),
271            extension: value.extension.clone(),
272            path: value.path.clone(),
273            file_type: value.file_type.clone(),
274            mime_type: value.mime_type.clone(),
275            file_type_label: value.file_type_label.clone(),
276            size: value.size,
277            date: value.date.clone(),
278            sha1: value.sha1.as_ref().map(|d| d.as_hex()),
279            md5: value.md5.as_ref().map(|d| d.as_hex()),
280            sha256: value.sha256.as_ref().map(|d| d.as_hex()),
281            sha1_git: value.sha1_git.as_ref().map(|d| d.as_hex()),
282            programming_language: value.programming_language.clone(),
283            package_data: value
284                .package_data
285                .iter()
286                .map(OutputPackageData::from)
287                .collect(),
288            license_expression: value.license_expression.clone(),
289            license_detections: value
290                .license_detections
291                .iter()
292                .map(OutputLicenseDetection::from)
293                .collect(),
294            license_clues: value.license_clues.iter().map(OutputMatch::from).collect(),
295            percentage_of_license_text: value.percentage_of_license_text,
296            copyrights: value
297                .copyrights
298                .iter()
299                .map(|copyright| OutputCopyright::from_with_compat_mode(copyright, mode))
300                .collect(),
301            holders: value.holders.iter().map(OutputHolder::from).collect(),
302            authors: value.authors.iter().map(OutputAuthor::from).collect(),
303            emails: value.emails.iter().map(OutputEmail::from).collect(),
304            urls: value.urls.iter().map(OutputURL::from).collect(),
305            for_packages: value
306                .for_packages
307                .iter()
308                .map(|uid| uid.to_string())
309                .collect(),
310            scan_errors: value.scan_errors.clone(),
311            license_policy: value
312                .license_policy
313                .as_ref()
314                .map(|v| v.iter().map(OutputLicensePolicyEntry::from).collect()),
315            is_generated: value.is_generated,
316            is_binary: value.is_binary,
317            is_text: value.is_text,
318            is_archive: value.is_archive,
319            is_media: value.is_media,
320            is_source: value.is_source,
321            is_script: value.is_script,
322            files_count: value.files_count,
323            dirs_count: value.dirs_count,
324            size_count: value.size_count,
325            source_count: value.source_count,
326            is_legal: value.is_legal,
327            is_manifest: value.is_manifest,
328            is_readme: value.is_readme,
329            is_top_level: value.is_top_level,
330            is_key_file: value.is_key_file,
331            is_community: value.is_community,
332            facets: value.facets.clone(),
333            tallies: value.tallies.as_ref().map(OutputTallies::from),
334        }
335    }
336}
337
338impl TryFrom<&OutputFileInfo> for crate::models::FileInfo {
339    type Error = String;
340    fn try_from(value: &OutputFileInfo) -> Result<Self, Self::Error> {
341        let mut package_data = Vec::with_capacity(value.package_data.len());
342        for p in &value.package_data {
343            package_data.push(crate::models::PackageData::try_from(p)?);
344        }
345        let mut license_detections = Vec::with_capacity(value.license_detections.len());
346        for d in &value.license_detections {
347            license_detections.push(crate::models::LicenseDetection::try_from(d)?);
348        }
349        let mut license_clues = Vec::with_capacity(value.license_clues.len());
350        for m in &value.license_clues {
351            license_clues.push(crate::models::Match::try_from(m)?);
352        }
353        let mut copyrights = Vec::with_capacity(value.copyrights.len());
354        for c in &value.copyrights {
355            copyrights.push(crate::models::Copyright::try_from(c)?);
356        }
357        let mut holders = Vec::with_capacity(value.holders.len());
358        for h in &value.holders {
359            holders.push(crate::models::Holder::try_from(h)?);
360        }
361        let mut authors = Vec::with_capacity(value.authors.len());
362        for a in &value.authors {
363            authors.push(crate::models::Author::try_from(a)?);
364        }
365        let mut emails = Vec::with_capacity(value.emails.len());
366        for e in &value.emails {
367            emails.push(crate::models::OutputEmail::try_from(e)?);
368        }
369        let mut urls = Vec::with_capacity(value.urls.len());
370        for u in &value.urls {
371            urls.push(crate::models::OutputURL::try_from(u)?);
372        }
373        let license_policy = value
374            .license_policy
375            .as_ref()
376            .map(|v| {
377                v.iter()
378                    .map(crate::models::LicensePolicyEntry::try_from)
379                    .collect::<Result<Vec<_>, _>>()
380            })
381            .transpose()?;
382        Ok(Self {
383            name: value.name.clone(),
384            base_name: value.base_name.clone(),
385            extension: value.extension.clone(),
386            path: value.path.clone(),
387            file_type: value.file_type.clone(),
388            mime_type: value.mime_type.clone(),
389            file_type_label: value.file_type_label.clone(),
390            size: value.size,
391            date: value.date.clone(),
392            sha1: value
393                .sha1
394                .as_ref()
395                .map(|s| crate::models::Sha1Digest::from_hex(s))
396                .transpose()
397                .map_err(|e| format!("invalid sha1: {}", e))?,
398            md5: value
399                .md5
400                .as_ref()
401                .map(|s| crate::models::Md5Digest::from_hex(s))
402                .transpose()
403                .map_err(|e| format!("invalid md5: {}", e))?,
404            sha256: value
405                .sha256
406                .as_ref()
407                .map(|s| crate::models::Sha256Digest::from_hex(s))
408                .transpose()
409                .map_err(|e| format!("invalid sha256: {}", e))?,
410            sha1_git: value
411                .sha1_git
412                .as_ref()
413                .map(|s| crate::models::GitSha1::from_hex(s))
414                .transpose()
415                .map_err(|e| format!("invalid sha1_git: {}", e))?,
416            programming_language: value.programming_language.clone(),
417            package_data,
418            license_expression: value.license_expression.clone(),
419            license_detections,
420            license_clues,
421            percentage_of_license_text: value.percentage_of_license_text,
422            copyrights,
423            holders,
424            authors,
425            emails,
426            urls,
427            for_packages: value
428                .for_packages
429                .iter()
430                .map(|s| crate::models::PackageUid::from_raw(s.clone()))
431                .collect(),
432            scan_errors: value.scan_errors.clone(),
433            scan_diagnostics: crate::models::diagnostics_from_legacy_scan_errors(
434                &value.scan_errors,
435            ),
436            license_policy,
437            is_generated: value.is_generated,
438            is_binary: value.is_binary,
439            is_text: value.is_text,
440            is_archive: value.is_archive,
441            is_media: value.is_media,
442            is_source: value.is_source,
443            is_script: value.is_script,
444            files_count: value.files_count,
445            dirs_count: value.dirs_count,
446            size_count: value.size_count,
447            source_count: value.source_count,
448            is_legal: value.is_legal,
449            is_manifest: value.is_manifest,
450            is_readme: value.is_readme,
451            is_top_level: value.is_top_level,
452            is_key_file: value.is_key_file,
453            is_community: value.is_community,
454            facets: value.facets.clone(),
455            tallies: value
456                .tallies
457                .as_ref()
458                .map(crate::models::Tallies::try_from)
459                .transpose()?,
460        })
461    }
462}