Skip to main content

provenant/output_schema/
file_info.rs

1use serde::{Deserialize, Serialize, Serializer};
2use serde_json::Map;
3
4use super::author::OutputAuthor;
5use super::copyright::OutputCopyright;
6use super::email::OutputEmail;
7use super::holder::OutputHolder;
8use super::license_detection::OutputLicenseDetection;
9use super::license_match::OutputMatch;
10use super::license_policy_entry::OutputLicensePolicyEntry;
11use super::package_data::OutputPackageData;
12use super::serde_helpers::insert_json;
13use super::tallies::OutputTallies;
14use super::url::OutputURL;
15
16#[derive(Debug, Clone, Deserialize)]
17pub struct OutputFileInfo {
18    #[serde(default)]
19    pub name: String,
20    #[serde(default)]
21    pub base_name: String,
22    #[serde(default)]
23    pub extension: String,
24    pub path: String,
25    #[serde(rename = "type")]
26    pub file_type: crate::models::FileType,
27    pub mime_type: Option<String>,
28    pub file_type_label: Option<String>,
29    #[serde(default)]
30    pub size: u64,
31    pub date: Option<String>,
32    pub sha1: Option<String>,
33    pub md5: Option<String>,
34    pub sha256: Option<String>,
35    pub sha1_git: Option<String>,
36    pub programming_language: Option<String>,
37    #[serde(default)]
38    pub package_data: Vec<OutputPackageData>,
39    #[serde(rename = "detected_license_expression_spdx")]
40    pub license_expression: Option<String>,
41    #[serde(default)]
42    pub license_detections: Vec<OutputLicenseDetection>,
43    #[serde(default, skip_serializing_if = "Vec::is_empty")]
44    pub license_clues: Vec<OutputMatch>,
45    pub percentage_of_license_text: Option<f64>,
46    #[serde(default)]
47    pub copyrights: Vec<OutputCopyright>,
48    #[serde(default)]
49    pub holders: Vec<OutputHolder>,
50    #[serde(default)]
51    pub authors: Vec<OutputAuthor>,
52    #[serde(default, skip_serializing_if = "Vec::is_empty")]
53    pub emails: Vec<OutputEmail>,
54    #[serde(default)]
55    pub urls: Vec<OutputURL>,
56    #[serde(default)]
57    pub for_packages: Vec<String>,
58    #[serde(default)]
59    pub scan_errors: Vec<String>,
60    pub license_policy: Option<Vec<OutputLicensePolicyEntry>>,
61    pub is_generated: Option<bool>,
62    pub is_binary: Option<bool>,
63    pub is_text: Option<bool>,
64    pub is_archive: Option<bool>,
65    pub is_media: Option<bool>,
66    pub is_source: Option<bool>,
67    pub is_script: Option<bool>,
68    pub files_count: Option<usize>,
69    pub dirs_count: Option<usize>,
70    pub size_count: Option<u64>,
71    pub source_count: Option<usize>,
72    #[serde(default, skip_serializing_if = "is_false")]
73    pub is_legal: bool,
74    #[serde(default, skip_serializing_if = "is_false")]
75    pub is_manifest: bool,
76    #[serde(default, skip_serializing_if = "is_false")]
77    pub is_readme: bool,
78    #[serde(default, skip_serializing_if = "is_false")]
79    pub is_top_level: bool,
80    #[serde(default, skip_serializing_if = "is_false")]
81    pub is_key_file: bool,
82    #[serde(default, skip_serializing_if = "is_false")]
83    pub is_community: bool,
84    #[serde(default, skip_serializing_if = "Vec::is_empty")]
85    pub facets: Vec<String>,
86    pub tallies: Option<OutputTallies>,
87}
88
89impl OutputFileInfo {
90    pub(crate) fn should_serialize_info_surface(&self) -> bool {
91        self.date.is_some()
92            || self.sha1.is_some()
93            || self.md5.is_some()
94            || self.sha256.is_some()
95            || self.sha1_git.is_some()
96            || self.mime_type.is_some()
97            || self.file_type_label.is_some()
98            || self.programming_language.is_some()
99            || self.is_binary.is_some()
100            || self.is_text.is_some()
101            || self.is_archive.is_some()
102            || self.is_media.is_some()
103            || self.is_source.is_some()
104            || self.is_script.is_some()
105            || self.files_count.is_some()
106            || self.dirs_count.is_some()
107            || self.size_count.is_some()
108    }
109
110    pub(crate) fn should_serialize_license_surface(&self) -> bool {
111        self.license_expression.is_some()
112            || !self.license_detections.is_empty()
113            || !self.license_clues.is_empty()
114            || self.percentage_of_license_text.is_some()
115    }
116
117    pub(crate) fn detected_license_expression_spdx(&self) -> Option<String> {
118        crate::utils::spdx::combine_license_expressions(
119            self.license_detections
120                .iter()
121                .filter(|detection| !detection.license_expression_spdx.is_empty())
122                .map(|detection| detection.license_expression_spdx.clone()),
123        )
124        .or_else(|| {
125            crate::utils::spdx::combine_license_expressions(
126                self.package_data
127                    .iter()
128                    .flat_map(|package_data| package_data.license_detections.iter())
129                    .filter(|detection| !detection.license_expression_spdx.is_empty())
130                    .map(|detection| detection.license_expression_spdx.clone()),
131            )
132        })
133        .or_else(|| {
134            self.license_expression
135                .clone()
136                .filter(|expression| !expression.is_empty())
137        })
138    }
139}
140
141impl Serialize for OutputFileInfo {
142    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
143    where
144        S: Serializer,
145    {
146        let mut map = Map::new();
147        insert_json(&mut map, "path", &self.path)?;
148        insert_json(&mut map, "type", &self.file_type)?;
149        insert_json(&mut map, "name", &self.name)?;
150        insert_json(&mut map, "base_name", &self.base_name)?;
151        insert_json(&mut map, "extension", &self.extension)?;
152        insert_json(&mut map, "size", self.size)?;
153
154        if self.should_serialize_info_surface() {
155            insert_json(&mut map, "date", &self.date)?;
156            insert_json(&mut map, "sha1", self.sha1.as_ref())?;
157            insert_json(&mut map, "md5", self.md5.as_ref())?;
158            insert_json(&mut map, "sha256", self.sha256.as_ref())?;
159            insert_json(&mut map, "sha1_git", self.sha1_git.as_ref())?;
160            insert_json(&mut map, "mime_type", &self.mime_type)?;
161            insert_json(&mut map, "file_type", &self.file_type_label)?;
162            insert_json(&mut map, "programming_language", &self.programming_language)?;
163            insert_json(&mut map, "is_binary", self.is_binary)?;
164            insert_json(&mut map, "is_text", self.is_text)?;
165            insert_json(&mut map, "is_archive", self.is_archive)?;
166            insert_json(&mut map, "is_media", self.is_media)?;
167            insert_json(&mut map, "is_source", self.is_source)?;
168            insert_json(&mut map, "is_script", self.is_script)?;
169            insert_json(&mut map, "files_count", self.files_count)?;
170            insert_json(&mut map, "dirs_count", self.dirs_count)?;
171            insert_json(&mut map, "size_count", self.size_count)?;
172        }
173
174        insert_json(&mut map, "package_data", &self.package_data)?;
175        insert_json(
176            &mut map,
177            "detected_license_expression_spdx",
178            self.detected_license_expression_spdx(),
179        )?;
180        insert_json(&mut map, "license_detections", &self.license_detections)?;
181        if self.should_serialize_license_surface() {
182            insert_json(&mut map, "license_clues", &self.license_clues)?;
183        }
184        if self.percentage_of_license_text.is_some() {
185            insert_json(
186                &mut map,
187                "percentage_of_license_text",
188                self.percentage_of_license_text,
189            )?;
190        }
191        insert_json(&mut map, "copyrights", &self.copyrights)?;
192        insert_json(&mut map, "holders", &self.holders)?;
193        insert_json(&mut map, "authors", &self.authors)?;
194        if !self.emails.is_empty() {
195            insert_json(&mut map, "emails", &self.emails)?;
196        }
197        insert_json(&mut map, "urls", &self.urls)?;
198        insert_json(&mut map, "for_packages", &self.for_packages)?;
199        insert_json(&mut map, "scan_errors", &self.scan_errors)?;
200        if self.license_policy.is_some() {
201            insert_json(&mut map, "license_policy", &self.license_policy)?;
202        }
203        if self.is_generated.is_some() {
204            insert_json(&mut map, "is_generated", self.is_generated)?;
205        }
206        if self.source_count.is_some() {
207            insert_json(&mut map, "source_count", self.source_count)?;
208        }
209        if self.is_legal {
210            insert_json(&mut map, "is_legal", self.is_legal)?;
211        }
212        if self.is_manifest {
213            insert_json(&mut map, "is_manifest", self.is_manifest)?;
214        }
215        if self.is_readme {
216            insert_json(&mut map, "is_readme", self.is_readme)?;
217        }
218        if self.is_top_level {
219            insert_json(&mut map, "is_top_level", self.is_top_level)?;
220        }
221        if self.is_key_file {
222            insert_json(&mut map, "is_key_file", self.is_key_file)?;
223        }
224        if self.is_community {
225            insert_json(&mut map, "is_community", self.is_community)?;
226        }
227        if !self.facets.is_empty() {
228            insert_json(&mut map, "facets", &self.facets)?;
229        }
230        if self.tallies.is_some() {
231            insert_json(&mut map, "tallies", &self.tallies)?;
232        }
233
234        map.serialize(serializer)
235    }
236}
237
238impl From<&crate::models::FileInfo> for OutputFileInfo {
239    fn from(value: &crate::models::FileInfo) -> Self {
240        Self {
241            name: value.name.clone(),
242            base_name: value.base_name.clone(),
243            extension: value.extension.clone(),
244            path: value.path.clone(),
245            file_type: value.file_type.clone(),
246            mime_type: value.mime_type.clone(),
247            file_type_label: value.file_type_label.clone(),
248            size: value.size,
249            date: value.date.clone(),
250            sha1: value.sha1.as_ref().map(|d| d.as_hex()),
251            md5: value.md5.as_ref().map(|d| d.as_hex()),
252            sha256: value.sha256.as_ref().map(|d| d.as_hex()),
253            sha1_git: value.sha1_git.as_ref().map(|d| d.as_hex()),
254            programming_language: value.programming_language.clone(),
255            package_data: value
256                .package_data
257                .iter()
258                .map(OutputPackageData::from)
259                .collect(),
260            license_expression: value.license_expression.clone(),
261            license_detections: value
262                .license_detections
263                .iter()
264                .map(OutputLicenseDetection::from)
265                .collect(),
266            license_clues: value.license_clues.iter().map(OutputMatch::from).collect(),
267            percentage_of_license_text: value.percentage_of_license_text,
268            copyrights: value.copyrights.iter().map(OutputCopyright::from).collect(),
269            holders: value.holders.iter().map(OutputHolder::from).collect(),
270            authors: value.authors.iter().map(OutputAuthor::from).collect(),
271            emails: value.emails.iter().map(OutputEmail::from).collect(),
272            urls: value.urls.iter().map(OutputURL::from).collect(),
273            for_packages: value
274                .for_packages
275                .iter()
276                .map(|uid| uid.to_string())
277                .collect(),
278            scan_errors: value.scan_errors.clone(),
279            license_policy: value
280                .license_policy
281                .as_ref()
282                .map(|v| v.iter().map(OutputLicensePolicyEntry::from).collect()),
283            is_generated: value.is_generated,
284            is_binary: value.is_binary,
285            is_text: value.is_text,
286            is_archive: value.is_archive,
287            is_media: value.is_media,
288            is_source: value.is_source,
289            is_script: value.is_script,
290            files_count: value.files_count,
291            dirs_count: value.dirs_count,
292            size_count: value.size_count,
293            source_count: value.source_count,
294            is_legal: value.is_legal,
295            is_manifest: value.is_manifest,
296            is_readme: value.is_readme,
297            is_top_level: value.is_top_level,
298            is_key_file: value.is_key_file,
299            is_community: value.is_community,
300            facets: value.facets.clone(),
301            tallies: value.tallies.as_ref().map(OutputTallies::from),
302        }
303    }
304}
305
306impl TryFrom<&OutputFileInfo> for crate::models::FileInfo {
307    type Error = String;
308    fn try_from(value: &OutputFileInfo) -> Result<Self, Self::Error> {
309        let mut package_data = Vec::with_capacity(value.package_data.len());
310        for p in &value.package_data {
311            package_data.push(crate::models::PackageData::try_from(p)?);
312        }
313        let mut license_detections = Vec::with_capacity(value.license_detections.len());
314        for d in &value.license_detections {
315            license_detections.push(crate::models::LicenseDetection::try_from(d)?);
316        }
317        let mut license_clues = Vec::with_capacity(value.license_clues.len());
318        for m in &value.license_clues {
319            license_clues.push(crate::models::Match::try_from(m)?);
320        }
321        let mut copyrights = Vec::with_capacity(value.copyrights.len());
322        for c in &value.copyrights {
323            copyrights.push(crate::models::Copyright::try_from(c)?);
324        }
325        let mut holders = Vec::with_capacity(value.holders.len());
326        for h in &value.holders {
327            holders.push(crate::models::Holder::try_from(h)?);
328        }
329        let mut authors = Vec::with_capacity(value.authors.len());
330        for a in &value.authors {
331            authors.push(crate::models::Author::try_from(a)?);
332        }
333        let mut emails = Vec::with_capacity(value.emails.len());
334        for e in &value.emails {
335            emails.push(crate::models::OutputEmail::try_from(e)?);
336        }
337        let mut urls = Vec::with_capacity(value.urls.len());
338        for u in &value.urls {
339            urls.push(crate::models::OutputURL::try_from(u)?);
340        }
341        let license_policy = value
342            .license_policy
343            .as_ref()
344            .map(|v| {
345                v.iter()
346                    .map(crate::models::LicensePolicyEntry::try_from)
347                    .collect::<Result<Vec<_>, _>>()
348            })
349            .transpose()?;
350        Ok(Self {
351            name: value.name.clone(),
352            base_name: value.base_name.clone(),
353            extension: value.extension.clone(),
354            path: value.path.clone(),
355            file_type: value.file_type.clone(),
356            mime_type: value.mime_type.clone(),
357            file_type_label: value.file_type_label.clone(),
358            size: value.size,
359            date: value.date.clone(),
360            sha1: value
361                .sha1
362                .as_ref()
363                .map(|s| crate::models::Sha1Digest::from_hex(s))
364                .transpose()
365                .map_err(|e| format!("invalid sha1: {}", e))?,
366            md5: value
367                .md5
368                .as_ref()
369                .map(|s| crate::models::Md5Digest::from_hex(s))
370                .transpose()
371                .map_err(|e| format!("invalid md5: {}", e))?,
372            sha256: value
373                .sha256
374                .as_ref()
375                .map(|s| crate::models::Sha256Digest::from_hex(s))
376                .transpose()
377                .map_err(|e| format!("invalid sha256: {}", e))?,
378            sha1_git: value
379                .sha1_git
380                .as_ref()
381                .map(|s| crate::models::GitSha1::from_hex(s))
382                .transpose()
383                .map_err(|e| format!("invalid sha1_git: {}", e))?,
384            programming_language: value.programming_language.clone(),
385            package_data,
386            license_expression: value.license_expression.clone(),
387            license_detections,
388            license_clues,
389            percentage_of_license_text: value.percentage_of_license_text,
390            copyrights,
391            holders,
392            authors,
393            emails,
394            urls,
395            for_packages: value
396                .for_packages
397                .iter()
398                .map(|s| crate::models::PackageUid::from_raw(s.clone()))
399                .collect(),
400            scan_errors: value.scan_errors.clone(),
401            license_policy,
402            is_generated: value.is_generated,
403            is_binary: value.is_binary,
404            is_text: value.is_text,
405            is_archive: value.is_archive,
406            is_media: value.is_media,
407            is_source: value.is_source,
408            is_script: value.is_script,
409            files_count: value.files_count,
410            dirs_count: value.dirs_count,
411            size_count: value.size_count,
412            source_count: value.source_count,
413            is_legal: value.is_legal,
414            is_manifest: value.is_manifest,
415            is_readme: value.is_readme,
416            is_top_level: value.is_top_level,
417            is_key_file: value.is_key_file,
418            is_community: value.is_community,
419            facets: value.facets.clone(),
420            tallies: value
421                .tallies
422                .as_ref()
423                .map(crate::models::Tallies::try_from)
424                .transpose()?,
425        })
426    }
427}