spdx_rs/models/
spdx_document.rs

1// SPDX-FileCopyrightText: 2021 HH Partners
2//
3// SPDX-License-Identifier: MIT
4
5use std::collections::HashSet;
6
7use log::info;
8use serde::{Deserialize, Serialize};
9use uuid::Uuid;
10
11use super::{
12    Algorithm, Annotation, DocumentCreationInformation, FileInformation,
13    OtherLicensingInformationDetected, PackageInformation, Relationship, Snippet,
14};
15
16/// A representation of an [SPDX Document]
17///
18/// This is the main struct of this crate. The struct implements [`Serialize`] and [`Deserialize`]
19/// to allow it to be serialized into and deserialized from any data format supported by [Serde].
20///
21/// # SPDX specification version
22///
23/// The crate has been developed around SPDX version 2.2.1. Fields deprecated in 2.2.1, like
24/// [review information] are not supported. The plan is to support newer versions as they are
25/// released.
26///
27/// # Data formats
28///
29/// The crate has been developed for usage with JSON SPDX documents. The naming of the fields should
30/// conform to the spec for at least JSON. Other formats, like YAML may work, but no guarantees are
31/// made.
32///
33/// The crate also allows for deserializing the struct from SPDX documents in [tag-value format]
34/// with [`crate::parsers::spdx_from_tag_value`].
35///
36/// [SPDX Document]: https://spdx.github.io/spdx-spec/composition-of-an-SPDX-document/
37/// [Serde]: https://serde.rs
38/// [review information]: https://spdx.github.io/spdx-spec/review-information-deprecated/
39/// [tag-value format]: https://spdx.github.io/spdx-spec/conformance/
40#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
41#[serde(rename_all = "camelCase", deny_unknown_fields)]
42pub struct SPDX {
43    /// <https://spdx.github.io/spdx-spec/2-document-creation-information/>
44    #[serde(flatten)]
45    pub document_creation_information: DocumentCreationInformation,
46
47    /// <https://spdx.github.io/spdx-spec/3-package-information/>
48    #[serde(rename = "packages")]
49    #[serde(default)]
50    pub package_information: Vec<PackageInformation>,
51
52    /// <https://spdx.github.io/spdx-spec/6-other-licensing-information-detected/>
53    #[serde(rename = "hasExtractedLicensingInfos")]
54    #[serde(default)]
55    pub other_licensing_information_detected: Vec<OtherLicensingInformationDetected>,
56
57    /// <https://spdx.github.io/spdx-spec/4-file-information/>
58    #[serde(rename = "files")]
59    #[serde(default)]
60    pub file_information: Vec<FileInformation>,
61
62    /// <https://spdx.github.io/spdx-spec/5-snippet-information/>
63    #[serde(rename = "snippets")]
64    #[serde(default)]
65    pub snippet_information: Vec<Snippet>,
66
67    /// <https://spdx.github.io/spdx-spec/7-relationships-between-SPDX-elements/>
68    #[serde(default)]
69    pub relationships: Vec<Relationship>,
70
71    /// <https://spdx.github.io/spdx-spec/8-annotations/>
72    #[serde(default)]
73    pub annotations: Vec<Annotation>,
74
75    /// Counter for creating SPDXRefs. Is not part of the spec, so don't serialize.
76    #[serde(skip)]
77    pub spdx_ref_counter: i32,
78}
79
80impl SPDX {
81    /// Create new SPDX struct.
82    pub fn new(name: &str) -> Self {
83        info!("Creating SPDX.");
84
85        Self {
86            document_creation_information: DocumentCreationInformation {
87                document_name: name.to_string(),
88                spdx_document_namespace: format!(
89                    "http://spdx.org/spdxdocs/{}-{}",
90                    name,
91                    Uuid::new_v4()
92                ),
93                ..DocumentCreationInformation::default()
94            },
95            package_information: Vec::new(),
96            other_licensing_information_detected: Vec::new(),
97            file_information: Vec::new(),
98            relationships: Vec::new(),
99            spdx_ref_counter: 0,
100            annotations: Vec::new(),
101            snippet_information: Vec::new(),
102        }
103    }
104
105    /// Get unique hashes for all files the SPDX.
106    pub fn get_unique_hashes(&self, algorithm: Algorithm) -> HashSet<String> {
107        info!("Getting unique hashes for files in SPDX.");
108
109        let mut unique_hashes: HashSet<String> = HashSet::new();
110
111        for file_information in &self.file_information {
112            if let Some(checksum) = file_information.checksum(algorithm) {
113                unique_hashes.insert(checksum.to_string());
114            }
115        }
116
117        unique_hashes
118    }
119
120    /// Find related files of the package with the provided id.
121    pub fn get_files_for_package(
122        &self,
123        package_spdx_id: &str,
124    ) -> Vec<(&FileInformation, &Relationship)> {
125        info!("Finding related files for package {}.", &package_spdx_id);
126
127        let relationships = self
128            .relationships
129            .iter()
130            .filter(|relationship| relationship.spdx_element_id == package_spdx_id);
131
132        let mut result: Vec<(&FileInformation, &Relationship)> = Vec::new();
133
134        for relationship in relationships {
135            let file = self
136                .file_information
137                .iter()
138                .find(|file| file.file_spdx_identifier == relationship.related_spdx_element);
139            if let Some(file) = file {
140                result.push((file, relationship));
141            };
142        }
143
144        result
145    }
146
147    /// Get all license identifiers from the SPDX.
148    ///
149    /// # Errors
150    ///
151    /// Returns [`SpdxError`] if parsing of the expressions fails.
152    pub fn get_license_ids(&self) -> HashSet<String> {
153        info!("Getting all license identifiers from SPDX.");
154
155        let mut license_ids = HashSet::new();
156
157        for file in &self.file_information {
158            if let Some(concluded_license) = &file.concluded_license {
159                for license in concluded_license.identifiers() {
160                    if license != "NOASSERTION" && license != "NONE" {
161                        license_ids.insert(license.clone());
162                    }
163                }
164            }
165        }
166
167        license_ids
168    }
169
170    /// Get all relationships where the given SPDX ID is the SPDX element id.
171    pub fn relationships_for_spdx_id(&self, spdx_id: &str) -> Vec<&Relationship> {
172        self.relationships
173            .iter()
174            .filter(|relationship| relationship.spdx_element_id == spdx_id)
175            .collect()
176    }
177
178    /// Get all relationships where the given SPDX ID is the related SPDX element id.
179    pub fn relationships_for_related_spdx_id(&self, spdx_id: &str) -> Vec<&Relationship> {
180        self.relationships
181            .iter()
182            .filter(|relationship| relationship.related_spdx_element == spdx_id)
183            .collect()
184    }
185}
186
187#[cfg(test)]
188mod test {
189    use std::{fs::read_to_string, iter::FromIterator};
190
191    use spdx_expression::SpdxExpression;
192
193    use crate::models::RelationshipType;
194
195    use super::*;
196
197    #[test]
198    fn deserialize_simple_spdx() {
199        let spdx_file: SPDX = serde_json::from_str(
200            &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
201        )
202        .unwrap();
203
204        assert_eq!(
205            spdx_file.document_creation_information.document_name,
206            "SPDX-Tools-v2.0".to_string()
207        );
208    }
209
210    #[test]
211    fn find_related_files_for_package() {
212        let spdx_file: SPDX = serde_json::from_str(
213            &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
214        )
215        .unwrap();
216
217        let package_1_files = spdx_file.get_files_for_package("SPDXRef-Package");
218
219        assert_eq!(package_1_files.len(), 1);
220
221        let file = package_1_files
222            .iter()
223            .find(|package_and_relationship| {
224                package_and_relationship.0.file_name == *"./lib-source/jena-2.6.3-sources.jar"
225            })
226            .expect("Should always be found");
227
228        assert_eq!(file.0.file_spdx_identifier, "SPDXRef-JenaLib");
229        assert_eq!(file.1.relationship_type, RelationshipType::Contains);
230
231        assert_eq!(
232            file.0.concluded_license,
233            Some(SpdxExpression::parse("LicenseRef-1").unwrap())
234        );
235    }
236
237    #[test]
238    fn get_all_licenses_from_spdx() {
239        let spdx_file: SPDX = serde_json::from_str(
240            &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
241        )
242        .unwrap();
243
244        let actual = spdx_file.get_license_ids();
245
246        let expected = HashSet::from_iter([
247            "Apache-2.0".into(),
248            "LicenseRef-1".into(),
249            "LGPL-2.0-only".into(),
250            "LicenseRef-2".into(),
251        ]);
252
253        assert_eq!(expected, actual);
254    }
255
256    #[test]
257    fn get_relationships_for_spdx_id() {
258        let spdx_file: SPDX = serde_json::from_str(
259            &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
260        )
261        .unwrap();
262
263        let relationships = spdx_file.relationships_for_spdx_id("SPDXRef-Package");
264        let relationship_1 = Relationship {
265            spdx_element_id: "SPDXRef-Package".into(),
266            related_spdx_element: "SPDXRef-Saxon".into(),
267            relationship_type: RelationshipType::DynamicLink,
268            comment: None,
269        };
270        let relationship_2 = Relationship {
271            spdx_element_id: "SPDXRef-Package".into(),
272            related_spdx_element: "SPDXRef-JenaLib".into(),
273            relationship_type: RelationshipType::Contains,
274            comment: None,
275        };
276        let expected_relationships = vec![&relationship_1, &relationship_2];
277
278        assert_eq!(relationships, expected_relationships);
279    }
280
281    #[test]
282    fn get_relationships_for_related_spdx_id() {
283        let spdx_file: SPDX = serde_json::from_str(
284            &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
285        )
286        .unwrap();
287
288        let relationships = spdx_file.relationships_for_related_spdx_id("SPDXRef-Package");
289        let relationship_1 = Relationship {
290            spdx_element_id: "SPDXRef-DOCUMENT".into(),
291            related_spdx_element: "SPDXRef-Package".into(),
292            relationship_type: RelationshipType::Contains,
293            comment: None,
294        };
295        let relationship_2 = Relationship {
296            spdx_element_id: "SPDXRef-DOCUMENT".into(),
297            related_spdx_element: "SPDXRef-Package".into(),
298            relationship_type: RelationshipType::Describes,
299            comment: None,
300        };
301        let relationship_3 = Relationship {
302            spdx_element_id: "SPDXRef-JenaLib".into(),
303            related_spdx_element: "SPDXRef-Package".into(),
304            relationship_type: RelationshipType::Contains,
305            comment: None,
306        };
307        let expected_relationships = vec![&relationship_1, &relationship_2, &relationship_3];
308
309        assert_eq!(relationships, expected_relationships);
310    }
311
312    #[test]
313    fn get_unique_hashes_for_files() {
314        let spdx_file: SPDX = serde_json::from_str(
315            &read_to_string("tests/data/SPDXJSONExample-v2.2.spdx.json").unwrap(),
316        )
317        .unwrap();
318        let hashes = spdx_file.get_unique_hashes(Algorithm::SHA1);
319
320        let expected = [
321            "2fd4e1c67a2d28fced849ee1bb76e7391b93eb12".to_string(),
322            "c2b4e1c67a2d28fced849ee1bb76e7391b93f125".to_string(),
323            "3ab4e1c67a2d28fced849ee1bb76e7391b93f125".to_string(),
324            "d6a770ba38583ed4bb4525bd96e50461655d2758".to_string(),
325        ]
326        .iter()
327        .cloned()
328        .collect::<HashSet<_>>();
329
330        assert_eq!(hashes, expected);
331    }
332}