Skip to main content

provenant/license_detection/embedded/
index.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use super::schema::{EmbeddedArtifactMetadata, EmbeddedLoaderSnapshot, SCHEMA_VERSION};
5use crate::license_detection::index::LicenseIndex;
6use crate::license_detection::index::build_index_from_loaded;
7
8#[derive(Debug, Clone)]
9#[allow(dead_code)]
10pub struct LoadedEmbeddedLicenseIndex {
11    pub index: LicenseIndex,
12    pub metadata: EmbeddedArtifactMetadata,
13}
14
15#[derive(Debug, Clone, thiserror::Error)]
16#[error("License loader artifact error: {0}")]
17pub struct SerializationError(pub String);
18
19pub fn load_loader_snapshot_from_bytes(
20    bytes: &[u8],
21) -> Result<EmbeddedLoaderSnapshot, SerializationError> {
22    if bytes.is_empty() {
23        return Err(SerializationError(
24            "Embedded license index artifact is empty".to_string(),
25        ));
26    }
27
28    let decompressed = zstd::decode_all(bytes).map_err(|e| {
29        SerializationError(format!("Failed to decompress embedded artifact: {}", e))
30    })?;
31
32    let snapshot: EmbeddedLoaderSnapshot = postcard::from_bytes(&decompressed).map_err(|e| {
33        SerializationError(format!("Failed to deserialize embedded artifact: {}", e))
34    })?;
35
36    if snapshot.schema_version != SCHEMA_VERSION {
37        return Err(SerializationError(format!(
38            "Embedded artifact schema version mismatch: expected {}, got {}",
39            SCHEMA_VERSION, snapshot.schema_version
40        )));
41    }
42
43    Ok(snapshot)
44}
45
46#[allow(dead_code)]
47pub fn load_embedded_license_index_from_bytes(
48    bytes: &[u8],
49) -> Result<LoadedEmbeddedLicenseIndex, SerializationError> {
50    let snapshot = load_loader_snapshot_from_bytes(bytes)?;
51    let index = build_index_from_loaded(snapshot.rules, snapshot.licenses, false);
52
53    Ok(LoadedEmbeddedLicenseIndex {
54        index,
55        metadata: snapshot.metadata,
56    })
57}
58
59pub fn load_embedded_artifact_metadata_from_bytes(
60    bytes: &[u8],
61) -> Result<EmbeddedArtifactMetadata, SerializationError> {
62    Ok(load_loader_snapshot_from_bytes(bytes)?.metadata)
63}
64
65#[cfg(test)]
66mod tests {
67    use super::*;
68    use crate::license_detection::models::{LoadedLicense, LoadedRule};
69
70    fn create_test_metadata() -> EmbeddedArtifactMetadata {
71        EmbeddedArtifactMetadata {
72            spdx_license_list_version: "3.27".to_string(),
73            license_index_provenance: crate::models::LicenseIndexProvenance {
74                source: "embedded-artifact".to_string(),
75                dataset_fingerprint: "test".to_string(),
76                ignored_rules: vec![],
77                ignored_licenses: vec![],
78                ignored_rules_due_to_licenses: vec![],
79                added_rules: vec![],
80                replaced_rules: vec![],
81                added_licenses: vec![],
82                replaced_licenses: vec![],
83            },
84        }
85    }
86
87    fn serialize_loader_snapshot_to_bytes(
88        rules: Vec<LoadedRule>,
89        licenses: Vec<LoadedLicense>,
90    ) -> Result<Vec<u8>, SerializationError> {
91        let snapshot = EmbeddedLoaderSnapshot {
92            schema_version: SCHEMA_VERSION,
93            metadata: create_test_metadata(),
94            rules,
95            licenses,
96        };
97
98        let postcard_bytes = postcard::to_allocvec(&snapshot).map_err(|e| {
99            SerializationError(format!("Failed to serialize embedded artifact: {}", e))
100        })?;
101
102        zstd::encode_all(&postcard_bytes[..], 0)
103            .map_err(|e| SerializationError(format!("Failed to compress embedded artifact: {}", e)))
104    }
105
106    fn create_test_loaded_rule() -> LoadedRule {
107        LoadedRule {
108            identifier: "test.RULE".to_string(),
109            license_expression: "mit".to_string(),
110            text: "MIT License text".to_string(),
111            rule_kind: crate::license_detection::models::RuleKind::Text,
112            is_false_positive: false,
113            is_required_phrase: false,
114            skip_for_required_phrase_generation: false,
115            relevance: Some(100),
116            minimum_coverage: None,
117            has_stored_minimum_coverage: false,
118            is_continuous: false,
119            referenced_filenames: None,
120            ignorable_urls: None,
121            ignorable_emails: None,
122            ignorable_copyrights: None,
123            ignorable_holders: None,
124            ignorable_authors: None,
125            language: None,
126            notes: None,
127            is_deprecated: false,
128            replaced_by: vec![],
129        }
130    }
131
132    fn create_test_loaded_license() -> LoadedLicense {
133        LoadedLicense {
134            key: "mit".to_string(),
135            short_name: Some("MIT".to_string()),
136            name: "MIT License".to_string(),
137            language: Some("en".to_string()),
138            spdx_license_key: Some("MIT".to_string()),
139            other_spdx_license_keys: vec![],
140            category: Some("Permissive".to_string()),
141            owner: None,
142            homepage_url: None,
143            text: "MIT License text".to_string(),
144            reference_urls: vec![],
145            osi_license_key: None,
146            text_urls: vec![],
147            osi_url: None,
148            faq_url: None,
149            other_urls: vec![],
150            notes: None,
151            is_deprecated: false,
152            is_exception: false,
153            is_unknown: false,
154            is_generic: false,
155            replaced_by: vec![],
156            minimum_coverage: None,
157            standard_notice: None,
158            ignorable_copyrights: None,
159            ignorable_holders: None,
160            ignorable_authors: None,
161            ignorable_urls: None,
162            ignorable_emails: None,
163        }
164    }
165
166    #[test]
167    fn test_load_license_index_from_bytes_roundtrip() {
168        let bytes = serialize_loader_snapshot_to_bytes(
169            vec![create_test_loaded_rule()],
170            vec![create_test_loaded_license()],
171        )
172        .expect("Should serialize");
173
174        let index = load_embedded_license_index_from_bytes(&bytes)
175            .expect("Should deserialize")
176            .index;
177
178        assert_eq!(index.licenses_by_key.len(), 1);
179        assert!(
180            index
181                .rules_by_rid
182                .iter()
183                .any(|rule| rule.identifier == "test.RULE"),
184            "runtime index should retain the serialized rule"
185        );
186        assert!(
187            index
188                .rules_by_rid
189                .iter()
190                .any(|rule| rule.identifier == "mit.LICENSE"),
191            "runtime index should synthesize a license-derived rule"
192        );
193    }
194
195    #[test]
196    fn test_load_embedded_artifact_metadata_from_bytes_roundtrip() {
197        let bytes = serialize_loader_snapshot_to_bytes(
198            vec![create_test_loaded_rule()],
199            vec![create_test_loaded_license()],
200        )
201        .expect("Should serialize");
202
203        let metadata = load_embedded_artifact_metadata_from_bytes(&bytes)
204            .expect("Should deserialize metadata");
205
206        assert_eq!(metadata.spdx_license_list_version, "3.27");
207        assert_eq!(
208            metadata.license_index_provenance.source,
209            "embedded-artifact"
210        );
211    }
212
213    #[test]
214    fn test_load_license_index_from_bytes_rejects_empty() {
215        let error = load_embedded_license_index_from_bytes(&[]).unwrap_err();
216        assert!(error.to_string().contains("artifact is empty"));
217    }
218}