Skip to main content

provenant/license_detection/embedded/
index.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use super::schema::{EmbeddedArtifactMetadata, EmbeddedLoaderSnapshot, SCHEMA_VERSION};
5use crate::license_detection::index::LicenseIndex;
6use crate::license_detection::index::build_index_from_loaded;
7
8#[derive(Debug, Clone)]
9#[allow(dead_code)]
10pub struct LoadedEmbeddedLicenseIndex {
11    pub index: LicenseIndex,
12    pub metadata: EmbeddedArtifactMetadata,
13}
14
15#[derive(Debug, Clone)]
16pub struct SerializationError(pub String);
17
18impl std::fmt::Display for SerializationError {
19    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20        write!(f, "License loader artifact error: {}", self.0)
21    }
22}
23
24impl std::error::Error for SerializationError {}
25
26pub fn load_loader_snapshot_from_bytes(
27    bytes: &[u8],
28) -> Result<EmbeddedLoaderSnapshot, SerializationError> {
29    if bytes.is_empty() {
30        return Err(SerializationError(
31            "Embedded license index artifact is empty".to_string(),
32        ));
33    }
34
35    let decompressed = zstd::decode_all(bytes).map_err(|e| {
36        SerializationError(format!("Failed to decompress embedded artifact: {}", e))
37    })?;
38
39    let snapshot: EmbeddedLoaderSnapshot = postcard::from_bytes(&decompressed).map_err(|e| {
40        SerializationError(format!("Failed to deserialize embedded artifact: {}", e))
41    })?;
42
43    if snapshot.schema_version != SCHEMA_VERSION {
44        return Err(SerializationError(format!(
45            "Embedded artifact schema version mismatch: expected {}, got {}",
46            SCHEMA_VERSION, snapshot.schema_version
47        )));
48    }
49
50    Ok(snapshot)
51}
52
53#[allow(dead_code)]
54pub fn load_embedded_license_index_from_bytes(
55    bytes: &[u8],
56) -> Result<LoadedEmbeddedLicenseIndex, SerializationError> {
57    let snapshot = load_loader_snapshot_from_bytes(bytes)?;
58    let index = build_index_from_loaded(snapshot.rules, snapshot.licenses, false);
59
60    Ok(LoadedEmbeddedLicenseIndex {
61        index,
62        metadata: snapshot.metadata,
63    })
64}
65
66pub fn load_embedded_artifact_metadata_from_bytes(
67    bytes: &[u8],
68) -> Result<EmbeddedArtifactMetadata, SerializationError> {
69    Ok(load_loader_snapshot_from_bytes(bytes)?.metadata)
70}
71
72#[cfg(test)]
73mod tests {
74    use super::*;
75    use crate::license_detection::models::{LoadedLicense, LoadedRule};
76
77    fn create_test_metadata() -> EmbeddedArtifactMetadata {
78        EmbeddedArtifactMetadata {
79            spdx_license_list_version: "3.27".to_string(),
80            license_index_provenance: crate::models::LicenseIndexProvenance {
81                source: "embedded-artifact".to_string(),
82                dataset_fingerprint: "test".to_string(),
83                ignored_rules: vec![],
84                ignored_licenses: vec![],
85                ignored_rules_due_to_licenses: vec![],
86                added_rules: vec![],
87                replaced_rules: vec![],
88                added_licenses: vec![],
89                replaced_licenses: vec![],
90            },
91        }
92    }
93
94    fn serialize_loader_snapshot_to_bytes(
95        rules: Vec<LoadedRule>,
96        licenses: Vec<LoadedLicense>,
97    ) -> Result<Vec<u8>, SerializationError> {
98        let snapshot = EmbeddedLoaderSnapshot {
99            schema_version: SCHEMA_VERSION,
100            metadata: create_test_metadata(),
101            rules,
102            licenses,
103        };
104
105        let postcard_bytes = postcard::to_allocvec(&snapshot).map_err(|e| {
106            SerializationError(format!("Failed to serialize embedded artifact: {}", e))
107        })?;
108
109        zstd::encode_all(&postcard_bytes[..], 0)
110            .map_err(|e| SerializationError(format!("Failed to compress embedded artifact: {}", e)))
111    }
112
113    fn create_test_loaded_rule() -> LoadedRule {
114        LoadedRule {
115            identifier: "test.RULE".to_string(),
116            license_expression: "mit".to_string(),
117            text: "MIT License text".to_string(),
118            rule_kind: crate::license_detection::models::RuleKind::Text,
119            is_false_positive: false,
120            is_required_phrase: false,
121            skip_for_required_phrase_generation: false,
122            relevance: Some(100),
123            minimum_coverage: None,
124            has_stored_minimum_coverage: false,
125            is_continuous: false,
126            referenced_filenames: None,
127            ignorable_urls: None,
128            ignorable_emails: None,
129            ignorable_copyrights: None,
130            ignorable_holders: None,
131            ignorable_authors: None,
132            language: None,
133            notes: None,
134            is_deprecated: false,
135            replaced_by: vec![],
136        }
137    }
138
139    fn create_test_loaded_license() -> LoadedLicense {
140        LoadedLicense {
141            key: "mit".to_string(),
142            short_name: Some("MIT".to_string()),
143            name: "MIT License".to_string(),
144            language: Some("en".to_string()),
145            spdx_license_key: Some("MIT".to_string()),
146            other_spdx_license_keys: vec![],
147            category: Some("Permissive".to_string()),
148            owner: None,
149            homepage_url: None,
150            text: "MIT License text".to_string(),
151            reference_urls: vec![],
152            osi_license_key: None,
153            text_urls: vec![],
154            osi_url: None,
155            faq_url: None,
156            other_urls: vec![],
157            notes: None,
158            is_deprecated: false,
159            is_exception: false,
160            is_unknown: false,
161            is_generic: false,
162            replaced_by: vec![],
163            minimum_coverage: None,
164            standard_notice: None,
165            ignorable_copyrights: None,
166            ignorable_holders: None,
167            ignorable_authors: None,
168            ignorable_urls: None,
169            ignorable_emails: None,
170        }
171    }
172
173    #[test]
174    fn test_load_license_index_from_bytes_roundtrip() {
175        let bytes = serialize_loader_snapshot_to_bytes(
176            vec![create_test_loaded_rule()],
177            vec![create_test_loaded_license()],
178        )
179        .expect("Should serialize");
180
181        let index = load_embedded_license_index_from_bytes(&bytes)
182            .expect("Should deserialize")
183            .index;
184
185        assert_eq!(index.licenses_by_key.len(), 1);
186        assert!(
187            index
188                .rules_by_rid
189                .iter()
190                .any(|rule| rule.identifier == "test.RULE"),
191            "runtime index should retain the serialized rule"
192        );
193        assert!(
194            index
195                .rules_by_rid
196                .iter()
197                .any(|rule| rule.identifier == "mit.LICENSE"),
198            "runtime index should synthesize a license-derived rule"
199        );
200    }
201
202    #[test]
203    fn test_load_embedded_artifact_metadata_from_bytes_roundtrip() {
204        let bytes = serialize_loader_snapshot_to_bytes(
205            vec![create_test_loaded_rule()],
206            vec![create_test_loaded_license()],
207        )
208        .expect("Should serialize");
209
210        let metadata = load_embedded_artifact_metadata_from_bytes(&bytes)
211            .expect("Should deserialize metadata");
212
213        assert_eq!(metadata.spdx_license_list_version, "3.27");
214        assert_eq!(
215            metadata.license_index_provenance.source,
216            "embedded-artifact"
217        );
218    }
219
220    #[test]
221    fn test_load_license_index_from_bytes_rejects_empty() {
222        let error = load_embedded_license_index_from_bytes(&[]).unwrap_err();
223        assert!(error.to_string().contains("artifact is empty"));
224    }
225}