Skip to main content

provenant/license_detection/models/
loaded_license.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Loader-stage license type.
5//!
6//! This module defines `LoadedLicense`, which represents a parsed and normalized
7//! license file (.LICENSE) before it is converted to a runtime `License`.
8//!
9//! Loader-stage responsibilities include:
10//! - Key derivation from filename
11//! - Name fallback chain resolution
12//! - URL merging from multiple source fields
13//! - Text trimming and normalization
14//! - Deprecation metadata preservation (without filtering)
15
16use serde::{Deserialize, Serialize};
17
18/// Loader-stage representation of a license.
19///
20/// This struct contains parsed and normalized data from a .LICENSE file.
21/// It is serialized at build time and deserialized at runtime, then converted
22/// to a runtime `License` during the build stage.
23#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
24pub struct LoadedLicense {
25    pub key: String,
26    pub short_name: Option<String>,
27    pub name: String,
28    pub language: Option<String>,
29    pub spdx_license_key: Option<String>,
30    pub other_spdx_license_keys: Vec<String>,
31    pub category: Option<String>,
32    pub owner: Option<String>,
33    pub homepage_url: Option<String>,
34    pub text: String,
35    pub reference_urls: Vec<String>,
36    pub osi_license_key: Option<String>,
37    pub text_urls: Vec<String>,
38    pub osi_url: Option<String>,
39    pub faq_url: Option<String>,
40    pub other_urls: Vec<String>,
41    pub notes: Option<String>,
42    pub is_deprecated: bool,
43    pub is_exception: bool,
44    pub is_unknown: bool,
45    pub is_generic: bool,
46    pub replaced_by: Vec<String>,
47    pub minimum_coverage: Option<u8>,
48    pub standard_notice: Option<String>,
49    pub ignorable_copyrights: Option<Vec<String>>,
50    pub ignorable_holders: Option<Vec<String>>,
51    pub ignorable_authors: Option<Vec<String>>,
52    pub ignorable_urls: Option<Vec<String>>,
53    pub ignorable_emails: Option<Vec<String>>,
54}
55
56/// Loader-stage normalization functions for license data.
57impl LoadedLicense {
58    /// Derive key from filename.
59    ///
60    /// Returns the file stem (filename without extension) as the key.
61    /// This should match the `key` field in the frontmatter.
62    pub fn derive_key(path: &std::path::Path) -> Result<String, LicenseKeyError> {
63        path.file_stem()
64            .and_then(|s| s.to_str())
65            .map(|s| s.to_string())
66            .ok_or(LicenseKeyError::CannotExtractKey)
67    }
68
69    /// Validate that the frontmatter key matches the filename key.
70    pub fn validate_key_match(
71        filename_key: &str,
72        frontmatter_key: Option<&str>,
73    ) -> Result<(), LicenseKeyError> {
74        match frontmatter_key {
75            Some(fm_key) if fm_key != filename_key => Err(LicenseKeyError::KeyMismatch {
76                filename: filename_key.to_string(),
77                frontmatter: fm_key.to_string(),
78            }),
79            _ => Ok(()),
80        }
81    }
82
83    /// Derive name using the fallback chain.
84    ///
85    /// Priority order:
86    /// 1. `name` field
87    /// 2. `short_name` field
88    /// 3. `key` as fallback
89    pub fn derive_name(name: Option<&str>, short_name: Option<&str>, key: &str) -> String {
90        name.map(|s| s.trim().to_string())
91            .filter(|s| !s.is_empty())
92            .or_else(|| {
93                short_name
94                    .map(|s| s.trim().to_string())
95                    .filter(|s| !s.is_empty())
96            })
97            .unwrap_or_else(|| key.to_string())
98    }
99
100    /// Merge reference URLs from multiple source fields.
101    ///
102    /// Collects URLs in this order:
103    /// 1. text_urls
104    /// 2. other_urls
105    /// 3. osi_url
106    /// 4. faq_url
107    /// 5. homepage_url
108    pub fn merge_reference_urls(
109        text_urls: Option<&[String]>,
110        other_urls: Option<&[String]>,
111        osi_url: Option<&str>,
112        faq_url: Option<&str>,
113        homepage_url: Option<&str>,
114    ) -> Vec<String> {
115        let mut urls = Vec::new();
116
117        if let Some(u) = text_urls {
118            urls.extend(u.iter().cloned());
119        }
120        if let Some(u) = other_urls {
121            urls.extend(u.iter().cloned());
122        }
123        if let Some(u) = osi_url {
124            let u = u.trim();
125            if !u.is_empty() {
126                urls.push(u.to_string());
127            }
128        }
129        if let Some(u) = faq_url {
130            let u = u.trim();
131            if !u.is_empty() {
132                urls.push(u.to_string());
133            }
134        }
135        if let Some(u) = homepage_url {
136            let u = u.trim();
137            if !u.is_empty() {
138                urls.push(u.to_string());
139            }
140        }
141
142        urls
143    }
144
145    /// Normalize optional string field.
146    ///
147    /// Returns `None` for empty strings, `Some(trimmed)` otherwise.
148    pub fn normalize_optional_string(s: Option<&str>) -> Option<String> {
149        s.map(|s| s.trim().to_string()).filter(|s| !s.is_empty())
150    }
151
152    /// Normalize optional string list.
153    ///
154    /// Returns `None` for empty lists, `Some(list)` with trimmed strings otherwise.
155    pub fn normalize_optional_list(list: Option<&[String]>) -> Option<Vec<String>> {
156        list.map(|l| {
157            l.iter()
158                .map(|s| s.trim().to_string())
159                .filter(|s| !s.is_empty())
160                .collect::<Vec<_>>()
161        })
162        .filter(|l: &Vec<String>| !l.is_empty())
163    }
164
165    /// Validate that a non-deprecated, non-unknown, non-generic license has text content.
166    pub fn validate_text_content(
167        text: &str,
168        is_deprecated: bool,
169        is_unknown: bool,
170        is_generic: bool,
171    ) -> Result<(), LicenseTextError> {
172        if text.trim().is_empty() && !is_deprecated && !is_unknown && !is_generic {
173            Err(LicenseTextError::EmptyText)
174        } else {
175            Ok(())
176        }
177    }
178}
179
180/// Error type for license key validation failures.
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub enum LicenseKeyError {
183    CannotExtractKey,
184    KeyMismatch {
185        filename: String,
186        frontmatter: String,
187    },
188}
189
190impl std::fmt::Display for LicenseKeyError {
191    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
192        match self {
193            Self::CannotExtractKey => write!(f, "cannot extract key from license file path"),
194            Self::KeyMismatch {
195                filename,
196                frontmatter,
197            } => {
198                write!(
199                    f,
200                    "license key mismatch: filename '{}' vs frontmatter '{}'",
201                    filename, frontmatter
202                )
203            }
204        }
205    }
206}
207
208impl std::error::Error for LicenseKeyError {}
209
210/// Error type for license text validation failures.
211#[derive(Debug, Clone, PartialEq, Eq)]
212pub enum LicenseTextError {
213    EmptyText,
214}
215
216impl std::fmt::Display for LicenseTextError {
217    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
218        match self {
219            Self::EmptyText => write!(
220                f,
221                "license file has empty text content and is not deprecated/unknown/generic"
222            ),
223        }
224    }
225}
226
227impl std::error::Error for LicenseTextError {}
228
229#[cfg(test)]
230mod tests {
231    use super::*;
232    use std::path::PathBuf;
233
234    #[test]
235    fn test_derive_key() {
236        assert_eq!(
237            LoadedLicense::derive_key(&PathBuf::from("licenses/mit.LICENSE")),
238            Ok("mit".to_string())
239        );
240        assert_eq!(
241            LoadedLicense::derive_key(&PathBuf::from("/path/to/apache-2.0.LICENSE")),
242            Ok("apache-2.0".to_string())
243        );
244        assert_eq!(
245            LoadedLicense::derive_key(&PathBuf::from("no-extension")),
246            Ok("no-extension".to_string())
247        );
248        assert_eq!(
249            LoadedLicense::derive_key(&PathBuf::from("/")),
250            Err(LicenseKeyError::CannotExtractKey)
251        );
252    }
253
254    #[test]
255    fn test_validate_key_match() {
256        assert!(LoadedLicense::validate_key_match("mit", Some("mit")).is_ok());
257        assert!(LoadedLicense::validate_key_match("mit", None).is_ok());
258        assert_eq!(
259            LoadedLicense::validate_key_match("mit", Some("apache")),
260            Err(LicenseKeyError::KeyMismatch {
261                filename: "mit".to_string(),
262                frontmatter: "apache".to_string()
263            })
264        );
265    }
266
267    #[test]
268    fn test_derive_name() {
269        assert_eq!(
270            LoadedLicense::derive_name(Some("MIT License"), None, "mit"),
271            "MIT License"
272        );
273        assert_eq!(LoadedLicense::derive_name(None, Some("MIT"), "mit"), "MIT");
274        assert_eq!(
275            LoadedLicense::derive_name(Some("  MIT License  "), None, "mit"),
276            "MIT License"
277        );
278        assert_eq!(LoadedLicense::derive_name(None, None, "mit"), "mit");
279        assert_eq!(
280            LoadedLicense::derive_name(Some(""), Some("Short"), "key"),
281            "Short"
282        );
283        assert_eq!(LoadedLicense::derive_name(Some("   "), None, "key"), "key");
284    }
285
286    #[test]
287    fn test_merge_reference_urls() {
288        let text_urls = vec!["https://example.com/text".to_string()];
289        let other_urls = vec!["https://example.com/other".to_string()];
290
291        let urls = LoadedLicense::merge_reference_urls(
292            Some(&text_urls),
293            Some(&other_urls),
294            Some("https://opensource.org/licenses/MIT"),
295            Some("https://example.com/faq"),
296            Some("https://example.com/home"),
297        );
298        assert_eq!(urls.len(), 5);
299        assert_eq!(urls[0], "https://example.com/text");
300        assert_eq!(urls[1], "https://example.com/other");
301        assert_eq!(urls[2], "https://opensource.org/licenses/MIT");
302        assert_eq!(urls[3], "https://example.com/faq");
303        assert_eq!(urls[4], "https://example.com/home");
304    }
305
306    #[test]
307    fn test_merge_reference_urls_empty() {
308        let urls = LoadedLicense::merge_reference_urls(None, None, None, None, None);
309        assert!(urls.is_empty());
310    }
311
312    #[test]
313    fn test_merge_reference_urls_trims_whitespace() {
314        let urls = LoadedLicense::merge_reference_urls(
315            None,
316            None,
317            Some("  https://example.com  "),
318            None,
319            None,
320        );
321        assert_eq!(urls, vec!["https://example.com"]);
322    }
323
324    #[test]
325    fn test_normalize_optional_string() {
326        assert_eq!(LoadedLicense::normalize_optional_string(None), None);
327        assert_eq!(LoadedLicense::normalize_optional_string(Some("")), None);
328        assert_eq!(LoadedLicense::normalize_optional_string(Some("   ")), None);
329        assert_eq!(
330            LoadedLicense::normalize_optional_string(Some("hello")),
331            Some("hello".to_string())
332        );
333        assert_eq!(
334            LoadedLicense::normalize_optional_string(Some("  hello  ")),
335            Some("hello".to_string())
336        );
337    }
338
339    #[test]
340    fn test_normalize_optional_list() {
341        assert_eq!(LoadedLicense::normalize_optional_list(None), None);
342        assert_eq!(LoadedLicense::normalize_optional_list(Some(&[])), None);
343        assert_eq!(
344            LoadedLicense::normalize_optional_list(Some(&["a".to_string(), "b".to_string()])),
345            Some(vec!["a".to_string(), "b".to_string()])
346        );
347    }
348
349    #[test]
350    fn test_validate_text_content() {
351        assert!(LoadedLicense::validate_text_content("some text", false, false, false).is_ok());
352        assert!(LoadedLicense::validate_text_content("", true, false, false).is_ok());
353        assert!(LoadedLicense::validate_text_content("", false, true, false).is_ok());
354        assert!(LoadedLicense::validate_text_content("", false, false, true).is_ok());
355        assert_eq!(
356            LoadedLicense::validate_text_content("", false, false, false),
357            Err(LicenseTextError::EmptyText)
358        );
359        assert_eq!(
360            LoadedLicense::validate_text_content("   ", false, false, false),
361            Err(LicenseTextError::EmptyText)
362        );
363    }
364
365    #[test]
366    fn test_serde_roundtrip() {
367        let license = LoadedLicense {
368            key: "mit".to_string(),
369            short_name: Some("MIT".to_string()),
370            name: "MIT License".to_string(),
371            language: Some("en".to_string()),
372            spdx_license_key: Some("MIT".to_string()),
373            other_spdx_license_keys: vec![],
374            category: Some("Permissive".to_string()),
375            owner: Some("Open Source Initiative".to_string()),
376            homepage_url: Some("https://opensource.org/licenses/MIT".to_string()),
377            text: "MIT License text".to_string(),
378            reference_urls: vec!["https://opensource.org/licenses/MIT".to_string()],
379            osi_license_key: Some("MIT".to_string()),
380            text_urls: vec!["https://opensource.org/licenses/MIT".to_string()],
381            osi_url: Some("https://opensource.org/licenses/MIT".to_string()),
382            faq_url: None,
383            other_urls: vec![],
384            notes: Some("Test note".to_string()),
385            is_deprecated: false,
386            is_exception: false,
387            is_unknown: false,
388            is_generic: false,
389            replaced_by: vec![],
390            minimum_coverage: None,
391            standard_notice: None,
392            ignorable_copyrights: None,
393            ignorable_holders: None,
394            ignorable_authors: None,
395            ignorable_urls: None,
396            ignorable_emails: None,
397        };
398
399        let json = serde_json::to_string(&license).unwrap();
400        let deserialized: LoadedLicense = serde_json::from_str(&json).unwrap();
401        assert_eq!(license, deserialized);
402    }
403}