Skip to main content

provenant/license_detection/models/
loaded_license.rs

1//! Loader-stage license type.
2//!
3//! This module defines `LoadedLicense`, which represents a parsed and normalized
4//! license file (.LICENSE) before it is converted to a runtime `License`.
5//!
6//! Loader-stage responsibilities include:
7//! - Key derivation from filename
8//! - Name fallback chain resolution
9//! - URL merging from multiple source fields
10//! - Text trimming and normalization
11//! - Deprecation metadata preservation (without filtering)
12
13use serde::{Deserialize, Serialize};
14
15/// Loader-stage representation of a license.
16///
17/// This struct contains parsed and normalized data from a .LICENSE file.
18/// It is serialized at build time and deserialized at runtime, then converted
19/// to a runtime `License` during the build stage.
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21pub struct LoadedLicense {
22    pub key: String,
23    pub short_name: Option<String>,
24    pub name: String,
25    pub language: Option<String>,
26    pub spdx_license_key: Option<String>,
27    pub other_spdx_license_keys: Vec<String>,
28    pub category: Option<String>,
29    pub owner: Option<String>,
30    pub homepage_url: Option<String>,
31    pub text: String,
32    pub reference_urls: Vec<String>,
33    pub osi_license_key: Option<String>,
34    pub text_urls: Vec<String>,
35    pub osi_url: Option<String>,
36    pub faq_url: Option<String>,
37    pub other_urls: Vec<String>,
38    pub notes: Option<String>,
39    pub is_deprecated: bool,
40    pub is_exception: bool,
41    pub is_unknown: bool,
42    pub is_generic: bool,
43    pub replaced_by: Vec<String>,
44    pub minimum_coverage: Option<u8>,
45    pub standard_notice: Option<String>,
46    pub ignorable_copyrights: Option<Vec<String>>,
47    pub ignorable_holders: Option<Vec<String>>,
48    pub ignorable_authors: Option<Vec<String>>,
49    pub ignorable_urls: Option<Vec<String>>,
50    pub ignorable_emails: Option<Vec<String>>,
51}
52
53/// Loader-stage normalization functions for license data.
54impl LoadedLicense {
55    /// Derive key from filename.
56    ///
57    /// Returns the file stem (filename without extension) as the key.
58    /// This should match the `key` field in the frontmatter.
59    pub fn derive_key(path: &std::path::Path) -> Result<String, LicenseKeyError> {
60        path.file_stem()
61            .and_then(|s| s.to_str())
62            .map(|s| s.to_string())
63            .ok_or(LicenseKeyError::CannotExtractKey)
64    }
65
66    /// Validate that the frontmatter key matches the filename key.
67    pub fn validate_key_match(
68        filename_key: &str,
69        frontmatter_key: Option<&str>,
70    ) -> Result<(), LicenseKeyError> {
71        match frontmatter_key {
72            Some(fm_key) if fm_key != filename_key => Err(LicenseKeyError::KeyMismatch {
73                filename: filename_key.to_string(),
74                frontmatter: fm_key.to_string(),
75            }),
76            _ => Ok(()),
77        }
78    }
79
80    /// Derive name using the fallback chain.
81    ///
82    /// Priority order:
83    /// 1. `name` field
84    /// 2. `short_name` field
85    /// 3. `key` as fallback
86    pub fn derive_name(name: Option<&str>, short_name: Option<&str>, key: &str) -> String {
87        name.map(|s| s.trim().to_string())
88            .filter(|s| !s.is_empty())
89            .or_else(|| {
90                short_name
91                    .map(|s| s.trim().to_string())
92                    .filter(|s| !s.is_empty())
93            })
94            .unwrap_or_else(|| key.to_string())
95    }
96
97    /// Merge reference URLs from multiple source fields.
98    ///
99    /// Collects URLs in this order:
100    /// 1. text_urls
101    /// 2. other_urls
102    /// 3. osi_url
103    /// 4. faq_url
104    /// 5. homepage_url
105    pub fn merge_reference_urls(
106        text_urls: Option<&[String]>,
107        other_urls: Option<&[String]>,
108        osi_url: Option<&str>,
109        faq_url: Option<&str>,
110        homepage_url: Option<&str>,
111    ) -> Vec<String> {
112        let mut urls = Vec::new();
113
114        if let Some(u) = text_urls {
115            urls.extend(u.iter().cloned());
116        }
117        if let Some(u) = other_urls {
118            urls.extend(u.iter().cloned());
119        }
120        if let Some(u) = osi_url {
121            let u = u.trim();
122            if !u.is_empty() {
123                urls.push(u.to_string());
124            }
125        }
126        if let Some(u) = faq_url {
127            let u = u.trim();
128            if !u.is_empty() {
129                urls.push(u.to_string());
130            }
131        }
132        if let Some(u) = homepage_url {
133            let u = u.trim();
134            if !u.is_empty() {
135                urls.push(u.to_string());
136            }
137        }
138
139        urls
140    }
141
142    /// Normalize optional string field.
143    ///
144    /// Returns `None` for empty strings, `Some(trimmed)` otherwise.
145    pub fn normalize_optional_string(s: Option<&str>) -> Option<String> {
146        s.map(|s| s.trim().to_string()).filter(|s| !s.is_empty())
147    }
148
149    /// Normalize optional string list.
150    ///
151    /// Returns `None` for empty lists, `Some(list)` with trimmed strings otherwise.
152    pub fn normalize_optional_list(list: Option<&[String]>) -> Option<Vec<String>> {
153        list.map(|l| {
154            l.iter()
155                .map(|s| s.trim().to_string())
156                .filter(|s| !s.is_empty())
157                .collect::<Vec<_>>()
158        })
159        .filter(|l: &Vec<String>| !l.is_empty())
160    }
161
162    /// Validate that a non-deprecated, non-unknown, non-generic license has text content.
163    pub fn validate_text_content(
164        text: &str,
165        is_deprecated: bool,
166        is_unknown: bool,
167        is_generic: bool,
168    ) -> Result<(), LicenseTextError> {
169        if text.trim().is_empty() && !is_deprecated && !is_unknown && !is_generic {
170            Err(LicenseTextError::EmptyText)
171        } else {
172            Ok(())
173        }
174    }
175}
176
177/// Error type for license key validation failures.
178#[derive(Debug, Clone, PartialEq, Eq)]
179pub enum LicenseKeyError {
180    CannotExtractKey,
181    KeyMismatch {
182        filename: String,
183        frontmatter: String,
184    },
185}
186
187impl std::fmt::Display for LicenseKeyError {
188    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
189        match self {
190            Self::CannotExtractKey => write!(f, "cannot extract key from license file path"),
191            Self::KeyMismatch {
192                filename,
193                frontmatter,
194            } => {
195                write!(
196                    f,
197                    "license key mismatch: filename '{}' vs frontmatter '{}'",
198                    filename, frontmatter
199                )
200            }
201        }
202    }
203}
204
205impl std::error::Error for LicenseKeyError {}
206
207/// Error type for license text validation failures.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub enum LicenseTextError {
210    EmptyText,
211}
212
213impl std::fmt::Display for LicenseTextError {
214    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
215        match self {
216            Self::EmptyText => write!(
217                f,
218                "license file has empty text content and is not deprecated/unknown/generic"
219            ),
220        }
221    }
222}
223
224impl std::error::Error for LicenseTextError {}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    use std::path::PathBuf;
230
231    #[test]
232    fn test_derive_key() {
233        assert_eq!(
234            LoadedLicense::derive_key(&PathBuf::from("licenses/mit.LICENSE")),
235            Ok("mit".to_string())
236        );
237        assert_eq!(
238            LoadedLicense::derive_key(&PathBuf::from("/path/to/apache-2.0.LICENSE")),
239            Ok("apache-2.0".to_string())
240        );
241        assert_eq!(
242            LoadedLicense::derive_key(&PathBuf::from("no-extension")),
243            Ok("no-extension".to_string())
244        );
245        assert_eq!(
246            LoadedLicense::derive_key(&PathBuf::from("/")),
247            Err(LicenseKeyError::CannotExtractKey)
248        );
249    }
250
251    #[test]
252    fn test_validate_key_match() {
253        assert!(LoadedLicense::validate_key_match("mit", Some("mit")).is_ok());
254        assert!(LoadedLicense::validate_key_match("mit", None).is_ok());
255        assert_eq!(
256            LoadedLicense::validate_key_match("mit", Some("apache")),
257            Err(LicenseKeyError::KeyMismatch {
258                filename: "mit".to_string(),
259                frontmatter: "apache".to_string()
260            })
261        );
262    }
263
264    #[test]
265    fn test_derive_name() {
266        assert_eq!(
267            LoadedLicense::derive_name(Some("MIT License"), None, "mit"),
268            "MIT License"
269        );
270        assert_eq!(LoadedLicense::derive_name(None, Some("MIT"), "mit"), "MIT");
271        assert_eq!(
272            LoadedLicense::derive_name(Some("  MIT License  "), None, "mit"),
273            "MIT License"
274        );
275        assert_eq!(LoadedLicense::derive_name(None, None, "mit"), "mit");
276        assert_eq!(
277            LoadedLicense::derive_name(Some(""), Some("Short"), "key"),
278            "Short"
279        );
280        assert_eq!(LoadedLicense::derive_name(Some("   "), None, "key"), "key");
281    }
282
283    #[test]
284    fn test_merge_reference_urls() {
285        let text_urls = vec!["https://example.com/text".to_string()];
286        let other_urls = vec!["https://example.com/other".to_string()];
287
288        let urls = LoadedLicense::merge_reference_urls(
289            Some(&text_urls),
290            Some(&other_urls),
291            Some("https://opensource.org/licenses/MIT"),
292            Some("https://example.com/faq"),
293            Some("https://example.com/home"),
294        );
295        assert_eq!(urls.len(), 5);
296        assert_eq!(urls[0], "https://example.com/text");
297        assert_eq!(urls[1], "https://example.com/other");
298        assert_eq!(urls[2], "https://opensource.org/licenses/MIT");
299        assert_eq!(urls[3], "https://example.com/faq");
300        assert_eq!(urls[4], "https://example.com/home");
301    }
302
303    #[test]
304    fn test_merge_reference_urls_empty() {
305        let urls = LoadedLicense::merge_reference_urls(None, None, None, None, None);
306        assert!(urls.is_empty());
307    }
308
309    #[test]
310    fn test_merge_reference_urls_trims_whitespace() {
311        let urls = LoadedLicense::merge_reference_urls(
312            None,
313            None,
314            Some("  https://example.com  "),
315            None,
316            None,
317        );
318        assert_eq!(urls, vec!["https://example.com"]);
319    }
320
321    #[test]
322    fn test_normalize_optional_string() {
323        assert_eq!(LoadedLicense::normalize_optional_string(None), None);
324        assert_eq!(LoadedLicense::normalize_optional_string(Some("")), None);
325        assert_eq!(LoadedLicense::normalize_optional_string(Some("   ")), None);
326        assert_eq!(
327            LoadedLicense::normalize_optional_string(Some("hello")),
328            Some("hello".to_string())
329        );
330        assert_eq!(
331            LoadedLicense::normalize_optional_string(Some("  hello  ")),
332            Some("hello".to_string())
333        );
334    }
335
336    #[test]
337    fn test_normalize_optional_list() {
338        assert_eq!(LoadedLicense::normalize_optional_list(None), None);
339        assert_eq!(LoadedLicense::normalize_optional_list(Some(&[])), None);
340        assert_eq!(
341            LoadedLicense::normalize_optional_list(Some(&["a".to_string(), "b".to_string()])),
342            Some(vec!["a".to_string(), "b".to_string()])
343        );
344    }
345
346    #[test]
347    fn test_validate_text_content() {
348        assert!(LoadedLicense::validate_text_content("some text", false, false, false).is_ok());
349        assert!(LoadedLicense::validate_text_content("", true, false, false).is_ok());
350        assert!(LoadedLicense::validate_text_content("", false, true, false).is_ok());
351        assert!(LoadedLicense::validate_text_content("", false, false, true).is_ok());
352        assert_eq!(
353            LoadedLicense::validate_text_content("", false, false, false),
354            Err(LicenseTextError::EmptyText)
355        );
356        assert_eq!(
357            LoadedLicense::validate_text_content("   ", false, false, false),
358            Err(LicenseTextError::EmptyText)
359        );
360    }
361
362    #[test]
363    fn test_serde_roundtrip() {
364        let license = LoadedLicense {
365            key: "mit".to_string(),
366            short_name: Some("MIT".to_string()),
367            name: "MIT License".to_string(),
368            language: Some("en".to_string()),
369            spdx_license_key: Some("MIT".to_string()),
370            other_spdx_license_keys: vec![],
371            category: Some("Permissive".to_string()),
372            owner: Some("Open Source Initiative".to_string()),
373            homepage_url: Some("https://opensource.org/licenses/MIT".to_string()),
374            text: "MIT License text".to_string(),
375            reference_urls: vec!["https://opensource.org/licenses/MIT".to_string()],
376            osi_license_key: Some("MIT".to_string()),
377            text_urls: vec!["https://opensource.org/licenses/MIT".to_string()],
378            osi_url: Some("https://opensource.org/licenses/MIT".to_string()),
379            faq_url: None,
380            other_urls: vec![],
381            notes: Some("Test note".to_string()),
382            is_deprecated: false,
383            is_exception: false,
384            is_unknown: false,
385            is_generic: false,
386            replaced_by: vec![],
387            minimum_coverage: None,
388            standard_notice: None,
389            ignorable_copyrights: None,
390            ignorable_holders: None,
391            ignorable_authors: None,
392            ignorable_urls: None,
393            ignorable_emails: None,
394        };
395
396        let json = serde_json::to_string(&license).unwrap();
397        let deserialized: LoadedLicense = serde_json::from_str(&json).unwrap();
398        assert_eq!(license, deserialized);
399    }
400}