Skip to main content

provenant/license_detection/models/
loaded_license.rs

1//! Loader-stage license type.
2//!
3//! This module defines `LoadedLicense`, which represents a parsed and normalized
4//! license file (.LICENSE) before it is converted to a runtime `License`.
5//!
6//! Loader-stage responsibilities include:
7//! - Key derivation from filename
8//! - Name fallback chain resolution
9//! - URL merging from multiple source fields
10//! - Text trimming and normalization
11//! - Deprecation metadata preservation (without filtering)
12
13use serde::{Deserialize, Serialize};
14
15/// Loader-stage representation of a license.
16///
17/// This struct contains parsed and normalized data from a .LICENSE file.
18/// It is serialized at build time and deserialized at runtime, then converted
19/// to a runtime `License` during the build stage.
20#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
21pub struct LoadedLicense {
22    /// Unique lowercase ASCII identifier derived from the filename.
23    pub key: String,
24
25    /// Full name of the license.
26    pub name: String,
27
28    /// SPDX license identifier if available.
29    pub spdx_license_key: Option<String>,
30
31    /// Alternative SPDX license identifiers (aliases).
32    pub other_spdx_license_keys: Vec<String>,
33
34    /// License category (e.g., "Permissive", "Copyleft").
35    pub category: Option<String>,
36
37    /// Full license text, trimmed and normalized.
38    pub text: String,
39
40    /// Reference URLs for this license, merged from source URL fields.
41    pub reference_urls: Vec<String>,
42
43    /// Free text notes.
44    pub notes: Option<String>,
45
46    /// Whether this license is deprecated.
47    pub is_deprecated: bool,
48
49    /// List of license keys that replace this deprecated license.
50    pub replaced_by: Vec<String>,
51
52    /// Minimum match coverage percentage (0-100) if specified.
53    pub minimum_coverage: Option<u8>,
54
55    /// Copyrights that should be ignored when found in this license text.
56    pub ignorable_copyrights: Option<Vec<String>>,
57
58    /// Holder names that should be ignored when found in this license text.
59    pub ignorable_holders: Option<Vec<String>>,
60
61    /// Author names that should be ignored when found in this license text.
62    pub ignorable_authors: Option<Vec<String>>,
63
64    /// URLs that should be ignored when found in this license text.
65    pub ignorable_urls: Option<Vec<String>>,
66
67    /// Emails that should be ignored when found in this license text.
68    pub ignorable_emails: Option<Vec<String>>,
69}
70
71/// Loader-stage normalization functions for license data.
72impl LoadedLicense {
73    /// Derive key from filename.
74    ///
75    /// Returns the file stem (filename without extension) as the key.
76    /// This should match the `key` field in the frontmatter.
77    pub fn derive_key(path: &std::path::Path) -> Result<String, LicenseKeyError> {
78        path.file_stem()
79            .and_then(|s| s.to_str())
80            .map(|s| s.to_string())
81            .ok_or(LicenseKeyError::CannotExtractKey)
82    }
83
84    /// Validate that the frontmatter key matches the filename key.
85    pub fn validate_key_match(
86        filename_key: &str,
87        frontmatter_key: Option<&str>,
88    ) -> Result<(), LicenseKeyError> {
89        match frontmatter_key {
90            Some(fm_key) if fm_key != filename_key => Err(LicenseKeyError::KeyMismatch {
91                filename: filename_key.to_string(),
92                frontmatter: fm_key.to_string(),
93            }),
94            _ => Ok(()),
95        }
96    }
97
98    /// Derive name using the fallback chain.
99    ///
100    /// Priority order:
101    /// 1. `name` field
102    /// 2. `short_name` field
103    /// 3. `key` as fallback
104    pub fn derive_name(name: Option<&str>, short_name: Option<&str>, key: &str) -> String {
105        name.map(|s| s.trim().to_string())
106            .filter(|s| !s.is_empty())
107            .or_else(|| {
108                short_name
109                    .map(|s| s.trim().to_string())
110                    .filter(|s| !s.is_empty())
111            })
112            .unwrap_or_else(|| key.to_string())
113    }
114
115    /// Merge reference URLs from multiple source fields.
116    ///
117    /// Collects URLs in this order:
118    /// 1. text_urls
119    /// 2. other_urls
120    /// 3. osi_url
121    /// 4. faq_url
122    /// 5. homepage_url
123    pub fn merge_reference_urls(
124        text_urls: Option<&[String]>,
125        other_urls: Option<&[String]>,
126        osi_url: Option<&str>,
127        faq_url: Option<&str>,
128        homepage_url: Option<&str>,
129    ) -> Vec<String> {
130        let mut urls = Vec::new();
131
132        if let Some(u) = text_urls {
133            urls.extend(u.iter().cloned());
134        }
135        if let Some(u) = other_urls {
136            urls.extend(u.iter().cloned());
137        }
138        if let Some(u) = osi_url {
139            let u = u.trim();
140            if !u.is_empty() {
141                urls.push(u.to_string());
142            }
143        }
144        if let Some(u) = faq_url {
145            let u = u.trim();
146            if !u.is_empty() {
147                urls.push(u.to_string());
148            }
149        }
150        if let Some(u) = homepage_url {
151            let u = u.trim();
152            if !u.is_empty() {
153                urls.push(u.to_string());
154            }
155        }
156
157        urls
158    }
159
160    /// Normalize optional string field.
161    ///
162    /// Returns `None` for empty strings, `Some(trimmed)` otherwise.
163    pub fn normalize_optional_string(s: Option<&str>) -> Option<String> {
164        s.map(|s| s.trim().to_string()).filter(|s| !s.is_empty())
165    }
166
167    /// Normalize optional string list.
168    ///
169    /// Returns `None` for empty lists, `Some(list)` with trimmed strings otherwise.
170    pub fn normalize_optional_list(list: Option<&[String]>) -> Option<Vec<String>> {
171        list.map(|l| {
172            l.iter()
173                .map(|s| s.trim().to_string())
174                .filter(|s| !s.is_empty())
175                .collect::<Vec<_>>()
176        })
177        .filter(|l: &Vec<String>| !l.is_empty())
178    }
179
180    /// Validate that a non-deprecated, non-unknown, non-generic license has text content.
181    pub fn validate_text_content(
182        text: &str,
183        is_deprecated: bool,
184        is_unknown: bool,
185        is_generic: bool,
186    ) -> Result<(), LicenseTextError> {
187        if text.trim().is_empty() && !is_deprecated && !is_unknown && !is_generic {
188            Err(LicenseTextError::EmptyText)
189        } else {
190            Ok(())
191        }
192    }
193}
194
195/// Error type for license key validation failures.
196#[derive(Debug, Clone, PartialEq, Eq)]
197pub enum LicenseKeyError {
198    CannotExtractKey,
199    KeyMismatch {
200        filename: String,
201        frontmatter: String,
202    },
203}
204
205impl std::fmt::Display for LicenseKeyError {
206    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207        match self {
208            Self::CannotExtractKey => write!(f, "cannot extract key from license file path"),
209            Self::KeyMismatch {
210                filename,
211                frontmatter,
212            } => {
213                write!(
214                    f,
215                    "license key mismatch: filename '{}' vs frontmatter '{}'",
216                    filename, frontmatter
217                )
218            }
219        }
220    }
221}
222
223impl std::error::Error for LicenseKeyError {}
224
225/// Error type for license text validation failures.
226#[derive(Debug, Clone, PartialEq, Eq)]
227pub enum LicenseTextError {
228    EmptyText,
229}
230
231impl std::fmt::Display for LicenseTextError {
232    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
233        match self {
234            Self::EmptyText => write!(
235                f,
236                "license file has empty text content and is not deprecated/unknown/generic"
237            ),
238        }
239    }
240}
241
242impl std::error::Error for LicenseTextError {}
243
244#[cfg(test)]
245mod tests {
246    use super::*;
247    use std::path::PathBuf;
248
249    #[test]
250    fn test_derive_key() {
251        assert_eq!(
252            LoadedLicense::derive_key(&PathBuf::from("licenses/mit.LICENSE")),
253            Ok("mit".to_string())
254        );
255        assert_eq!(
256            LoadedLicense::derive_key(&PathBuf::from("/path/to/apache-2.0.LICENSE")),
257            Ok("apache-2.0".to_string())
258        );
259        assert_eq!(
260            LoadedLicense::derive_key(&PathBuf::from("no-extension")),
261            Ok("no-extension".to_string())
262        );
263        assert_eq!(
264            LoadedLicense::derive_key(&PathBuf::from("/")),
265            Err(LicenseKeyError::CannotExtractKey)
266        );
267    }
268
269    #[test]
270    fn test_validate_key_match() {
271        assert!(LoadedLicense::validate_key_match("mit", Some("mit")).is_ok());
272        assert!(LoadedLicense::validate_key_match("mit", None).is_ok());
273        assert_eq!(
274            LoadedLicense::validate_key_match("mit", Some("apache")),
275            Err(LicenseKeyError::KeyMismatch {
276                filename: "mit".to_string(),
277                frontmatter: "apache".to_string()
278            })
279        );
280    }
281
282    #[test]
283    fn test_derive_name() {
284        assert_eq!(
285            LoadedLicense::derive_name(Some("MIT License"), None, "mit"),
286            "MIT License"
287        );
288        assert_eq!(LoadedLicense::derive_name(None, Some("MIT"), "mit"), "MIT");
289        assert_eq!(
290            LoadedLicense::derive_name(Some("  MIT License  "), None, "mit"),
291            "MIT License"
292        );
293        assert_eq!(LoadedLicense::derive_name(None, None, "mit"), "mit");
294        assert_eq!(
295            LoadedLicense::derive_name(Some(""), Some("Short"), "key"),
296            "Short"
297        );
298        assert_eq!(LoadedLicense::derive_name(Some("   "), None, "key"), "key");
299    }
300
301    #[test]
302    fn test_merge_reference_urls() {
303        let text_urls = vec!["https://example.com/text".to_string()];
304        let other_urls = vec!["https://example.com/other".to_string()];
305
306        let urls = LoadedLicense::merge_reference_urls(
307            Some(&text_urls),
308            Some(&other_urls),
309            Some("https://opensource.org/licenses/MIT"),
310            Some("https://example.com/faq"),
311            Some("https://example.com/home"),
312        );
313        assert_eq!(urls.len(), 5);
314        assert_eq!(urls[0], "https://example.com/text");
315        assert_eq!(urls[1], "https://example.com/other");
316        assert_eq!(urls[2], "https://opensource.org/licenses/MIT");
317        assert_eq!(urls[3], "https://example.com/faq");
318        assert_eq!(urls[4], "https://example.com/home");
319    }
320
321    #[test]
322    fn test_merge_reference_urls_empty() {
323        let urls = LoadedLicense::merge_reference_urls(None, None, None, None, None);
324        assert!(urls.is_empty());
325    }
326
327    #[test]
328    fn test_merge_reference_urls_trims_whitespace() {
329        let urls = LoadedLicense::merge_reference_urls(
330            None,
331            None,
332            Some("  https://example.com  "),
333            None,
334            None,
335        );
336        assert_eq!(urls, vec!["https://example.com"]);
337    }
338
339    #[test]
340    fn test_normalize_optional_string() {
341        assert_eq!(LoadedLicense::normalize_optional_string(None), None);
342        assert_eq!(LoadedLicense::normalize_optional_string(Some("")), None);
343        assert_eq!(LoadedLicense::normalize_optional_string(Some("   ")), None);
344        assert_eq!(
345            LoadedLicense::normalize_optional_string(Some("hello")),
346            Some("hello".to_string())
347        );
348        assert_eq!(
349            LoadedLicense::normalize_optional_string(Some("  hello  ")),
350            Some("hello".to_string())
351        );
352    }
353
354    #[test]
355    fn test_normalize_optional_list() {
356        assert_eq!(LoadedLicense::normalize_optional_list(None), None);
357        assert_eq!(LoadedLicense::normalize_optional_list(Some(&[])), None);
358        assert_eq!(
359            LoadedLicense::normalize_optional_list(Some(&["a".to_string(), "b".to_string()])),
360            Some(vec!["a".to_string(), "b".to_string()])
361        );
362    }
363
364    #[test]
365    fn test_validate_text_content() {
366        assert!(LoadedLicense::validate_text_content("some text", false, false, false).is_ok());
367        assert!(LoadedLicense::validate_text_content("", true, false, false).is_ok());
368        assert!(LoadedLicense::validate_text_content("", false, true, false).is_ok());
369        assert!(LoadedLicense::validate_text_content("", false, false, true).is_ok());
370        assert_eq!(
371            LoadedLicense::validate_text_content("", false, false, false),
372            Err(LicenseTextError::EmptyText)
373        );
374        assert_eq!(
375            LoadedLicense::validate_text_content("   ", false, false, false),
376            Err(LicenseTextError::EmptyText)
377        );
378    }
379
380    #[test]
381    fn test_serde_roundtrip() {
382        let license = LoadedLicense {
383            key: "mit".to_string(),
384            name: "MIT License".to_string(),
385            spdx_license_key: Some("MIT".to_string()),
386            other_spdx_license_keys: vec![],
387            category: Some("Permissive".to_string()),
388            text: "MIT License text".to_string(),
389            reference_urls: vec!["https://opensource.org/licenses/MIT".to_string()],
390            notes: Some("Test note".to_string()),
391            is_deprecated: false,
392            replaced_by: vec![],
393            minimum_coverage: None,
394            ignorable_copyrights: None,
395            ignorable_holders: None,
396            ignorable_authors: None,
397            ignorable_urls: None,
398            ignorable_emails: None,
399        };
400
401        let json = serde_json::to_string(&license).unwrap();
402        let deserialized: LoadedLicense = serde_json::from_str(&json).unwrap();
403        assert_eq!(license, deserialized);
404    }
405}