Skip to main content

provenant/license_detection/models/
loaded_license.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Loader-stage license type.
5//!
6//! This module defines `LoadedLicense`, which represents a parsed and normalized
7//! license file (.LICENSE) before it is converted to a runtime `License`.
8//!
9//! Loader-stage responsibilities include:
10//! - Key derivation from filename
11//! - Name fallback chain resolution
12//! - URL merging from multiple source fields
13//! - Text trimming and normalization
14//! - Deprecation metadata preservation (without filtering)
15
16use serde::{Deserialize, Serialize};
17
18/// Loader-stage representation of a license.
19///
20/// This struct contains parsed and normalized data from a .LICENSE file.
21/// It is serialized at build time and deserialized at runtime, then converted
22/// to a runtime `License` during the build stage.
23#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
24pub struct LoadedLicense {
25    pub key: String,
26    pub short_name: Option<String>,
27    pub name: String,
28    pub language: Option<String>,
29    pub spdx_license_key: Option<String>,
30    pub other_spdx_license_keys: Vec<String>,
31    pub category: Option<String>,
32    pub owner: Option<String>,
33    pub homepage_url: Option<String>,
34    pub text: String,
35    pub reference_urls: Vec<String>,
36    pub osi_license_key: Option<String>,
37    pub text_urls: Vec<String>,
38    pub osi_url: Option<String>,
39    pub faq_url: Option<String>,
40    pub other_urls: Vec<String>,
41    pub notes: Option<String>,
42    pub is_deprecated: bool,
43    pub is_exception: bool,
44    pub is_unknown: bool,
45    pub is_generic: bool,
46    pub replaced_by: Vec<String>,
47    pub minimum_coverage: Option<u8>,
48    pub standard_notice: Option<String>,
49    pub ignorable_copyrights: Option<Vec<String>>,
50    pub ignorable_holders: Option<Vec<String>>,
51    pub ignorable_authors: Option<Vec<String>>,
52    pub ignorable_urls: Option<Vec<String>>,
53    pub ignorable_emails: Option<Vec<String>>,
54}
55
56/// Loader-stage normalization functions for license data.
57impl LoadedLicense {
58    /// Derive key from filename.
59    ///
60    /// Returns the file stem (filename without extension) as the key.
61    /// This should match the `key` field in the frontmatter.
62    pub fn derive_key(path: &std::path::Path) -> Result<String, LicenseKeyError> {
63        path.file_stem()
64            .and_then(|s| s.to_str())
65            .map(|s| s.to_string())
66            .ok_or(LicenseKeyError::CannotExtractKey)
67    }
68
69    /// Validate that the frontmatter key matches the filename key.
70    pub fn validate_key_match(
71        filename_key: &str,
72        frontmatter_key: Option<&str>,
73    ) -> Result<(), LicenseKeyError> {
74        match frontmatter_key {
75            Some(fm_key) if fm_key != filename_key => Err(LicenseKeyError::KeyMismatch {
76                filename: filename_key.to_string(),
77                frontmatter: fm_key.to_string(),
78            }),
79            _ => Ok(()),
80        }
81    }
82
83    /// Derive name using the fallback chain.
84    ///
85    /// Priority order:
86    /// 1. `name` field
87    /// 2. `short_name` field
88    /// 3. `key` as fallback
89    pub fn derive_name(name: Option<&str>, short_name: Option<&str>, key: &str) -> String {
90        name.map(|s| s.trim().to_string())
91            .filter(|s| !s.is_empty())
92            .or_else(|| {
93                short_name
94                    .map(|s| s.trim().to_string())
95                    .filter(|s| !s.is_empty())
96            })
97            .unwrap_or_else(|| key.to_string())
98    }
99
100    /// Merge reference URLs from multiple source fields.
101    ///
102    /// Collects URLs in this order:
103    /// 1. text_urls
104    /// 2. other_urls
105    /// 3. osi_url
106    /// 4. faq_url
107    /// 5. homepage_url
108    pub fn merge_reference_urls(
109        text_urls: Option<&[String]>,
110        other_urls: Option<&[String]>,
111        osi_url: Option<&str>,
112        faq_url: Option<&str>,
113        homepage_url: Option<&str>,
114    ) -> Vec<String> {
115        let mut urls = Vec::new();
116
117        if let Some(u) = text_urls {
118            urls.extend(u.iter().cloned());
119        }
120        if let Some(u) = other_urls {
121            urls.extend(u.iter().cloned());
122        }
123        if let Some(u) = osi_url {
124            let u = u.trim();
125            if !u.is_empty() {
126                urls.push(u.to_string());
127            }
128        }
129        if let Some(u) = faq_url {
130            let u = u.trim();
131            if !u.is_empty() {
132                urls.push(u.to_string());
133            }
134        }
135        if let Some(u) = homepage_url {
136            let u = u.trim();
137            if !u.is_empty() {
138                urls.push(u.to_string());
139            }
140        }
141
142        urls
143    }
144
145    /// Normalize optional string field.
146    ///
147    /// Returns `None` for empty strings, `Some(trimmed)` otherwise.
148    pub fn normalize_optional_string(s: Option<&str>) -> Option<String> {
149        s.map(|s| s.trim().to_string()).filter(|s| !s.is_empty())
150    }
151
152    /// Normalize optional string list.
153    ///
154    /// Returns `None` for empty lists, `Some(list)` with trimmed strings otherwise.
155    pub fn normalize_optional_list(list: Option<&[String]>) -> Option<Vec<String>> {
156        list.map(|l| {
157            l.iter()
158                .map(|s| s.trim().to_string())
159                .filter(|s| !s.is_empty())
160                .collect::<Vec<_>>()
161        })
162        .filter(|l: &Vec<String>| !l.is_empty())
163    }
164
165    /// Validate that a non-deprecated, non-unknown, non-generic license has text content.
166    pub fn validate_text_content(
167        text: &str,
168        is_deprecated: bool,
169        is_unknown: bool,
170        is_generic: bool,
171    ) -> Result<(), LicenseTextError> {
172        if text.trim().is_empty() && !is_deprecated && !is_unknown && !is_generic {
173            Err(LicenseTextError::EmptyText)
174        } else {
175            Ok(())
176        }
177    }
178}
179
180/// Error type for license key validation failures.
181#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
182pub enum LicenseKeyError {
183    #[error("cannot extract key from license file path")]
184    CannotExtractKey,
185    #[error("license key mismatch: filename '{filename}' vs frontmatter '{frontmatter}'")]
186    KeyMismatch {
187        filename: String,
188        frontmatter: String,
189    },
190}
191
192/// Error type for license text validation failures.
193#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
194pub enum LicenseTextError {
195    #[error("license file has empty text content and is not deprecated/unknown/generic")]
196    EmptyText,
197}
198
199#[cfg(test)]
200mod tests {
201    use super::*;
202    use std::path::PathBuf;
203
204    #[test]
205    fn test_derive_key() {
206        assert_eq!(
207            LoadedLicense::derive_key(&PathBuf::from("licenses/mit.LICENSE")),
208            Ok("mit".to_string())
209        );
210        assert_eq!(
211            LoadedLicense::derive_key(&PathBuf::from("/path/to/apache-2.0.LICENSE")),
212            Ok("apache-2.0".to_string())
213        );
214        assert_eq!(
215            LoadedLicense::derive_key(&PathBuf::from("no-extension")),
216            Ok("no-extension".to_string())
217        );
218        assert_eq!(
219            LoadedLicense::derive_key(&PathBuf::from("/")),
220            Err(LicenseKeyError::CannotExtractKey)
221        );
222    }
223
224    #[test]
225    fn test_validate_key_match() {
226        assert!(LoadedLicense::validate_key_match("mit", Some("mit")).is_ok());
227        assert!(LoadedLicense::validate_key_match("mit", None).is_ok());
228        assert_eq!(
229            LoadedLicense::validate_key_match("mit", Some("apache")),
230            Err(LicenseKeyError::KeyMismatch {
231                filename: "mit".to_string(),
232                frontmatter: "apache".to_string()
233            })
234        );
235    }
236
237    #[test]
238    fn test_derive_name() {
239        assert_eq!(
240            LoadedLicense::derive_name(Some("MIT License"), None, "mit"),
241            "MIT License"
242        );
243        assert_eq!(LoadedLicense::derive_name(None, Some("MIT"), "mit"), "MIT");
244        assert_eq!(
245            LoadedLicense::derive_name(Some("  MIT License  "), None, "mit"),
246            "MIT License"
247        );
248        assert_eq!(LoadedLicense::derive_name(None, None, "mit"), "mit");
249        assert_eq!(
250            LoadedLicense::derive_name(Some(""), Some("Short"), "key"),
251            "Short"
252        );
253        assert_eq!(LoadedLicense::derive_name(Some("   "), None, "key"), "key");
254    }
255
256    #[test]
257    fn test_merge_reference_urls() {
258        let text_urls = vec!["https://example.com/text".to_string()];
259        let other_urls = vec!["https://example.com/other".to_string()];
260
261        let urls = LoadedLicense::merge_reference_urls(
262            Some(&text_urls),
263            Some(&other_urls),
264            Some("https://opensource.org/licenses/MIT"),
265            Some("https://example.com/faq"),
266            Some("https://example.com/home"),
267        );
268        assert_eq!(urls.len(), 5);
269        assert_eq!(urls[0], "https://example.com/text");
270        assert_eq!(urls[1], "https://example.com/other");
271        assert_eq!(urls[2], "https://opensource.org/licenses/MIT");
272        assert_eq!(urls[3], "https://example.com/faq");
273        assert_eq!(urls[4], "https://example.com/home");
274    }
275
276    #[test]
277    fn test_merge_reference_urls_empty() {
278        let urls = LoadedLicense::merge_reference_urls(None, None, None, None, None);
279        assert!(urls.is_empty());
280    }
281
282    #[test]
283    fn test_merge_reference_urls_trims_whitespace() {
284        let urls = LoadedLicense::merge_reference_urls(
285            None,
286            None,
287            Some("  https://example.com  "),
288            None,
289            None,
290        );
291        assert_eq!(urls, vec!["https://example.com"]);
292    }
293
294    #[test]
295    fn test_normalize_optional_string() {
296        assert_eq!(LoadedLicense::normalize_optional_string(None), None);
297        assert_eq!(LoadedLicense::normalize_optional_string(Some("")), None);
298        assert_eq!(LoadedLicense::normalize_optional_string(Some("   ")), None);
299        assert_eq!(
300            LoadedLicense::normalize_optional_string(Some("hello")),
301            Some("hello".to_string())
302        );
303        assert_eq!(
304            LoadedLicense::normalize_optional_string(Some("  hello  ")),
305            Some("hello".to_string())
306        );
307    }
308
309    #[test]
310    fn test_normalize_optional_list() {
311        assert_eq!(LoadedLicense::normalize_optional_list(None), None);
312        assert_eq!(LoadedLicense::normalize_optional_list(Some(&[])), None);
313        assert_eq!(
314            LoadedLicense::normalize_optional_list(Some(&["a".to_string(), "b".to_string()])),
315            Some(vec!["a".to_string(), "b".to_string()])
316        );
317    }
318
319    #[test]
320    fn test_validate_text_content() {
321        assert!(LoadedLicense::validate_text_content("some text", false, false, false).is_ok());
322        assert!(LoadedLicense::validate_text_content("", true, false, false).is_ok());
323        assert!(LoadedLicense::validate_text_content("", false, true, false).is_ok());
324        assert!(LoadedLicense::validate_text_content("", false, false, true).is_ok());
325        assert_eq!(
326            LoadedLicense::validate_text_content("", false, false, false),
327            Err(LicenseTextError::EmptyText)
328        );
329        assert_eq!(
330            LoadedLicense::validate_text_content("   ", false, false, false),
331            Err(LicenseTextError::EmptyText)
332        );
333    }
334
335    #[test]
336    fn test_serde_roundtrip() {
337        let license = LoadedLicense {
338            key: "mit".to_string(),
339            short_name: Some("MIT".to_string()),
340            name: "MIT License".to_string(),
341            language: Some("en".to_string()),
342            spdx_license_key: Some("MIT".to_string()),
343            other_spdx_license_keys: vec![],
344            category: Some("Permissive".to_string()),
345            owner: Some("Open Source Initiative".to_string()),
346            homepage_url: Some("https://opensource.org/licenses/MIT".to_string()),
347            text: "MIT License text".to_string(),
348            reference_urls: vec!["https://opensource.org/licenses/MIT".to_string()],
349            osi_license_key: Some("MIT".to_string()),
350            text_urls: vec!["https://opensource.org/licenses/MIT".to_string()],
351            osi_url: Some("https://opensource.org/licenses/MIT".to_string()),
352            faq_url: None,
353            other_urls: vec![],
354            notes: Some("Test note".to_string()),
355            is_deprecated: false,
356            is_exception: false,
357            is_unknown: false,
358            is_generic: false,
359            replaced_by: vec![],
360            minimum_coverage: None,
361            standard_notice: None,
362            ignorable_copyrights: None,
363            ignorable_holders: None,
364            ignorable_authors: None,
365            ignorable_urls: None,
366            ignorable_emails: None,
367        };
368
369        let json = serde_json::to_string(&license).unwrap();
370        let deserialized: LoadedLicense = serde_json::from_str(&json).unwrap();
371        assert_eq!(license, deserialized);
372    }
373}