Skip to main content

provenant/license_detection/
build_policy.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{BTreeMap, HashMap, HashSet};
5use std::sync::LazyLock;
6
7use anyhow::{Result, anyhow};
8use serde::Deserialize;
9
10use crate::license_detection::expression::parse_expression;
11use crate::license_detection::models::{LoadedLicense, LoadedRule, RuleKind};
12use crate::license_detection::rules::{parse_license_str_to_loaded, parse_rule_str_to_loaded};
13use crate::models::LicenseIndexProvenance;
14
15pub const DEFAULT_INDEX_BUILD_POLICY_PATH: &str =
16    "resources/license_detection/index_build_policy.toml";
17pub const DEFAULT_INDEX_BUILD_OVERLAY_ROOT: &str = "resources/license_detection/overlay";
18pub const EMBEDDED_LICENSE_INDEX_SOURCE: &str = "embedded-artifact";
19
20const DEFAULT_INDEX_BUILD_POLICY_TEXT: &str =
21    include_str!("../../resources/license_detection/index_build_policy.toml");
22
23pub(crate) struct BundledOverlayFile {
24    pub identifier: &'static str,
25    pub contents: &'static str,
26}
27
28mod bundled_overlay_manifest {
29    use super::BundledOverlayFile;
30
31    include!(concat!(env!("OUT_DIR"), "/bundled_license_overlays.rs"));
32}
33
34use bundled_overlay_manifest::{BUNDLED_LICENSE_OVERLAY_FILES, BUNDLED_RULE_OVERLAY_FILES};
35
36static DEFAULT_INDEX_BUILD_POLICY: LazyLock<IndexBuildPolicy> = LazyLock::new(|| {
37    toml::from_str(DEFAULT_INDEX_BUILD_POLICY_TEXT)
38        .expect("Failed to parse bundled license index build policy")
39});
40
41#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize)]
42pub struct IndexBuildPolicy {
43    #[serde(default)]
44    pub ignored_rules: Vec<String>,
45    #[serde(default)]
46    pub ignored_licenses: Vec<String>,
47    #[serde(default)]
48    pub overlay_reasons: OverlayReasons,
49}
50
51#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize)]
52pub struct OverlayReasons {
53    #[serde(default)]
54    pub rules: BTreeMap<String, String>,
55    #[serde(default)]
56    pub licenses: BTreeMap<String, String>,
57}
58
59impl IndexBuildPolicy {
60    /// Returns true when no behavior-changing ignore policy is present.
61    ///
62    /// `overlay_reasons` is intentionally excluded because it is enforced only for
63    /// the bundled default policy path and does not affect non-default overlay
64    /// application semantics.
65    pub fn is_empty(&self) -> bool {
66        self.ignored_rules.is_empty() && self.ignored_licenses.is_empty()
67    }
68
69    fn ignored_rule_set(&self) -> HashSet<String> {
70        self.ignored_rules
71            .iter()
72            .map(|identifier| identifier.trim())
73            .filter(|identifier| !identifier.is_empty())
74            .map(ToOwned::to_owned)
75            .collect()
76    }
77
78    fn ignored_license_set(&self) -> HashSet<String> {
79        self.ignored_licenses
80            .iter()
81            .map(|key| normalize_license_key(key))
82            .filter(|key| !key.is_empty())
83            .collect()
84    }
85}
86
87#[derive(Debug, Clone, Default, PartialEq, Eq)]
88pub struct AppliedIndexBuildPolicy {
89    pub ignored_rules: Vec<String>,
90    pub ignored_licenses: Vec<String>,
91    pub ignored_rules_due_to_licenses: Vec<String>,
92    pub added_rules: Vec<String>,
93    pub replaced_rules: Vec<String>,
94    pub added_licenses: Vec<String>,
95    pub replaced_licenses: Vec<String>,
96}
97
98impl AppliedIndexBuildPolicy {
99    pub fn is_empty(&self) -> bool {
100        self.ignored_rules.is_empty()
101            && self.ignored_licenses.is_empty()
102            && self.ignored_rules_due_to_licenses.is_empty()
103            && self.added_rules.is_empty()
104            && self.replaced_rules.is_empty()
105            && self.added_licenses.is_empty()
106            && self.replaced_licenses.is_empty()
107    }
108
109    fn sort_and_dedup(&mut self) {
110        for values in [
111            &mut self.ignored_rules,
112            &mut self.ignored_licenses,
113            &mut self.ignored_rules_due_to_licenses,
114            &mut self.added_rules,
115            &mut self.replaced_rules,
116            &mut self.added_licenses,
117            &mut self.replaced_licenses,
118        ] {
119            values.sort();
120            values.dedup();
121        }
122    }
123
124    pub fn to_license_index_provenance(
125        &self,
126        source: &str,
127        dataset_fingerprint: String,
128    ) -> LicenseIndexProvenance {
129        LicenseIndexProvenance {
130            source: source.to_string(),
131            dataset_fingerprint,
132            ignored_rules: self.ignored_rules.clone(),
133            ignored_licenses: self.ignored_licenses.clone(),
134            ignored_rules_due_to_licenses: self.ignored_rules_due_to_licenses.clone(),
135            added_rules: self.added_rules.clone(),
136            replaced_rules: self.replaced_rules.clone(),
137            added_licenses: self.added_licenses.clone(),
138            replaced_licenses: self.replaced_licenses.clone(),
139        }
140    }
141}
142
143pub fn default_index_build_policy() -> &'static IndexBuildPolicy {
144    &DEFAULT_INDEX_BUILD_POLICY
145}
146
147pub fn apply_default_index_build_policy(
148    loaded_rules: Vec<LoadedRule>,
149    loaded_licenses: Vec<LoadedLicense>,
150) -> Result<(Vec<LoadedRule>, Vec<LoadedLicense>, AppliedIndexBuildPolicy)> {
151    validate_bundled_overlay_reasons(
152        default_index_build_policy(),
153        BUNDLED_RULE_OVERLAY_FILES,
154        BUNDLED_LICENSE_OVERLAY_FILES,
155    )?;
156    let overlay_rules = load_default_overlay_rules()?;
157    let overlay_licenses = load_default_overlay_licenses()?;
158    let (loaded_rules, loaded_licenses, report) = apply_index_build_policy(
159        loaded_rules,
160        loaded_licenses,
161        default_index_build_policy(),
162        &overlay_rules,
163        &overlay_licenses,
164    )?;
165    Ok((loaded_rules, loaded_licenses, report))
166}
167
168pub fn apply_index_build_policy(
169    loaded_rules: Vec<LoadedRule>,
170    loaded_licenses: Vec<LoadedLicense>,
171    policy: &IndexBuildPolicy,
172    overlay_rules: &[LoadedRule],
173    overlay_licenses: &[LoadedLicense],
174) -> Result<(Vec<LoadedRule>, Vec<LoadedLicense>, AppliedIndexBuildPolicy)> {
175    if policy.is_empty() && overlay_rules.is_empty() && overlay_licenses.is_empty() {
176        return Ok((
177            loaded_rules,
178            loaded_licenses,
179            AppliedIndexBuildPolicy::default(),
180        ));
181    }
182
183    let ignored_rule_identifiers = policy.ignored_rule_set();
184    let ignored_license_keys = policy.ignored_license_set();
185    let mut report = AppliedIndexBuildPolicy::default();
186
187    let mut filtered_licenses: Vec<_> = loaded_licenses
188        .into_iter()
189        .filter_map(|license| {
190            if ignored_license_keys.contains(&normalize_license_key(&license.key)) {
191                report.ignored_licenses.push(license.key.clone());
192                None
193            } else {
194                Some(license)
195            }
196        })
197        .collect();
198
199    let mut filtered_rules: Vec<_> = loaded_rules
200        .into_iter()
201        .filter_map(|rule| {
202            if ignored_rule_identifiers.contains(rule.identifier.as_str()) {
203                report.ignored_rules.push(rule.identifier.clone());
204                return None;
205            }
206
207            if rule_references_ignored_license(&rule, &ignored_license_keys) {
208                report
209                    .ignored_rules_due_to_licenses
210                    .push(rule.identifier.clone());
211                return None;
212            }
213
214            Some(rule)
215        })
216        .collect();
217
218    ensure_all_ignored_entries_exist(&ignored_rule_identifiers, &ignored_license_keys, &report)?;
219
220    apply_license_overlays(
221        &mut filtered_licenses,
222        overlay_licenses,
223        &ignored_license_keys,
224        &mut report,
225    )?;
226    apply_rule_overlays(
227        &mut filtered_rules,
228        overlay_rules,
229        &ignored_rule_identifiers,
230        &ignored_license_keys,
231        &filtered_licenses,
232        &mut report,
233    )?;
234
235    report.sort_and_dedup();
236
237    Ok((filtered_rules, filtered_licenses, report))
238}
239
240fn validate_bundled_overlay_reasons(
241    policy: &IndexBuildPolicy,
242    rule_overlays: &[BundledOverlayFile],
243    license_overlays: &[BundledOverlayFile],
244) -> Result<()> {
245    validate_overlay_reason_entries("rule", &policy.overlay_reasons.rules, rule_overlays)?;
246    validate_overlay_reason_entries(
247        "license",
248        &policy.overlay_reasons.licenses,
249        license_overlays,
250    )?;
251    Ok(())
252}
253
254fn validate_overlay_reason_entries(
255    overlay_kind: &str,
256    reasons: &BTreeMap<String, String>,
257    overlays: &[BundledOverlayFile],
258) -> Result<()> {
259    let overlay_identifiers = overlays
260        .iter()
261        .map(|overlay| overlay.identifier)
262        .collect::<HashSet<_>>();
263
264    let blank_reason_entries = overlays
265        .iter()
266        .filter_map(|overlay| match reasons.get(overlay.identifier) {
267            Some(reason) if reason.trim().is_empty() => Some(overlay.identifier.to_string()),
268            _ => None,
269        })
270        .collect::<Vec<_>>();
271    if !blank_reason_entries.is_empty() {
272        return Err(anyhow!(
273            "blank overlay reasons for bundled {} overlays: {}",
274            overlay_kind,
275            blank_reason_entries.join(", ")
276        ));
277    }
278
279    let missing_reason_entries = overlays
280        .iter()
281        .filter(|overlay| !reasons.contains_key(overlay.identifier))
282        .map(|overlay| overlay.identifier.to_string())
283        .collect::<Vec<_>>();
284    if !missing_reason_entries.is_empty() {
285        return Err(anyhow!(
286            "missing overlay reasons for bundled {} overlays: {}",
287            overlay_kind,
288            missing_reason_entries.join(", ")
289        ));
290    }
291
292    let stale_reason_entries = reasons
293        .keys()
294        .filter(|identifier| !overlay_identifiers.contains(identifier.as_str()))
295        .cloned()
296        .collect::<Vec<_>>();
297    if !stale_reason_entries.is_empty() {
298        return Err(anyhow!(
299            "overlay reasons reference missing bundled {} overlays: {}",
300            overlay_kind,
301            stale_reason_entries.join(", ")
302        ));
303    }
304
305    Ok(())
306}
307
308fn load_default_overlay_rules() -> Result<Vec<LoadedRule>> {
309    BUNDLED_RULE_OVERLAY_FILES
310        .iter()
311        .map(|overlay| {
312            parse_rule_str_to_loaded(overlay.identifier, overlay.contents).map_err(|error| {
313                anyhow!(
314                    "Failed to parse bundled overlay rule {} from {}: {}",
315                    overlay.identifier,
316                    DEFAULT_INDEX_BUILD_OVERLAY_ROOT,
317                    error
318                )
319            })
320        })
321        .collect()
322}
323
324fn load_default_overlay_licenses() -> Result<Vec<LoadedLicense>> {
325    BUNDLED_LICENSE_OVERLAY_FILES
326        .iter()
327        .map(|overlay| {
328            parse_license_str_to_loaded(overlay.identifier, overlay.contents).map_err(|error| {
329                anyhow!(
330                    "Failed to parse bundled overlay license {} from {}: {}",
331                    overlay.identifier,
332                    DEFAULT_INDEX_BUILD_OVERLAY_ROOT,
333                    error
334                )
335            })
336        })
337        .collect()
338}
339
340fn ensure_all_ignored_entries_exist(
341    ignored_rule_identifiers: &HashSet<String>,
342    ignored_license_keys: &HashSet<String>,
343    report: &AppliedIndexBuildPolicy,
344) -> Result<()> {
345    let applied_ignored_rules = report.ignored_rules.iter().cloned().collect::<HashSet<_>>();
346    let missing_rules = ignored_rule_identifiers
347        .difference(&applied_ignored_rules)
348        .cloned()
349        .collect::<Vec<_>>();
350
351    let applied_ignored_licenses = report
352        .ignored_licenses
353        .iter()
354        .map(|key| normalize_license_key(key))
355        .collect::<HashSet<_>>();
356    let missing_licenses = ignored_license_keys
357        .difference(&applied_ignored_licenses)
358        .cloned()
359        .collect::<Vec<_>>();
360
361    if missing_rules.is_empty() && missing_licenses.is_empty() {
362        Ok(())
363    } else {
364        let mut problems = Vec::new();
365        if !missing_rules.is_empty() {
366            problems.push(format!(
367                "ignored rule identifiers not found upstream: {}",
368                missing_rules.join(", ")
369            ));
370        }
371        if !missing_licenses.is_empty() {
372            problems.push(format!(
373                "ignored license keys not found upstream: {}",
374                missing_licenses.join(", ")
375            ));
376        }
377        Err(anyhow!(
378            "stale index-build policy entries detected; remove or update them: {}",
379            problems.join("; ")
380        ))
381    }
382}
383
384fn apply_license_overlays(
385    licenses: &mut Vec<LoadedLicense>,
386    overlays: &[LoadedLicense],
387    ignored_license_keys: &HashSet<String>,
388    report: &mut AppliedIndexBuildPolicy,
389) -> Result<()> {
390    let mut indices = build_license_index_map(licenses)?;
391    let mut seen_overlay_keys = HashSet::new();
392
393    for overlay in overlays {
394        let key = normalize_license_key(&overlay.key);
395
396        if !seen_overlay_keys.insert(key.clone()) {
397            return Err(anyhow!(
398                "bundled overlay contains duplicate license key '{}'",
399                overlay.key
400            ));
401        }
402
403        if ignored_license_keys.contains(&key) {
404            return Err(anyhow!(
405                "overlay license '{}' conflicts with ignored_licenses",
406                overlay.key
407            ));
408        }
409
410        if let Some(index) = indices.get(&key).copied() {
411            if licenses[index] == *overlay {
412                return Err(anyhow!(
413                    "overlay license '{}' is now identical to upstream; remove the local overlay file",
414                    overlay.key
415                ));
416            }
417            report.replaced_licenses.push(overlay.key.clone());
418            licenses[index] = overlay.clone();
419        } else {
420            report.added_licenses.push(overlay.key.clone());
421            licenses.push(overlay.clone());
422            indices.insert(key, licenses.len() - 1);
423        }
424    }
425
426    Ok(())
427}
428
429fn apply_rule_overlays(
430    rules: &mut Vec<LoadedRule>,
431    overlays: &[LoadedRule],
432    ignored_rule_identifiers: &HashSet<String>,
433    ignored_license_keys: &HashSet<String>,
434    licenses: &[LoadedLicense],
435    report: &mut AppliedIndexBuildPolicy,
436) -> Result<()> {
437    let mut indices = build_rule_index_map(rules)?;
438    let mut seen_overlay_identifiers = HashSet::new();
439    let available_license_keys = licenses
440        .iter()
441        .map(|license| normalize_license_key(&license.key))
442        .collect::<HashSet<_>>();
443
444    for overlay in overlays {
445        let identifier = overlay.identifier.clone();
446
447        if !seen_overlay_identifiers.insert(identifier.clone()) {
448            return Err(anyhow!(
449                "bundled overlay contains duplicate rule identifier '{}'",
450                identifier
451            ));
452        }
453
454        if ignored_rule_identifiers.contains(identifier.as_str()) {
455            return Err(anyhow!(
456                "overlay rule '{}' conflicts with ignored_rules",
457                identifier
458            ));
459        }
460
461        if rule_references_ignored_license(overlay, ignored_license_keys) {
462            return Err(anyhow!(
463                "overlay rule '{}' references an ignored license key",
464                identifier
465            ));
466        }
467
468        ensure_rule_references_known_licenses(overlay, &available_license_keys)?;
469
470        if let Some(index) = indices.get(identifier.as_str()).copied() {
471            if rules[index] == *overlay {
472                return Err(anyhow!(
473                    "overlay rule '{}' is now identical to upstream; remove the local overlay file",
474                    identifier
475                ));
476            }
477            report.replaced_rules.push(identifier.clone());
478            rules[index] = overlay.clone();
479        } else {
480            report.added_rules.push(identifier.clone());
481            rules.push(overlay.clone());
482            indices.insert(identifier, rules.len() - 1);
483        }
484    }
485
486    Ok(())
487}
488
489fn build_rule_index_map(rules: &[LoadedRule]) -> Result<HashMap<String, usize>> {
490    let mut indices = HashMap::new();
491    for (index, rule) in rules.iter().enumerate() {
492        if indices.insert(rule.identifier.clone(), index).is_some() {
493            return Err(anyhow!(
494                "cannot apply overlay because duplicate rule identifier '{}' is already present",
495                rule.identifier
496            ));
497        }
498    }
499    Ok(indices)
500}
501
502fn build_license_index_map(licenses: &[LoadedLicense]) -> Result<HashMap<String, usize>> {
503    let mut indices = HashMap::new();
504    for (index, license) in licenses.iter().enumerate() {
505        let normalized_key = normalize_license_key(&license.key);
506        if indices.insert(normalized_key, index).is_some() {
507            return Err(anyhow!(
508                "cannot apply overlay because duplicate license key '{}' is already present",
509                license.key
510            ));
511        }
512    }
513    Ok(indices)
514}
515
516fn ensure_rule_references_known_licenses(
517    rule: &LoadedRule,
518    available_license_keys: &HashSet<String>,
519) -> Result<()> {
520    if rule.rule_kind == RuleKind::None && rule.is_false_positive {
521        return Ok(());
522    }
523
524    let expression = parse_expression(&rule.license_expression).map_err(|error| {
525        anyhow!(
526            "overlay rule '{}' has an invalid license expression '{}': {}",
527            rule.identifier,
528            rule.license_expression,
529            error
530        )
531    })?;
532
533    let missing_keys = expression
534        .license_keys()
535        .into_iter()
536        .map(|key| normalize_license_key(&key))
537        .filter(|key| !available_license_keys.contains(key))
538        .collect::<Vec<_>>();
539
540    if missing_keys.is_empty() {
541        Ok(())
542    } else {
543        Err(anyhow!(
544            "overlay rule '{}' references unknown license keys: {}",
545            rule.identifier,
546            missing_keys.join(", ")
547        ))
548    }
549}
550
551fn normalize_license_key(key: &str) -> String {
552    key.trim().to_lowercase()
553}
554
555fn rule_references_ignored_license(
556    rule: &LoadedRule,
557    ignored_license_keys: &HashSet<String>,
558) -> bool {
559    if ignored_license_keys.is_empty() {
560        return false;
561    }
562
563    let normalized_expression = normalize_license_key(&rule.license_expression);
564    if ignored_license_keys.contains(&normalized_expression) {
565        return true;
566    }
567
568    if rule.rule_kind == RuleKind::None && rule.is_false_positive {
569        return false;
570    }
571
572    parse_expression(&rule.license_expression)
573        .map(|expression| {
574            expression
575                .license_keys()
576                .into_iter()
577                .map(|key| normalize_license_key(&key))
578                .any(|key| ignored_license_keys.contains(&key))
579        })
580        .unwrap_or(false)
581}
582
583#[cfg(test)]
584mod tests {
585    use super::*;
586
587    fn create_loaded_rule(identifier: &str, expression: &str) -> LoadedRule {
588        LoadedRule {
589            identifier: identifier.to_string(),
590            license_expression: expression.to_string(),
591            text: format!("{identifier} text"),
592            rule_kind: RuleKind::Text,
593            is_false_positive: false,
594            is_required_phrase: false,
595            skip_for_required_phrase_generation: false,
596            relevance: Some(100),
597            minimum_coverage: None,
598            has_stored_minimum_coverage: false,
599            is_continuous: false,
600            referenced_filenames: None,
601            ignorable_urls: None,
602            ignorable_emails: None,
603            ignorable_copyrights: None,
604            ignorable_holders: None,
605            ignorable_authors: None,
606            language: None,
607            notes: None,
608            is_deprecated: false,
609            replaced_by: vec![],
610        }
611    }
612
613    fn create_loaded_license(key: &str) -> LoadedLicense {
614        LoadedLicense {
615            key: key.to_string(),
616            short_name: Some(key.to_uppercase()),
617            name: format!("{key} license"),
618            language: Some("en".to_string()),
619            spdx_license_key: Some(key.to_uppercase()),
620            other_spdx_license_keys: vec![],
621            category: Some("Permissive".to_string()),
622            owner: None,
623            homepage_url: None,
624            text: format!("{key} text"),
625            reference_urls: vec![],
626            osi_license_key: None,
627            text_urls: vec![],
628            osi_url: None,
629            faq_url: None,
630            other_urls: vec![],
631            notes: None,
632            is_deprecated: false,
633            is_exception: false,
634            is_unknown: false,
635            is_generic: false,
636            replaced_by: vec![],
637            minimum_coverage: None,
638            standard_notice: None,
639            ignorable_copyrights: None,
640            ignorable_holders: None,
641            ignorable_authors: None,
642            ignorable_urls: None,
643            ignorable_emails: None,
644        }
645    }
646
647    #[test]
648    fn test_apply_index_build_policy_filters_direct_and_dependent_entries() {
649        let policy = IndexBuildPolicy {
650            ignored_rules: vec!["direct.RULE".to_string()],
651            ignored_licenses: vec!["apache-2.0".to_string()],
652            overlay_reasons: OverlayReasons::default(),
653        };
654
655        let rules = vec![
656            create_loaded_rule("keep.RULE", "mit"),
657            create_loaded_rule("direct.RULE", "mit"),
658            create_loaded_rule("dependent.RULE", "mit OR apache-2.0"),
659        ];
660        let licenses = vec![
661            create_loaded_license("mit"),
662            create_loaded_license("apache-2.0"),
663        ];
664
665        let (filtered_rules, filtered_licenses, report) =
666            apply_index_build_policy(rules, licenses, &policy, &[], &[])
667                .expect("policy application");
668
669        assert_eq!(
670            filtered_rules
671                .iter()
672                .map(|rule| rule.identifier.as_str())
673                .collect::<Vec<_>>(),
674            vec!["keep.RULE"]
675        );
676        assert_eq!(
677            filtered_licenses
678                .iter()
679                .map(|license| license.key.as_str())
680                .collect::<Vec<_>>(),
681            vec!["mit"]
682        );
683        assert_eq!(report.ignored_rules, vec!["direct.RULE".to_string()]);
684        assert_eq!(report.ignored_licenses, vec!["apache-2.0".to_string()]);
685        assert_eq!(
686            report.ignored_rules_due_to_licenses,
687            vec!["dependent.RULE".to_string()]
688        );
689    }
690
691    #[test]
692    fn test_apply_index_build_policy_fails_for_stale_ignored_entries() {
693        let policy = IndexBuildPolicy {
694            ignored_rules: vec!["missing.RULE".to_string()],
695            ignored_licenses: vec![],
696            overlay_reasons: OverlayReasons::default(),
697        };
698
699        let error = apply_index_build_policy(
700            vec![create_loaded_rule("keep.RULE", "mit")],
701            vec![create_loaded_license("mit")],
702            &policy,
703            &[],
704            &[],
705        )
706        .expect_err("missing ignored rule should fail");
707
708        assert!(
709            error
710                .to_string()
711                .contains("ignored rule identifiers not found upstream: missing.RULE")
712        );
713    }
714
715    #[test]
716    fn test_apply_index_build_policy_infers_add_from_new_overlay_entries() {
717        let policy = IndexBuildPolicy::default();
718        let overlay_rules = vec![create_loaded_rule("custom-rule.RULE", "mit")];
719        let overlay_licenses = vec![create_loaded_license("custom-license")];
720        let rules = vec![create_loaded_rule("keep.RULE", "mit")];
721        let licenses = vec![create_loaded_license("mit")];
722
723        let (filtered_rules, filtered_licenses, report) =
724            apply_index_build_policy(rules, licenses, &policy, &overlay_rules, &overlay_licenses)
725                .expect("policy application");
726
727        assert!(
728            filtered_rules
729                .iter()
730                .any(|rule| rule.identifier == "custom-rule.RULE")
731        );
732        assert!(
733            filtered_licenses
734                .iter()
735                .any(|license| license.key == "custom-license")
736        );
737        assert_eq!(report.added_rules, vec!["custom-rule.RULE".to_string()]);
738        assert_eq!(report.added_licenses, vec!["custom-license".to_string()]);
739    }
740
741    #[test]
742    fn test_apply_index_build_policy_infers_replace_from_colliding_overlay_entries() {
743        let policy = IndexBuildPolicy::default();
744        let overlay_rules = vec![LoadedRule {
745            text: "updated rule text".to_string(),
746            ..create_loaded_rule("replace.RULE", "mit")
747        }];
748        let overlay_licenses = vec![LoadedLicense {
749            name: "MIT Updated".to_string(),
750            text: "updated license text".to_string(),
751            ..create_loaded_license("mit")
752        }];
753        let rules = vec![create_loaded_rule("replace.RULE", "mit")];
754        let licenses = vec![create_loaded_license("mit")];
755
756        let (filtered_rules, filtered_licenses, report) =
757            apply_index_build_policy(rules, licenses, &policy, &overlay_rules, &overlay_licenses)
758                .expect("policy application");
759
760        assert_eq!(filtered_rules[0].text, "updated rule text");
761        assert_eq!(filtered_licenses[0].name, "MIT Updated");
762        assert_eq!(report.replaced_rules, vec!["replace.RULE".to_string()]);
763        assert_eq!(report.replaced_licenses, vec!["mit".to_string()]);
764    }
765
766    #[test]
767    fn test_apply_index_build_policy_rejects_redundant_rule_overlay() {
768        let policy = IndexBuildPolicy::default();
769        let base_rule = create_loaded_rule("replace.RULE", "mit");
770        let error = apply_index_build_policy(
771            vec![base_rule.clone()],
772            vec![create_loaded_license("mit")],
773            &policy,
774            &[base_rule],
775            &[],
776        )
777        .expect_err("redundant overlay should fail");
778
779        assert!(
780            error
781                .to_string()
782                .contains("overlay rule 'replace.RULE' is now identical to upstream")
783        );
784    }
785
786    #[test]
787    fn test_apply_index_build_policy_rejects_redundant_license_overlay() {
788        let policy = IndexBuildPolicy::default();
789        let base_license = create_loaded_license("mit");
790        let error = apply_index_build_policy(
791            vec![create_loaded_rule("keep.RULE", "mit")],
792            vec![base_license.clone()],
793            &policy,
794            &[],
795            &[base_license],
796        )
797        .expect_err("redundant overlay should fail");
798
799        assert!(
800            error
801                .to_string()
802                .contains("overlay license 'mit' is now identical to upstream")
803        );
804    }
805
806    #[test]
807    fn test_validate_bundled_overlay_reasons_accepts_complete_reason_registry() {
808        let policy = IndexBuildPolicy {
809            ignored_rules: vec![],
810            ignored_licenses: vec![],
811            overlay_reasons: OverlayReasons {
812                rules: BTreeMap::from([(
813                    "custom.RULE".to_string(),
814                    "Matches a documented custom rule variant.".to_string(),
815                )]),
816                licenses: BTreeMap::from([(
817                    "custom.LICENSE".to_string(),
818                    "Carries a documented local license correction.".to_string(),
819                )]),
820            },
821        };
822
823        let rule_overlays = [BundledOverlayFile {
824            identifier: "custom.RULE",
825            contents: "---\nlicense_expression: mit\nis_license_text: yes\n---\ntext",
826        }];
827        let license_overlays = [BundledOverlayFile {
828            identifier: "custom.LICENSE",
829            contents: "---\nkey: custom\nname: Custom\n---\ntext",
830        }];
831
832        validate_bundled_overlay_reasons(&policy, &rule_overlays, &license_overlays)
833            .expect("reason validation should pass");
834    }
835
836    #[test]
837    fn test_validate_bundled_overlay_reasons_accepts_checked_in_default_registry() {
838        validate_bundled_overlay_reasons(
839            default_index_build_policy(),
840            BUNDLED_RULE_OVERLAY_FILES,
841            BUNDLED_LICENSE_OVERLAY_FILES,
842        )
843        .expect("checked-in overlay reason registry should stay in sync");
844    }
845
846    #[test]
847    fn test_validate_bundled_overlay_reasons_rejects_missing_rule_reason() {
848        let policy = IndexBuildPolicy::default();
849        let rule_overlays = [BundledOverlayFile {
850            identifier: "custom.RULE",
851            contents: "",
852        }];
853
854        let error = validate_bundled_overlay_reasons(&policy, &rule_overlays, &[])
855            .expect_err("missing rule reason should fail");
856
857        assert!(
858            error
859                .to_string()
860                .contains("missing overlay reasons for bundled rule overlays: custom.RULE")
861        );
862    }
863
864    #[test]
865    fn test_validate_bundled_overlay_reasons_rejects_blank_license_reason() {
866        let policy = IndexBuildPolicy {
867            ignored_rules: vec![],
868            ignored_licenses: vec![],
869            overlay_reasons: OverlayReasons {
870                rules: BTreeMap::new(),
871                licenses: BTreeMap::from([("custom.LICENSE".to_string(), "   ".to_string())]),
872            },
873        };
874        let license_overlays = [BundledOverlayFile {
875            identifier: "custom.LICENSE",
876            contents: "",
877        }];
878
879        let error = validate_bundled_overlay_reasons(&policy, &[], &license_overlays)
880            .expect_err("blank license reason should fail");
881
882        assert!(
883            error
884                .to_string()
885                .contains("blank overlay reasons for bundled license overlays: custom.LICENSE")
886        );
887    }
888
889    #[test]
890    fn test_validate_bundled_overlay_reasons_rejects_missing_license_reason() {
891        let policy = IndexBuildPolicy::default();
892        let license_overlays = [BundledOverlayFile {
893            identifier: "custom.LICENSE",
894            contents: "",
895        }];
896
897        let error = validate_bundled_overlay_reasons(&policy, &[], &license_overlays)
898            .expect_err("missing license reason should fail");
899
900        assert!(
901            error
902                .to_string()
903                .contains("missing overlay reasons for bundled license overlays: custom.LICENSE")
904        );
905    }
906
907    #[test]
908    fn test_validate_bundled_overlay_reasons_rejects_stale_rule_reason() {
909        let policy = IndexBuildPolicy {
910            ignored_rules: vec![],
911            ignored_licenses: vec![],
912            overlay_reasons: OverlayReasons {
913                rules: BTreeMap::from([(
914                    "removed.RULE".to_string(),
915                    "Old rationale that should have been deleted.".to_string(),
916                )]),
917                licenses: BTreeMap::new(),
918            },
919        };
920
921        let error = validate_bundled_overlay_reasons(&policy, &[], &[])
922            .expect_err("stale rule reason should fail");
923
924        assert!(
925            error
926                .to_string()
927                .contains("overlay reasons reference missing bundled rule overlays: removed.RULE")
928        );
929    }
930
931    #[test]
932    fn test_validate_bundled_overlay_reasons_rejects_stale_license_reason() {
933        let policy = IndexBuildPolicy {
934            ignored_rules: vec![],
935            ignored_licenses: vec![],
936            overlay_reasons: OverlayReasons {
937                rules: BTreeMap::new(),
938                licenses: BTreeMap::from([(
939                    "removed.LICENSE".to_string(),
940                    "Old rationale that should have been deleted.".to_string(),
941                )]),
942            },
943        };
944
945        let error = validate_bundled_overlay_reasons(&policy, &[], &[])
946            .expect_err("stale license reason should fail");
947
948        assert!(error.to_string().contains(
949            "overlay reasons reference missing bundled license overlays: removed.LICENSE"
950        ));
951    }
952}