Skip to main content

provenant/license_detection/
build_policy.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{BTreeMap, HashMap, HashSet};
5use std::sync::LazyLock;
6
7use anyhow::{Result, anyhow};
8use serde::Deserialize;
9
10use crate::license_detection::expression::parse_expression;
11use crate::license_detection::models::{LoadedLicense, LoadedRule, RuleKind};
12use crate::license_detection::rules::{parse_license_str_to_loaded, parse_rule_str_to_loaded};
13use crate::models::LicenseIndexProvenance;
14
15pub const DEFAULT_INDEX_BUILD_POLICY_PATH: &str =
16    "resources/license_detection/index_build_policy.toml";
17pub const DEFAULT_INDEX_BUILD_OVERLAY_ROOT: &str = "resources/license_detection/overlay";
18pub const EMBEDDED_LICENSE_INDEX_SOURCE: &str = "embedded-artifact";
19
20const DEFAULT_INDEX_BUILD_POLICY_TEXT: &str =
21    include_str!("../../resources/license_detection/index_build_policy.toml");
22
23pub(crate) struct BundledOverlayFile {
24    pub identifier: &'static str,
25    pub contents: &'static str,
26}
27
28mod bundled_overlay_manifest {
29    use super::BundledOverlayFile;
30
31    include!(concat!(env!("OUT_DIR"), "/bundled_license_overlays.rs"));
32}
33
34use bundled_overlay_manifest::{BUNDLED_LICENSE_OVERLAY_FILES, BUNDLED_RULE_OVERLAY_FILES};
35
36static DEFAULT_INDEX_BUILD_POLICY: LazyLock<IndexBuildPolicy> = LazyLock::new(|| {
37    toml::from_str(DEFAULT_INDEX_BUILD_POLICY_TEXT).unwrap_or_else(|error| {
38        panic!(
39            "Failed to parse bundled license index build policy at {}: {}",
40            DEFAULT_INDEX_BUILD_POLICY_PATH, error
41        )
42    })
43});
44
45#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize)]
46pub struct IndexBuildPolicy {
47    #[serde(default)]
48    pub ignored_rules: Vec<String>,
49    #[serde(default)]
50    pub ignored_licenses: Vec<String>,
51    #[serde(default)]
52    pub overlay_reasons: OverlayReasons,
53}
54
55#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize)]
56pub struct OverlayReasons {
57    #[serde(default)]
58    pub rules: BTreeMap<String, String>,
59    #[serde(default)]
60    pub licenses: BTreeMap<String, String>,
61}
62
63impl IndexBuildPolicy {
64    /// Returns true when no behavior-changing ignore policy is present.
65    ///
66    /// `overlay_reasons` is intentionally excluded because it is enforced only for
67    /// the bundled default policy path and does not affect non-default overlay
68    /// application semantics.
69    pub fn is_empty(&self) -> bool {
70        self.ignored_rules.is_empty() && self.ignored_licenses.is_empty()
71    }
72
73    fn ignored_rule_set(&self) -> HashSet<String> {
74        self.ignored_rules
75            .iter()
76            .map(|identifier| identifier.trim())
77            .filter(|identifier| !identifier.is_empty())
78            .map(ToOwned::to_owned)
79            .collect()
80    }
81
82    fn ignored_license_set(&self) -> HashSet<String> {
83        self.ignored_licenses
84            .iter()
85            .map(|key| normalize_license_key(key))
86            .filter(|key| !key.is_empty())
87            .collect()
88    }
89}
90
91#[derive(Debug, Clone, Default, PartialEq, Eq)]
92pub struct AppliedIndexBuildPolicy {
93    pub ignored_rules: Vec<String>,
94    pub ignored_licenses: Vec<String>,
95    pub ignored_rules_due_to_licenses: Vec<String>,
96    pub added_rules: Vec<String>,
97    pub replaced_rules: Vec<String>,
98    pub added_licenses: Vec<String>,
99    pub replaced_licenses: Vec<String>,
100}
101
102impl AppliedIndexBuildPolicy {
103    pub fn is_empty(&self) -> bool {
104        self.ignored_rules.is_empty()
105            && self.ignored_licenses.is_empty()
106            && self.ignored_rules_due_to_licenses.is_empty()
107            && self.added_rules.is_empty()
108            && self.replaced_rules.is_empty()
109            && self.added_licenses.is_empty()
110            && self.replaced_licenses.is_empty()
111    }
112
113    fn sort_and_dedup(&mut self) {
114        for values in [
115            &mut self.ignored_rules,
116            &mut self.ignored_licenses,
117            &mut self.ignored_rules_due_to_licenses,
118            &mut self.added_rules,
119            &mut self.replaced_rules,
120            &mut self.added_licenses,
121            &mut self.replaced_licenses,
122        ] {
123            values.sort();
124            values.dedup();
125        }
126    }
127
128    pub fn to_license_index_provenance(
129        &self,
130        source: &str,
131        dataset_fingerprint: String,
132    ) -> LicenseIndexProvenance {
133        LicenseIndexProvenance {
134            source: source.to_string(),
135            dataset_fingerprint,
136            ignored_rules: self.ignored_rules.clone(),
137            ignored_licenses: self.ignored_licenses.clone(),
138            ignored_rules_due_to_licenses: self.ignored_rules_due_to_licenses.clone(),
139            added_rules: self.added_rules.clone(),
140            replaced_rules: self.replaced_rules.clone(),
141            added_licenses: self.added_licenses.clone(),
142            replaced_licenses: self.replaced_licenses.clone(),
143        }
144    }
145}
146
147pub fn default_index_build_policy() -> &'static IndexBuildPolicy {
148    &DEFAULT_INDEX_BUILD_POLICY
149}
150
151pub fn apply_default_index_build_policy(
152    loaded_rules: Vec<LoadedRule>,
153    loaded_licenses: Vec<LoadedLicense>,
154) -> Result<(Vec<LoadedRule>, Vec<LoadedLicense>, AppliedIndexBuildPolicy)> {
155    validate_bundled_overlay_reasons(
156        default_index_build_policy(),
157        BUNDLED_RULE_OVERLAY_FILES,
158        BUNDLED_LICENSE_OVERLAY_FILES,
159    )?;
160    let overlay_rules = load_default_overlay_rules()?;
161    let overlay_licenses = load_default_overlay_licenses()?;
162    let (loaded_rules, loaded_licenses, report) = apply_index_build_policy(
163        loaded_rules,
164        loaded_licenses,
165        default_index_build_policy(),
166        &overlay_rules,
167        &overlay_licenses,
168    )?;
169    Ok((loaded_rules, loaded_licenses, report))
170}
171
172pub fn apply_index_build_policy(
173    loaded_rules: Vec<LoadedRule>,
174    loaded_licenses: Vec<LoadedLicense>,
175    policy: &IndexBuildPolicy,
176    overlay_rules: &[LoadedRule],
177    overlay_licenses: &[LoadedLicense],
178) -> Result<(Vec<LoadedRule>, Vec<LoadedLicense>, AppliedIndexBuildPolicy)> {
179    if policy.is_empty() && overlay_rules.is_empty() && overlay_licenses.is_empty() {
180        return Ok((
181            loaded_rules,
182            loaded_licenses,
183            AppliedIndexBuildPolicy::default(),
184        ));
185    }
186
187    let ignored_rule_identifiers = policy.ignored_rule_set();
188    let ignored_license_keys = policy.ignored_license_set();
189    let mut report = AppliedIndexBuildPolicy::default();
190
191    let mut filtered_licenses: Vec<_> = loaded_licenses
192        .into_iter()
193        .filter_map(|license| {
194            if ignored_license_keys.contains(&normalize_license_key(&license.key)) {
195                report.ignored_licenses.push(license.key.clone());
196                None
197            } else {
198                Some(license)
199            }
200        })
201        .collect();
202
203    let mut filtered_rules: Vec<_> = loaded_rules
204        .into_iter()
205        .filter_map(|rule| {
206            if ignored_rule_identifiers.contains(rule.identifier.as_str()) {
207                report.ignored_rules.push(rule.identifier.clone());
208                return None;
209            }
210
211            if rule_references_ignored_license(&rule, &ignored_license_keys) {
212                report
213                    .ignored_rules_due_to_licenses
214                    .push(rule.identifier.clone());
215                return None;
216            }
217
218            Some(rule)
219        })
220        .collect();
221
222    ensure_all_ignored_entries_exist(&ignored_rule_identifiers, &ignored_license_keys, &report)?;
223
224    apply_license_overlays(
225        &mut filtered_licenses,
226        overlay_licenses,
227        &ignored_license_keys,
228        &mut report,
229    )?;
230    apply_rule_overlays(
231        &mut filtered_rules,
232        overlay_rules,
233        &ignored_rule_identifiers,
234        &ignored_license_keys,
235        &filtered_licenses,
236        &mut report,
237    )?;
238
239    report.sort_and_dedup();
240
241    Ok((filtered_rules, filtered_licenses, report))
242}
243
244fn validate_bundled_overlay_reasons(
245    policy: &IndexBuildPolicy,
246    rule_overlays: &[BundledOverlayFile],
247    license_overlays: &[BundledOverlayFile],
248) -> Result<()> {
249    validate_overlay_reason_entries("rule", &policy.overlay_reasons.rules, rule_overlays)?;
250    validate_overlay_reason_entries(
251        "license",
252        &policy.overlay_reasons.licenses,
253        license_overlays,
254    )?;
255    Ok(())
256}
257
258fn validate_overlay_reason_entries(
259    overlay_kind: &str,
260    reasons: &BTreeMap<String, String>,
261    overlays: &[BundledOverlayFile],
262) -> Result<()> {
263    let overlay_identifiers = overlays
264        .iter()
265        .map(|overlay| overlay.identifier)
266        .collect::<HashSet<_>>();
267
268    let blank_reason_entries = overlays
269        .iter()
270        .filter_map(|overlay| match reasons.get(overlay.identifier) {
271            Some(reason) if reason.trim().is_empty() => Some(overlay.identifier.to_string()),
272            _ => None,
273        })
274        .collect::<Vec<_>>();
275    if !blank_reason_entries.is_empty() {
276        return Err(anyhow!(
277            "blank overlay reasons for bundled {} overlays: {}",
278            overlay_kind,
279            blank_reason_entries.join(", ")
280        ));
281    }
282
283    let missing_reason_entries = overlays
284        .iter()
285        .filter(|overlay| !reasons.contains_key(overlay.identifier))
286        .map(|overlay| overlay.identifier.to_string())
287        .collect::<Vec<_>>();
288    if !missing_reason_entries.is_empty() {
289        return Err(anyhow!(
290            "missing overlay reasons for bundled {} overlays: {}",
291            overlay_kind,
292            missing_reason_entries.join(", ")
293        ));
294    }
295
296    let stale_reason_entries = reasons
297        .keys()
298        .filter(|identifier| !overlay_identifiers.contains(identifier.as_str()))
299        .cloned()
300        .collect::<Vec<_>>();
301    if !stale_reason_entries.is_empty() {
302        return Err(anyhow!(
303            "overlay reasons reference missing bundled {} overlays: {}",
304            overlay_kind,
305            stale_reason_entries.join(", ")
306        ));
307    }
308
309    Ok(())
310}
311
312fn load_default_overlay_rules() -> Result<Vec<LoadedRule>> {
313    BUNDLED_RULE_OVERLAY_FILES
314        .iter()
315        .map(|overlay| {
316            parse_rule_str_to_loaded(overlay.identifier, overlay.contents).map_err(|error| {
317                anyhow!(
318                    "Failed to parse bundled overlay rule {} from {}: {}",
319                    overlay.identifier,
320                    DEFAULT_INDEX_BUILD_OVERLAY_ROOT,
321                    error
322                )
323            })
324        })
325        .collect()
326}
327
328fn load_default_overlay_licenses() -> Result<Vec<LoadedLicense>> {
329    BUNDLED_LICENSE_OVERLAY_FILES
330        .iter()
331        .map(|overlay| {
332            parse_license_str_to_loaded(overlay.identifier, overlay.contents).map_err(|error| {
333                anyhow!(
334                    "Failed to parse bundled overlay license {} from {}: {}",
335                    overlay.identifier,
336                    DEFAULT_INDEX_BUILD_OVERLAY_ROOT,
337                    error
338                )
339            })
340        })
341        .collect()
342}
343
344fn ensure_all_ignored_entries_exist(
345    ignored_rule_identifiers: &HashSet<String>,
346    ignored_license_keys: &HashSet<String>,
347    report: &AppliedIndexBuildPolicy,
348) -> Result<()> {
349    let applied_ignored_rules = report.ignored_rules.iter().cloned().collect::<HashSet<_>>();
350    let missing_rules = ignored_rule_identifiers
351        .difference(&applied_ignored_rules)
352        .cloned()
353        .collect::<Vec<_>>();
354
355    let applied_ignored_licenses = report
356        .ignored_licenses
357        .iter()
358        .map(|key| normalize_license_key(key))
359        .collect::<HashSet<_>>();
360    let missing_licenses = ignored_license_keys
361        .difference(&applied_ignored_licenses)
362        .cloned()
363        .collect::<Vec<_>>();
364
365    if missing_rules.is_empty() && missing_licenses.is_empty() {
366        Ok(())
367    } else {
368        let mut problems = Vec::new();
369        if !missing_rules.is_empty() {
370            problems.push(format!(
371                "ignored rule identifiers not found upstream: {}",
372                missing_rules.join(", ")
373            ));
374        }
375        if !missing_licenses.is_empty() {
376            problems.push(format!(
377                "ignored license keys not found upstream: {}",
378                missing_licenses.join(", ")
379            ));
380        }
381        Err(anyhow!(
382            "stale index-build policy entries detected; remove or update them: {}",
383            problems.join("; ")
384        ))
385    }
386}
387
388fn apply_license_overlays(
389    licenses: &mut Vec<LoadedLicense>,
390    overlays: &[LoadedLicense],
391    ignored_license_keys: &HashSet<String>,
392    report: &mut AppliedIndexBuildPolicy,
393) -> Result<()> {
394    let mut indices = build_license_index_map(licenses)?;
395    let mut seen_overlay_keys = HashSet::new();
396
397    for overlay in overlays {
398        let key = normalize_license_key(&overlay.key);
399
400        if !seen_overlay_keys.insert(key.clone()) {
401            return Err(anyhow!(
402                "bundled overlay contains duplicate license key '{}'",
403                overlay.key
404            ));
405        }
406
407        if ignored_license_keys.contains(&key) {
408            return Err(anyhow!(
409                "overlay license '{}' conflicts with ignored_licenses",
410                overlay.key
411            ));
412        }
413
414        if let Some(index) = indices.get(&key).copied() {
415            if licenses[index] == *overlay {
416                return Err(anyhow!(
417                    "overlay license '{}' is now identical to upstream; remove the local overlay file",
418                    overlay.key
419                ));
420            }
421            report.replaced_licenses.push(overlay.key.clone());
422            licenses[index] = overlay.clone();
423        } else {
424            report.added_licenses.push(overlay.key.clone());
425            licenses.push(overlay.clone());
426            indices.insert(key, licenses.len() - 1);
427        }
428    }
429
430    Ok(())
431}
432
433fn apply_rule_overlays(
434    rules: &mut Vec<LoadedRule>,
435    overlays: &[LoadedRule],
436    ignored_rule_identifiers: &HashSet<String>,
437    ignored_license_keys: &HashSet<String>,
438    licenses: &[LoadedLicense],
439    report: &mut AppliedIndexBuildPolicy,
440) -> Result<()> {
441    let mut indices = build_rule_index_map(rules)?;
442    let mut seen_overlay_identifiers = HashSet::new();
443    let available_license_keys = licenses
444        .iter()
445        .map(|license| normalize_license_key(&license.key))
446        .collect::<HashSet<_>>();
447
448    for overlay in overlays {
449        let identifier = overlay.identifier.clone();
450
451        if !seen_overlay_identifiers.insert(identifier.clone()) {
452            return Err(anyhow!(
453                "bundled overlay contains duplicate rule identifier '{}'",
454                identifier
455            ));
456        }
457
458        if ignored_rule_identifiers.contains(identifier.as_str()) {
459            return Err(anyhow!(
460                "overlay rule '{}' conflicts with ignored_rules",
461                identifier
462            ));
463        }
464
465        if rule_references_ignored_license(overlay, ignored_license_keys) {
466            return Err(anyhow!(
467                "overlay rule '{}' references an ignored license key",
468                identifier
469            ));
470        }
471
472        ensure_rule_references_known_licenses(overlay, &available_license_keys)?;
473
474        if let Some(index) = indices.get(identifier.as_str()).copied() {
475            if rules[index] == *overlay {
476                return Err(anyhow!(
477                    "overlay rule '{}' is now identical to upstream; remove the local overlay file",
478                    identifier
479                ));
480            }
481            report.replaced_rules.push(identifier.clone());
482            rules[index] = overlay.clone();
483        } else {
484            report.added_rules.push(identifier.clone());
485            rules.push(overlay.clone());
486            indices.insert(identifier, rules.len() - 1);
487        }
488    }
489
490    Ok(())
491}
492
493fn build_rule_index_map(rules: &[LoadedRule]) -> Result<HashMap<String, usize>> {
494    let mut indices = HashMap::new();
495    for (index, rule) in rules.iter().enumerate() {
496        if indices.insert(rule.identifier.clone(), index).is_some() {
497            return Err(anyhow!(
498                "cannot apply overlay because duplicate rule identifier '{}' is already present",
499                rule.identifier
500            ));
501        }
502    }
503    Ok(indices)
504}
505
506fn build_license_index_map(licenses: &[LoadedLicense]) -> Result<HashMap<String, usize>> {
507    let mut indices = HashMap::new();
508    for (index, license) in licenses.iter().enumerate() {
509        let normalized_key = normalize_license_key(&license.key);
510        if indices.insert(normalized_key, index).is_some() {
511            return Err(anyhow!(
512                "cannot apply overlay because duplicate license key '{}' is already present",
513                license.key
514            ));
515        }
516    }
517    Ok(indices)
518}
519
520fn ensure_rule_references_known_licenses(
521    rule: &LoadedRule,
522    available_license_keys: &HashSet<String>,
523) -> Result<()> {
524    if rule.rule_kind == RuleKind::None && rule.is_false_positive {
525        return Ok(());
526    }
527
528    let expression = parse_expression(&rule.license_expression).map_err(|error| {
529        anyhow!(
530            "overlay rule '{}' has an invalid license expression '{}': {}",
531            rule.identifier,
532            rule.license_expression,
533            error
534        )
535    })?;
536
537    let missing_keys = expression
538        .license_keys()
539        .into_iter()
540        .map(|key| normalize_license_key(&key))
541        .filter(|key| !available_license_keys.contains(key))
542        .collect::<Vec<_>>();
543
544    if missing_keys.is_empty() {
545        Ok(())
546    } else {
547        Err(anyhow!(
548            "overlay rule '{}' references unknown license keys: {}",
549            rule.identifier,
550            missing_keys.join(", ")
551        ))
552    }
553}
554
555fn normalize_license_key(key: &str) -> String {
556    key.trim().to_lowercase()
557}
558
559fn rule_references_ignored_license(
560    rule: &LoadedRule,
561    ignored_license_keys: &HashSet<String>,
562) -> bool {
563    if ignored_license_keys.is_empty() {
564        return false;
565    }
566
567    let normalized_expression = normalize_license_key(&rule.license_expression);
568    if ignored_license_keys.contains(&normalized_expression) {
569        return true;
570    }
571
572    if rule.rule_kind == RuleKind::None && rule.is_false_positive {
573        return false;
574    }
575
576    parse_expression(&rule.license_expression)
577        .map(|expression| {
578            expression
579                .license_keys()
580                .into_iter()
581                .map(|key| normalize_license_key(&key))
582                .any(|key| ignored_license_keys.contains(&key))
583        })
584        .unwrap_or(false)
585}
586
587#[cfg(test)]
588mod tests {
589    use super::*;
590
591    fn create_loaded_rule(identifier: &str, expression: &str) -> LoadedRule {
592        LoadedRule {
593            identifier: identifier.to_string(),
594            license_expression: expression.to_string(),
595            text: format!("{identifier} text"),
596            rule_kind: RuleKind::Text,
597            is_false_positive: false,
598            is_required_phrase: false,
599            skip_for_required_phrase_generation: false,
600            relevance: Some(100),
601            minimum_coverage: None,
602            has_stored_minimum_coverage: false,
603            is_continuous: false,
604            referenced_filenames: None,
605            ignorable_urls: None,
606            ignorable_emails: None,
607            ignorable_copyrights: None,
608            ignorable_holders: None,
609            ignorable_authors: None,
610            language: None,
611            notes: None,
612            is_deprecated: false,
613            replaced_by: vec![],
614        }
615    }
616
617    fn create_loaded_license(key: &str) -> LoadedLicense {
618        LoadedLicense {
619            key: key.to_string(),
620            short_name: Some(key.to_uppercase()),
621            name: format!("{key} license"),
622            language: Some("en".to_string()),
623            spdx_license_key: Some(key.to_uppercase()),
624            other_spdx_license_keys: vec![],
625            category: Some("Permissive".to_string()),
626            owner: None,
627            homepage_url: None,
628            text: format!("{key} text"),
629            reference_urls: vec![],
630            osi_license_key: None,
631            text_urls: vec![],
632            osi_url: None,
633            faq_url: None,
634            other_urls: vec![],
635            notes: None,
636            is_deprecated: false,
637            is_exception: false,
638            is_unknown: false,
639            is_generic: false,
640            replaced_by: vec![],
641            minimum_coverage: None,
642            standard_notice: None,
643            ignorable_copyrights: None,
644            ignorable_holders: None,
645            ignorable_authors: None,
646            ignorable_urls: None,
647            ignorable_emails: None,
648        }
649    }
650
651    #[test]
652    fn test_apply_index_build_policy_filters_direct_and_dependent_entries() {
653        let policy = IndexBuildPolicy {
654            ignored_rules: vec!["direct.RULE".to_string()],
655            ignored_licenses: vec!["apache-2.0".to_string()],
656            overlay_reasons: OverlayReasons::default(),
657        };
658
659        let rules = vec![
660            create_loaded_rule("keep.RULE", "mit"),
661            create_loaded_rule("direct.RULE", "mit"),
662            create_loaded_rule("dependent.RULE", "mit OR apache-2.0"),
663        ];
664        let licenses = vec![
665            create_loaded_license("mit"),
666            create_loaded_license("apache-2.0"),
667        ];
668
669        let (filtered_rules, filtered_licenses, report) =
670            apply_index_build_policy(rules, licenses, &policy, &[], &[])
671                .expect("policy application");
672
673        assert_eq!(
674            filtered_rules
675                .iter()
676                .map(|rule| rule.identifier.as_str())
677                .collect::<Vec<_>>(),
678            vec!["keep.RULE"]
679        );
680        assert_eq!(
681            filtered_licenses
682                .iter()
683                .map(|license| license.key.as_str())
684                .collect::<Vec<_>>(),
685            vec!["mit"]
686        );
687        assert_eq!(report.ignored_rules, vec!["direct.RULE".to_string()]);
688        assert_eq!(report.ignored_licenses, vec!["apache-2.0".to_string()]);
689        assert_eq!(
690            report.ignored_rules_due_to_licenses,
691            vec!["dependent.RULE".to_string()]
692        );
693    }
694
695    #[test]
696    fn test_apply_index_build_policy_fails_for_stale_ignored_entries() {
697        let policy = IndexBuildPolicy {
698            ignored_rules: vec!["missing.RULE".to_string()],
699            ignored_licenses: vec![],
700            overlay_reasons: OverlayReasons::default(),
701        };
702
703        let error = apply_index_build_policy(
704            vec![create_loaded_rule("keep.RULE", "mit")],
705            vec![create_loaded_license("mit")],
706            &policy,
707            &[],
708            &[],
709        )
710        .expect_err("missing ignored rule should fail");
711
712        assert!(
713            error
714                .to_string()
715                .contains("ignored rule identifiers not found upstream: missing.RULE")
716        );
717    }
718
719    #[test]
720    fn test_apply_index_build_policy_infers_add_from_new_overlay_entries() {
721        let policy = IndexBuildPolicy::default();
722        let overlay_rules = vec![create_loaded_rule("custom-rule.RULE", "mit")];
723        let overlay_licenses = vec![create_loaded_license("custom-license")];
724        let rules = vec![create_loaded_rule("keep.RULE", "mit")];
725        let licenses = vec![create_loaded_license("mit")];
726
727        let (filtered_rules, filtered_licenses, report) =
728            apply_index_build_policy(rules, licenses, &policy, &overlay_rules, &overlay_licenses)
729                .expect("policy application");
730
731        assert!(
732            filtered_rules
733                .iter()
734                .any(|rule| rule.identifier == "custom-rule.RULE")
735        );
736        assert!(
737            filtered_licenses
738                .iter()
739                .any(|license| license.key == "custom-license")
740        );
741        assert_eq!(report.added_rules, vec!["custom-rule.RULE".to_string()]);
742        assert_eq!(report.added_licenses, vec!["custom-license".to_string()]);
743    }
744
745    #[test]
746    fn test_apply_index_build_policy_infers_replace_from_colliding_overlay_entries() {
747        let policy = IndexBuildPolicy::default();
748        let overlay_rules = vec![LoadedRule {
749            text: "updated rule text".to_string(),
750            ..create_loaded_rule("replace.RULE", "mit")
751        }];
752        let overlay_licenses = vec![LoadedLicense {
753            name: "MIT Updated".to_string(),
754            text: "updated license text".to_string(),
755            ..create_loaded_license("mit")
756        }];
757        let rules = vec![create_loaded_rule("replace.RULE", "mit")];
758        let licenses = vec![create_loaded_license("mit")];
759
760        let (filtered_rules, filtered_licenses, report) =
761            apply_index_build_policy(rules, licenses, &policy, &overlay_rules, &overlay_licenses)
762                .expect("policy application");
763
764        assert_eq!(filtered_rules[0].text, "updated rule text");
765        assert_eq!(filtered_licenses[0].name, "MIT Updated");
766        assert_eq!(report.replaced_rules, vec!["replace.RULE".to_string()]);
767        assert_eq!(report.replaced_licenses, vec!["mit".to_string()]);
768    }
769
770    #[test]
771    fn test_apply_index_build_policy_rejects_redundant_rule_overlay() {
772        let policy = IndexBuildPolicy::default();
773        let base_rule = create_loaded_rule("replace.RULE", "mit");
774        let error = apply_index_build_policy(
775            vec![base_rule.clone()],
776            vec![create_loaded_license("mit")],
777            &policy,
778            &[base_rule],
779            &[],
780        )
781        .expect_err("redundant overlay should fail");
782
783        assert!(
784            error
785                .to_string()
786                .contains("overlay rule 'replace.RULE' is now identical to upstream")
787        );
788    }
789
790    #[test]
791    fn test_apply_index_build_policy_rejects_redundant_license_overlay() {
792        let policy = IndexBuildPolicy::default();
793        let base_license = create_loaded_license("mit");
794        let error = apply_index_build_policy(
795            vec![create_loaded_rule("keep.RULE", "mit")],
796            vec![base_license.clone()],
797            &policy,
798            &[],
799            &[base_license],
800        )
801        .expect_err("redundant overlay should fail");
802
803        assert!(
804            error
805                .to_string()
806                .contains("overlay license 'mit' is now identical to upstream")
807        );
808    }
809
810    #[test]
811    fn test_validate_bundled_overlay_reasons_accepts_complete_reason_registry() {
812        let policy = IndexBuildPolicy {
813            ignored_rules: vec![],
814            ignored_licenses: vec![],
815            overlay_reasons: OverlayReasons {
816                rules: BTreeMap::from([(
817                    "custom.RULE".to_string(),
818                    "Matches a documented custom rule variant.".to_string(),
819                )]),
820                licenses: BTreeMap::from([(
821                    "custom.LICENSE".to_string(),
822                    "Carries a documented local license correction.".to_string(),
823                )]),
824            },
825        };
826
827        let rule_overlays = [BundledOverlayFile {
828            identifier: "custom.RULE",
829            contents: "---\nlicense_expression: mit\nis_license_text: yes\n---\ntext",
830        }];
831        let license_overlays = [BundledOverlayFile {
832            identifier: "custom.LICENSE",
833            contents: "---\nkey: custom\nname: Custom\n---\ntext",
834        }];
835
836        validate_bundled_overlay_reasons(&policy, &rule_overlays, &license_overlays)
837            .expect("reason validation should pass");
838    }
839
840    #[test]
841    fn test_validate_bundled_overlay_reasons_accepts_checked_in_default_registry() {
842        validate_bundled_overlay_reasons(
843            default_index_build_policy(),
844            BUNDLED_RULE_OVERLAY_FILES,
845            BUNDLED_LICENSE_OVERLAY_FILES,
846        )
847        .expect("checked-in overlay reason registry should stay in sync");
848    }
849
850    #[test]
851    fn test_validate_bundled_overlay_reasons_rejects_missing_rule_reason() {
852        let policy = IndexBuildPolicy::default();
853        let rule_overlays = [BundledOverlayFile {
854            identifier: "custom.RULE",
855            contents: "",
856        }];
857
858        let error = validate_bundled_overlay_reasons(&policy, &rule_overlays, &[])
859            .expect_err("missing rule reason should fail");
860
861        assert!(
862            error
863                .to_string()
864                .contains("missing overlay reasons for bundled rule overlays: custom.RULE")
865        );
866    }
867
868    #[test]
869    fn test_validate_bundled_overlay_reasons_rejects_blank_license_reason() {
870        let policy = IndexBuildPolicy {
871            ignored_rules: vec![],
872            ignored_licenses: vec![],
873            overlay_reasons: OverlayReasons {
874                rules: BTreeMap::new(),
875                licenses: BTreeMap::from([("custom.LICENSE".to_string(), "   ".to_string())]),
876            },
877        };
878        let license_overlays = [BundledOverlayFile {
879            identifier: "custom.LICENSE",
880            contents: "",
881        }];
882
883        let error = validate_bundled_overlay_reasons(&policy, &[], &license_overlays)
884            .expect_err("blank license reason should fail");
885
886        assert!(
887            error
888                .to_string()
889                .contains("blank overlay reasons for bundled license overlays: custom.LICENSE")
890        );
891    }
892
893    #[test]
894    fn test_validate_bundled_overlay_reasons_rejects_missing_license_reason() {
895        let policy = IndexBuildPolicy::default();
896        let license_overlays = [BundledOverlayFile {
897            identifier: "custom.LICENSE",
898            contents: "",
899        }];
900
901        let error = validate_bundled_overlay_reasons(&policy, &[], &license_overlays)
902            .expect_err("missing license reason should fail");
903
904        assert!(
905            error
906                .to_string()
907                .contains("missing overlay reasons for bundled license overlays: custom.LICENSE")
908        );
909    }
910
911    #[test]
912    fn test_validate_bundled_overlay_reasons_rejects_stale_rule_reason() {
913        let policy = IndexBuildPolicy {
914            ignored_rules: vec![],
915            ignored_licenses: vec![],
916            overlay_reasons: OverlayReasons {
917                rules: BTreeMap::from([(
918                    "removed.RULE".to_string(),
919                    "Old rationale that should have been deleted.".to_string(),
920                )]),
921                licenses: BTreeMap::new(),
922            },
923        };
924
925        let error = validate_bundled_overlay_reasons(&policy, &[], &[])
926            .expect_err("stale rule reason should fail");
927
928        assert!(
929            error
930                .to_string()
931                .contains("overlay reasons reference missing bundled rule overlays: removed.RULE")
932        );
933    }
934
935    #[test]
936    fn test_validate_bundled_overlay_reasons_rejects_stale_license_reason() {
937        let policy = IndexBuildPolicy {
938            ignored_rules: vec![],
939            ignored_licenses: vec![],
940            overlay_reasons: OverlayReasons {
941                rules: BTreeMap::new(),
942                licenses: BTreeMap::from([(
943                    "removed.LICENSE".to_string(),
944                    "Old rationale that should have been deleted.".to_string(),
945                )]),
946            },
947        };
948
949        let error = validate_bundled_overlay_reasons(&policy, &[], &[])
950            .expect_err("stale license reason should fail");
951
952        assert!(error.to_string().contains(
953            "overlay reasons reference missing bundled license overlays: removed.LICENSE"
954        ));
955    }
956}