1use std::collections::{HashMap, HashSet};
2use std::sync::LazyLock;
3
4use anyhow::{Result, anyhow};
5use serde::Deserialize;
6
7use crate::license_detection::expression::parse_expression;
8use crate::license_detection::models::{LoadedLicense, LoadedRule, RuleKind};
9use crate::license_detection::rules::{parse_license_str_to_loaded, parse_rule_str_to_loaded};
10use crate::models::LicenseIndexProvenance;
11
12pub const DEFAULT_INDEX_BUILD_POLICY_PATH: &str =
13 "resources/license_detection/index_build_policy.toml";
14pub const DEFAULT_INDEX_BUILD_OVERLAY_ROOT: &str = "resources/license_detection/overlay";
15pub const EMBEDDED_LICENSE_INDEX_SOURCE: &str = "embedded-artifact";
16
17const DEFAULT_INDEX_BUILD_POLICY_TEXT: &str =
18 include_str!("../../resources/license_detection/index_build_policy.toml");
19
20pub(crate) struct BundledOverlayFile {
21 pub identifier: &'static str,
22 pub contents: &'static str,
23}
24
25mod bundled_overlay_manifest {
26 use super::BundledOverlayFile;
27
28 include!(concat!(env!("OUT_DIR"), "/bundled_license_overlays.rs"));
29}
30
31use bundled_overlay_manifest::{BUNDLED_LICENSE_OVERLAY_FILES, BUNDLED_RULE_OVERLAY_FILES};
32
33static DEFAULT_INDEX_BUILD_POLICY: LazyLock<IndexBuildPolicy> = LazyLock::new(|| {
34 toml::from_str(DEFAULT_INDEX_BUILD_POLICY_TEXT).unwrap_or_else(|error| {
35 panic!(
36 "Failed to parse bundled license index build policy at {}: {}",
37 DEFAULT_INDEX_BUILD_POLICY_PATH, error
38 )
39 })
40});
41
42#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize)]
43pub struct IndexBuildPolicy {
44 #[serde(default)]
45 pub ignored_rules: Vec<String>,
46 #[serde(default)]
47 pub ignored_licenses: Vec<String>,
48}
49
50impl IndexBuildPolicy {
51 pub fn is_empty(&self) -> bool {
52 self.ignored_rules.is_empty() && self.ignored_licenses.is_empty()
53 }
54
55 fn ignored_rule_set(&self) -> HashSet<String> {
56 self.ignored_rules
57 .iter()
58 .map(|identifier| identifier.trim())
59 .filter(|identifier| !identifier.is_empty())
60 .map(ToOwned::to_owned)
61 .collect()
62 }
63
64 fn ignored_license_set(&self) -> HashSet<String> {
65 self.ignored_licenses
66 .iter()
67 .map(|key| normalize_license_key(key))
68 .filter(|key| !key.is_empty())
69 .collect()
70 }
71}
72
73#[derive(Debug, Clone, Default, PartialEq, Eq)]
74pub struct AppliedIndexBuildPolicy {
75 pub ignored_rules: Vec<String>,
76 pub ignored_licenses: Vec<String>,
77 pub ignored_rules_due_to_licenses: Vec<String>,
78 pub added_rules: Vec<String>,
79 pub replaced_rules: Vec<String>,
80 pub added_licenses: Vec<String>,
81 pub replaced_licenses: Vec<String>,
82}
83
84impl AppliedIndexBuildPolicy {
85 pub fn is_empty(&self) -> bool {
86 self.ignored_rules.is_empty()
87 && self.ignored_licenses.is_empty()
88 && self.ignored_rules_due_to_licenses.is_empty()
89 && self.added_rules.is_empty()
90 && self.replaced_rules.is_empty()
91 && self.added_licenses.is_empty()
92 && self.replaced_licenses.is_empty()
93 }
94
95 fn sort_and_dedup(&mut self) {
96 for values in [
97 &mut self.ignored_rules,
98 &mut self.ignored_licenses,
99 &mut self.ignored_rules_due_to_licenses,
100 &mut self.added_rules,
101 &mut self.replaced_rules,
102 &mut self.added_licenses,
103 &mut self.replaced_licenses,
104 ] {
105 values.sort();
106 values.dedup();
107 }
108 }
109
110 pub fn to_license_index_provenance(
111 &self,
112 source: &str,
113 dataset_fingerprint: String,
114 ) -> LicenseIndexProvenance {
115 LicenseIndexProvenance {
116 source: source.to_string(),
117 dataset_fingerprint,
118 ignored_rules: self.ignored_rules.clone(),
119 ignored_licenses: self.ignored_licenses.clone(),
120 ignored_rules_due_to_licenses: self.ignored_rules_due_to_licenses.clone(),
121 added_rules: self.added_rules.clone(),
122 replaced_rules: self.replaced_rules.clone(),
123 added_licenses: self.added_licenses.clone(),
124 replaced_licenses: self.replaced_licenses.clone(),
125 }
126 }
127}
128
129pub fn default_index_build_policy() -> &'static IndexBuildPolicy {
130 &DEFAULT_INDEX_BUILD_POLICY
131}
132
133pub fn apply_default_index_build_policy(
134 loaded_rules: Vec<LoadedRule>,
135 loaded_licenses: Vec<LoadedLicense>,
136) -> Result<(Vec<LoadedRule>, Vec<LoadedLicense>, AppliedIndexBuildPolicy)> {
137 let overlay_rules = load_default_overlay_rules()?;
138 let overlay_licenses = load_default_overlay_licenses()?;
139 let (loaded_rules, loaded_licenses, report) = apply_index_build_policy(
140 loaded_rules,
141 loaded_licenses,
142 default_index_build_policy(),
143 &overlay_rules,
144 &overlay_licenses,
145 )?;
146 Ok((loaded_rules, loaded_licenses, report))
147}
148
149pub fn apply_index_build_policy(
150 loaded_rules: Vec<LoadedRule>,
151 loaded_licenses: Vec<LoadedLicense>,
152 policy: &IndexBuildPolicy,
153 overlay_rules: &[LoadedRule],
154 overlay_licenses: &[LoadedLicense],
155) -> Result<(Vec<LoadedRule>, Vec<LoadedLicense>, AppliedIndexBuildPolicy)> {
156 if policy.is_empty() && overlay_rules.is_empty() && overlay_licenses.is_empty() {
157 return Ok((
158 loaded_rules,
159 loaded_licenses,
160 AppliedIndexBuildPolicy::default(),
161 ));
162 }
163
164 let ignored_rule_identifiers = policy.ignored_rule_set();
165 let ignored_license_keys = policy.ignored_license_set();
166 let mut report = AppliedIndexBuildPolicy::default();
167
168 let mut filtered_licenses: Vec<_> = loaded_licenses
169 .into_iter()
170 .filter_map(|license| {
171 if ignored_license_keys.contains(&normalize_license_key(&license.key)) {
172 report.ignored_licenses.push(license.key.clone());
173 None
174 } else {
175 Some(license)
176 }
177 })
178 .collect();
179
180 let mut filtered_rules: Vec<_> = loaded_rules
181 .into_iter()
182 .filter_map(|rule| {
183 if ignored_rule_identifiers.contains(rule.identifier.as_str()) {
184 report.ignored_rules.push(rule.identifier.clone());
185 return None;
186 }
187
188 if rule_references_ignored_license(&rule, &ignored_license_keys) {
189 report
190 .ignored_rules_due_to_licenses
191 .push(rule.identifier.clone());
192 return None;
193 }
194
195 Some(rule)
196 })
197 .collect();
198
199 ensure_all_ignored_entries_exist(&ignored_rule_identifiers, &ignored_license_keys, &report)?;
200
201 apply_license_overlays(
202 &mut filtered_licenses,
203 overlay_licenses,
204 &ignored_license_keys,
205 &mut report,
206 )?;
207 apply_rule_overlays(
208 &mut filtered_rules,
209 overlay_rules,
210 &ignored_rule_identifiers,
211 &ignored_license_keys,
212 &filtered_licenses,
213 &mut report,
214 )?;
215
216 report.sort_and_dedup();
217
218 Ok((filtered_rules, filtered_licenses, report))
219}
220
221fn load_default_overlay_rules() -> Result<Vec<LoadedRule>> {
222 BUNDLED_RULE_OVERLAY_FILES
223 .iter()
224 .map(|overlay| {
225 parse_rule_str_to_loaded(overlay.identifier, overlay.contents).map_err(|error| {
226 anyhow!(
227 "Failed to parse bundled overlay rule {} from {}: {}",
228 overlay.identifier,
229 DEFAULT_INDEX_BUILD_OVERLAY_ROOT,
230 error
231 )
232 })
233 })
234 .collect()
235}
236
237fn load_default_overlay_licenses() -> Result<Vec<LoadedLicense>> {
238 BUNDLED_LICENSE_OVERLAY_FILES
239 .iter()
240 .map(|overlay| {
241 parse_license_str_to_loaded(overlay.identifier, overlay.contents).map_err(|error| {
242 anyhow!(
243 "Failed to parse bundled overlay license {} from {}: {}",
244 overlay.identifier,
245 DEFAULT_INDEX_BUILD_OVERLAY_ROOT,
246 error
247 )
248 })
249 })
250 .collect()
251}
252
253fn ensure_all_ignored_entries_exist(
254 ignored_rule_identifiers: &HashSet<String>,
255 ignored_license_keys: &HashSet<String>,
256 report: &AppliedIndexBuildPolicy,
257) -> Result<()> {
258 let applied_ignored_rules = report.ignored_rules.iter().cloned().collect::<HashSet<_>>();
259 let missing_rules = ignored_rule_identifiers
260 .difference(&applied_ignored_rules)
261 .cloned()
262 .collect::<Vec<_>>();
263
264 let applied_ignored_licenses = report
265 .ignored_licenses
266 .iter()
267 .map(|key| normalize_license_key(key))
268 .collect::<HashSet<_>>();
269 let missing_licenses = ignored_license_keys
270 .difference(&applied_ignored_licenses)
271 .cloned()
272 .collect::<Vec<_>>();
273
274 if missing_rules.is_empty() && missing_licenses.is_empty() {
275 Ok(())
276 } else {
277 let mut problems = Vec::new();
278 if !missing_rules.is_empty() {
279 problems.push(format!(
280 "ignored rule identifiers not found upstream: {}",
281 missing_rules.join(", ")
282 ));
283 }
284 if !missing_licenses.is_empty() {
285 problems.push(format!(
286 "ignored license keys not found upstream: {}",
287 missing_licenses.join(", ")
288 ));
289 }
290 Err(anyhow!(
291 "stale index-build policy entries detected; remove or update them: {}",
292 problems.join("; ")
293 ))
294 }
295}
296
297fn apply_license_overlays(
298 licenses: &mut Vec<LoadedLicense>,
299 overlays: &[LoadedLicense],
300 ignored_license_keys: &HashSet<String>,
301 report: &mut AppliedIndexBuildPolicy,
302) -> Result<()> {
303 let mut indices = build_license_index_map(licenses)?;
304 let mut seen_overlay_keys = HashSet::new();
305
306 for overlay in overlays {
307 let key = normalize_license_key(&overlay.key);
308
309 if !seen_overlay_keys.insert(key.clone()) {
310 return Err(anyhow!(
311 "bundled overlay contains duplicate license key '{}'",
312 overlay.key
313 ));
314 }
315
316 if ignored_license_keys.contains(&key) {
317 return Err(anyhow!(
318 "overlay license '{}' conflicts with ignored_licenses",
319 overlay.key
320 ));
321 }
322
323 if let Some(index) = indices.get(&key).copied() {
324 if licenses[index] == *overlay {
325 return Err(anyhow!(
326 "overlay license '{}' is now identical to upstream; remove the local overlay file",
327 overlay.key
328 ));
329 }
330 report.replaced_licenses.push(overlay.key.clone());
331 licenses[index] = overlay.clone();
332 } else {
333 report.added_licenses.push(overlay.key.clone());
334 licenses.push(overlay.clone());
335 indices.insert(key, licenses.len() - 1);
336 }
337 }
338
339 Ok(())
340}
341
342fn apply_rule_overlays(
343 rules: &mut Vec<LoadedRule>,
344 overlays: &[LoadedRule],
345 ignored_rule_identifiers: &HashSet<String>,
346 ignored_license_keys: &HashSet<String>,
347 licenses: &[LoadedLicense],
348 report: &mut AppliedIndexBuildPolicy,
349) -> Result<()> {
350 let mut indices = build_rule_index_map(rules)?;
351 let mut seen_overlay_identifiers = HashSet::new();
352 let available_license_keys = licenses
353 .iter()
354 .map(|license| normalize_license_key(&license.key))
355 .collect::<HashSet<_>>();
356
357 for overlay in overlays {
358 let identifier = overlay.identifier.clone();
359
360 if !seen_overlay_identifiers.insert(identifier.clone()) {
361 return Err(anyhow!(
362 "bundled overlay contains duplicate rule identifier '{}'",
363 identifier
364 ));
365 }
366
367 if ignored_rule_identifiers.contains(identifier.as_str()) {
368 return Err(anyhow!(
369 "overlay rule '{}' conflicts with ignored_rules",
370 identifier
371 ));
372 }
373
374 if rule_references_ignored_license(overlay, ignored_license_keys) {
375 return Err(anyhow!(
376 "overlay rule '{}' references an ignored license key",
377 identifier
378 ));
379 }
380
381 ensure_rule_references_known_licenses(overlay, &available_license_keys)?;
382
383 if let Some(index) = indices.get(identifier.as_str()).copied() {
384 if rules[index] == *overlay {
385 return Err(anyhow!(
386 "overlay rule '{}' is now identical to upstream; remove the local overlay file",
387 identifier
388 ));
389 }
390 report.replaced_rules.push(identifier.clone());
391 rules[index] = overlay.clone();
392 } else {
393 report.added_rules.push(identifier.clone());
394 rules.push(overlay.clone());
395 indices.insert(identifier, rules.len() - 1);
396 }
397 }
398
399 Ok(())
400}
401
402fn build_rule_index_map(rules: &[LoadedRule]) -> Result<HashMap<String, usize>> {
403 let mut indices = HashMap::new();
404 for (index, rule) in rules.iter().enumerate() {
405 if indices.insert(rule.identifier.clone(), index).is_some() {
406 return Err(anyhow!(
407 "cannot apply overlay because duplicate rule identifier '{}' is already present",
408 rule.identifier
409 ));
410 }
411 }
412 Ok(indices)
413}
414
415fn build_license_index_map(licenses: &[LoadedLicense]) -> Result<HashMap<String, usize>> {
416 let mut indices = HashMap::new();
417 for (index, license) in licenses.iter().enumerate() {
418 let normalized_key = normalize_license_key(&license.key);
419 if indices.insert(normalized_key, index).is_some() {
420 return Err(anyhow!(
421 "cannot apply overlay because duplicate license key '{}' is already present",
422 license.key
423 ));
424 }
425 }
426 Ok(indices)
427}
428
429fn ensure_rule_references_known_licenses(
430 rule: &LoadedRule,
431 available_license_keys: &HashSet<String>,
432) -> Result<()> {
433 if rule.rule_kind == RuleKind::None && rule.is_false_positive {
434 return Ok(());
435 }
436
437 let expression = parse_expression(&rule.license_expression).map_err(|error| {
438 anyhow!(
439 "overlay rule '{}' has an invalid license expression '{}': {}",
440 rule.identifier,
441 rule.license_expression,
442 error
443 )
444 })?;
445
446 let missing_keys = expression
447 .license_keys()
448 .into_iter()
449 .map(|key| normalize_license_key(&key))
450 .filter(|key| !available_license_keys.contains(key))
451 .collect::<Vec<_>>();
452
453 if missing_keys.is_empty() {
454 Ok(())
455 } else {
456 Err(anyhow!(
457 "overlay rule '{}' references unknown license keys: {}",
458 rule.identifier,
459 missing_keys.join(", ")
460 ))
461 }
462}
463
464fn normalize_license_key(key: &str) -> String {
465 key.trim().to_lowercase()
466}
467
468fn rule_references_ignored_license(
469 rule: &LoadedRule,
470 ignored_license_keys: &HashSet<String>,
471) -> bool {
472 if ignored_license_keys.is_empty() {
473 return false;
474 }
475
476 let normalized_expression = normalize_license_key(&rule.license_expression);
477 if ignored_license_keys.contains(&normalized_expression) {
478 return true;
479 }
480
481 if rule.rule_kind == RuleKind::None && rule.is_false_positive {
482 return false;
483 }
484
485 parse_expression(&rule.license_expression)
486 .map(|expression| {
487 expression
488 .license_keys()
489 .into_iter()
490 .map(|key| normalize_license_key(&key))
491 .any(|key| ignored_license_keys.contains(&key))
492 })
493 .unwrap_or(false)
494}
495
496#[cfg(test)]
497mod tests {
498 use super::*;
499
500 fn create_loaded_rule(identifier: &str, expression: &str) -> LoadedRule {
501 LoadedRule {
502 identifier: identifier.to_string(),
503 license_expression: expression.to_string(),
504 text: format!("{identifier} text"),
505 rule_kind: RuleKind::Text,
506 is_false_positive: false,
507 is_required_phrase: false,
508 skip_for_required_phrase_generation: false,
509 relevance: Some(100),
510 minimum_coverage: None,
511 has_stored_minimum_coverage: false,
512 is_continuous: false,
513 referenced_filenames: None,
514 ignorable_urls: None,
515 ignorable_emails: None,
516 ignorable_copyrights: None,
517 ignorable_holders: None,
518 ignorable_authors: None,
519 language: None,
520 notes: None,
521 is_deprecated: false,
522 replaced_by: vec![],
523 }
524 }
525
526 fn create_loaded_license(key: &str) -> LoadedLicense {
527 LoadedLicense {
528 key: key.to_string(),
529 short_name: Some(key.to_uppercase()),
530 name: format!("{key} license"),
531 language: Some("en".to_string()),
532 spdx_license_key: Some(key.to_uppercase()),
533 other_spdx_license_keys: vec![],
534 category: Some("Permissive".to_string()),
535 owner: None,
536 homepage_url: None,
537 text: format!("{key} text"),
538 reference_urls: vec![],
539 osi_license_key: None,
540 text_urls: vec![],
541 osi_url: None,
542 faq_url: None,
543 other_urls: vec![],
544 notes: None,
545 is_deprecated: false,
546 is_exception: false,
547 is_unknown: false,
548 is_generic: false,
549 replaced_by: vec![],
550 minimum_coverage: None,
551 standard_notice: None,
552 ignorable_copyrights: None,
553 ignorable_holders: None,
554 ignorable_authors: None,
555 ignorable_urls: None,
556 ignorable_emails: None,
557 }
558 }
559
560 #[test]
561 fn test_apply_index_build_policy_filters_direct_and_dependent_entries() {
562 let policy = IndexBuildPolicy {
563 ignored_rules: vec!["direct.RULE".to_string()],
564 ignored_licenses: vec!["apache-2.0".to_string()],
565 };
566
567 let rules = vec![
568 create_loaded_rule("keep.RULE", "mit"),
569 create_loaded_rule("direct.RULE", "mit"),
570 create_loaded_rule("dependent.RULE", "mit OR apache-2.0"),
571 ];
572 let licenses = vec![
573 create_loaded_license("mit"),
574 create_loaded_license("apache-2.0"),
575 ];
576
577 let (filtered_rules, filtered_licenses, report) =
578 apply_index_build_policy(rules, licenses, &policy, &[], &[])
579 .expect("policy application");
580
581 assert_eq!(
582 filtered_rules
583 .iter()
584 .map(|rule| rule.identifier.as_str())
585 .collect::<Vec<_>>(),
586 vec!["keep.RULE"]
587 );
588 assert_eq!(
589 filtered_licenses
590 .iter()
591 .map(|license| license.key.as_str())
592 .collect::<Vec<_>>(),
593 vec!["mit"]
594 );
595 assert_eq!(report.ignored_rules, vec!["direct.RULE".to_string()]);
596 assert_eq!(report.ignored_licenses, vec!["apache-2.0".to_string()]);
597 assert_eq!(
598 report.ignored_rules_due_to_licenses,
599 vec!["dependent.RULE".to_string()]
600 );
601 }
602
603 #[test]
604 fn test_apply_index_build_policy_fails_for_stale_ignored_entries() {
605 let policy = IndexBuildPolicy {
606 ignored_rules: vec!["missing.RULE".to_string()],
607 ignored_licenses: vec![],
608 };
609
610 let error = apply_index_build_policy(
611 vec![create_loaded_rule("keep.RULE", "mit")],
612 vec![create_loaded_license("mit")],
613 &policy,
614 &[],
615 &[],
616 )
617 .expect_err("missing ignored rule should fail");
618
619 assert!(
620 error
621 .to_string()
622 .contains("ignored rule identifiers not found upstream: missing.RULE")
623 );
624 }
625
626 #[test]
627 fn test_apply_index_build_policy_infers_add_from_new_overlay_entries() {
628 let policy = IndexBuildPolicy::default();
629 let overlay_rules = vec![create_loaded_rule("custom-rule.RULE", "mit")];
630 let overlay_licenses = vec![create_loaded_license("custom-license")];
631 let rules = vec![create_loaded_rule("keep.RULE", "mit")];
632 let licenses = vec![create_loaded_license("mit")];
633
634 let (filtered_rules, filtered_licenses, report) =
635 apply_index_build_policy(rules, licenses, &policy, &overlay_rules, &overlay_licenses)
636 .expect("policy application");
637
638 assert!(
639 filtered_rules
640 .iter()
641 .any(|rule| rule.identifier == "custom-rule.RULE")
642 );
643 assert!(
644 filtered_licenses
645 .iter()
646 .any(|license| license.key == "custom-license")
647 );
648 assert_eq!(report.added_rules, vec!["custom-rule.RULE".to_string()]);
649 assert_eq!(report.added_licenses, vec!["custom-license".to_string()]);
650 }
651
652 #[test]
653 fn test_apply_index_build_policy_infers_replace_from_colliding_overlay_entries() {
654 let policy = IndexBuildPolicy::default();
655 let overlay_rules = vec![LoadedRule {
656 text: "updated rule text".to_string(),
657 ..create_loaded_rule("replace.RULE", "mit")
658 }];
659 let overlay_licenses = vec![LoadedLicense {
660 name: "MIT Updated".to_string(),
661 text: "updated license text".to_string(),
662 ..create_loaded_license("mit")
663 }];
664 let rules = vec![create_loaded_rule("replace.RULE", "mit")];
665 let licenses = vec![create_loaded_license("mit")];
666
667 let (filtered_rules, filtered_licenses, report) =
668 apply_index_build_policy(rules, licenses, &policy, &overlay_rules, &overlay_licenses)
669 .expect("policy application");
670
671 assert_eq!(filtered_rules[0].text, "updated rule text");
672 assert_eq!(filtered_licenses[0].name, "MIT Updated");
673 assert_eq!(report.replaced_rules, vec!["replace.RULE".to_string()]);
674 assert_eq!(report.replaced_licenses, vec!["mit".to_string()]);
675 }
676
677 #[test]
678 fn test_apply_index_build_policy_rejects_redundant_rule_overlay() {
679 let policy = IndexBuildPolicy::default();
680 let base_rule = create_loaded_rule("replace.RULE", "mit");
681 let error = apply_index_build_policy(
682 vec![base_rule.clone()],
683 vec![create_loaded_license("mit")],
684 &policy,
685 &[base_rule],
686 &[],
687 )
688 .expect_err("redundant overlay should fail");
689
690 assert!(
691 error
692 .to_string()
693 .contains("overlay rule 'replace.RULE' is now identical to upstream")
694 );
695 }
696
697 #[test]
698 fn test_apply_index_build_policy_rejects_redundant_license_overlay() {
699 let policy = IndexBuildPolicy::default();
700 let base_license = create_loaded_license("mit");
701 let error = apply_index_build_policy(
702 vec![create_loaded_rule("keep.RULE", "mit")],
703 vec![base_license.clone()],
704 &policy,
705 &[],
706 &[base_license],
707 )
708 .expect_err("redundant overlay should fail");
709
710 assert!(
711 error
712 .to_string()
713 .contains("overlay license 'mit' is now identical to upstream")
714 );
715 }
716}