1use std::collections::{HashMap, HashSet};
5use std::sync::LazyLock;
6
7use anyhow::{Result, anyhow};
8use serde::Deserialize;
9
10use crate::license_detection::expression::parse_expression;
11use crate::license_detection::models::{LoadedLicense, LoadedRule, RuleKind};
12use crate::license_detection::rules::{parse_license_str_to_loaded, parse_rule_str_to_loaded};
13use crate::models::LicenseIndexProvenance;
14
15pub const DEFAULT_INDEX_BUILD_POLICY_PATH: &str =
16 "resources/license_detection/index_build_policy.toml";
17pub const DEFAULT_INDEX_BUILD_OVERLAY_ROOT: &str = "resources/license_detection/overlay";
18pub const EMBEDDED_LICENSE_INDEX_SOURCE: &str = "embedded-artifact";
19
20const DEFAULT_INDEX_BUILD_POLICY_TEXT: &str =
21 include_str!("../../resources/license_detection/index_build_policy.toml");
22
23pub(crate) struct BundledOverlayFile {
24 pub identifier: &'static str,
25 pub contents: &'static str,
26}
27
28mod bundled_overlay_manifest {
29 use super::BundledOverlayFile;
30
31 include!(concat!(env!("OUT_DIR"), "/bundled_license_overlays.rs"));
32}
33
34use bundled_overlay_manifest::{BUNDLED_LICENSE_OVERLAY_FILES, BUNDLED_RULE_OVERLAY_FILES};
35
36static DEFAULT_INDEX_BUILD_POLICY: LazyLock<IndexBuildPolicy> = LazyLock::new(|| {
37 toml::from_str(DEFAULT_INDEX_BUILD_POLICY_TEXT).unwrap_or_else(|error| {
38 panic!(
39 "Failed to parse bundled license index build policy at {}: {}",
40 DEFAULT_INDEX_BUILD_POLICY_PATH, error
41 )
42 })
43});
44
45#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize)]
46pub struct IndexBuildPolicy {
47 #[serde(default)]
48 pub ignored_rules: Vec<String>,
49 #[serde(default)]
50 pub ignored_licenses: Vec<String>,
51}
52
53impl IndexBuildPolicy {
54 pub fn is_empty(&self) -> bool {
55 self.ignored_rules.is_empty() && self.ignored_licenses.is_empty()
56 }
57
58 fn ignored_rule_set(&self) -> HashSet<String> {
59 self.ignored_rules
60 .iter()
61 .map(|identifier| identifier.trim())
62 .filter(|identifier| !identifier.is_empty())
63 .map(ToOwned::to_owned)
64 .collect()
65 }
66
67 fn ignored_license_set(&self) -> HashSet<String> {
68 self.ignored_licenses
69 .iter()
70 .map(|key| normalize_license_key(key))
71 .filter(|key| !key.is_empty())
72 .collect()
73 }
74}
75
76#[derive(Debug, Clone, Default, PartialEq, Eq)]
77pub struct AppliedIndexBuildPolicy {
78 pub ignored_rules: Vec<String>,
79 pub ignored_licenses: Vec<String>,
80 pub ignored_rules_due_to_licenses: Vec<String>,
81 pub added_rules: Vec<String>,
82 pub replaced_rules: Vec<String>,
83 pub added_licenses: Vec<String>,
84 pub replaced_licenses: Vec<String>,
85}
86
87impl AppliedIndexBuildPolicy {
88 pub fn is_empty(&self) -> bool {
89 self.ignored_rules.is_empty()
90 && self.ignored_licenses.is_empty()
91 && self.ignored_rules_due_to_licenses.is_empty()
92 && self.added_rules.is_empty()
93 && self.replaced_rules.is_empty()
94 && self.added_licenses.is_empty()
95 && self.replaced_licenses.is_empty()
96 }
97
98 fn sort_and_dedup(&mut self) {
99 for values in [
100 &mut self.ignored_rules,
101 &mut self.ignored_licenses,
102 &mut self.ignored_rules_due_to_licenses,
103 &mut self.added_rules,
104 &mut self.replaced_rules,
105 &mut self.added_licenses,
106 &mut self.replaced_licenses,
107 ] {
108 values.sort();
109 values.dedup();
110 }
111 }
112
113 pub fn to_license_index_provenance(
114 &self,
115 source: &str,
116 dataset_fingerprint: String,
117 ) -> LicenseIndexProvenance {
118 LicenseIndexProvenance {
119 source: source.to_string(),
120 dataset_fingerprint,
121 ignored_rules: self.ignored_rules.clone(),
122 ignored_licenses: self.ignored_licenses.clone(),
123 ignored_rules_due_to_licenses: self.ignored_rules_due_to_licenses.clone(),
124 added_rules: self.added_rules.clone(),
125 replaced_rules: self.replaced_rules.clone(),
126 added_licenses: self.added_licenses.clone(),
127 replaced_licenses: self.replaced_licenses.clone(),
128 }
129 }
130}
131
132pub fn default_index_build_policy() -> &'static IndexBuildPolicy {
133 &DEFAULT_INDEX_BUILD_POLICY
134}
135
136pub fn apply_default_index_build_policy(
137 loaded_rules: Vec<LoadedRule>,
138 loaded_licenses: Vec<LoadedLicense>,
139) -> Result<(Vec<LoadedRule>, Vec<LoadedLicense>, AppliedIndexBuildPolicy)> {
140 let overlay_rules = load_default_overlay_rules()?;
141 let overlay_licenses = load_default_overlay_licenses()?;
142 let (loaded_rules, loaded_licenses, report) = apply_index_build_policy(
143 loaded_rules,
144 loaded_licenses,
145 default_index_build_policy(),
146 &overlay_rules,
147 &overlay_licenses,
148 )?;
149 Ok((loaded_rules, loaded_licenses, report))
150}
151
152pub fn apply_index_build_policy(
153 loaded_rules: Vec<LoadedRule>,
154 loaded_licenses: Vec<LoadedLicense>,
155 policy: &IndexBuildPolicy,
156 overlay_rules: &[LoadedRule],
157 overlay_licenses: &[LoadedLicense],
158) -> Result<(Vec<LoadedRule>, Vec<LoadedLicense>, AppliedIndexBuildPolicy)> {
159 if policy.is_empty() && overlay_rules.is_empty() && overlay_licenses.is_empty() {
160 return Ok((
161 loaded_rules,
162 loaded_licenses,
163 AppliedIndexBuildPolicy::default(),
164 ));
165 }
166
167 let ignored_rule_identifiers = policy.ignored_rule_set();
168 let ignored_license_keys = policy.ignored_license_set();
169 let mut report = AppliedIndexBuildPolicy::default();
170
171 let mut filtered_licenses: Vec<_> = loaded_licenses
172 .into_iter()
173 .filter_map(|license| {
174 if ignored_license_keys.contains(&normalize_license_key(&license.key)) {
175 report.ignored_licenses.push(license.key.clone());
176 None
177 } else {
178 Some(license)
179 }
180 })
181 .collect();
182
183 let mut filtered_rules: Vec<_> = loaded_rules
184 .into_iter()
185 .filter_map(|rule| {
186 if ignored_rule_identifiers.contains(rule.identifier.as_str()) {
187 report.ignored_rules.push(rule.identifier.clone());
188 return None;
189 }
190
191 if rule_references_ignored_license(&rule, &ignored_license_keys) {
192 report
193 .ignored_rules_due_to_licenses
194 .push(rule.identifier.clone());
195 return None;
196 }
197
198 Some(rule)
199 })
200 .collect();
201
202 ensure_all_ignored_entries_exist(&ignored_rule_identifiers, &ignored_license_keys, &report)?;
203
204 apply_license_overlays(
205 &mut filtered_licenses,
206 overlay_licenses,
207 &ignored_license_keys,
208 &mut report,
209 )?;
210 apply_rule_overlays(
211 &mut filtered_rules,
212 overlay_rules,
213 &ignored_rule_identifiers,
214 &ignored_license_keys,
215 &filtered_licenses,
216 &mut report,
217 )?;
218
219 report.sort_and_dedup();
220
221 Ok((filtered_rules, filtered_licenses, report))
222}
223
224fn load_default_overlay_rules() -> Result<Vec<LoadedRule>> {
225 BUNDLED_RULE_OVERLAY_FILES
226 .iter()
227 .map(|overlay| {
228 parse_rule_str_to_loaded(overlay.identifier, overlay.contents).map_err(|error| {
229 anyhow!(
230 "Failed to parse bundled overlay rule {} from {}: {}",
231 overlay.identifier,
232 DEFAULT_INDEX_BUILD_OVERLAY_ROOT,
233 error
234 )
235 })
236 })
237 .collect()
238}
239
240fn load_default_overlay_licenses() -> Result<Vec<LoadedLicense>> {
241 BUNDLED_LICENSE_OVERLAY_FILES
242 .iter()
243 .map(|overlay| {
244 parse_license_str_to_loaded(overlay.identifier, overlay.contents).map_err(|error| {
245 anyhow!(
246 "Failed to parse bundled overlay license {} from {}: {}",
247 overlay.identifier,
248 DEFAULT_INDEX_BUILD_OVERLAY_ROOT,
249 error
250 )
251 })
252 })
253 .collect()
254}
255
256fn ensure_all_ignored_entries_exist(
257 ignored_rule_identifiers: &HashSet<String>,
258 ignored_license_keys: &HashSet<String>,
259 report: &AppliedIndexBuildPolicy,
260) -> Result<()> {
261 let applied_ignored_rules = report.ignored_rules.iter().cloned().collect::<HashSet<_>>();
262 let missing_rules = ignored_rule_identifiers
263 .difference(&applied_ignored_rules)
264 .cloned()
265 .collect::<Vec<_>>();
266
267 let applied_ignored_licenses = report
268 .ignored_licenses
269 .iter()
270 .map(|key| normalize_license_key(key))
271 .collect::<HashSet<_>>();
272 let missing_licenses = ignored_license_keys
273 .difference(&applied_ignored_licenses)
274 .cloned()
275 .collect::<Vec<_>>();
276
277 if missing_rules.is_empty() && missing_licenses.is_empty() {
278 Ok(())
279 } else {
280 let mut problems = Vec::new();
281 if !missing_rules.is_empty() {
282 problems.push(format!(
283 "ignored rule identifiers not found upstream: {}",
284 missing_rules.join(", ")
285 ));
286 }
287 if !missing_licenses.is_empty() {
288 problems.push(format!(
289 "ignored license keys not found upstream: {}",
290 missing_licenses.join(", ")
291 ));
292 }
293 Err(anyhow!(
294 "stale index-build policy entries detected; remove or update them: {}",
295 problems.join("; ")
296 ))
297 }
298}
299
300fn apply_license_overlays(
301 licenses: &mut Vec<LoadedLicense>,
302 overlays: &[LoadedLicense],
303 ignored_license_keys: &HashSet<String>,
304 report: &mut AppliedIndexBuildPolicy,
305) -> Result<()> {
306 let mut indices = build_license_index_map(licenses)?;
307 let mut seen_overlay_keys = HashSet::new();
308
309 for overlay in overlays {
310 let key = normalize_license_key(&overlay.key);
311
312 if !seen_overlay_keys.insert(key.clone()) {
313 return Err(anyhow!(
314 "bundled overlay contains duplicate license key '{}'",
315 overlay.key
316 ));
317 }
318
319 if ignored_license_keys.contains(&key) {
320 return Err(anyhow!(
321 "overlay license '{}' conflicts with ignored_licenses",
322 overlay.key
323 ));
324 }
325
326 if let Some(index) = indices.get(&key).copied() {
327 if licenses[index] == *overlay {
328 return Err(anyhow!(
329 "overlay license '{}' is now identical to upstream; remove the local overlay file",
330 overlay.key
331 ));
332 }
333 report.replaced_licenses.push(overlay.key.clone());
334 licenses[index] = overlay.clone();
335 } else {
336 report.added_licenses.push(overlay.key.clone());
337 licenses.push(overlay.clone());
338 indices.insert(key, licenses.len() - 1);
339 }
340 }
341
342 Ok(())
343}
344
345fn apply_rule_overlays(
346 rules: &mut Vec<LoadedRule>,
347 overlays: &[LoadedRule],
348 ignored_rule_identifiers: &HashSet<String>,
349 ignored_license_keys: &HashSet<String>,
350 licenses: &[LoadedLicense],
351 report: &mut AppliedIndexBuildPolicy,
352) -> Result<()> {
353 let mut indices = build_rule_index_map(rules)?;
354 let mut seen_overlay_identifiers = HashSet::new();
355 let available_license_keys = licenses
356 .iter()
357 .map(|license| normalize_license_key(&license.key))
358 .collect::<HashSet<_>>();
359
360 for overlay in overlays {
361 let identifier = overlay.identifier.clone();
362
363 if !seen_overlay_identifiers.insert(identifier.clone()) {
364 return Err(anyhow!(
365 "bundled overlay contains duplicate rule identifier '{}'",
366 identifier
367 ));
368 }
369
370 if ignored_rule_identifiers.contains(identifier.as_str()) {
371 return Err(anyhow!(
372 "overlay rule '{}' conflicts with ignored_rules",
373 identifier
374 ));
375 }
376
377 if rule_references_ignored_license(overlay, ignored_license_keys) {
378 return Err(anyhow!(
379 "overlay rule '{}' references an ignored license key",
380 identifier
381 ));
382 }
383
384 ensure_rule_references_known_licenses(overlay, &available_license_keys)?;
385
386 if let Some(index) = indices.get(identifier.as_str()).copied() {
387 if rules[index] == *overlay {
388 return Err(anyhow!(
389 "overlay rule '{}' is now identical to upstream; remove the local overlay file",
390 identifier
391 ));
392 }
393 report.replaced_rules.push(identifier.clone());
394 rules[index] = overlay.clone();
395 } else {
396 report.added_rules.push(identifier.clone());
397 rules.push(overlay.clone());
398 indices.insert(identifier, rules.len() - 1);
399 }
400 }
401
402 Ok(())
403}
404
405fn build_rule_index_map(rules: &[LoadedRule]) -> Result<HashMap<String, usize>> {
406 let mut indices = HashMap::new();
407 for (index, rule) in rules.iter().enumerate() {
408 if indices.insert(rule.identifier.clone(), index).is_some() {
409 return Err(anyhow!(
410 "cannot apply overlay because duplicate rule identifier '{}' is already present",
411 rule.identifier
412 ));
413 }
414 }
415 Ok(indices)
416}
417
418fn build_license_index_map(licenses: &[LoadedLicense]) -> Result<HashMap<String, usize>> {
419 let mut indices = HashMap::new();
420 for (index, license) in licenses.iter().enumerate() {
421 let normalized_key = normalize_license_key(&license.key);
422 if indices.insert(normalized_key, index).is_some() {
423 return Err(anyhow!(
424 "cannot apply overlay because duplicate license key '{}' is already present",
425 license.key
426 ));
427 }
428 }
429 Ok(indices)
430}
431
432fn ensure_rule_references_known_licenses(
433 rule: &LoadedRule,
434 available_license_keys: &HashSet<String>,
435) -> Result<()> {
436 if rule.rule_kind == RuleKind::None && rule.is_false_positive {
437 return Ok(());
438 }
439
440 let expression = parse_expression(&rule.license_expression).map_err(|error| {
441 anyhow!(
442 "overlay rule '{}' has an invalid license expression '{}': {}",
443 rule.identifier,
444 rule.license_expression,
445 error
446 )
447 })?;
448
449 let missing_keys = expression
450 .license_keys()
451 .into_iter()
452 .map(|key| normalize_license_key(&key))
453 .filter(|key| !available_license_keys.contains(key))
454 .collect::<Vec<_>>();
455
456 if missing_keys.is_empty() {
457 Ok(())
458 } else {
459 Err(anyhow!(
460 "overlay rule '{}' references unknown license keys: {}",
461 rule.identifier,
462 missing_keys.join(", ")
463 ))
464 }
465}
466
467fn normalize_license_key(key: &str) -> String {
468 key.trim().to_lowercase()
469}
470
471fn rule_references_ignored_license(
472 rule: &LoadedRule,
473 ignored_license_keys: &HashSet<String>,
474) -> bool {
475 if ignored_license_keys.is_empty() {
476 return false;
477 }
478
479 let normalized_expression = normalize_license_key(&rule.license_expression);
480 if ignored_license_keys.contains(&normalized_expression) {
481 return true;
482 }
483
484 if rule.rule_kind == RuleKind::None && rule.is_false_positive {
485 return false;
486 }
487
488 parse_expression(&rule.license_expression)
489 .map(|expression| {
490 expression
491 .license_keys()
492 .into_iter()
493 .map(|key| normalize_license_key(&key))
494 .any(|key| ignored_license_keys.contains(&key))
495 })
496 .unwrap_or(false)
497}
498
499#[cfg(test)]
500mod tests {
501 use super::*;
502
503 fn create_loaded_rule(identifier: &str, expression: &str) -> LoadedRule {
504 LoadedRule {
505 identifier: identifier.to_string(),
506 license_expression: expression.to_string(),
507 text: format!("{identifier} text"),
508 rule_kind: RuleKind::Text,
509 is_false_positive: false,
510 is_required_phrase: false,
511 skip_for_required_phrase_generation: false,
512 relevance: Some(100),
513 minimum_coverage: None,
514 has_stored_minimum_coverage: false,
515 is_continuous: false,
516 referenced_filenames: None,
517 ignorable_urls: None,
518 ignorable_emails: None,
519 ignorable_copyrights: None,
520 ignorable_holders: None,
521 ignorable_authors: None,
522 language: None,
523 notes: None,
524 is_deprecated: false,
525 replaced_by: vec![],
526 }
527 }
528
529 fn create_loaded_license(key: &str) -> LoadedLicense {
530 LoadedLicense {
531 key: key.to_string(),
532 short_name: Some(key.to_uppercase()),
533 name: format!("{key} license"),
534 language: Some("en".to_string()),
535 spdx_license_key: Some(key.to_uppercase()),
536 other_spdx_license_keys: vec![],
537 category: Some("Permissive".to_string()),
538 owner: None,
539 homepage_url: None,
540 text: format!("{key} text"),
541 reference_urls: vec![],
542 osi_license_key: None,
543 text_urls: vec![],
544 osi_url: None,
545 faq_url: None,
546 other_urls: vec![],
547 notes: None,
548 is_deprecated: false,
549 is_exception: false,
550 is_unknown: false,
551 is_generic: false,
552 replaced_by: vec![],
553 minimum_coverage: None,
554 standard_notice: None,
555 ignorable_copyrights: None,
556 ignorable_holders: None,
557 ignorable_authors: None,
558 ignorable_urls: None,
559 ignorable_emails: None,
560 }
561 }
562
563 #[test]
564 fn test_apply_index_build_policy_filters_direct_and_dependent_entries() {
565 let policy = IndexBuildPolicy {
566 ignored_rules: vec!["direct.RULE".to_string()],
567 ignored_licenses: vec!["apache-2.0".to_string()],
568 };
569
570 let rules = vec![
571 create_loaded_rule("keep.RULE", "mit"),
572 create_loaded_rule("direct.RULE", "mit"),
573 create_loaded_rule("dependent.RULE", "mit OR apache-2.0"),
574 ];
575 let licenses = vec![
576 create_loaded_license("mit"),
577 create_loaded_license("apache-2.0"),
578 ];
579
580 let (filtered_rules, filtered_licenses, report) =
581 apply_index_build_policy(rules, licenses, &policy, &[], &[])
582 .expect("policy application");
583
584 assert_eq!(
585 filtered_rules
586 .iter()
587 .map(|rule| rule.identifier.as_str())
588 .collect::<Vec<_>>(),
589 vec!["keep.RULE"]
590 );
591 assert_eq!(
592 filtered_licenses
593 .iter()
594 .map(|license| license.key.as_str())
595 .collect::<Vec<_>>(),
596 vec!["mit"]
597 );
598 assert_eq!(report.ignored_rules, vec!["direct.RULE".to_string()]);
599 assert_eq!(report.ignored_licenses, vec!["apache-2.0".to_string()]);
600 assert_eq!(
601 report.ignored_rules_due_to_licenses,
602 vec!["dependent.RULE".to_string()]
603 );
604 }
605
606 #[test]
607 fn test_apply_index_build_policy_fails_for_stale_ignored_entries() {
608 let policy = IndexBuildPolicy {
609 ignored_rules: vec!["missing.RULE".to_string()],
610 ignored_licenses: vec![],
611 };
612
613 let error = apply_index_build_policy(
614 vec![create_loaded_rule("keep.RULE", "mit")],
615 vec![create_loaded_license("mit")],
616 &policy,
617 &[],
618 &[],
619 )
620 .expect_err("missing ignored rule should fail");
621
622 assert!(
623 error
624 .to_string()
625 .contains("ignored rule identifiers not found upstream: missing.RULE")
626 );
627 }
628
629 #[test]
630 fn test_apply_index_build_policy_infers_add_from_new_overlay_entries() {
631 let policy = IndexBuildPolicy::default();
632 let overlay_rules = vec![create_loaded_rule("custom-rule.RULE", "mit")];
633 let overlay_licenses = vec![create_loaded_license("custom-license")];
634 let rules = vec![create_loaded_rule("keep.RULE", "mit")];
635 let licenses = vec![create_loaded_license("mit")];
636
637 let (filtered_rules, filtered_licenses, report) =
638 apply_index_build_policy(rules, licenses, &policy, &overlay_rules, &overlay_licenses)
639 .expect("policy application");
640
641 assert!(
642 filtered_rules
643 .iter()
644 .any(|rule| rule.identifier == "custom-rule.RULE")
645 );
646 assert!(
647 filtered_licenses
648 .iter()
649 .any(|license| license.key == "custom-license")
650 );
651 assert_eq!(report.added_rules, vec!["custom-rule.RULE".to_string()]);
652 assert_eq!(report.added_licenses, vec!["custom-license".to_string()]);
653 }
654
655 #[test]
656 fn test_apply_index_build_policy_infers_replace_from_colliding_overlay_entries() {
657 let policy = IndexBuildPolicy::default();
658 let overlay_rules = vec![LoadedRule {
659 text: "updated rule text".to_string(),
660 ..create_loaded_rule("replace.RULE", "mit")
661 }];
662 let overlay_licenses = vec![LoadedLicense {
663 name: "MIT Updated".to_string(),
664 text: "updated license text".to_string(),
665 ..create_loaded_license("mit")
666 }];
667 let rules = vec![create_loaded_rule("replace.RULE", "mit")];
668 let licenses = vec![create_loaded_license("mit")];
669
670 let (filtered_rules, filtered_licenses, report) =
671 apply_index_build_policy(rules, licenses, &policy, &overlay_rules, &overlay_licenses)
672 .expect("policy application");
673
674 assert_eq!(filtered_rules[0].text, "updated rule text");
675 assert_eq!(filtered_licenses[0].name, "MIT Updated");
676 assert_eq!(report.replaced_rules, vec!["replace.RULE".to_string()]);
677 assert_eq!(report.replaced_licenses, vec!["mit".to_string()]);
678 }
679
680 #[test]
681 fn test_apply_index_build_policy_rejects_redundant_rule_overlay() {
682 let policy = IndexBuildPolicy::default();
683 let base_rule = create_loaded_rule("replace.RULE", "mit");
684 let error = apply_index_build_policy(
685 vec![base_rule.clone()],
686 vec![create_loaded_license("mit")],
687 &policy,
688 &[base_rule],
689 &[],
690 )
691 .expect_err("redundant overlay should fail");
692
693 assert!(
694 error
695 .to_string()
696 .contains("overlay rule 'replace.RULE' is now identical to upstream")
697 );
698 }
699
700 #[test]
701 fn test_apply_index_build_policy_rejects_redundant_license_overlay() {
702 let policy = IndexBuildPolicy::default();
703 let base_license = create_loaded_license("mit");
704 let error = apply_index_build_policy(
705 vec![create_loaded_rule("keep.RULE", "mit")],
706 vec![base_license.clone()],
707 &policy,
708 &[],
709 &[base_license],
710 )
711 .expect_err("redundant overlay should fail");
712
713 assert!(
714 error
715 .to_string()
716 .contains("overlay license 'mit' is now identical to upstream")
717 );
718 }
719}