1use crate::license_detection::index::{loaded_license_to_license, loaded_rule_to_rule};
12use crate::license_detection::models::{License, LoadedLicense, LoadedRule, Rule};
13use anyhow::{Context, Result, anyhow};
14use log::warn;
15use once_cell::sync::Lazy;
16use regex::Regex;
17use serde::{Deserialize, Deserializer, Serialize};
18use std::collections::HashSet;
19use std::fs;
20use std::path::Path;
21
22static FM_BOUNDARY: Lazy<Regex> =
23 Lazy::new(|| Regex::new(r"(?m)^-{3,}\s*$").expect("Invalid frontmatter regex"));
24
25fn deserialize_yes_no_bool<'de, D>(deserializer: D) -> Result<Option<bool>, D::Error>
26where
27 D: Deserializer<'de>,
28{
29 #[derive(Deserialize, Serialize)]
30 #[serde(untagged)]
31 enum YesNoOrBool {
32 String(String),
33 Bool(bool),
34 }
35
36 match YesNoOrBool::deserialize(deserializer)? {
37 YesNoOrBool::Bool(b) => Ok(Some(b)),
38 YesNoOrBool::String(s) => {
39 let lower = s.to_lowercase();
40 if lower == "yes" || lower == "true" || lower == "1" {
41 Ok(Some(true))
42 } else if lower == "no" || lower == "false" || lower == "0" {
43 Ok(Some(false))
44 } else {
45 Ok(None)
46 }
47 }
48 }
49}
50
51trait ParseNumber {
52 fn as_u8(&self) -> Option<u8>;
53}
54
55impl ParseNumber for yaml_serde::Number {
56 fn as_u8(&self) -> Option<u8> {
57 self.as_i64()
58 .and_then(|n| {
59 if n >= 0 && n <= u8::MAX as i64 {
60 Some(n as u8)
61 } else {
62 None
63 }
64 })
65 .or_else(|| {
66 self.as_f64().and_then(|f| {
67 if f >= 0.0 && f <= u8::MAX as f64 {
68 Some(f as u8)
69 } else {
70 None
71 }
72 })
73 })
74 }
75}
76
77#[derive(Debug, Deserialize)]
78#[allow(dead_code)]
79struct LicenseFrontmatter {
80 #[serde(default)]
81 key: Option<String>,
82
83 #[serde(default)]
84 short_name: Option<String>,
85
86 #[serde(default)]
87 name: Option<String>,
88
89 #[serde(default)]
90 category: Option<String>,
91
92 #[serde(default)]
93 owner: Option<String>,
94
95 #[serde(default)]
96 homepage_url: Option<String>,
97
98 #[serde(default)]
99 notes: Option<String>,
100
101 #[serde(default)]
102 spdx_license_key: Option<String>,
103
104 #[serde(default)]
105 other_spdx_license_keys: Option<Vec<String>>,
106
107 #[serde(default)]
108 osi_license_key: Option<String>,
109
110 #[serde(default)]
111 text_urls: Option<Vec<String>>,
112
113 #[serde(default)]
114 osi_url: Option<String>,
115
116 #[serde(default)]
117 faq_url: Option<String>,
118
119 #[serde(default)]
120 other_urls: Option<Vec<String>>,
121
122 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
123 is_deprecated: Option<bool>,
124
125 #[serde(default)]
126 replaced_by: Option<Vec<String>>,
127
128 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
129 is_exception: Option<bool>,
130
131 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
132 is_unknown: Option<bool>,
133
134 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
135 is_generic: Option<bool>,
136
137 #[serde(default)]
138 minimum_coverage: Option<yaml_serde::Number>,
139
140 #[serde(default)]
141 standard_notice: Option<String>,
142
143 #[serde(default)]
144 ignorable_copyrights: Option<Vec<String>>,
145
146 #[serde(default)]
147 ignorable_holders: Option<Vec<String>>,
148
149 #[serde(default)]
150 ignorable_authors: Option<Vec<String>>,
151
152 #[serde(default)]
153 ignorable_urls: Option<Vec<String>>,
154
155 #[serde(default)]
156 ignorable_emails: Option<Vec<String>>,
157}
158
159struct ParsedRuleFile {
161 yaml_content: String,
162 text_content: String,
163 has_stored_minimum_coverage: bool,
164}
165
166struct ParsedLicenseFile {
168 yaml_content: String,
169 text_content: String,
170}
171
172fn parse_file_content(content: &str, path: &Path) -> Result<ParsedRuleFile> {
177 if content.len() < 6 {
178 return Err(anyhow!("File content too short: {}", path.display()));
179 }
180
181 let parts: Vec<&str> = FM_BOUNDARY.splitn(content, 3).collect();
182
183 if parts.len() < 3 {
184 let trimmed = content.trim();
185 if trimmed.is_empty() {
186 return Err(anyhow!(
187 "File is empty or has no content: {}",
188 path.display()
189 ));
190 }
191 return Err(anyhow!("File missing delimiter '---': {}", path.display()));
192 }
193
194 let yaml_content = parts
195 .get(1)
196 .ok_or_else(|| anyhow!("Missing YAML frontmatter in {}", path.display()))?
197 .to_string();
198 let text_content = parts
199 .get(2)
200 .ok_or_else(|| {
201 anyhow!(
202 "Missing text content after frontmatter in {}",
203 path.display()
204 )
205 })?
206 .trim_start_matches('\n')
207 .trim()
208 .to_string();
209
210 let frontmatter_value: yaml_serde::Value =
211 yaml_serde::from_str(&yaml_content).map_err(|e| {
212 anyhow!(
213 "Failed to parse frontmatter YAML in {}: {}\nContent was:\n{}",
214 path.display(),
215 e,
216 yaml_content
217 )
218 })?;
219
220 let has_stored_minimum_coverage = frontmatter_value.as_mapping().is_some_and(|mapping| {
221 mapping.contains_key(yaml_serde::Value::String("minimum_coverage".to_string()))
222 });
223
224 Ok(ParsedRuleFile {
225 yaml_content,
226 text_content,
227 has_stored_minimum_coverage,
228 })
229}
230
231#[derive(Debug, Deserialize)]
232#[allow(dead_code)]
233struct RuleFrontmatter {
234 #[serde(default)]
235 license_expression: Option<String>,
236
237 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
238 is_license_text: Option<bool>,
239
240 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
241 is_license_notice: Option<bool>,
242
243 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
244 is_license_reference: Option<bool>,
245
246 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
247 is_license_tag: Option<bool>,
248
249 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
250 is_license_intro: Option<bool>,
251
252 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
253 is_license_clue: Option<bool>,
254
255 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
256 is_false_positive: Option<bool>,
257
258 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
259 is_required_phrase: Option<bool>,
260
261 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
262 skip_for_required_phrase_generation: Option<bool>,
263
264 #[serde(default)]
265 relevance: Option<yaml_serde::Number>,
266
267 #[serde(default)]
268 minimum_coverage: Option<yaml_serde::Number>,
269
270 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
271 is_continuous: Option<bool>,
272
273 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
274 is_deprecated: Option<bool>,
275
276 #[serde(default)]
277 referenced_filenames: Option<Vec<String>>,
278
279 #[serde(default)]
280 replaced_by: Option<Vec<String>>,
281
282 #[serde(default)]
283 ignorable_urls: Option<Vec<String>>,
284
285 #[serde(default)]
286 ignorable_emails: Option<Vec<String>>,
287
288 #[serde(default)]
289 notes: Option<String>,
290
291 #[serde(default)]
292 ignorable_copyrights: Option<Vec<String>>,
293
294 #[serde(default)]
295 ignorable_holders: Option<Vec<String>>,
296
297 #[serde(default)]
298 ignorable_authors: Option<Vec<String>>,
299
300 #[serde(default)]
301 language: Option<String>,
302}
303
304pub fn parse_rule_to_loaded(path: &Path) -> Result<LoadedRule> {
316 let content = fs::read_to_string(path)
317 .with_context(|| format!("Failed to read rule file: {}", path.display()))?;
318
319 let identifier = LoadedRule::derive_identifier(
320 path.file_name()
321 .and_then(|s| s.to_str())
322 .unwrap_or("unknown.RULE"),
323 );
324
325 let parsed = parse_file_content(&content, path)?;
326
327 if parsed.text_content.is_empty() {
328 return Err(anyhow!(
329 "Rule file has empty text content: {}",
330 path.display()
331 ));
332 }
333
334 let fm: RuleFrontmatter = yaml_serde::from_str(&parsed.yaml_content).map_err(|e| {
335 anyhow!(
336 "Failed to parse rule frontmatter YAML in {}: {}\nContent was:\n{}",
337 path.display(),
338 e,
339 parsed.yaml_content
340 )
341 })?;
342
343 let is_false_positive = fm.is_false_positive.unwrap_or(false);
344
345 let rule_kind = LoadedRule::derive_rule_kind(
346 fm.is_license_text.unwrap_or(false),
347 fm.is_license_notice.unwrap_or(false),
348 fm.is_license_reference.unwrap_or(false),
349 fm.is_license_tag.unwrap_or(false),
350 fm.is_license_intro.unwrap_or(false),
351 fm.is_license_clue.unwrap_or(false),
352 )
353 .map_err(|e| {
354 anyhow!(
355 "Rule file has invalid rule-kind flags: {}: {}",
356 path.display(),
357 e
358 )
359 })?;
360
361 LoadedRule::validate_rule_kind_flags(rule_kind, is_false_positive)
362 .map_err(|e| anyhow!("Rule file has invalid flags: {}: {}", path.display(), e))?;
363
364 let license_expression = LoadedRule::normalize_license_expression(
365 fm.license_expression.as_deref(),
366 is_false_positive,
367 )
368 .map_err(|e| {
369 anyhow!(
370 "Rule file has invalid license_expression: {}: {}",
371 path.display(),
372 e
373 )
374 })?;
375
376 let relevance = fm.relevance.and_then(|n| n.as_u8());
377
378 let minimum_coverage = fm.minimum_coverage.and_then(|n| n.as_u8());
379
380 Ok(LoadedRule {
381 identifier,
382 license_expression,
383 text: parsed.text_content,
384 rule_kind,
385 is_false_positive,
386 is_required_phrase: fm.is_required_phrase.unwrap_or(false),
387 skip_for_required_phrase_generation: fm
388 .skip_for_required_phrase_generation
389 .unwrap_or(false),
390 relevance,
391 minimum_coverage,
392 has_stored_minimum_coverage: parsed.has_stored_minimum_coverage,
393 is_continuous: fm.is_continuous.unwrap_or(false),
394 referenced_filenames: LoadedRule::normalize_optional_list(
395 fm.referenced_filenames.as_deref(),
396 ),
397 ignorable_urls: LoadedRule::normalize_optional_list(fm.ignorable_urls.as_deref()),
398 ignorable_emails: LoadedRule::normalize_optional_list(fm.ignorable_emails.as_deref()),
399 ignorable_copyrights: LoadedRule::normalize_optional_list(
400 fm.ignorable_copyrights.as_deref(),
401 ),
402 ignorable_holders: LoadedRule::normalize_optional_list(fm.ignorable_holders.as_deref()),
403 ignorable_authors: LoadedRule::normalize_optional_list(fm.ignorable_authors.as_deref()),
404 language: LoadedRule::normalize_optional_string(fm.language.as_deref()),
405 notes: LoadedRule::normalize_optional_string(fm.notes.as_deref()),
406 is_deprecated: fm.is_deprecated.unwrap_or(false),
407 replaced_by: fm.replaced_by.unwrap_or_default(),
408 })
409}
410
411pub fn parse_license_to_loaded(path: &Path) -> Result<LoadedLicense> {
423 let content = fs::read_to_string(path)
424 .with_context(|| format!("Failed to read license file: {}", path.display()))?;
425
426 let key = LoadedLicense::derive_key(path)?;
427
428 let parsed = parse_license_file_content(&content, path)?;
429
430 let fm: LicenseFrontmatter = yaml_serde::from_str(&parsed.yaml_content).map_err(|e| {
431 anyhow!(
432 "Failed to parse license frontmatter YAML in {}: {}\nContent was:\n{}",
433 path.display(),
434 e,
435 parsed.yaml_content
436 )
437 })?;
438
439 LoadedLicense::validate_key_match(&key, fm.key.as_deref())
440 .map_err(|e| anyhow!("License file has key mismatch: {}: {}", path.display(), e))?;
441
442 let is_deprecated = fm.is_deprecated.unwrap_or(false);
443 let is_unknown = fm.is_unknown.unwrap_or(false);
444 let is_generic = fm.is_generic.unwrap_or(false);
445
446 LoadedLicense::validate_text_content(
447 &parsed.text_content,
448 is_deprecated,
449 is_unknown,
450 is_generic,
451 )
452 .map_err(|e| {
453 anyhow!(
454 "License file has invalid content: {}: {}",
455 path.display(),
456 e
457 )
458 })?;
459
460 let name = LoadedLicense::derive_name(fm.name.as_deref(), fm.short_name.as_deref(), &key);
461
462 let reference_urls = LoadedLicense::merge_reference_urls(
463 fm.text_urls.as_deref(),
464 fm.other_urls.as_deref(),
465 fm.osi_url.as_deref(),
466 fm.faq_url.as_deref(),
467 fm.homepage_url.as_deref(),
468 );
469
470 let minimum_coverage = fm.minimum_coverage.and_then(|n| n.as_u8());
471
472 Ok(LoadedLicense {
473 key,
474 short_name: LoadedLicense::normalize_optional_string(fm.short_name.as_deref()),
475 name,
476 language: Some("en".to_string()),
477 spdx_license_key: LoadedLicense::normalize_optional_string(fm.spdx_license_key.as_deref()),
478 other_spdx_license_keys: fm.other_spdx_license_keys.unwrap_or_default(),
479 category: LoadedLicense::normalize_optional_string(fm.category.as_deref()),
480 owner: LoadedLicense::normalize_optional_string(fm.owner.as_deref()),
481 homepage_url: LoadedLicense::normalize_optional_string(fm.homepage_url.as_deref()),
482 text: parsed.text_content,
483 reference_urls,
484 osi_license_key: LoadedLicense::normalize_optional_string(fm.osi_license_key.as_deref()),
485 text_urls: LoadedLicense::normalize_optional_list(fm.text_urls.as_deref())
486 .unwrap_or_default(),
487 osi_url: LoadedLicense::normalize_optional_string(fm.osi_url.as_deref()),
488 faq_url: LoadedLicense::normalize_optional_string(fm.faq_url.as_deref()),
489 other_urls: LoadedLicense::normalize_optional_list(fm.other_urls.as_deref())
490 .unwrap_or_default(),
491 notes: LoadedLicense::normalize_optional_string(fm.notes.as_deref()),
492 is_deprecated,
493 is_exception: fm.is_exception.unwrap_or(false),
494 is_unknown,
495 is_generic,
496 replaced_by: fm.replaced_by.unwrap_or_default(),
497 minimum_coverage,
498 standard_notice: LoadedLicense::normalize_optional_string(fm.standard_notice.as_deref()),
499 ignorable_copyrights: LoadedLicense::normalize_optional_list(
500 fm.ignorable_copyrights.as_deref(),
501 ),
502 ignorable_holders: LoadedLicense::normalize_optional_list(fm.ignorable_holders.as_deref()),
503 ignorable_authors: LoadedLicense::normalize_optional_list(fm.ignorable_authors.as_deref()),
504 ignorable_urls: LoadedLicense::normalize_optional_list(fm.ignorable_urls.as_deref()),
505 ignorable_emails: LoadedLicense::normalize_optional_list(fm.ignorable_emails.as_deref()),
506 })
507}
508
509fn parse_license_file_content(content: &str, path: &Path) -> Result<ParsedLicenseFile> {
513 if content.len() < 6 {
514 return Err(anyhow!(
515 "License file content too short: {}",
516 path.display()
517 ));
518 }
519
520 let parts: Vec<&str> = FM_BOUNDARY.splitn(content, 3).collect();
521
522 if parts.len() < 3 {
523 let trimmed = content.trim();
524 if trimmed.is_empty() {
525 return Err(anyhow!(
526 "License file is empty or has no content: {}",
527 path.display()
528 ));
529 }
530 return Err(anyhow!(
531 "License file missing delimiter '---': {}",
532 path.display()
533 ));
534 }
535
536 let yaml_content = parts
537 .get(1)
538 .ok_or_else(|| anyhow!("Missing YAML frontmatter in {}", path.display()))?
539 .to_string();
540 let text_content = parts
541 .get(2)
542 .ok_or_else(|| {
543 anyhow!(
544 "Missing text content after frontmatter in {}",
545 path.display()
546 )
547 })?
548 .trim_start_matches('\n')
549 .trim()
550 .to_string();
551
552 Ok(ParsedLicenseFile {
553 yaml_content,
554 text_content,
555 })
556}
557
558pub fn load_loaded_rules_from_directory(dir: &Path) -> Result<Vec<LoadedRule>> {
570 let mut rules = Vec::new();
571
572 let entries = fs::read_dir(dir)
573 .with_context(|| format!("Failed to read rules directory: {}", dir.display()))?;
574
575 for entry in entries {
576 let entry = entry
577 .with_context(|| format!("Failed to read directory entry in: {}", dir.display()))?;
578 let path = entry.path();
579
580 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("RULE") {
581 match parse_rule_to_loaded(&path) {
582 Ok(rule) => rules.push(rule),
583 Err(e) => {
584 warn!("Failed to parse rule file {}: {}", path.display(), e);
585 }
586 }
587 }
588 }
589
590 Ok(rules)
591}
592
593pub fn load_loaded_licenses_from_directory(dir: &Path) -> Result<Vec<LoadedLicense>> {
605 let mut licenses = Vec::new();
606
607 let entries = fs::read_dir(dir)
608 .with_context(|| format!("Failed to read licenses directory: {}", dir.display()))?;
609
610 for entry in entries {
611 let entry = entry
612 .with_context(|| format!("Failed to read directory entry in: {}", dir.display()))?;
613 let path = entry.path();
614
615 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("LICENSE") {
616 match parse_license_to_loaded(&path) {
617 Ok(license) => licenses.push(license),
618 Err(e) => {
619 warn!("Failed to parse license file {}: {}", path.display(), e);
620 }
621 }
622 }
623 }
624
625 Ok(licenses)
626}
627
628#[allow(dead_code)]
640fn validate_rules(rules: &[Rule]) {
641 let mut seen_texts: HashSet<&str> = HashSet::new();
642 let mut duplicate_count = 0;
643
644 for rule in rules {
645 if !seen_texts.insert(&rule.text) {
646 warn!(
647 "Duplicate rule text found for license_expression: {}",
648 rule.license_expression
649 );
650 duplicate_count += 1;
651 }
652
653 if !rule.is_false_positive && rule.license_expression.trim().is_empty() {
654 warn!("Rule has empty license_expression but is not marked as false_positive");
655 }
656 }
657
658 if duplicate_count > 0 {
659 warn!(
660 "Found {} duplicate rule text(s) during rule validation",
661 duplicate_count
662 );
663 }
664}
665
666#[allow(dead_code)]
675pub fn load_rules_from_directory(dir: &Path, with_deprecated: bool) -> Result<Vec<Rule>> {
676 let loaded = load_loaded_rules_from_directory(dir)?;
677 let rules: Vec<Rule> = loaded
678 .into_iter()
679 .filter(|r| with_deprecated || !r.is_deprecated)
680 .map(loaded_rule_to_rule)
681 .collect();
682 validate_rules(&rules);
683 Ok(rules)
684}
685
686#[allow(dead_code)]
695pub fn load_licenses_from_directory(dir: &Path, with_deprecated: bool) -> Result<Vec<License>> {
696 let loaded = load_loaded_licenses_from_directory(dir)?;
697 let licenses: Vec<License> = loaded
698 .into_iter()
699 .filter(|l| with_deprecated || !l.is_deprecated)
700 .map(loaded_license_to_license)
701 .collect();
702 Ok(licenses)
703}
704
705#[cfg(test)]
706mod tests {
707 use super::*;
708 use std::collections::HashMap;
709 use std::fs;
710 use tempfile::tempdir;
711
712 pub fn parse_rule_file(path: &Path) -> Result<Rule> {
713 let loaded = parse_rule_to_loaded(path)?;
714 Ok(loaded_rule_to_rule(loaded))
715 }
716
717 #[test]
718 fn test_parse_number_as_u8() {
719 let num_int: yaml_serde::Number = yaml_serde::from_str("100").unwrap();
720 assert_eq!(num_int.as_u8(), Some(100));
721
722 let num_out_of_range: yaml_serde::Number = yaml_serde::from_str("500").unwrap();
723 assert_eq!(num_out_of_range.as_u8(), None);
724
725 let num_float: yaml_serde::Number = yaml_serde::from_str("90.5").unwrap();
726 assert_eq!(num_float.as_u8(), Some(90));
727 }
728
729 #[test]
730 fn test_parse_simple_license_file() {
731 let dir = tempdir().unwrap();
732 let license_path = dir.path().join("mit.LICENSE");
733 fs::write(
734 &license_path,
735 r#"---
736key: mit
737short_name: MIT License
738name: MIT License
739category: Permissive
740spdx_license_key: MIT
741---
742MIT License text here"#,
743 )
744 .unwrap();
745
746 let license = parse_license_to_loaded(&license_path)
747 .map(loaded_license_to_license)
748 .unwrap();
749 assert_eq!(license.key, "mit");
750 assert_eq!(license.name, "MIT License");
751 assert!(license.text.contains("MIT License text"));
752 }
753
754 #[test]
755 fn test_parse_simple_rule_file() {
756 let dir = tempdir().unwrap();
757 let rule_path = dir.path().join("mit_1.RULE");
758 fs::write(
759 &rule_path,
760 r#"---
761license_expression: mit
762is_license_reference: yes
763relevance: 90
764referenced_filenames:
765 - MIT.txt
766---
767MIT.txt"#,
768 )
769 .unwrap();
770
771 let rule = parse_rule_file(&rule_path).unwrap();
772 assert_eq!(rule.license_expression, "mit");
773 assert_eq!(rule.text, "MIT.txt");
774 assert!(rule.is_license_reference());
775 assert_eq!(rule.relevance, 90);
776 }
777
778 #[test]
779 fn test_deserialize_yes_no_bool() {
780 let dir = tempdir().unwrap();
781 let rule_path = dir.path().join("test.RULE");
782
783 fs::write(
784 &rule_path,
785 r#"---
786license_expression: mit
787is_license_notice: yes
788is_license_tag: no
789---
790MIT License"#,
791 )
792 .unwrap();
793
794 let rule = parse_rule_file(&rule_path).unwrap();
795 assert!(rule.is_license_notice());
796 assert!(!rule.is_license_tag());
797 }
798
799 #[test]
800 fn test_load_licenses_from_directory() {
801 let dir = tempdir().unwrap();
802
803 fs::write(
804 dir.path().join("test.LICENSE"),
805 r#"---
806key: test
807name: Test License
808spdx_license_key: TEST
809category: Permissive
810---
811Test license text here"#,
812 )
813 .unwrap();
814
815 let licenses = load_licenses_from_directory(dir.path(), false).unwrap();
816 assert_eq!(licenses.len(), 1);
817
818 let license = &licenses[0];
819 assert_eq!(license.key, "test");
820 assert_eq!(license.name, "Test License");
821 assert_eq!(license.spdx_license_key, Some("TEST".to_string()));
822 assert!(!license.text.is_empty());
823 }
824
825 #[test]
826 fn test_load_rules_from_directory() {
827 let dir = tempdir().unwrap();
828
829 fs::write(
830 dir.path().join("test_1.RULE"),
831 r#"---
832license_expression: test
833is_license_reference: yes
834relevance: 85
835referenced_filenames:
836 - TEST.txt
837---
838TEST.txt"#,
839 )
840 .unwrap();
841
842 let rules = load_rules_from_directory(dir.path(), false).unwrap();
843 assert_eq!(rules.len(), 1);
844
845 let rule = &rules[0];
846 assert_eq!(rule.license_expression, "test");
847 assert!(rule.is_license_reference());
848 assert_eq!(rule.relevance, 85);
849 }
850
851 #[test]
852 fn test_validate_rules_detects_duplicates() {
853 let rules = vec![
854 Rule {
855 identifier: "mit.LICENSE".to_string(),
856 license_expression: "mit".to_string(),
857 text: "MIT License".to_string(),
858 tokens: vec![],
859 rule_kind: crate::license_detection::models::RuleKind::Text,
860 is_false_positive: false,
861 is_required_phrase: false,
862 is_from_license: false,
863 relevance: 100,
864 minimum_coverage: None,
865 has_stored_minimum_coverage: false,
866 is_continuous: false,
867 required_phrase_spans: vec![],
868 stopwords_by_pos: HashMap::new(),
869 referenced_filenames: None,
870 ignorable_urls: None,
871 ignorable_emails: None,
872 ignorable_copyrights: None,
873 ignorable_holders: None,
874 ignorable_authors: None,
875 language: None,
876 notes: None,
877 length_unique: 0,
878 high_length_unique: 0,
879 high_length: 0,
880 min_matched_length: 0,
881 min_high_matched_length: 0,
882 min_matched_length_unique: 0,
883 min_high_matched_length_unique: 0,
884 is_small: false,
885 is_tiny: false,
886 starts_with_license: false,
887 ends_with_license: false,
888 is_deprecated: false,
889 spdx_license_key: None,
890 other_spdx_license_keys: vec![],
891 },
892 Rule {
893 identifier: "apache-2.0.LICENSE".to_string(),
894 license_expression: "apache-2.0".to_string(),
895 text: "MIT License".to_string(),
896 tokens: vec![],
897 rule_kind: crate::license_detection::models::RuleKind::Text,
898 is_false_positive: false,
899 is_required_phrase: false,
900 is_from_license: false,
901 relevance: 100,
902 minimum_coverage: None,
903 has_stored_minimum_coverage: false,
904 is_continuous: false,
905 required_phrase_spans: vec![],
906 stopwords_by_pos: HashMap::new(),
907 referenced_filenames: None,
908 ignorable_urls: None,
909 ignorable_emails: None,
910 ignorable_copyrights: None,
911 ignorable_holders: None,
912 ignorable_authors: None,
913 language: None,
914 notes: None,
915 length_unique: 0,
916 high_length_unique: 0,
917 high_length: 0,
918 min_matched_length: 0,
919 min_high_matched_length: 0,
920 min_matched_length_unique: 0,
921 min_high_matched_length_unique: 0,
922 is_small: false,
923 is_tiny: false,
924 starts_with_license: false,
925 ends_with_license: false,
926 is_deprecated: false,
927 spdx_license_key: None,
928 other_spdx_license_keys: vec![],
929 },
930 ];
931
932 validate_rules(&rules);
933 }
934
935 #[test]
936 fn test_validate_rules_accepts_false_positive_without_expression() {
937 let rules = vec![Rule {
938 identifier: "fp.RULE".to_string(),
939 license_expression: "".to_string(),
940 text: "Some text".to_string(),
941 tokens: vec![],
942 rule_kind: crate::license_detection::models::RuleKind::None,
943 is_false_positive: true,
944 is_required_phrase: false,
945 is_from_license: false,
946 relevance: 100,
947 minimum_coverage: None,
948 has_stored_minimum_coverage: false,
949 is_continuous: false,
950 required_phrase_spans: vec![],
951 stopwords_by_pos: HashMap::new(),
952 referenced_filenames: None,
953 ignorable_urls: None,
954 ignorable_emails: None,
955 ignorable_copyrights: None,
956 ignorable_holders: None,
957 ignorable_authors: None,
958 language: None,
959 notes: Some("False positive for common pattern".to_string()),
960 length_unique: 0,
961 high_length_unique: 0,
962 high_length: 0,
963 min_matched_length: 0,
964 min_high_matched_length: 0,
965 min_matched_length_unique: 0,
966 min_high_matched_length_unique: 0,
967 is_small: false,
968 is_tiny: false,
969 starts_with_license: false,
970 ends_with_license: false,
971 is_deprecated: false,
972 spdx_license_key: None,
973 other_spdx_license_keys: vec![],
974 }];
975
976 validate_rules(&rules);
977 }
978
979 #[test]
980 fn test_validate_rules_no_duplicates() {
981 let rules = vec![
982 Rule {
983 identifier: "mit.LICENSE".to_string(),
984 license_expression: "mit".to_string(),
985 text: "MIT License".to_string(),
986 tokens: vec![],
987 rule_kind: crate::license_detection::models::RuleKind::Text,
988 is_false_positive: false,
989 is_required_phrase: false,
990 is_from_license: false,
991 relevance: 100,
992 minimum_coverage: None,
993 has_stored_minimum_coverage: false,
994 is_continuous: false,
995 required_phrase_spans: vec![],
996 stopwords_by_pos: HashMap::new(),
997 referenced_filenames: None,
998 ignorable_urls: None,
999 ignorable_emails: None,
1000 ignorable_copyrights: None,
1001 ignorable_holders: None,
1002 ignorable_authors: None,
1003 language: None,
1004 notes: None,
1005 length_unique: 0,
1006 high_length_unique: 0,
1007 high_length: 0,
1008 min_matched_length: 0,
1009 min_high_matched_length: 0,
1010 min_matched_length_unique: 0,
1011 min_high_matched_length_unique: 0,
1012 is_small: false,
1013 is_tiny: false,
1014 starts_with_license: false,
1015 ends_with_license: false,
1016 is_deprecated: false,
1017 spdx_license_key: None,
1018 other_spdx_license_keys: vec![],
1019 },
1020 Rule {
1021 identifier: "apache-2.0.LICENSE".to_string(),
1022 license_expression: "apache-2.0".to_string(),
1023 text: "Apache License".to_string(),
1024 tokens: vec![],
1025 rule_kind: crate::license_detection::models::RuleKind::Text,
1026 is_false_positive: false,
1027 is_required_phrase: false,
1028 is_from_license: false,
1029 relevance: 100,
1030 minimum_coverage: None,
1031 has_stored_minimum_coverage: false,
1032 is_continuous: false,
1033 required_phrase_spans: vec![],
1034 stopwords_by_pos: HashMap::new(),
1035 referenced_filenames: None,
1036 ignorable_urls: None,
1037 ignorable_emails: None,
1038 ignorable_copyrights: None,
1039 ignorable_holders: None,
1040 ignorable_authors: None,
1041 language: None,
1042 notes: None,
1043 length_unique: 0,
1044 high_length_unique: 0,
1045 high_length: 0,
1046 min_matched_length: 0,
1047 min_high_matched_length: 0,
1048 min_matched_length_unique: 0,
1049 min_high_matched_length_unique: 0,
1050 is_small: false,
1051 is_tiny: false,
1052 starts_with_license: false,
1053 ends_with_license: false,
1054 is_deprecated: false,
1055 spdx_license_key: None,
1056 other_spdx_license_keys: vec![],
1057 },
1058 ];
1059
1060 validate_rules(&rules);
1061 }
1062
1063 #[test]
1064 fn test_load_licenses_filters_deprecated_by_default() {
1065 let dir = tempdir().unwrap();
1066
1067 fs::write(
1068 dir.path().join("active.LICENSE"),
1069 r#"---
1070key: active
1071name: Active License
1072---
1073Active license text"#,
1074 )
1075 .unwrap();
1076
1077 fs::write(
1078 dir.path().join("deprecated.LICENSE"),
1079 r#"---
1080key: deprecated
1081name: Deprecated License
1082is_deprecated: yes
1083---
1084Deprecated license text"#,
1085 )
1086 .unwrap();
1087
1088 let licenses_without = load_licenses_from_directory(dir.path(), false).unwrap();
1089 assert_eq!(licenses_without.len(), 1);
1090 assert_eq!(licenses_without[0].key, "active");
1091
1092 let licenses_with = load_licenses_from_directory(dir.path(), true).unwrap();
1093 assert_eq!(licenses_with.len(), 2);
1094 }
1095
1096 #[test]
1097 fn test_load_rules_filters_deprecated_by_default() {
1098 let dir = tempdir().unwrap();
1099
1100 fs::write(
1101 dir.path().join("active.RULE"),
1102 r#"---
1103license_expression: active
1104is_license_notice: yes
1105---
1106Active rule text"#,
1107 )
1108 .unwrap();
1109
1110 fs::write(
1111 dir.path().join("deprecated.RULE"),
1112 r#"---
1113license_expression: deprecated
1114is_license_notice: yes
1115is_deprecated: yes
1116---
1117Deprecated rule text"#,
1118 )
1119 .unwrap();
1120
1121 let rules_without = load_rules_from_directory(dir.path(), false).unwrap();
1122 assert_eq!(rules_without.len(), 1);
1123 assert_eq!(rules_without[0].license_expression, "active");
1124
1125 let rules_with = load_rules_from_directory(dir.path(), true).unwrap();
1126 assert_eq!(rules_with.len(), 2);
1127 }
1128
1129 #[test]
1130 fn test_parse_rule_to_loaded() {
1131 let dir = tempdir().unwrap();
1132 let rule_path = dir.path().join("mit_1.RULE");
1133 fs::write(
1134 &rule_path,
1135 r#"---
1136license_expression: mit
1137is_license_reference: yes
1138relevance: 90
1139referenced_filenames:
1140 - MIT.txt
1141---
1142MIT.txt"#,
1143 )
1144 .unwrap();
1145
1146 let loaded = parse_rule_to_loaded(&rule_path).unwrap();
1147 assert_eq!(loaded.identifier, "mit_1.RULE");
1148 assert_eq!(loaded.license_expression, "mit");
1149 assert_eq!(loaded.text, "MIT.txt");
1150 assert_eq!(
1151 loaded.rule_kind,
1152 crate::license_detection::models::RuleKind::Reference
1153 );
1154 assert_eq!(loaded.relevance, Some(90));
1155 assert_eq!(
1156 loaded.referenced_filenames,
1157 Some(vec!["MIT.txt".to_string()])
1158 );
1159 assert!(!loaded.is_deprecated);
1160 }
1161
1162 #[test]
1163 fn test_parse_license_to_loaded() {
1164 let dir = tempdir().unwrap();
1165 let license_path = dir.path().join("mit.LICENSE");
1166 fs::write(
1167 &license_path,
1168 r#"---
1169key: mit
1170short_name: MIT License
1171name: MIT License
1172category: Permissive
1173spdx_license_key: MIT
1174---
1175MIT License text here"#,
1176 )
1177 .unwrap();
1178
1179 let loaded = parse_license_to_loaded(&license_path).unwrap();
1180 assert_eq!(loaded.key, "mit");
1181 assert_eq!(loaded.name, "MIT License");
1182 assert!(loaded.text.contains("MIT License text"));
1183 assert_eq!(loaded.spdx_license_key, Some("MIT".to_string()));
1184 }
1185
1186 #[test]
1187 fn test_load_loaded_rules_from_directory_includes_deprecated() {
1188 let dir = tempdir().unwrap();
1189
1190 fs::write(
1191 dir.path().join("active.RULE"),
1192 r#"---
1193license_expression: active
1194is_license_notice: yes
1195---
1196Active rule text"#,
1197 )
1198 .unwrap();
1199
1200 fs::write(
1201 dir.path().join("deprecated.RULE"),
1202 r#"---
1203license_expression: deprecated
1204is_license_notice: yes
1205is_deprecated: yes
1206---
1207Deprecated rule text"#,
1208 )
1209 .unwrap();
1210
1211 let loaded_rules = load_loaded_rules_from_directory(dir.path()).unwrap();
1212 assert_eq!(loaded_rules.len(), 2);
1213
1214 let active = loaded_rules
1215 .iter()
1216 .find(|r| r.license_expression == "active")
1217 .unwrap();
1218 assert!(!active.is_deprecated);
1219
1220 let deprecated = loaded_rules
1221 .iter()
1222 .find(|r| r.license_expression == "deprecated")
1223 .unwrap();
1224 assert!(deprecated.is_deprecated);
1225 }
1226
1227 #[test]
1228 fn test_load_loaded_licenses_from_directory_includes_deprecated() {
1229 let dir = tempdir().unwrap();
1230
1231 fs::write(
1232 dir.path().join("active.LICENSE"),
1233 r#"---
1234key: active
1235name: Active License
1236---
1237Active license text"#,
1238 )
1239 .unwrap();
1240
1241 fs::write(
1242 dir.path().join("deprecated.LICENSE"),
1243 r#"---
1244key: deprecated
1245name: Deprecated License
1246is_deprecated: yes
1247---
1248Deprecated license text"#,
1249 )
1250 .unwrap();
1251
1252 let loaded_licenses = load_loaded_licenses_from_directory(dir.path()).unwrap();
1253 assert_eq!(loaded_licenses.len(), 2);
1254
1255 let active = loaded_licenses.iter().find(|l| l.key == "active").unwrap();
1256 assert!(!active.is_deprecated);
1257
1258 let deprecated = loaded_licenses
1259 .iter()
1260 .find(|l| l.key == "deprecated")
1261 .unwrap();
1262 assert!(deprecated.is_deprecated);
1263 }
1264}