1use crate::license_detection::index::{loaded_license_to_license, loaded_rule_to_rule};
12use crate::license_detection::models::{License, LoadedLicense, LoadedRule, Rule};
13use anyhow::{Context, Result, anyhow};
14use log::warn;
15use once_cell::sync::Lazy;
16use regex::Regex;
17use serde::{Deserialize, Deserializer, Serialize};
18use std::collections::HashSet;
19use std::fs;
20use std::path::Path;
21
22static FM_BOUNDARY: Lazy<Regex> =
23 Lazy::new(|| Regex::new(r"(?m)^-{3,}\s*$").expect("Invalid frontmatter regex"));
24
25fn deserialize_yes_no_bool<'de, D>(deserializer: D) -> Result<Option<bool>, D::Error>
26where
27 D: Deserializer<'de>,
28{
29 #[derive(Deserialize, Serialize)]
30 #[serde(untagged)]
31 enum YesNoOrBool {
32 String(String),
33 Bool(bool),
34 }
35
36 match YesNoOrBool::deserialize(deserializer)? {
37 YesNoOrBool::Bool(b) => Ok(Some(b)),
38 YesNoOrBool::String(s) => {
39 let lower = s.to_lowercase();
40 if lower == "yes" || lower == "true" || lower == "1" {
41 Ok(Some(true))
42 } else if lower == "no" || lower == "false" || lower == "0" {
43 Ok(Some(false))
44 } else {
45 Ok(None)
46 }
47 }
48 }
49}
50
51trait ParseNumber {
52 fn as_u8(&self) -> Option<u8>;
53}
54
55impl ParseNumber for serde_yaml::Number {
56 fn as_u8(&self) -> Option<u8> {
57 self.as_i64()
58 .and_then(|n| {
59 if n >= 0 && n <= u8::MAX as i64 {
60 Some(n as u8)
61 } else {
62 None
63 }
64 })
65 .or_else(|| {
66 self.as_f64().and_then(|f| {
67 if f >= 0.0 && f <= u8::MAX as f64 {
68 Some(f as u8)
69 } else {
70 None
71 }
72 })
73 })
74 }
75}
76
77#[derive(Debug, Deserialize)]
78#[allow(dead_code)]
79struct LicenseFrontmatter {
80 #[serde(default)]
81 key: Option<String>,
82
83 #[serde(default)]
84 short_name: Option<String>,
85
86 #[serde(default)]
87 name: Option<String>,
88
89 #[serde(default)]
90 category: Option<String>,
91
92 #[serde(default)]
93 owner: Option<String>,
94
95 #[serde(default)]
96 homepage_url: Option<String>,
97
98 #[serde(default)]
99 notes: Option<String>,
100
101 #[serde(default)]
102 spdx_license_key: Option<String>,
103
104 #[serde(default)]
105 other_spdx_license_keys: Option<Vec<String>>,
106
107 #[serde(default)]
108 osi_license_key: Option<String>,
109
110 #[serde(default)]
111 text_urls: Option<Vec<String>>,
112
113 #[serde(default)]
114 osi_url: Option<String>,
115
116 #[serde(default)]
117 faq_url: Option<String>,
118
119 #[serde(default)]
120 other_urls: Option<Vec<String>>,
121
122 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
123 is_deprecated: Option<bool>,
124
125 #[serde(default)]
126 replaced_by: Option<Vec<String>>,
127
128 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
129 is_exception: Option<bool>,
130
131 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
132 is_unknown: Option<bool>,
133
134 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
135 is_generic: Option<bool>,
136
137 #[serde(default)]
138 minimum_coverage: Option<serde_yaml::Number>,
139
140 #[serde(default)]
141 standard_notice: Option<String>,
142
143 #[serde(default)]
144 ignorable_copyrights: Option<Vec<String>>,
145
146 #[serde(default)]
147 ignorable_holders: Option<Vec<String>>,
148
149 #[serde(default)]
150 ignorable_authors: Option<Vec<String>>,
151
152 #[serde(default)]
153 ignorable_urls: Option<Vec<String>>,
154
155 #[serde(default)]
156 ignorable_emails: Option<Vec<String>>,
157}
158
159struct ParsedRuleFile {
161 yaml_content: String,
162 text_content: String,
163 has_stored_minimum_coverage: bool,
164}
165
166struct ParsedLicenseFile {
168 yaml_content: String,
169 text_content: String,
170}
171
172fn parse_file_content(content: &str, path: &Path) -> Result<ParsedRuleFile> {
177 if content.len() < 6 {
178 return Err(anyhow!("File content too short: {}", path.display()));
179 }
180
181 let parts: Vec<&str> = FM_BOUNDARY.splitn(content, 3).collect();
182
183 if parts.len() < 3 {
184 let trimmed = content.trim();
185 if trimmed.is_empty() {
186 return Err(anyhow!(
187 "File is empty or has no content: {}",
188 path.display()
189 ));
190 }
191 return Err(anyhow!("File missing delimiter '---': {}", path.display()));
192 }
193
194 let yaml_content = parts
195 .get(1)
196 .ok_or_else(|| anyhow!("Missing YAML frontmatter in {}", path.display()))?
197 .to_string();
198 let text_content = parts
199 .get(2)
200 .ok_or_else(|| {
201 anyhow!(
202 "Missing text content after frontmatter in {}",
203 path.display()
204 )
205 })?
206 .trim_start_matches('\n')
207 .trim()
208 .to_string();
209
210 let frontmatter_value: serde_yaml::Value =
211 serde_yaml::from_str(&yaml_content).map_err(|e| {
212 anyhow!(
213 "Failed to parse frontmatter YAML in {}: {}\nContent was:\n{}",
214 path.display(),
215 e,
216 yaml_content
217 )
218 })?;
219
220 let has_stored_minimum_coverage = frontmatter_value.as_mapping().is_some_and(|mapping| {
221 mapping.contains_key(serde_yaml::Value::String("minimum_coverage".to_string()))
222 });
223
224 Ok(ParsedRuleFile {
225 yaml_content,
226 text_content,
227 has_stored_minimum_coverage,
228 })
229}
230
231#[derive(Debug, Deserialize)]
232#[allow(dead_code)]
233struct RuleFrontmatter {
234 #[serde(default)]
235 license_expression: Option<String>,
236
237 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
238 is_license_text: Option<bool>,
239
240 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
241 is_license_notice: Option<bool>,
242
243 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
244 is_license_reference: Option<bool>,
245
246 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
247 is_license_tag: Option<bool>,
248
249 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
250 is_license_intro: Option<bool>,
251
252 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
253 is_license_clue: Option<bool>,
254
255 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
256 is_false_positive: Option<bool>,
257
258 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
259 is_required_phrase: Option<bool>,
260
261 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
262 skip_for_required_phrase_generation: Option<bool>,
263
264 #[serde(default)]
265 relevance: Option<serde_yaml::Number>,
266
267 #[serde(default)]
268 minimum_coverage: Option<serde_yaml::Number>,
269
270 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
271 is_continuous: Option<bool>,
272
273 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
274 is_deprecated: Option<bool>,
275
276 #[serde(default)]
277 referenced_filenames: Option<Vec<String>>,
278
279 #[serde(default)]
280 replaced_by: Option<Vec<String>>,
281
282 #[serde(default)]
283 ignorable_urls: Option<Vec<String>>,
284
285 #[serde(default)]
286 ignorable_emails: Option<Vec<String>>,
287
288 #[serde(default)]
289 notes: Option<String>,
290
291 #[serde(default)]
292 ignorable_copyrights: Option<Vec<String>>,
293
294 #[serde(default)]
295 ignorable_holders: Option<Vec<String>>,
296
297 #[serde(default)]
298 ignorable_authors: Option<Vec<String>>,
299
300 #[serde(default)]
301 language: Option<String>,
302}
303
304pub fn parse_rule_to_loaded(path: &Path) -> Result<LoadedRule> {
316 let content = fs::read_to_string(path)
317 .with_context(|| format!("Failed to read rule file: {}", path.display()))?;
318
319 let identifier = LoadedRule::derive_identifier(
320 path.file_name()
321 .and_then(|s| s.to_str())
322 .unwrap_or("unknown.RULE"),
323 );
324
325 let parsed = parse_file_content(&content, path)?;
326
327 if parsed.text_content.is_empty() {
328 return Err(anyhow!(
329 "Rule file has empty text content: {}",
330 path.display()
331 ));
332 }
333
334 let fm: RuleFrontmatter = serde_yaml::from_str(&parsed.yaml_content).map_err(|e| {
335 anyhow!(
336 "Failed to parse rule frontmatter YAML in {}: {}\nContent was:\n{}",
337 path.display(),
338 e,
339 parsed.yaml_content
340 )
341 })?;
342
343 let is_false_positive = fm.is_false_positive.unwrap_or(false);
344
345 let rule_kind = LoadedRule::derive_rule_kind(
346 fm.is_license_text.unwrap_or(false),
347 fm.is_license_notice.unwrap_or(false),
348 fm.is_license_reference.unwrap_or(false),
349 fm.is_license_tag.unwrap_or(false),
350 fm.is_license_intro.unwrap_or(false),
351 fm.is_license_clue.unwrap_or(false),
352 )
353 .map_err(|e| {
354 anyhow!(
355 "Rule file has invalid rule-kind flags: {}: {}",
356 path.display(),
357 e
358 )
359 })?;
360
361 LoadedRule::validate_rule_kind_flags(rule_kind, is_false_positive)
362 .map_err(|e| anyhow!("Rule file has invalid flags: {}: {}", path.display(), e))?;
363
364 let license_expression = LoadedRule::normalize_license_expression(
365 fm.license_expression.as_deref(),
366 is_false_positive,
367 )
368 .map_err(|e| {
369 anyhow!(
370 "Rule file has invalid license_expression: {}: {}",
371 path.display(),
372 e
373 )
374 })?;
375
376 let relevance = fm.relevance.and_then(|n| n.as_u8());
377
378 let minimum_coverage = fm.minimum_coverage.and_then(|n| n.as_u8());
379
380 Ok(LoadedRule {
381 identifier,
382 license_expression,
383 text: parsed.text_content,
384 rule_kind,
385 is_false_positive,
386 is_required_phrase: fm.is_required_phrase.unwrap_or(false),
387 relevance,
388 minimum_coverage,
389 has_stored_minimum_coverage: parsed.has_stored_minimum_coverage,
390 is_continuous: fm.is_continuous.unwrap_or(false),
391 referenced_filenames: LoadedRule::normalize_optional_list(
392 fm.referenced_filenames.as_deref(),
393 ),
394 ignorable_urls: LoadedRule::normalize_optional_list(fm.ignorable_urls.as_deref()),
395 ignorable_emails: LoadedRule::normalize_optional_list(fm.ignorable_emails.as_deref()),
396 ignorable_copyrights: LoadedRule::normalize_optional_list(
397 fm.ignorable_copyrights.as_deref(),
398 ),
399 ignorable_holders: LoadedRule::normalize_optional_list(fm.ignorable_holders.as_deref()),
400 ignorable_authors: LoadedRule::normalize_optional_list(fm.ignorable_authors.as_deref()),
401 language: LoadedRule::normalize_optional_string(fm.language.as_deref()),
402 notes: LoadedRule::normalize_optional_string(fm.notes.as_deref()),
403 is_deprecated: fm.is_deprecated.unwrap_or(false),
404 })
405}
406
407pub fn parse_license_to_loaded(path: &Path) -> Result<LoadedLicense> {
419 let content = fs::read_to_string(path)
420 .with_context(|| format!("Failed to read license file: {}", path.display()))?;
421
422 let key = LoadedLicense::derive_key(path)?;
423
424 let parsed = parse_license_file_content(&content, path)?;
425
426 let fm: LicenseFrontmatter = serde_yaml::from_str(&parsed.yaml_content).map_err(|e| {
427 anyhow!(
428 "Failed to parse license frontmatter YAML in {}: {}\nContent was:\n{}",
429 path.display(),
430 e,
431 parsed.yaml_content
432 )
433 })?;
434
435 LoadedLicense::validate_key_match(&key, fm.key.as_deref())
436 .map_err(|e| anyhow!("License file has key mismatch: {}: {}", path.display(), e))?;
437
438 let is_deprecated = fm.is_deprecated.unwrap_or(false);
439 let is_unknown = fm.is_unknown.unwrap_or(false);
440 let is_generic = fm.is_generic.unwrap_or(false);
441
442 LoadedLicense::validate_text_content(
443 &parsed.text_content,
444 is_deprecated,
445 is_unknown,
446 is_generic,
447 )
448 .map_err(|e| {
449 anyhow!(
450 "License file has invalid content: {}: {}",
451 path.display(),
452 e
453 )
454 })?;
455
456 let name = LoadedLicense::derive_name(fm.name.as_deref(), fm.short_name.as_deref(), &key);
457
458 let reference_urls = LoadedLicense::merge_reference_urls(
459 fm.text_urls.as_deref(),
460 fm.other_urls.as_deref(),
461 fm.osi_url.as_deref(),
462 fm.faq_url.as_deref(),
463 fm.homepage_url.as_deref(),
464 );
465
466 let minimum_coverage = fm.minimum_coverage.and_then(|n| n.as_u8());
467
468 Ok(LoadedLicense {
469 key,
470 name,
471 spdx_license_key: LoadedLicense::normalize_optional_string(fm.spdx_license_key.as_deref()),
472 other_spdx_license_keys: fm.other_spdx_license_keys.unwrap_or_default(),
473 category: LoadedLicense::normalize_optional_string(fm.category.as_deref()),
474 text: parsed.text_content,
475 reference_urls,
476 notes: LoadedLicense::normalize_optional_string(fm.notes.as_deref()),
477 is_deprecated,
478 replaced_by: fm.replaced_by.unwrap_or_default(),
479 minimum_coverage,
480 ignorable_copyrights: LoadedLicense::normalize_optional_list(
481 fm.ignorable_copyrights.as_deref(),
482 ),
483 ignorable_holders: LoadedLicense::normalize_optional_list(fm.ignorable_holders.as_deref()),
484 ignorable_authors: LoadedLicense::normalize_optional_list(fm.ignorable_authors.as_deref()),
485 ignorable_urls: LoadedLicense::normalize_optional_list(fm.ignorable_urls.as_deref()),
486 ignorable_emails: LoadedLicense::normalize_optional_list(fm.ignorable_emails.as_deref()),
487 })
488}
489
490fn parse_license_file_content(content: &str, path: &Path) -> Result<ParsedLicenseFile> {
494 if content.len() < 6 {
495 return Err(anyhow!(
496 "License file content too short: {}",
497 path.display()
498 ));
499 }
500
501 let parts: Vec<&str> = FM_BOUNDARY.splitn(content, 3).collect();
502
503 if parts.len() < 3 {
504 let trimmed = content.trim();
505 if trimmed.is_empty() {
506 return Err(anyhow!(
507 "License file is empty or has no content: {}",
508 path.display()
509 ));
510 }
511 return Err(anyhow!(
512 "License file missing delimiter '---': {}",
513 path.display()
514 ));
515 }
516
517 let yaml_content = parts
518 .get(1)
519 .ok_or_else(|| anyhow!("Missing YAML frontmatter in {}", path.display()))?
520 .to_string();
521 let text_content = parts
522 .get(2)
523 .ok_or_else(|| {
524 anyhow!(
525 "Missing text content after frontmatter in {}",
526 path.display()
527 )
528 })?
529 .trim_start_matches('\n')
530 .trim()
531 .to_string();
532
533 Ok(ParsedLicenseFile {
534 yaml_content,
535 text_content,
536 })
537}
538
539pub fn load_loaded_rules_from_directory(dir: &Path) -> Result<Vec<LoadedRule>> {
551 let mut rules = Vec::new();
552
553 let entries = fs::read_dir(dir)
554 .with_context(|| format!("Failed to read rules directory: {}", dir.display()))?;
555
556 for entry in entries {
557 let entry = entry
558 .with_context(|| format!("Failed to read directory entry in: {}", dir.display()))?;
559 let path = entry.path();
560
561 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("RULE") {
562 match parse_rule_to_loaded(&path) {
563 Ok(rule) => rules.push(rule),
564 Err(e) => {
565 eprintln!(
566 "Warning: Failed to parse rule file {}: {}",
567 path.display(),
568 e
569 );
570 }
571 }
572 }
573 }
574
575 Ok(rules)
576}
577
578pub fn load_loaded_licenses_from_directory(dir: &Path) -> Result<Vec<LoadedLicense>> {
590 let mut licenses = Vec::new();
591
592 let entries = fs::read_dir(dir)
593 .with_context(|| format!("Failed to read licenses directory: {}", dir.display()))?;
594
595 for entry in entries {
596 let entry = entry
597 .with_context(|| format!("Failed to read directory entry in: {}", dir.display()))?;
598 let path = entry.path();
599
600 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("LICENSE") {
601 match parse_license_to_loaded(&path) {
602 Ok(license) => licenses.push(license),
603 Err(e) => {
604 eprintln!(
605 "Warning: Failed to parse license file {}: {}",
606 path.display(),
607 e
608 );
609 }
610 }
611 }
612 }
613
614 Ok(licenses)
615}
616
617#[allow(dead_code)]
629fn validate_rules(rules: &[Rule]) {
630 let mut seen_texts: HashSet<&str> = HashSet::new();
631 let mut duplicate_count = 0;
632
633 for rule in rules {
634 if !seen_texts.insert(&rule.text) {
635 warn!(
636 "Duplicate rule text found for license_expression: {}",
637 rule.license_expression
638 );
639 duplicate_count += 1;
640 }
641
642 if !rule.is_false_positive && rule.license_expression.trim().is_empty() {
643 warn!("Rule has empty license_expression but is not marked as false_positive");
644 }
645 }
646
647 if duplicate_count > 0 {
648 warn!(
649 "Found {} duplicate rule text(s) during rule validation",
650 duplicate_count
651 );
652 }
653}
654
655#[allow(dead_code)]
664pub fn load_rules_from_directory(dir: &Path, with_deprecated: bool) -> Result<Vec<Rule>> {
665 let loaded = load_loaded_rules_from_directory(dir)?;
666 let rules: Vec<Rule> = loaded
667 .into_iter()
668 .filter(|r| with_deprecated || !r.is_deprecated)
669 .map(loaded_rule_to_rule)
670 .collect();
671 validate_rules(&rules);
672 Ok(rules)
673}
674
675#[allow(dead_code)]
684pub fn load_licenses_from_directory(dir: &Path, with_deprecated: bool) -> Result<Vec<License>> {
685 let loaded = load_loaded_licenses_from_directory(dir)?;
686 let licenses: Vec<License> = loaded
687 .into_iter()
688 .filter(|l| with_deprecated || !l.is_deprecated)
689 .map(loaded_license_to_license)
690 .collect();
691 Ok(licenses)
692}
693
694#[cfg(test)]
695mod tests {
696 use super::*;
697 use std::collections::HashMap;
698 use std::fs;
699 use tempfile::tempdir;
700
701 pub fn parse_rule_file(path: &Path) -> Result<Rule> {
702 let loaded = parse_rule_to_loaded(path)?;
703 Ok(loaded_rule_to_rule(loaded))
704 }
705
706 #[test]
707 fn test_parse_number_as_u8() {
708 let num_int: serde_yaml::Number = serde_yaml::from_str("100").unwrap();
709 assert_eq!(num_int.as_u8(), Some(100));
710
711 let num_out_of_range: serde_yaml::Number = serde_yaml::from_str("500").unwrap();
712 assert_eq!(num_out_of_range.as_u8(), None);
713
714 let num_float: serde_yaml::Number = serde_yaml::from_str("90.5").unwrap();
715 assert_eq!(num_float.as_u8(), Some(90));
716 }
717
718 #[test]
719 fn test_parse_simple_license_file() {
720 let dir = tempdir().unwrap();
721 let license_path = dir.path().join("mit.LICENSE");
722 fs::write(
723 &license_path,
724 r#"---
725key: mit
726short_name: MIT License
727name: MIT License
728category: Permissive
729spdx_license_key: MIT
730---
731MIT License text here"#,
732 )
733 .unwrap();
734
735 let license = parse_license_to_loaded(&license_path)
736 .map(loaded_license_to_license)
737 .unwrap();
738 assert_eq!(license.key, "mit");
739 assert_eq!(license.name, "MIT License");
740 assert!(license.text.contains("MIT License text"));
741 }
742
743 #[test]
744 fn test_parse_simple_rule_file() {
745 let dir = tempdir().unwrap();
746 let rule_path = dir.path().join("mit_1.RULE");
747 fs::write(
748 &rule_path,
749 r#"---
750license_expression: mit
751is_license_reference: yes
752relevance: 90
753referenced_filenames:
754 - MIT.txt
755---
756MIT.txt"#,
757 )
758 .unwrap();
759
760 let rule = parse_rule_file(&rule_path).unwrap();
761 assert_eq!(rule.license_expression, "mit");
762 assert_eq!(rule.text, "MIT.txt");
763 assert!(rule.is_license_reference());
764 assert_eq!(rule.relevance, 90);
765 }
766
767 #[test]
768 fn test_deserialize_yes_no_bool() {
769 let dir = tempdir().unwrap();
770 let rule_path = dir.path().join("test.RULE");
771
772 fs::write(
773 &rule_path,
774 r#"---
775license_expression: mit
776is_license_notice: yes
777is_license_tag: no
778---
779MIT License"#,
780 )
781 .unwrap();
782
783 let rule = parse_rule_file(&rule_path).unwrap();
784 assert!(rule.is_license_notice());
785 assert!(!rule.is_license_tag());
786 }
787
788 #[test]
789 fn test_load_licenses_from_directory() {
790 let dir = tempdir().unwrap();
791
792 fs::write(
793 dir.path().join("test.LICENSE"),
794 r#"---
795key: test
796name: Test License
797spdx_license_key: TEST
798category: Permissive
799---
800Test license text here"#,
801 )
802 .unwrap();
803
804 let licenses = load_licenses_from_directory(dir.path(), false).unwrap();
805 assert_eq!(licenses.len(), 1);
806
807 let license = &licenses[0];
808 assert_eq!(license.key, "test");
809 assert_eq!(license.name, "Test License");
810 assert_eq!(license.spdx_license_key, Some("TEST".to_string()));
811 assert!(!license.text.is_empty());
812 }
813
814 #[test]
815 fn test_load_rules_from_directory() {
816 let dir = tempdir().unwrap();
817
818 fs::write(
819 dir.path().join("test_1.RULE"),
820 r#"---
821license_expression: test
822is_license_reference: yes
823relevance: 85
824referenced_filenames:
825 - TEST.txt
826---
827TEST.txt"#,
828 )
829 .unwrap();
830
831 let rules = load_rules_from_directory(dir.path(), false).unwrap();
832 assert_eq!(rules.len(), 1);
833
834 let rule = &rules[0];
835 assert_eq!(rule.license_expression, "test");
836 assert!(rule.is_license_reference());
837 assert_eq!(rule.relevance, 85);
838 }
839
840 #[test]
841 fn test_validate_rules_detects_duplicates() {
842 let rules = vec![
843 Rule {
844 identifier: "mit.LICENSE".to_string(),
845 license_expression: "mit".to_string(),
846 text: "MIT License".to_string(),
847 tokens: vec![],
848 rule_kind: crate::license_detection::models::RuleKind::Text,
849 is_false_positive: false,
850 is_required_phrase: false,
851 is_from_license: false,
852 relevance: 100,
853 minimum_coverage: None,
854 has_stored_minimum_coverage: false,
855 is_continuous: false,
856 required_phrase_spans: vec![],
857 stopwords_by_pos: HashMap::new(),
858 referenced_filenames: None,
859 ignorable_urls: None,
860 ignorable_emails: None,
861 ignorable_copyrights: None,
862 ignorable_holders: None,
863 ignorable_authors: None,
864 language: None,
865 notes: None,
866 length_unique: 0,
867 high_length_unique: 0,
868 high_length: 0,
869 min_matched_length: 0,
870 min_high_matched_length: 0,
871 min_matched_length_unique: 0,
872 min_high_matched_length_unique: 0,
873 is_small: false,
874 is_tiny: false,
875 starts_with_license: false,
876 ends_with_license: false,
877 is_deprecated: false,
878 spdx_license_key: None,
879 other_spdx_license_keys: vec![],
880 },
881 Rule {
882 identifier: "apache-2.0.LICENSE".to_string(),
883 license_expression: "apache-2.0".to_string(),
884 text: "MIT License".to_string(),
885 tokens: vec![],
886 rule_kind: crate::license_detection::models::RuleKind::Text,
887 is_false_positive: false,
888 is_required_phrase: false,
889 is_from_license: false,
890 relevance: 100,
891 minimum_coverage: None,
892 has_stored_minimum_coverage: false,
893 is_continuous: false,
894 required_phrase_spans: vec![],
895 stopwords_by_pos: HashMap::new(),
896 referenced_filenames: None,
897 ignorable_urls: None,
898 ignorable_emails: None,
899 ignorable_copyrights: None,
900 ignorable_holders: None,
901 ignorable_authors: None,
902 language: None,
903 notes: None,
904 length_unique: 0,
905 high_length_unique: 0,
906 high_length: 0,
907 min_matched_length: 0,
908 min_high_matched_length: 0,
909 min_matched_length_unique: 0,
910 min_high_matched_length_unique: 0,
911 is_small: false,
912 is_tiny: false,
913 starts_with_license: false,
914 ends_with_license: false,
915 is_deprecated: false,
916 spdx_license_key: None,
917 other_spdx_license_keys: vec![],
918 },
919 ];
920
921 validate_rules(&rules);
922 }
923
924 #[test]
925 fn test_validate_rules_accepts_false_positive_without_expression() {
926 let rules = vec![Rule {
927 identifier: "fp.RULE".to_string(),
928 license_expression: "".to_string(),
929 text: "Some text".to_string(),
930 tokens: vec![],
931 rule_kind: crate::license_detection::models::RuleKind::None,
932 is_false_positive: true,
933 is_required_phrase: false,
934 is_from_license: false,
935 relevance: 100,
936 minimum_coverage: None,
937 has_stored_minimum_coverage: false,
938 is_continuous: false,
939 required_phrase_spans: vec![],
940 stopwords_by_pos: HashMap::new(),
941 referenced_filenames: None,
942 ignorable_urls: None,
943 ignorable_emails: None,
944 ignorable_copyrights: None,
945 ignorable_holders: None,
946 ignorable_authors: None,
947 language: None,
948 notes: Some("False positive for common pattern".to_string()),
949 length_unique: 0,
950 high_length_unique: 0,
951 high_length: 0,
952 min_matched_length: 0,
953 min_high_matched_length: 0,
954 min_matched_length_unique: 0,
955 min_high_matched_length_unique: 0,
956 is_small: false,
957 is_tiny: false,
958 starts_with_license: false,
959 ends_with_license: false,
960 is_deprecated: false,
961 spdx_license_key: None,
962 other_spdx_license_keys: vec![],
963 }];
964
965 validate_rules(&rules);
966 }
967
968 #[test]
969 fn test_validate_rules_no_duplicates() {
970 let rules = vec![
971 Rule {
972 identifier: "mit.LICENSE".to_string(),
973 license_expression: "mit".to_string(),
974 text: "MIT License".to_string(),
975 tokens: vec![],
976 rule_kind: crate::license_detection::models::RuleKind::Text,
977 is_false_positive: false,
978 is_required_phrase: false,
979 is_from_license: false,
980 relevance: 100,
981 minimum_coverage: None,
982 has_stored_minimum_coverage: false,
983 is_continuous: false,
984 required_phrase_spans: vec![],
985 stopwords_by_pos: HashMap::new(),
986 referenced_filenames: None,
987 ignorable_urls: None,
988 ignorable_emails: None,
989 ignorable_copyrights: None,
990 ignorable_holders: None,
991 ignorable_authors: None,
992 language: None,
993 notes: None,
994 length_unique: 0,
995 high_length_unique: 0,
996 high_length: 0,
997 min_matched_length: 0,
998 min_high_matched_length: 0,
999 min_matched_length_unique: 0,
1000 min_high_matched_length_unique: 0,
1001 is_small: false,
1002 is_tiny: false,
1003 starts_with_license: false,
1004 ends_with_license: false,
1005 is_deprecated: false,
1006 spdx_license_key: None,
1007 other_spdx_license_keys: vec![],
1008 },
1009 Rule {
1010 identifier: "apache-2.0.LICENSE".to_string(),
1011 license_expression: "apache-2.0".to_string(),
1012 text: "Apache License".to_string(),
1013 tokens: vec![],
1014 rule_kind: crate::license_detection::models::RuleKind::Text,
1015 is_false_positive: false,
1016 is_required_phrase: false,
1017 is_from_license: false,
1018 relevance: 100,
1019 minimum_coverage: None,
1020 has_stored_minimum_coverage: false,
1021 is_continuous: false,
1022 required_phrase_spans: vec![],
1023 stopwords_by_pos: HashMap::new(),
1024 referenced_filenames: None,
1025 ignorable_urls: None,
1026 ignorable_emails: None,
1027 ignorable_copyrights: None,
1028 ignorable_holders: None,
1029 ignorable_authors: None,
1030 language: None,
1031 notes: None,
1032 length_unique: 0,
1033 high_length_unique: 0,
1034 high_length: 0,
1035 min_matched_length: 0,
1036 min_high_matched_length: 0,
1037 min_matched_length_unique: 0,
1038 min_high_matched_length_unique: 0,
1039 is_small: false,
1040 is_tiny: false,
1041 starts_with_license: false,
1042 ends_with_license: false,
1043 is_deprecated: false,
1044 spdx_license_key: None,
1045 other_spdx_license_keys: vec![],
1046 },
1047 ];
1048
1049 validate_rules(&rules);
1050 }
1051
1052 #[test]
1053 fn test_load_licenses_filters_deprecated_by_default() {
1054 let dir = tempdir().unwrap();
1055
1056 fs::write(
1057 dir.path().join("active.LICENSE"),
1058 r#"---
1059key: active
1060name: Active License
1061---
1062Active license text"#,
1063 )
1064 .unwrap();
1065
1066 fs::write(
1067 dir.path().join("deprecated.LICENSE"),
1068 r#"---
1069key: deprecated
1070name: Deprecated License
1071is_deprecated: yes
1072---
1073Deprecated license text"#,
1074 )
1075 .unwrap();
1076
1077 let licenses_without = load_licenses_from_directory(dir.path(), false).unwrap();
1078 assert_eq!(licenses_without.len(), 1);
1079 assert_eq!(licenses_without[0].key, "active");
1080
1081 let licenses_with = load_licenses_from_directory(dir.path(), true).unwrap();
1082 assert_eq!(licenses_with.len(), 2);
1083 }
1084
1085 #[test]
1086 fn test_load_rules_filters_deprecated_by_default() {
1087 let dir = tempdir().unwrap();
1088
1089 fs::write(
1090 dir.path().join("active.RULE"),
1091 r#"---
1092license_expression: active
1093is_license_notice: yes
1094---
1095Active rule text"#,
1096 )
1097 .unwrap();
1098
1099 fs::write(
1100 dir.path().join("deprecated.RULE"),
1101 r#"---
1102license_expression: deprecated
1103is_license_notice: yes
1104is_deprecated: yes
1105---
1106Deprecated rule text"#,
1107 )
1108 .unwrap();
1109
1110 let rules_without = load_rules_from_directory(dir.path(), false).unwrap();
1111 assert_eq!(rules_without.len(), 1);
1112 assert_eq!(rules_without[0].license_expression, "active");
1113
1114 let rules_with = load_rules_from_directory(dir.path(), true).unwrap();
1115 assert_eq!(rules_with.len(), 2);
1116 }
1117
1118 #[test]
1119 fn test_parse_rule_to_loaded() {
1120 let dir = tempdir().unwrap();
1121 let rule_path = dir.path().join("mit_1.RULE");
1122 fs::write(
1123 &rule_path,
1124 r#"---
1125license_expression: mit
1126is_license_reference: yes
1127relevance: 90
1128referenced_filenames:
1129 - MIT.txt
1130---
1131MIT.txt"#,
1132 )
1133 .unwrap();
1134
1135 let loaded = parse_rule_to_loaded(&rule_path).unwrap();
1136 assert_eq!(loaded.identifier, "mit_1.RULE");
1137 assert_eq!(loaded.license_expression, "mit");
1138 assert_eq!(loaded.text, "MIT.txt");
1139 assert_eq!(
1140 loaded.rule_kind,
1141 crate::license_detection::models::RuleKind::Reference
1142 );
1143 assert_eq!(loaded.relevance, Some(90));
1144 assert_eq!(
1145 loaded.referenced_filenames,
1146 Some(vec!["MIT.txt".to_string()])
1147 );
1148 assert!(!loaded.is_deprecated);
1149 }
1150
1151 #[test]
1152 fn test_parse_license_to_loaded() {
1153 let dir = tempdir().unwrap();
1154 let license_path = dir.path().join("mit.LICENSE");
1155 fs::write(
1156 &license_path,
1157 r#"---
1158key: mit
1159short_name: MIT License
1160name: MIT License
1161category: Permissive
1162spdx_license_key: MIT
1163---
1164MIT License text here"#,
1165 )
1166 .unwrap();
1167
1168 let loaded = parse_license_to_loaded(&license_path).unwrap();
1169 assert_eq!(loaded.key, "mit");
1170 assert_eq!(loaded.name, "MIT License");
1171 assert!(loaded.text.contains("MIT License text"));
1172 assert_eq!(loaded.spdx_license_key, Some("MIT".to_string()));
1173 }
1174
1175 #[test]
1176 fn test_load_loaded_rules_from_directory_includes_deprecated() {
1177 let dir = tempdir().unwrap();
1178
1179 fs::write(
1180 dir.path().join("active.RULE"),
1181 r#"---
1182license_expression: active
1183is_license_notice: yes
1184---
1185Active rule text"#,
1186 )
1187 .unwrap();
1188
1189 fs::write(
1190 dir.path().join("deprecated.RULE"),
1191 r#"---
1192license_expression: deprecated
1193is_license_notice: yes
1194is_deprecated: yes
1195---
1196Deprecated rule text"#,
1197 )
1198 .unwrap();
1199
1200 let loaded_rules = load_loaded_rules_from_directory(dir.path()).unwrap();
1201 assert_eq!(loaded_rules.len(), 2);
1202
1203 let active = loaded_rules
1204 .iter()
1205 .find(|r| r.license_expression == "active")
1206 .unwrap();
1207 assert!(!active.is_deprecated);
1208
1209 let deprecated = loaded_rules
1210 .iter()
1211 .find(|r| r.license_expression == "deprecated")
1212 .unwrap();
1213 assert!(deprecated.is_deprecated);
1214 }
1215
1216 #[test]
1217 fn test_load_loaded_licenses_from_directory_includes_deprecated() {
1218 let dir = tempdir().unwrap();
1219
1220 fs::write(
1221 dir.path().join("active.LICENSE"),
1222 r#"---
1223key: active
1224name: Active License
1225---
1226Active license text"#,
1227 )
1228 .unwrap();
1229
1230 fs::write(
1231 dir.path().join("deprecated.LICENSE"),
1232 r#"---
1233key: deprecated
1234name: Deprecated License
1235is_deprecated: yes
1236---
1237Deprecated license text"#,
1238 )
1239 .unwrap();
1240
1241 let loaded_licenses = load_loaded_licenses_from_directory(dir.path()).unwrap();
1242 assert_eq!(loaded_licenses.len(), 2);
1243
1244 let active = loaded_licenses.iter().find(|l| l.key == "active").unwrap();
1245 assert!(!active.is_deprecated);
1246
1247 let deprecated = loaded_licenses
1248 .iter()
1249 .find(|l| l.key == "deprecated")
1250 .unwrap();
1251 assert!(deprecated.is_deprecated);
1252 }
1253}