1use crate::license_detection::index::{loaded_license_to_license, loaded_rule_to_rule};
12use crate::license_detection::models::{License, LoadedLicense, LoadedRule, Rule};
13use anyhow::{Context, Result, anyhow};
14use log::warn;
15use regex::Regex;
16use serde::{Deserialize, Deserializer, Serialize};
17use std::collections::HashSet;
18use std::fs;
19use std::path::Path;
20use std::sync::LazyLock;
21
22static FM_BOUNDARY: LazyLock<Regex> =
23 LazyLock::new(|| Regex::new(r"(?m)^-{3,}\s*$").expect("Invalid frontmatter regex"));
24
25fn deserialize_yes_no_bool<'de, D>(deserializer: D) -> Result<Option<bool>, D::Error>
26where
27 D: Deserializer<'de>,
28{
29 #[derive(Deserialize, Serialize)]
30 #[serde(untagged)]
31 enum YesNoOrBool {
32 String(String),
33 Bool(bool),
34 }
35
36 match YesNoOrBool::deserialize(deserializer)? {
37 YesNoOrBool::Bool(b) => Ok(Some(b)),
38 YesNoOrBool::String(s) => {
39 let lower = s.to_lowercase();
40 if lower == "yes" || lower == "true" || lower == "1" {
41 Ok(Some(true))
42 } else if lower == "no" || lower == "false" || lower == "0" {
43 Ok(Some(false))
44 } else {
45 Ok(None)
46 }
47 }
48 }
49}
50
51trait ParseNumber {
52 fn as_u8(&self) -> Option<u8>;
53}
54
55impl ParseNumber for yaml_serde::Number {
56 fn as_u8(&self) -> Option<u8> {
57 self.as_i64()
58 .and_then(|n| u8::try_from(n).ok())
59 .or_else(|| {
60 self.as_f64().and_then(|f| {
61 if f >= 0.0 && f <= f64::from(u8::MAX) {
62 #[allow(clippy::cast_sign_loss)]
64 Some(f as u8)
65 } else {
66 None
67 }
68 })
69 })
70 }
71}
72
73#[derive(Debug, Deserialize)]
74#[allow(dead_code)]
75struct LicenseFrontmatter {
76 #[serde(default)]
77 key: Option<String>,
78
79 #[serde(default)]
80 short_name: Option<String>,
81
82 #[serde(default)]
83 name: Option<String>,
84
85 #[serde(default)]
86 category: Option<String>,
87
88 #[serde(default)]
89 owner: Option<String>,
90
91 #[serde(default)]
92 homepage_url: Option<String>,
93
94 #[serde(default)]
95 notes: Option<String>,
96
97 #[serde(default)]
98 spdx_license_key: Option<String>,
99
100 #[serde(default)]
101 other_spdx_license_keys: Option<Vec<String>>,
102
103 #[serde(default)]
104 osi_license_key: Option<String>,
105
106 #[serde(default)]
107 text_urls: Option<Vec<String>>,
108
109 #[serde(default)]
110 osi_url: Option<String>,
111
112 #[serde(default)]
113 faq_url: Option<String>,
114
115 #[serde(default)]
116 other_urls: Option<Vec<String>>,
117
118 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
119 is_deprecated: Option<bool>,
120
121 #[serde(default)]
122 replaced_by: Option<Vec<String>>,
123
124 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
125 is_exception: Option<bool>,
126
127 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
128 is_unknown: Option<bool>,
129
130 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
131 is_generic: Option<bool>,
132
133 #[serde(default)]
134 minimum_coverage: Option<yaml_serde::Number>,
135
136 #[serde(default)]
137 standard_notice: Option<String>,
138
139 #[serde(default)]
140 ignorable_copyrights: Option<Vec<String>>,
141
142 #[serde(default)]
143 ignorable_holders: Option<Vec<String>>,
144
145 #[serde(default)]
146 ignorable_authors: Option<Vec<String>>,
147
148 #[serde(default)]
149 ignorable_urls: Option<Vec<String>>,
150
151 #[serde(default)]
152 ignorable_emails: Option<Vec<String>>,
153}
154
155struct ParsedRuleFile {
157 yaml_content: String,
158 text_content: String,
159 has_stored_minimum_coverage: bool,
160}
161
162struct ParsedLicenseFile {
164 yaml_content: String,
165 text_content: String,
166}
167
168fn parse_file_content(content: &str, path: &Path) -> Result<ParsedRuleFile> {
173 if content.len() < 6 {
174 return Err(anyhow!("File content too short: {}", path.display()));
175 }
176
177 let parts: Vec<&str> = FM_BOUNDARY.splitn(content, 3).collect();
178
179 if parts.len() < 3 {
180 let trimmed = content.trim();
181 if trimmed.is_empty() {
182 return Err(anyhow!(
183 "File is empty or has no content: {}",
184 path.display()
185 ));
186 }
187 return Err(anyhow!("File missing delimiter '---': {}", path.display()));
188 }
189
190 let yaml_content = parts
191 .get(1)
192 .ok_or_else(|| anyhow!("Missing YAML frontmatter in {}", path.display()))?
193 .to_string();
194 let text_content = parts
195 .get(2)
196 .ok_or_else(|| {
197 anyhow!(
198 "Missing text content after frontmatter in {}",
199 path.display()
200 )
201 })?
202 .trim_start_matches('\n')
203 .trim()
204 .to_string();
205
206 let frontmatter_value: yaml_serde::Value =
207 yaml_serde::from_str(&yaml_content).map_err(|e| {
208 anyhow!(
209 "Failed to parse frontmatter YAML in {}: {}\nContent was:\n{}",
210 path.display(),
211 e,
212 yaml_content
213 )
214 })?;
215
216 let has_stored_minimum_coverage = frontmatter_value.as_mapping().is_some_and(|mapping| {
217 mapping.contains_key(yaml_serde::Value::String("minimum_coverage".to_string()))
218 });
219
220 Ok(ParsedRuleFile {
221 yaml_content,
222 text_content,
223 has_stored_minimum_coverage,
224 })
225}
226
227#[derive(Debug, Deserialize)]
228#[allow(dead_code)]
229struct RuleFrontmatter {
230 #[serde(default)]
231 license_expression: Option<String>,
232
233 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
234 is_license_text: Option<bool>,
235
236 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
237 is_license_notice: Option<bool>,
238
239 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
240 is_license_reference: Option<bool>,
241
242 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
243 is_license_tag: Option<bool>,
244
245 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
246 is_license_intro: Option<bool>,
247
248 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
249 is_license_clue: Option<bool>,
250
251 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
252 is_false_positive: Option<bool>,
253
254 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
255 is_required_phrase: Option<bool>,
256
257 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
258 skip_for_required_phrase_generation: Option<bool>,
259
260 #[serde(default)]
261 relevance: Option<yaml_serde::Number>,
262
263 #[serde(default)]
264 minimum_coverage: Option<yaml_serde::Number>,
265
266 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
267 is_continuous: Option<bool>,
268
269 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
270 is_deprecated: Option<bool>,
271
272 #[serde(default)]
273 referenced_filenames: Option<Vec<String>>,
274
275 #[serde(default)]
276 replaced_by: Option<Vec<String>>,
277
278 #[serde(default)]
279 ignorable_urls: Option<Vec<String>>,
280
281 #[serde(default)]
282 ignorable_emails: Option<Vec<String>>,
283
284 #[serde(default)]
285 notes: Option<String>,
286
287 #[serde(default)]
288 ignorable_copyrights: Option<Vec<String>>,
289
290 #[serde(default)]
291 ignorable_holders: Option<Vec<String>>,
292
293 #[serde(default)]
294 ignorable_authors: Option<Vec<String>>,
295
296 #[serde(default)]
297 language: Option<String>,
298}
299
300fn parse_rule_source_to_loaded(
301 identifier: &str,
302 content: &str,
303 source_path: &Path,
304) -> Result<LoadedRule> {
305 let identifier = LoadedRule::derive_identifier(
306 source_path
307 .file_name()
308 .and_then(|s| s.to_str())
309 .unwrap_or(identifier),
310 );
311
312 let parsed = parse_file_content(content, source_path)?;
313
314 if parsed.text_content.is_empty() {
315 return Err(anyhow!(
316 "Rule file has empty text content: {}",
317 source_path.display()
318 ));
319 }
320
321 let fm: RuleFrontmatter = yaml_serde::from_str(&parsed.yaml_content).map_err(|e| {
322 anyhow!(
323 "Failed to parse rule frontmatter YAML in {}: {}\nContent was:\n{}",
324 source_path.display(),
325 e,
326 parsed.yaml_content
327 )
328 })?;
329
330 let is_false_positive = fm.is_false_positive.unwrap_or(false);
331
332 let rule_kind = LoadedRule::derive_rule_kind(
333 fm.is_license_text.unwrap_or(false),
334 fm.is_license_notice.unwrap_or(false),
335 fm.is_license_reference.unwrap_or(false),
336 fm.is_license_tag.unwrap_or(false),
337 fm.is_license_intro.unwrap_or(false),
338 fm.is_license_clue.unwrap_or(false),
339 )
340 .map_err(|e| {
341 anyhow!(
342 "Rule file has invalid rule-kind flags: {}: {}",
343 source_path.display(),
344 e
345 )
346 })?;
347
348 LoadedRule::validate_rule_kind_flags(rule_kind, is_false_positive).map_err(|e| {
349 anyhow!(
350 "Rule file has invalid flags: {}: {}",
351 source_path.display(),
352 e
353 )
354 })?;
355
356 let license_expression = LoadedRule::normalize_license_expression(
357 fm.license_expression.as_deref(),
358 is_false_positive,
359 )
360 .map_err(|e| {
361 anyhow!(
362 "Rule file has invalid license_expression: {}: {}",
363 source_path.display(),
364 e
365 )
366 })?;
367
368 let relevance = fm.relevance.and_then(|n| n.as_u8());
369
370 let minimum_coverage = fm.minimum_coverage.and_then(|n| n.as_u8());
371
372 Ok(LoadedRule {
373 identifier,
374 license_expression,
375 text: parsed.text_content,
376 rule_kind,
377 is_false_positive,
378 is_required_phrase: fm.is_required_phrase.unwrap_or(false),
379 skip_for_required_phrase_generation: fm
380 .skip_for_required_phrase_generation
381 .unwrap_or(false),
382 relevance,
383 minimum_coverage,
384 has_stored_minimum_coverage: parsed.has_stored_minimum_coverage,
385 is_continuous: fm.is_continuous.unwrap_or(false),
386 referenced_filenames: LoadedRule::normalize_optional_list(
387 fm.referenced_filenames.as_deref(),
388 ),
389 ignorable_urls: LoadedRule::normalize_optional_list(fm.ignorable_urls.as_deref()),
390 ignorable_emails: LoadedRule::normalize_optional_list(fm.ignorable_emails.as_deref()),
391 ignorable_copyrights: LoadedRule::normalize_optional_list(
392 fm.ignorable_copyrights.as_deref(),
393 ),
394 ignorable_holders: LoadedRule::normalize_optional_list(fm.ignorable_holders.as_deref()),
395 ignorable_authors: LoadedRule::normalize_optional_list(fm.ignorable_authors.as_deref()),
396 language: LoadedRule::normalize_optional_string(fm.language.as_deref()),
397 notes: LoadedRule::normalize_optional_string(fm.notes.as_deref()),
398 is_deprecated: fm.is_deprecated.unwrap_or(false),
399 replaced_by: fm.replaced_by.unwrap_or_default(),
400 })
401}
402
403pub fn parse_rule_to_loaded(path: &Path) -> Result<LoadedRule> {
408 let content = fs::read_to_string(path)
409 .with_context(|| format!("Failed to read rule file: {}", path.display()))?;
410 parse_rule_source_to_loaded(
411 path.file_name()
412 .and_then(|s| s.to_str())
413 .unwrap_or("unknown.RULE"),
414 &content,
415 path,
416 )
417}
418
419pub fn parse_rule_str_to_loaded(identifier: &str, content: &str) -> Result<LoadedRule> {
421 let synthetic_path = Path::new(identifier);
422 parse_rule_source_to_loaded(identifier, content, synthetic_path)
423}
424
425fn parse_license_source_to_loaded(
426 filename: &str,
427 content: &str,
428 source_path: &Path,
429) -> Result<LoadedLicense> {
430 let key = LoadedLicense::derive_key(Path::new(filename))?;
431
432 let parsed = parse_license_file_content(content, source_path)?;
433
434 let fm: LicenseFrontmatter = yaml_serde::from_str(&parsed.yaml_content).map_err(|e| {
435 anyhow!(
436 "Failed to parse license frontmatter YAML in {}: {}\nContent was:\n{}",
437 source_path.display(),
438 e,
439 parsed.yaml_content
440 )
441 })?;
442
443 LoadedLicense::validate_key_match(&key, fm.key.as_deref()).map_err(|e| {
444 anyhow!(
445 "License file has key mismatch: {}: {}",
446 source_path.display(),
447 e
448 )
449 })?;
450
451 let is_deprecated = fm.is_deprecated.unwrap_or(false);
452 let is_unknown = fm.is_unknown.unwrap_or(false);
453 let is_generic = fm.is_generic.unwrap_or(false);
454
455 LoadedLicense::validate_text_content(
456 &parsed.text_content,
457 is_deprecated,
458 is_unknown,
459 is_generic,
460 )
461 .map_err(|e| {
462 anyhow!(
463 "License file has invalid content: {}: {}",
464 source_path.display(),
465 e
466 )
467 })?;
468
469 let name = LoadedLicense::derive_name(fm.name.as_deref(), fm.short_name.as_deref(), &key);
470
471 let reference_urls = LoadedLicense::merge_reference_urls(
472 fm.text_urls.as_deref(),
473 fm.other_urls.as_deref(),
474 fm.osi_url.as_deref(),
475 fm.faq_url.as_deref(),
476 fm.homepage_url.as_deref(),
477 );
478
479 let minimum_coverage = fm.minimum_coverage.and_then(|n| n.as_u8());
480
481 Ok(LoadedLicense {
482 key,
483 short_name: LoadedLicense::normalize_optional_string(fm.short_name.as_deref()),
484 name,
485 language: Some("en".to_string()),
486 spdx_license_key: LoadedLicense::normalize_optional_string(fm.spdx_license_key.as_deref()),
487 other_spdx_license_keys: fm.other_spdx_license_keys.unwrap_or_default(),
488 category: LoadedLicense::normalize_optional_string(fm.category.as_deref()),
489 owner: LoadedLicense::normalize_optional_string(fm.owner.as_deref()),
490 homepage_url: LoadedLicense::normalize_optional_string(fm.homepage_url.as_deref()),
491 text: parsed.text_content,
492 reference_urls,
493 osi_license_key: LoadedLicense::normalize_optional_string(fm.osi_license_key.as_deref()),
494 text_urls: LoadedLicense::normalize_optional_list(fm.text_urls.as_deref())
495 .unwrap_or_default(),
496 osi_url: LoadedLicense::normalize_optional_string(fm.osi_url.as_deref()),
497 faq_url: LoadedLicense::normalize_optional_string(fm.faq_url.as_deref()),
498 other_urls: LoadedLicense::normalize_optional_list(fm.other_urls.as_deref())
499 .unwrap_or_default(),
500 notes: LoadedLicense::normalize_optional_string(fm.notes.as_deref()),
501 is_deprecated,
502 is_exception: fm.is_exception.unwrap_or(false),
503 is_unknown,
504 is_generic,
505 replaced_by: fm.replaced_by.unwrap_or_default(),
506 minimum_coverage,
507 standard_notice: LoadedLicense::normalize_optional_string(fm.standard_notice.as_deref()),
508 ignorable_copyrights: LoadedLicense::normalize_optional_list(
509 fm.ignorable_copyrights.as_deref(),
510 ),
511 ignorable_holders: LoadedLicense::normalize_optional_list(fm.ignorable_holders.as_deref()),
512 ignorable_authors: LoadedLicense::normalize_optional_list(fm.ignorable_authors.as_deref()),
513 ignorable_urls: LoadedLicense::normalize_optional_list(fm.ignorable_urls.as_deref()),
514 ignorable_emails: LoadedLicense::normalize_optional_list(fm.ignorable_emails.as_deref()),
515 })
516}
517
518pub fn parse_license_to_loaded(path: &Path) -> Result<LoadedLicense> {
523 let content = fs::read_to_string(path)
524 .with_context(|| format!("Failed to read license file: {}", path.display()))?;
525 parse_license_source_to_loaded(
526 path.file_name()
527 .and_then(|s| s.to_str())
528 .unwrap_or("unknown.LICENSE"),
529 &content,
530 path,
531 )
532}
533
534pub fn parse_license_str_to_loaded(filename: &str, content: &str) -> Result<LoadedLicense> {
536 let synthetic_path = Path::new(filename);
537 parse_license_source_to_loaded(filename, content, synthetic_path)
538}
539
540fn parse_license_file_content(content: &str, path: &Path) -> Result<ParsedLicenseFile> {
544 if content.len() < 6 {
545 return Err(anyhow!(
546 "License file content too short: {}",
547 path.display()
548 ));
549 }
550
551 let parts: Vec<&str> = FM_BOUNDARY.splitn(content, 3).collect();
552
553 if parts.len() < 3 {
554 let trimmed = content.trim();
555 if trimmed.is_empty() {
556 return Err(anyhow!(
557 "License file is empty or has no content: {}",
558 path.display()
559 ));
560 }
561 return Err(anyhow!(
562 "License file missing delimiter '---': {}",
563 path.display()
564 ));
565 }
566
567 let yaml_content = parts
568 .get(1)
569 .ok_or_else(|| anyhow!("Missing YAML frontmatter in {}", path.display()))?
570 .to_string();
571 let text_content = parts
572 .get(2)
573 .ok_or_else(|| {
574 anyhow!(
575 "Missing text content after frontmatter in {}",
576 path.display()
577 )
578 })?
579 .trim_start_matches('\n')
580 .trim()
581 .to_string();
582
583 Ok(ParsedLicenseFile {
584 yaml_content,
585 text_content,
586 })
587}
588
589pub fn load_loaded_rules_from_directory(dir: &Path) -> Result<Vec<LoadedRule>> {
601 let mut rules = Vec::new();
602
603 let entries = fs::read_dir(dir)
604 .with_context(|| format!("Failed to read rules directory: {}", dir.display()))?;
605
606 for entry in entries {
607 let entry = entry
608 .with_context(|| format!("Failed to read directory entry in: {}", dir.display()))?;
609 let path = entry.path();
610
611 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("RULE") {
612 match parse_rule_to_loaded(&path) {
613 Ok(rule) => rules.push(rule),
614 Err(e) => {
615 warn!("Failed to parse rule file {}: {}", path.display(), e);
616 }
617 }
618 }
619 }
620
621 Ok(rules)
622}
623
624pub fn load_loaded_licenses_from_directory(dir: &Path) -> Result<Vec<LoadedLicense>> {
636 let mut licenses = Vec::new();
637
638 let entries = fs::read_dir(dir)
639 .with_context(|| format!("Failed to read licenses directory: {}", dir.display()))?;
640
641 for entry in entries {
642 let entry = entry
643 .with_context(|| format!("Failed to read directory entry in: {}", dir.display()))?;
644 let path = entry.path();
645
646 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("LICENSE") {
647 match parse_license_to_loaded(&path) {
648 Ok(license) => licenses.push(license),
649 Err(e) => {
650 warn!("Failed to parse license file {}: {}", path.display(), e);
651 }
652 }
653 }
654 }
655
656 Ok(licenses)
657}
658
659#[allow(dead_code)]
671fn validate_rules(rules: &[Rule]) {
672 let mut seen_texts: HashSet<&str> = HashSet::new();
673 let mut duplicate_count = 0;
674
675 for rule in rules {
676 if !seen_texts.insert(&rule.text) {
677 warn!(
678 "Duplicate rule text found for license_expression: {}",
679 rule.license_expression
680 );
681 duplicate_count += 1;
682 }
683
684 if !rule.is_false_positive && rule.license_expression.trim().is_empty() {
685 warn!("Rule has empty license_expression but is not marked as false_positive");
686 }
687 }
688
689 if duplicate_count > 0 {
690 warn!(
691 "Found {} duplicate rule text(s) during rule validation",
692 duplicate_count
693 );
694 }
695}
696
697#[allow(dead_code)]
706pub fn load_rules_from_directory(dir: &Path, with_deprecated: bool) -> Result<Vec<Rule>> {
707 let loaded = load_loaded_rules_from_directory(dir)?;
708 let rules: Vec<Rule> = loaded
709 .into_iter()
710 .filter(|r| with_deprecated || !r.is_deprecated)
711 .map(loaded_rule_to_rule)
712 .collect();
713 validate_rules(&rules);
714 Ok(rules)
715}
716
717#[allow(dead_code)]
726pub fn load_licenses_from_directory(dir: &Path, with_deprecated: bool) -> Result<Vec<License>> {
727 let loaded = load_loaded_licenses_from_directory(dir)?;
728 let licenses: Vec<License> = loaded
729 .into_iter()
730 .filter(|l| with_deprecated || !l.is_deprecated)
731 .map(loaded_license_to_license)
732 .collect();
733 Ok(licenses)
734}
735
736#[cfg(test)]
737mod tests {
738 use super::*;
739 use std::collections::HashMap;
740 use std::fs;
741 use tempfile::tempdir;
742
743 pub fn parse_rule_file(path: &Path) -> Result<Rule> {
744 let loaded = parse_rule_to_loaded(path)?;
745 Ok(loaded_rule_to_rule(loaded))
746 }
747
748 #[test]
749 fn test_parse_number_as_u8() {
750 let num_int: yaml_serde::Number = yaml_serde::from_str("100").unwrap();
751 assert_eq!(num_int.as_u8(), Some(100));
752
753 let num_out_of_range: yaml_serde::Number = yaml_serde::from_str("500").unwrap();
754 assert_eq!(num_out_of_range.as_u8(), None);
755
756 let num_float: yaml_serde::Number = yaml_serde::from_str("90.5").unwrap();
757 assert_eq!(num_float.as_u8(), Some(90));
758 }
759
760 #[test]
761 fn test_parse_simple_license_file() {
762 let dir = tempdir().unwrap();
763 let license_path = dir.path().join("mit.LICENSE");
764 fs::write(
765 &license_path,
766 r#"---
767key: mit
768short_name: MIT License
769name: MIT License
770category: Permissive
771spdx_license_key: MIT
772---
773MIT License text here"#,
774 )
775 .unwrap();
776
777 let license = parse_license_to_loaded(&license_path)
778 .map(loaded_license_to_license)
779 .unwrap();
780 assert_eq!(license.key, "mit");
781 assert_eq!(license.name, "MIT License");
782 assert!(license.text.contains("MIT License text"));
783 }
784
785 #[test]
786 fn test_parse_simple_rule_file() {
787 let dir = tempdir().unwrap();
788 let rule_path = dir.path().join("mit_1.RULE");
789 fs::write(
790 &rule_path,
791 r#"---
792license_expression: mit
793is_license_reference: yes
794relevance: 90
795referenced_filenames:
796 - MIT.txt
797---
798MIT.txt"#,
799 )
800 .unwrap();
801
802 let rule = parse_rule_file(&rule_path).unwrap();
803 assert_eq!(rule.license_expression, "mit");
804 assert_eq!(rule.text, "MIT.txt");
805 assert!(rule.is_license_reference());
806 assert_eq!(rule.relevance, 90);
807 }
808
809 #[test]
810 fn test_deserialize_yes_no_bool() {
811 let dir = tempdir().unwrap();
812 let rule_path = dir.path().join("test.RULE");
813
814 fs::write(
815 &rule_path,
816 r#"---
817license_expression: mit
818is_license_notice: yes
819is_license_tag: no
820---
821MIT License"#,
822 )
823 .unwrap();
824
825 let rule = parse_rule_file(&rule_path).unwrap();
826 assert!(rule.is_license_notice());
827 assert!(!rule.is_license_tag());
828 }
829
830 #[test]
831 fn test_load_licenses_from_directory() {
832 let dir = tempdir().unwrap();
833
834 fs::write(
835 dir.path().join("test.LICENSE"),
836 r#"---
837key: test
838name: Test License
839spdx_license_key: TEST
840category: Permissive
841---
842Test license text here"#,
843 )
844 .unwrap();
845
846 let licenses = load_licenses_from_directory(dir.path(), false).unwrap();
847 assert_eq!(licenses.len(), 1);
848
849 let license = &licenses[0];
850 assert_eq!(license.key, "test");
851 assert_eq!(license.name, "Test License");
852 assert_eq!(license.spdx_license_key, Some("TEST".to_string()));
853 assert!(!license.text.is_empty());
854 }
855
856 #[test]
857 fn test_load_rules_from_directory() {
858 let dir = tempdir().unwrap();
859
860 fs::write(
861 dir.path().join("test_1.RULE"),
862 r#"---
863license_expression: test
864is_license_reference: yes
865relevance: 85
866referenced_filenames:
867 - TEST.txt
868---
869TEST.txt"#,
870 )
871 .unwrap();
872
873 let rules = load_rules_from_directory(dir.path(), false).unwrap();
874 assert_eq!(rules.len(), 1);
875
876 let rule = &rules[0];
877 assert_eq!(rule.license_expression, "test");
878 assert!(rule.is_license_reference());
879 assert_eq!(rule.relevance, 85);
880 }
881
882 #[test]
883 fn test_validate_rules_detects_duplicates() {
884 let rules = vec![
885 Rule {
886 identifier: "mit.LICENSE".to_string(),
887 license_expression: "mit".to_string(),
888 text: "MIT License".to_string(),
889 tokens: vec![],
890 rule_kind: crate::license_detection::models::RuleKind::Text,
891 is_false_positive: false,
892 is_required_phrase: false,
893 is_from_license: false,
894 relevance: 100,
895 minimum_coverage: None,
896 has_stored_minimum_coverage: false,
897 is_continuous: false,
898 required_phrase_spans: vec![],
899 stopwords_by_pos: HashMap::new(),
900 referenced_filenames: None,
901 ignorable_urls: None,
902 ignorable_emails: None,
903 ignorable_copyrights: None,
904 ignorable_holders: None,
905 ignorable_authors: None,
906 language: None,
907 notes: None,
908 length_unique: 0,
909 high_length_unique: 0,
910 high_length: 0,
911 min_matched_length: 0,
912 min_high_matched_length: 0,
913 min_matched_length_unique: 0,
914 min_high_matched_length_unique: 0,
915 is_small: false,
916 is_tiny: false,
917 starts_with_license: false,
918 ends_with_license: false,
919 is_deprecated: false,
920 spdx_license_key: None,
921 other_spdx_license_keys: vec![],
922 },
923 Rule {
924 identifier: "apache-2.0.LICENSE".to_string(),
925 license_expression: "apache-2.0".to_string(),
926 text: "MIT License".to_string(),
927 tokens: vec![],
928 rule_kind: crate::license_detection::models::RuleKind::Text,
929 is_false_positive: false,
930 is_required_phrase: false,
931 is_from_license: false,
932 relevance: 100,
933 minimum_coverage: None,
934 has_stored_minimum_coverage: false,
935 is_continuous: false,
936 required_phrase_spans: vec![],
937 stopwords_by_pos: HashMap::new(),
938 referenced_filenames: None,
939 ignorable_urls: None,
940 ignorable_emails: None,
941 ignorable_copyrights: None,
942 ignorable_holders: None,
943 ignorable_authors: None,
944 language: None,
945 notes: None,
946 length_unique: 0,
947 high_length_unique: 0,
948 high_length: 0,
949 min_matched_length: 0,
950 min_high_matched_length: 0,
951 min_matched_length_unique: 0,
952 min_high_matched_length_unique: 0,
953 is_small: false,
954 is_tiny: false,
955 starts_with_license: false,
956 ends_with_license: false,
957 is_deprecated: false,
958 spdx_license_key: None,
959 other_spdx_license_keys: vec![],
960 },
961 ];
962
963 validate_rules(&rules);
964 }
965
966 #[test]
967 fn test_validate_rules_accepts_false_positive_without_expression() {
968 let rules = vec![Rule {
969 identifier: "fp.RULE".to_string(),
970 license_expression: "".to_string(),
971 text: "Some text".to_string(),
972 tokens: vec![],
973 rule_kind: crate::license_detection::models::RuleKind::None,
974 is_false_positive: true,
975 is_required_phrase: false,
976 is_from_license: false,
977 relevance: 100,
978 minimum_coverage: None,
979 has_stored_minimum_coverage: false,
980 is_continuous: false,
981 required_phrase_spans: vec![],
982 stopwords_by_pos: HashMap::new(),
983 referenced_filenames: None,
984 ignorable_urls: None,
985 ignorable_emails: None,
986 ignorable_copyrights: None,
987 ignorable_holders: None,
988 ignorable_authors: None,
989 language: None,
990 notes: Some("False positive for common pattern".to_string()),
991 length_unique: 0,
992 high_length_unique: 0,
993 high_length: 0,
994 min_matched_length: 0,
995 min_high_matched_length: 0,
996 min_matched_length_unique: 0,
997 min_high_matched_length_unique: 0,
998 is_small: false,
999 is_tiny: false,
1000 starts_with_license: false,
1001 ends_with_license: false,
1002 is_deprecated: false,
1003 spdx_license_key: None,
1004 other_spdx_license_keys: vec![],
1005 }];
1006
1007 validate_rules(&rules);
1008 }
1009
1010 #[test]
1011 fn test_validate_rules_no_duplicates() {
1012 let rules = vec![
1013 Rule {
1014 identifier: "mit.LICENSE".to_string(),
1015 license_expression: "mit".to_string(),
1016 text: "MIT License".to_string(),
1017 tokens: vec![],
1018 rule_kind: crate::license_detection::models::RuleKind::Text,
1019 is_false_positive: false,
1020 is_required_phrase: false,
1021 is_from_license: false,
1022 relevance: 100,
1023 minimum_coverage: None,
1024 has_stored_minimum_coverage: false,
1025 is_continuous: false,
1026 required_phrase_spans: vec![],
1027 stopwords_by_pos: HashMap::new(),
1028 referenced_filenames: None,
1029 ignorable_urls: None,
1030 ignorable_emails: None,
1031 ignorable_copyrights: None,
1032 ignorable_holders: None,
1033 ignorable_authors: None,
1034 language: None,
1035 notes: None,
1036 length_unique: 0,
1037 high_length_unique: 0,
1038 high_length: 0,
1039 min_matched_length: 0,
1040 min_high_matched_length: 0,
1041 min_matched_length_unique: 0,
1042 min_high_matched_length_unique: 0,
1043 is_small: false,
1044 is_tiny: false,
1045 starts_with_license: false,
1046 ends_with_license: false,
1047 is_deprecated: false,
1048 spdx_license_key: None,
1049 other_spdx_license_keys: vec![],
1050 },
1051 Rule {
1052 identifier: "apache-2.0.LICENSE".to_string(),
1053 license_expression: "apache-2.0".to_string(),
1054 text: "Apache License".to_string(),
1055 tokens: vec![],
1056 rule_kind: crate::license_detection::models::RuleKind::Text,
1057 is_false_positive: false,
1058 is_required_phrase: false,
1059 is_from_license: false,
1060 relevance: 100,
1061 minimum_coverage: None,
1062 has_stored_minimum_coverage: false,
1063 is_continuous: false,
1064 required_phrase_spans: vec![],
1065 stopwords_by_pos: HashMap::new(),
1066 referenced_filenames: None,
1067 ignorable_urls: None,
1068 ignorable_emails: None,
1069 ignorable_copyrights: None,
1070 ignorable_holders: None,
1071 ignorable_authors: None,
1072 language: None,
1073 notes: None,
1074 length_unique: 0,
1075 high_length_unique: 0,
1076 high_length: 0,
1077 min_matched_length: 0,
1078 min_high_matched_length: 0,
1079 min_matched_length_unique: 0,
1080 min_high_matched_length_unique: 0,
1081 is_small: false,
1082 is_tiny: false,
1083 starts_with_license: false,
1084 ends_with_license: false,
1085 is_deprecated: false,
1086 spdx_license_key: None,
1087 other_spdx_license_keys: vec![],
1088 },
1089 ];
1090
1091 validate_rules(&rules);
1092 }
1093
1094 #[test]
1095 fn test_load_licenses_filters_deprecated_by_default() {
1096 let dir = tempdir().unwrap();
1097
1098 fs::write(
1099 dir.path().join("active.LICENSE"),
1100 r#"---
1101key: active
1102name: Active License
1103---
1104Active license text"#,
1105 )
1106 .unwrap();
1107
1108 fs::write(
1109 dir.path().join("deprecated.LICENSE"),
1110 r#"---
1111key: deprecated
1112name: Deprecated License
1113is_deprecated: yes
1114---
1115Deprecated license text"#,
1116 )
1117 .unwrap();
1118
1119 let licenses_without = load_licenses_from_directory(dir.path(), false).unwrap();
1120 assert_eq!(licenses_without.len(), 1);
1121 assert_eq!(licenses_without[0].key, "active");
1122
1123 let licenses_with = load_licenses_from_directory(dir.path(), true).unwrap();
1124 assert_eq!(licenses_with.len(), 2);
1125 }
1126
1127 #[test]
1128 fn test_load_rules_filters_deprecated_by_default() {
1129 let dir = tempdir().unwrap();
1130
1131 fs::write(
1132 dir.path().join("active.RULE"),
1133 r#"---
1134license_expression: active
1135is_license_notice: yes
1136---
1137Active rule text"#,
1138 )
1139 .unwrap();
1140
1141 fs::write(
1142 dir.path().join("deprecated.RULE"),
1143 r#"---
1144license_expression: deprecated
1145is_license_notice: yes
1146is_deprecated: yes
1147---
1148Deprecated rule text"#,
1149 )
1150 .unwrap();
1151
1152 let rules_without = load_rules_from_directory(dir.path(), false).unwrap();
1153 assert_eq!(rules_without.len(), 1);
1154 assert_eq!(rules_without[0].license_expression, "active");
1155
1156 let rules_with = load_rules_from_directory(dir.path(), true).unwrap();
1157 assert_eq!(rules_with.len(), 2);
1158 }
1159
1160 #[test]
1161 fn test_parse_rule_to_loaded() {
1162 let dir = tempdir().unwrap();
1163 let rule_path = dir.path().join("mit_1.RULE");
1164 fs::write(
1165 &rule_path,
1166 r#"---
1167license_expression: mit
1168is_license_reference: yes
1169relevance: 90
1170referenced_filenames:
1171 - MIT.txt
1172---
1173MIT.txt"#,
1174 )
1175 .unwrap();
1176
1177 let loaded = parse_rule_to_loaded(&rule_path).unwrap();
1178 assert_eq!(loaded.identifier, "mit_1.RULE");
1179 assert_eq!(loaded.license_expression, "mit");
1180 assert_eq!(loaded.text, "MIT.txt");
1181 assert_eq!(
1182 loaded.rule_kind,
1183 crate::license_detection::models::RuleKind::Reference
1184 );
1185 assert_eq!(loaded.relevance, Some(90));
1186 assert_eq!(
1187 loaded.referenced_filenames,
1188 Some(vec!["MIT.txt".to_string()])
1189 );
1190 assert!(!loaded.is_deprecated);
1191 }
1192
1193 #[test]
1194 fn test_parse_license_to_loaded() {
1195 let dir = tempdir().unwrap();
1196 let license_path = dir.path().join("mit.LICENSE");
1197 fs::write(
1198 &license_path,
1199 r#"---
1200key: mit
1201short_name: MIT License
1202name: MIT License
1203category: Permissive
1204spdx_license_key: MIT
1205---
1206MIT License text here"#,
1207 )
1208 .unwrap();
1209
1210 let loaded = parse_license_to_loaded(&license_path).unwrap();
1211 assert_eq!(loaded.key, "mit");
1212 assert_eq!(loaded.name, "MIT License");
1213 assert!(loaded.text.contains("MIT License text"));
1214 assert_eq!(loaded.spdx_license_key, Some("MIT".to_string()));
1215 }
1216
1217 #[test]
1218 fn test_load_loaded_rules_from_directory_includes_deprecated() {
1219 let dir = tempdir().unwrap();
1220
1221 fs::write(
1222 dir.path().join("active.RULE"),
1223 r#"---
1224license_expression: active
1225is_license_notice: yes
1226---
1227Active rule text"#,
1228 )
1229 .unwrap();
1230
1231 fs::write(
1232 dir.path().join("deprecated.RULE"),
1233 r#"---
1234license_expression: deprecated
1235is_license_notice: yes
1236is_deprecated: yes
1237---
1238Deprecated rule text"#,
1239 )
1240 .unwrap();
1241
1242 let loaded_rules = load_loaded_rules_from_directory(dir.path()).unwrap();
1243 assert_eq!(loaded_rules.len(), 2);
1244
1245 let active = loaded_rules
1246 .iter()
1247 .find(|r| r.license_expression == "active")
1248 .unwrap();
1249 assert!(!active.is_deprecated);
1250
1251 let deprecated = loaded_rules
1252 .iter()
1253 .find(|r| r.license_expression == "deprecated")
1254 .unwrap();
1255 assert!(deprecated.is_deprecated);
1256 }
1257
1258 #[test]
1259 fn test_load_loaded_licenses_from_directory_includes_deprecated() {
1260 let dir = tempdir().unwrap();
1261
1262 fs::write(
1263 dir.path().join("active.LICENSE"),
1264 r#"---
1265key: active
1266name: Active License
1267---
1268Active license text"#,
1269 )
1270 .unwrap();
1271
1272 fs::write(
1273 dir.path().join("deprecated.LICENSE"),
1274 r#"---
1275key: deprecated
1276name: Deprecated License
1277is_deprecated: yes
1278---
1279Deprecated license text"#,
1280 )
1281 .unwrap();
1282
1283 let loaded_licenses = load_loaded_licenses_from_directory(dir.path()).unwrap();
1284 assert_eq!(loaded_licenses.len(), 2);
1285
1286 let active = loaded_licenses.iter().find(|l| l.key == "active").unwrap();
1287 assert!(!active.is_deprecated);
1288
1289 let deprecated = loaded_licenses
1290 .iter()
1291 .find(|l| l.key == "deprecated")
1292 .unwrap();
1293 assert!(deprecated.is_deprecated);
1294 }
1295}