1use crate::license_detection::index::{loaded_license_to_license, loaded_rule_to_rule};
15use crate::license_detection::models::{License, LoadedLicense, LoadedRule, Rule};
16use anyhow::{Context, Result, anyhow};
17use log::warn;
18use regex::Regex;
19use serde::{Deserialize, Deserializer, Serialize};
20use std::collections::HashSet;
21use std::fs;
22use std::path::Path;
23use std::sync::LazyLock;
24
25static FM_BOUNDARY: LazyLock<Regex> =
26 LazyLock::new(|| Regex::new(r"(?m)^-{3,}\s*$").expect("Invalid frontmatter regex"));
27
28fn deserialize_yes_no_bool<'de, D>(deserializer: D) -> Result<Option<bool>, D::Error>
29where
30 D: Deserializer<'de>,
31{
32 #[derive(Deserialize, Serialize)]
33 #[serde(untagged)]
34 enum YesNoOrBool {
35 String(String),
36 Bool(bool),
37 }
38
39 match YesNoOrBool::deserialize(deserializer)? {
40 YesNoOrBool::Bool(b) => Ok(Some(b)),
41 YesNoOrBool::String(s) => {
42 let lower = s.to_lowercase();
43 if lower == "yes" || lower == "true" || lower == "1" {
44 Ok(Some(true))
45 } else if lower == "no" || lower == "false" || lower == "0" {
46 Ok(Some(false))
47 } else {
48 Ok(None)
49 }
50 }
51 }
52}
53
54trait ParseNumber {
55 fn as_u8(&self) -> Option<u8>;
56}
57
58impl ParseNumber for yaml_serde::Number {
59 fn as_u8(&self) -> Option<u8> {
60 self.as_i64()
61 .and_then(|n| u8::try_from(n).ok())
62 .or_else(|| {
63 self.as_f64().and_then(|f| {
64 if f >= 0.0 && f <= f64::from(u8::MAX) {
65 #[allow(clippy::cast_sign_loss)]
67 Some(f as u8)
68 } else {
69 None
70 }
71 })
72 })
73 }
74}
75
76#[derive(Debug, Deserialize)]
77#[allow(dead_code)]
78struct LicenseFrontmatter {
79 #[serde(default)]
80 key: Option<String>,
81
82 #[serde(default)]
83 short_name: Option<String>,
84
85 #[serde(default)]
86 name: Option<String>,
87
88 #[serde(default)]
89 category: Option<String>,
90
91 #[serde(default)]
92 owner: Option<String>,
93
94 #[serde(default)]
95 homepage_url: Option<String>,
96
97 #[serde(default)]
98 notes: Option<String>,
99
100 #[serde(default)]
101 spdx_license_key: Option<String>,
102
103 #[serde(default)]
104 other_spdx_license_keys: Option<Vec<String>>,
105
106 #[serde(default)]
107 osi_license_key: Option<String>,
108
109 #[serde(default)]
110 text_urls: Option<Vec<String>>,
111
112 #[serde(default)]
113 osi_url: Option<String>,
114
115 #[serde(default)]
116 faq_url: Option<String>,
117
118 #[serde(default)]
119 other_urls: Option<Vec<String>>,
120
121 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
122 is_deprecated: Option<bool>,
123
124 #[serde(default)]
125 replaced_by: Option<Vec<String>>,
126
127 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
128 is_exception: Option<bool>,
129
130 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
131 is_unknown: Option<bool>,
132
133 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
134 is_generic: Option<bool>,
135
136 #[serde(default)]
137 minimum_coverage: Option<yaml_serde::Number>,
138
139 #[serde(default)]
140 standard_notice: Option<String>,
141
142 #[serde(default)]
143 ignorable_copyrights: Option<Vec<String>>,
144
145 #[serde(default)]
146 ignorable_holders: Option<Vec<String>>,
147
148 #[serde(default)]
149 ignorable_authors: Option<Vec<String>>,
150
151 #[serde(default)]
152 ignorable_urls: Option<Vec<String>>,
153
154 #[serde(default)]
155 ignorable_emails: Option<Vec<String>>,
156}
157
158struct ParsedRuleFile {
160 yaml_content: String,
161 text_content: String,
162 has_stored_minimum_coverage: bool,
163}
164
165struct ParsedLicenseFile {
167 yaml_content: String,
168 text_content: String,
169}
170
171fn parse_file_content(content: &str, path: &Path) -> Result<ParsedRuleFile> {
176 if content.len() < 6 {
177 return Err(anyhow!("File content too short: {}", path.display()));
178 }
179
180 let parts: Vec<&str> = FM_BOUNDARY.splitn(content, 3).collect();
181
182 if parts.len() < 3 {
183 let trimmed = content.trim();
184 if trimmed.is_empty() {
185 return Err(anyhow!(
186 "File is empty or has no content: {}",
187 path.display()
188 ));
189 }
190 return Err(anyhow!("File missing delimiter '---': {}", path.display()));
191 }
192
193 let yaml_content = parts
194 .get(1)
195 .ok_or_else(|| anyhow!("Missing YAML frontmatter in {}", path.display()))?
196 .to_string();
197 let text_content = parts
198 .get(2)
199 .ok_or_else(|| {
200 anyhow!(
201 "Missing text content after frontmatter in {}",
202 path.display()
203 )
204 })?
205 .trim_start_matches('\n')
206 .trim()
207 .to_string();
208
209 let frontmatter_value: yaml_serde::Value =
210 yaml_serde::from_str(&yaml_content).map_err(|e| {
211 anyhow!(
212 "Failed to parse frontmatter YAML in {}: {}\nContent was:\n{}",
213 path.display(),
214 e,
215 yaml_content
216 )
217 })?;
218
219 let has_stored_minimum_coverage = frontmatter_value.as_mapping().is_some_and(|mapping| {
220 mapping.contains_key(yaml_serde::Value::String("minimum_coverage".to_string()))
221 });
222
223 Ok(ParsedRuleFile {
224 yaml_content,
225 text_content,
226 has_stored_minimum_coverage,
227 })
228}
229
230#[derive(Debug, Deserialize)]
231#[allow(dead_code)]
232struct RuleFrontmatter {
233 #[serde(default)]
234 license_expression: Option<String>,
235
236 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
237 is_license_text: Option<bool>,
238
239 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
240 is_license_notice: Option<bool>,
241
242 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
243 is_license_reference: Option<bool>,
244
245 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
246 is_license_tag: Option<bool>,
247
248 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
249 is_license_intro: Option<bool>,
250
251 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
252 is_license_clue: Option<bool>,
253
254 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
255 is_false_positive: Option<bool>,
256
257 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
258 is_required_phrase: Option<bool>,
259
260 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
261 skip_for_required_phrase_generation: Option<bool>,
262
263 #[serde(default)]
264 relevance: Option<yaml_serde::Number>,
265
266 #[serde(default)]
267 minimum_coverage: Option<yaml_serde::Number>,
268
269 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
270 is_continuous: Option<bool>,
271
272 #[serde(default, deserialize_with = "deserialize_yes_no_bool")]
273 is_deprecated: Option<bool>,
274
275 #[serde(default)]
276 referenced_filenames: Option<Vec<String>>,
277
278 #[serde(default)]
279 replaced_by: Option<Vec<String>>,
280
281 #[serde(default)]
282 ignorable_urls: Option<Vec<String>>,
283
284 #[serde(default)]
285 ignorable_emails: Option<Vec<String>>,
286
287 #[serde(default)]
288 notes: Option<String>,
289
290 #[serde(default)]
291 ignorable_copyrights: Option<Vec<String>>,
292
293 #[serde(default)]
294 ignorable_holders: Option<Vec<String>>,
295
296 #[serde(default)]
297 ignorable_authors: Option<Vec<String>>,
298
299 #[serde(default)]
300 language: Option<String>,
301}
302
303fn parse_rule_source_to_loaded(
304 identifier: &str,
305 content: &str,
306 source_path: &Path,
307) -> Result<LoadedRule> {
308 let identifier = LoadedRule::derive_identifier(
309 source_path
310 .file_name()
311 .and_then(|s| s.to_str())
312 .unwrap_or(identifier),
313 );
314
315 let parsed = parse_file_content(content, source_path)?;
316
317 if parsed.text_content.is_empty() {
318 return Err(anyhow!(
319 "Rule file has empty text content: {}",
320 source_path.display()
321 ));
322 }
323
324 let fm: RuleFrontmatter = yaml_serde::from_str(&parsed.yaml_content).map_err(|e| {
325 anyhow!(
326 "Failed to parse rule frontmatter YAML in {}: {}\nContent was:\n{}",
327 source_path.display(),
328 e,
329 parsed.yaml_content
330 )
331 })?;
332
333 let is_false_positive = fm.is_false_positive.unwrap_or(false);
334
335 let rule_kind = LoadedRule::derive_rule_kind(
336 fm.is_license_text.unwrap_or(false),
337 fm.is_license_notice.unwrap_or(false),
338 fm.is_license_reference.unwrap_or(false),
339 fm.is_license_tag.unwrap_or(false),
340 fm.is_license_intro.unwrap_or(false),
341 fm.is_license_clue.unwrap_or(false),
342 )
343 .map_err(|e| {
344 anyhow!(
345 "Rule file has invalid rule-kind flags: {}: {}",
346 source_path.display(),
347 e
348 )
349 })?;
350
351 LoadedRule::validate_rule_kind_flags(rule_kind, is_false_positive).map_err(|e| {
352 anyhow!(
353 "Rule file has invalid flags: {}: {}",
354 source_path.display(),
355 e
356 )
357 })?;
358
359 let license_expression = LoadedRule::normalize_license_expression(
360 fm.license_expression.as_deref(),
361 is_false_positive,
362 )
363 .map_err(|e| {
364 anyhow!(
365 "Rule file has invalid license_expression: {}: {}",
366 source_path.display(),
367 e
368 )
369 })?;
370
371 let relevance = fm.relevance.and_then(|n| n.as_u8());
372
373 let minimum_coverage = fm.minimum_coverage.and_then(|n| n.as_u8());
374
375 Ok(LoadedRule {
376 identifier,
377 license_expression,
378 text: parsed.text_content,
379 rule_kind,
380 is_false_positive,
381 is_required_phrase: fm.is_required_phrase.unwrap_or(false),
382 skip_for_required_phrase_generation: fm
383 .skip_for_required_phrase_generation
384 .unwrap_or(false),
385 relevance,
386 minimum_coverage,
387 has_stored_minimum_coverage: parsed.has_stored_minimum_coverage,
388 is_continuous: fm.is_continuous.unwrap_or(false),
389 referenced_filenames: LoadedRule::normalize_optional_list(
390 fm.referenced_filenames.as_deref(),
391 ),
392 ignorable_urls: LoadedRule::normalize_optional_list(fm.ignorable_urls.as_deref()),
393 ignorable_emails: LoadedRule::normalize_optional_list(fm.ignorable_emails.as_deref()),
394 ignorable_copyrights: LoadedRule::normalize_optional_list(
395 fm.ignorable_copyrights.as_deref(),
396 ),
397 ignorable_holders: LoadedRule::normalize_optional_list(fm.ignorable_holders.as_deref()),
398 ignorable_authors: LoadedRule::normalize_optional_list(fm.ignorable_authors.as_deref()),
399 language: LoadedRule::normalize_optional_string(fm.language.as_deref()),
400 notes: LoadedRule::normalize_optional_string(fm.notes.as_deref()),
401 is_deprecated: fm.is_deprecated.unwrap_or(false),
402 replaced_by: fm.replaced_by.unwrap_or_default(),
403 })
404}
405
406pub fn parse_rule_to_loaded(path: &Path) -> Result<LoadedRule> {
411 let content = fs::read_to_string(path)
412 .with_context(|| format!("Failed to read rule file: {}", path.display()))?;
413 parse_rule_source_to_loaded(
414 path.file_name()
415 .and_then(|s| s.to_str())
416 .unwrap_or("unknown.RULE"),
417 &content,
418 path,
419 )
420}
421
422pub fn parse_rule_str_to_loaded(identifier: &str, content: &str) -> Result<LoadedRule> {
424 let synthetic_path = Path::new(identifier);
425 parse_rule_source_to_loaded(identifier, content, synthetic_path)
426}
427
428fn parse_license_source_to_loaded(
429 filename: &str,
430 content: &str,
431 source_path: &Path,
432) -> Result<LoadedLicense> {
433 let key = LoadedLicense::derive_key(Path::new(filename))?;
434
435 let parsed = parse_license_file_content(content, source_path)?;
436
437 let fm: LicenseFrontmatter = yaml_serde::from_str(&parsed.yaml_content).map_err(|e| {
438 anyhow!(
439 "Failed to parse license frontmatter YAML in {}: {}\nContent was:\n{}",
440 source_path.display(),
441 e,
442 parsed.yaml_content
443 )
444 })?;
445
446 LoadedLicense::validate_key_match(&key, fm.key.as_deref()).map_err(|e| {
447 anyhow!(
448 "License file has key mismatch: {}: {}",
449 source_path.display(),
450 e
451 )
452 })?;
453
454 let is_deprecated = fm.is_deprecated.unwrap_or(false);
455 let is_unknown = fm.is_unknown.unwrap_or(false);
456 let is_generic = fm.is_generic.unwrap_or(false);
457
458 LoadedLicense::validate_text_content(
459 &parsed.text_content,
460 is_deprecated,
461 is_unknown,
462 is_generic,
463 )
464 .map_err(|e| {
465 anyhow!(
466 "License file has invalid content: {}: {}",
467 source_path.display(),
468 e
469 )
470 })?;
471
472 let name = LoadedLicense::derive_name(fm.name.as_deref(), fm.short_name.as_deref(), &key);
473
474 let reference_urls = LoadedLicense::merge_reference_urls(
475 fm.text_urls.as_deref(),
476 fm.other_urls.as_deref(),
477 fm.osi_url.as_deref(),
478 fm.faq_url.as_deref(),
479 fm.homepage_url.as_deref(),
480 );
481
482 let minimum_coverage = fm.minimum_coverage.and_then(|n| n.as_u8());
483
484 Ok(LoadedLicense {
485 key,
486 short_name: LoadedLicense::normalize_optional_string(fm.short_name.as_deref()),
487 name,
488 language: Some("en".to_string()),
489 spdx_license_key: LoadedLicense::normalize_optional_string(fm.spdx_license_key.as_deref()),
490 other_spdx_license_keys: fm.other_spdx_license_keys.unwrap_or_default(),
491 category: LoadedLicense::normalize_optional_string(fm.category.as_deref()),
492 owner: LoadedLicense::normalize_optional_string(fm.owner.as_deref()),
493 homepage_url: LoadedLicense::normalize_optional_string(fm.homepage_url.as_deref()),
494 text: parsed.text_content,
495 reference_urls,
496 osi_license_key: LoadedLicense::normalize_optional_string(fm.osi_license_key.as_deref()),
497 text_urls: LoadedLicense::normalize_optional_list(fm.text_urls.as_deref())
498 .unwrap_or_default(),
499 osi_url: LoadedLicense::normalize_optional_string(fm.osi_url.as_deref()),
500 faq_url: LoadedLicense::normalize_optional_string(fm.faq_url.as_deref()),
501 other_urls: LoadedLicense::normalize_optional_list(fm.other_urls.as_deref())
502 .unwrap_or_default(),
503 notes: LoadedLicense::normalize_optional_string(fm.notes.as_deref()),
504 is_deprecated,
505 is_exception: fm.is_exception.unwrap_or(false),
506 is_unknown,
507 is_generic,
508 replaced_by: fm.replaced_by.unwrap_or_default(),
509 minimum_coverage,
510 standard_notice: LoadedLicense::normalize_optional_string(fm.standard_notice.as_deref()),
511 ignorable_copyrights: LoadedLicense::normalize_optional_list(
512 fm.ignorable_copyrights.as_deref(),
513 ),
514 ignorable_holders: LoadedLicense::normalize_optional_list(fm.ignorable_holders.as_deref()),
515 ignorable_authors: LoadedLicense::normalize_optional_list(fm.ignorable_authors.as_deref()),
516 ignorable_urls: LoadedLicense::normalize_optional_list(fm.ignorable_urls.as_deref()),
517 ignorable_emails: LoadedLicense::normalize_optional_list(fm.ignorable_emails.as_deref()),
518 })
519}
520
521pub fn parse_license_to_loaded(path: &Path) -> Result<LoadedLicense> {
526 let content = fs::read_to_string(path)
527 .with_context(|| format!("Failed to read license file: {}", path.display()))?;
528 parse_license_source_to_loaded(
529 path.file_name()
530 .and_then(|s| s.to_str())
531 .unwrap_or("unknown.LICENSE"),
532 &content,
533 path,
534 )
535}
536
537pub fn parse_license_str_to_loaded(filename: &str, content: &str) -> Result<LoadedLicense> {
539 let synthetic_path = Path::new(filename);
540 parse_license_source_to_loaded(filename, content, synthetic_path)
541}
542
543fn parse_license_file_content(content: &str, path: &Path) -> Result<ParsedLicenseFile> {
547 if content.len() < 6 {
548 return Err(anyhow!(
549 "License file content too short: {}",
550 path.display()
551 ));
552 }
553
554 let parts: Vec<&str> = FM_BOUNDARY.splitn(content, 3).collect();
555
556 if parts.len() < 3 {
557 let trimmed = content.trim();
558 if trimmed.is_empty() {
559 return Err(anyhow!(
560 "License file is empty or has no content: {}",
561 path.display()
562 ));
563 }
564 return Err(anyhow!(
565 "License file missing delimiter '---': {}",
566 path.display()
567 ));
568 }
569
570 let yaml_content = parts
571 .get(1)
572 .ok_or_else(|| anyhow!("Missing YAML frontmatter in {}", path.display()))?
573 .to_string();
574 let text_content = parts
575 .get(2)
576 .ok_or_else(|| {
577 anyhow!(
578 "Missing text content after frontmatter in {}",
579 path.display()
580 )
581 })?
582 .trim_start_matches('\n')
583 .trim()
584 .to_string();
585
586 Ok(ParsedLicenseFile {
587 yaml_content,
588 text_content,
589 })
590}
591
592pub fn load_loaded_rules_from_directory(dir: &Path) -> Result<Vec<LoadedRule>> {
604 let mut rules = Vec::new();
605
606 let entries = fs::read_dir(dir)
607 .with_context(|| format!("Failed to read rules directory: {}", dir.display()))?;
608
609 for entry in entries {
610 let entry = entry
611 .with_context(|| format!("Failed to read directory entry in: {}", dir.display()))?;
612 let path = entry.path();
613
614 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("RULE") {
615 match parse_rule_to_loaded(&path) {
616 Ok(rule) => rules.push(rule),
617 Err(e) => {
618 warn!("Failed to parse rule file {}: {}", path.display(), e);
619 }
620 }
621 }
622 }
623
624 Ok(rules)
625}
626
627pub fn load_loaded_licenses_from_directory(dir: &Path) -> Result<Vec<LoadedLicense>> {
639 let mut licenses = Vec::new();
640
641 let entries = fs::read_dir(dir)
642 .with_context(|| format!("Failed to read licenses directory: {}", dir.display()))?;
643
644 for entry in entries {
645 let entry = entry
646 .with_context(|| format!("Failed to read directory entry in: {}", dir.display()))?;
647 let path = entry.path();
648
649 if path.is_file() && path.extension().and_then(|s| s.to_str()) == Some("LICENSE") {
650 match parse_license_to_loaded(&path) {
651 Ok(license) => licenses.push(license),
652 Err(e) => {
653 warn!("Failed to parse license file {}: {}", path.display(), e);
654 }
655 }
656 }
657 }
658
659 Ok(licenses)
660}
661
662#[allow(dead_code)]
674fn validate_rules(rules: &[Rule]) {
675 let mut seen_texts: HashSet<&str> = HashSet::new();
676 let mut duplicate_count = 0;
677
678 for rule in rules {
679 if !seen_texts.insert(&rule.text) {
680 warn!(
681 "Duplicate rule text found for license_expression: {}",
682 rule.license_expression
683 );
684 duplicate_count += 1;
685 }
686
687 if !rule.is_false_positive && rule.license_expression.trim().is_empty() {
688 warn!("Rule has empty license_expression but is not marked as false_positive");
689 }
690 }
691
692 if duplicate_count > 0 {
693 warn!(
694 "Found {} duplicate rule text(s) during rule validation",
695 duplicate_count
696 );
697 }
698}
699
700#[allow(dead_code)]
709pub fn load_rules_from_directory(dir: &Path, with_deprecated: bool) -> Result<Vec<Rule>> {
710 let loaded = load_loaded_rules_from_directory(dir)?;
711 let rules: Vec<Rule> = loaded
712 .into_iter()
713 .filter(|r| with_deprecated || !r.is_deprecated)
714 .map(loaded_rule_to_rule)
715 .collect();
716 validate_rules(&rules);
717 Ok(rules)
718}
719
720#[allow(dead_code)]
729pub fn load_licenses_from_directory(dir: &Path, with_deprecated: bool) -> Result<Vec<License>> {
730 let loaded = load_loaded_licenses_from_directory(dir)?;
731 let licenses: Vec<License> = loaded
732 .into_iter()
733 .filter(|l| with_deprecated || !l.is_deprecated)
734 .map(loaded_license_to_license)
735 .collect();
736 Ok(licenses)
737}
738
739#[cfg(test)]
740mod tests {
741 use super::*;
742 use std::collections::HashMap;
743 use std::fs;
744 use tempfile::tempdir;
745
746 pub fn parse_rule_file(path: &Path) -> Result<Rule> {
747 let loaded = parse_rule_to_loaded(path)?;
748 Ok(loaded_rule_to_rule(loaded))
749 }
750
751 #[test]
752 fn test_parse_number_as_u8() {
753 let num_int: yaml_serde::Number = yaml_serde::from_str("100").unwrap();
754 assert_eq!(num_int.as_u8(), Some(100));
755
756 let num_out_of_range: yaml_serde::Number = yaml_serde::from_str("500").unwrap();
757 assert_eq!(num_out_of_range.as_u8(), None);
758
759 let num_float: yaml_serde::Number = yaml_serde::from_str("90.5").unwrap();
760 assert_eq!(num_float.as_u8(), Some(90));
761 }
762
763 #[test]
764 fn test_parse_simple_license_file() {
765 let dir = tempdir().unwrap();
766 let license_path = dir.path().join("mit.LICENSE");
767 fs::write(
768 &license_path,
769 r#"---
770key: mit
771short_name: MIT License
772name: MIT License
773category: Permissive
774spdx_license_key: MIT
775---
776MIT License text here"#,
777 )
778 .unwrap();
779
780 let license = parse_license_to_loaded(&license_path)
781 .map(loaded_license_to_license)
782 .unwrap();
783 assert_eq!(license.key, "mit");
784 assert_eq!(license.name, "MIT License");
785 assert!(license.text.contains("MIT License text"));
786 }
787
788 #[test]
789 fn test_parse_simple_rule_file() {
790 let dir = tempdir().unwrap();
791 let rule_path = dir.path().join("mit_1.RULE");
792 fs::write(
793 &rule_path,
794 r#"---
795license_expression: mit
796is_license_reference: yes
797relevance: 90
798referenced_filenames:
799 - MIT.txt
800---
801MIT.txt"#,
802 )
803 .unwrap();
804
805 let rule = parse_rule_file(&rule_path).unwrap();
806 assert_eq!(rule.license_expression, "mit");
807 assert_eq!(rule.text, "MIT.txt");
808 assert!(rule.is_license_reference());
809 assert_eq!(rule.relevance, 90);
810 }
811
812 #[test]
813 fn test_deserialize_yes_no_bool() {
814 let dir = tempdir().unwrap();
815 let rule_path = dir.path().join("test.RULE");
816
817 fs::write(
818 &rule_path,
819 r#"---
820license_expression: mit
821is_license_notice: yes
822is_license_tag: no
823---
824MIT License"#,
825 )
826 .unwrap();
827
828 let rule = parse_rule_file(&rule_path).unwrap();
829 assert!(rule.is_license_notice());
830 assert!(!rule.is_license_tag());
831 }
832
833 #[test]
834 fn test_load_licenses_from_directory() {
835 let dir = tempdir().unwrap();
836
837 fs::write(
838 dir.path().join("test.LICENSE"),
839 r#"---
840key: test
841name: Test License
842spdx_license_key: TEST
843category: Permissive
844---
845Test license text here"#,
846 )
847 .unwrap();
848
849 let licenses = load_licenses_from_directory(dir.path(), false).unwrap();
850 assert_eq!(licenses.len(), 1);
851
852 let license = &licenses[0];
853 assert_eq!(license.key, "test");
854 assert_eq!(license.name, "Test License");
855 assert_eq!(license.spdx_license_key, Some("TEST".to_string()));
856 assert!(!license.text.is_empty());
857 }
858
859 #[test]
860 fn test_load_rules_from_directory() {
861 let dir = tempdir().unwrap();
862
863 fs::write(
864 dir.path().join("test_1.RULE"),
865 r#"---
866license_expression: test
867is_license_reference: yes
868relevance: 85
869referenced_filenames:
870 - TEST.txt
871---
872TEST.txt"#,
873 )
874 .unwrap();
875
876 let rules = load_rules_from_directory(dir.path(), false).unwrap();
877 assert_eq!(rules.len(), 1);
878
879 let rule = &rules[0];
880 assert_eq!(rule.license_expression, "test");
881 assert!(rule.is_license_reference());
882 assert_eq!(rule.relevance, 85);
883 }
884
885 #[test]
886 fn test_validate_rules_detects_duplicates() {
887 let rules = vec![
888 Rule {
889 identifier: "mit.LICENSE".to_string(),
890 license_expression: "mit".to_string(),
891 text: "MIT License".to_string(),
892 tokens: vec![],
893 rule_kind: crate::license_detection::models::RuleKind::Text,
894 is_false_positive: false,
895 is_required_phrase: false,
896 is_from_license: false,
897 relevance: 100,
898 minimum_coverage: None,
899 has_stored_minimum_coverage: false,
900 is_continuous: false,
901 required_phrase_spans: vec![],
902 stopwords_by_pos: HashMap::new(),
903 referenced_filenames: None,
904 ignorable_urls: None,
905 ignorable_emails: None,
906 ignorable_copyrights: None,
907 ignorable_holders: None,
908 ignorable_authors: None,
909 language: None,
910 notes: None,
911 length_unique: 0,
912 high_length_unique: 0,
913 high_length: 0,
914 min_matched_length: 0,
915 min_high_matched_length: 0,
916 min_matched_length_unique: 0,
917 min_high_matched_length_unique: 0,
918 is_small: false,
919 is_tiny: false,
920 starts_with_license: false,
921 ends_with_license: false,
922 is_deprecated: false,
923 spdx_license_key: None,
924 other_spdx_license_keys: vec![],
925 },
926 Rule {
927 identifier: "apache-2.0.LICENSE".to_string(),
928 license_expression: "apache-2.0".to_string(),
929 text: "MIT License".to_string(),
930 tokens: vec![],
931 rule_kind: crate::license_detection::models::RuleKind::Text,
932 is_false_positive: false,
933 is_required_phrase: false,
934 is_from_license: false,
935 relevance: 100,
936 minimum_coverage: None,
937 has_stored_minimum_coverage: false,
938 is_continuous: false,
939 required_phrase_spans: vec![],
940 stopwords_by_pos: HashMap::new(),
941 referenced_filenames: None,
942 ignorable_urls: None,
943 ignorable_emails: None,
944 ignorable_copyrights: None,
945 ignorable_holders: None,
946 ignorable_authors: None,
947 language: None,
948 notes: None,
949 length_unique: 0,
950 high_length_unique: 0,
951 high_length: 0,
952 min_matched_length: 0,
953 min_high_matched_length: 0,
954 min_matched_length_unique: 0,
955 min_high_matched_length_unique: 0,
956 is_small: false,
957 is_tiny: false,
958 starts_with_license: false,
959 ends_with_license: false,
960 is_deprecated: false,
961 spdx_license_key: None,
962 other_spdx_license_keys: vec![],
963 },
964 ];
965
966 validate_rules(&rules);
967 }
968
969 #[test]
970 fn test_validate_rules_accepts_false_positive_without_expression() {
971 let rules = vec![Rule {
972 identifier: "fp.RULE".to_string(),
973 license_expression: "".to_string(),
974 text: "Some text".to_string(),
975 tokens: vec![],
976 rule_kind: crate::license_detection::models::RuleKind::None,
977 is_false_positive: true,
978 is_required_phrase: false,
979 is_from_license: false,
980 relevance: 100,
981 minimum_coverage: None,
982 has_stored_minimum_coverage: false,
983 is_continuous: false,
984 required_phrase_spans: vec![],
985 stopwords_by_pos: HashMap::new(),
986 referenced_filenames: None,
987 ignorable_urls: None,
988 ignorable_emails: None,
989 ignorable_copyrights: None,
990 ignorable_holders: None,
991 ignorable_authors: None,
992 language: None,
993 notes: Some("False positive for common pattern".to_string()),
994 length_unique: 0,
995 high_length_unique: 0,
996 high_length: 0,
997 min_matched_length: 0,
998 min_high_matched_length: 0,
999 min_matched_length_unique: 0,
1000 min_high_matched_length_unique: 0,
1001 is_small: false,
1002 is_tiny: false,
1003 starts_with_license: false,
1004 ends_with_license: false,
1005 is_deprecated: false,
1006 spdx_license_key: None,
1007 other_spdx_license_keys: vec![],
1008 }];
1009
1010 validate_rules(&rules);
1011 }
1012
1013 #[test]
1014 fn test_validate_rules_no_duplicates() {
1015 let rules = vec![
1016 Rule {
1017 identifier: "mit.LICENSE".to_string(),
1018 license_expression: "mit".to_string(),
1019 text: "MIT License".to_string(),
1020 tokens: vec![],
1021 rule_kind: crate::license_detection::models::RuleKind::Text,
1022 is_false_positive: false,
1023 is_required_phrase: false,
1024 is_from_license: false,
1025 relevance: 100,
1026 minimum_coverage: None,
1027 has_stored_minimum_coverage: false,
1028 is_continuous: false,
1029 required_phrase_spans: vec![],
1030 stopwords_by_pos: HashMap::new(),
1031 referenced_filenames: None,
1032 ignorable_urls: None,
1033 ignorable_emails: None,
1034 ignorable_copyrights: None,
1035 ignorable_holders: None,
1036 ignorable_authors: None,
1037 language: None,
1038 notes: None,
1039 length_unique: 0,
1040 high_length_unique: 0,
1041 high_length: 0,
1042 min_matched_length: 0,
1043 min_high_matched_length: 0,
1044 min_matched_length_unique: 0,
1045 min_high_matched_length_unique: 0,
1046 is_small: false,
1047 is_tiny: false,
1048 starts_with_license: false,
1049 ends_with_license: false,
1050 is_deprecated: false,
1051 spdx_license_key: None,
1052 other_spdx_license_keys: vec![],
1053 },
1054 Rule {
1055 identifier: "apache-2.0.LICENSE".to_string(),
1056 license_expression: "apache-2.0".to_string(),
1057 text: "Apache License".to_string(),
1058 tokens: vec![],
1059 rule_kind: crate::license_detection::models::RuleKind::Text,
1060 is_false_positive: false,
1061 is_required_phrase: false,
1062 is_from_license: false,
1063 relevance: 100,
1064 minimum_coverage: None,
1065 has_stored_minimum_coverage: false,
1066 is_continuous: false,
1067 required_phrase_spans: vec![],
1068 stopwords_by_pos: HashMap::new(),
1069 referenced_filenames: None,
1070 ignorable_urls: None,
1071 ignorable_emails: None,
1072 ignorable_copyrights: None,
1073 ignorable_holders: None,
1074 ignorable_authors: None,
1075 language: None,
1076 notes: None,
1077 length_unique: 0,
1078 high_length_unique: 0,
1079 high_length: 0,
1080 min_matched_length: 0,
1081 min_high_matched_length: 0,
1082 min_matched_length_unique: 0,
1083 min_high_matched_length_unique: 0,
1084 is_small: false,
1085 is_tiny: false,
1086 starts_with_license: false,
1087 ends_with_license: false,
1088 is_deprecated: false,
1089 spdx_license_key: None,
1090 other_spdx_license_keys: vec![],
1091 },
1092 ];
1093
1094 validate_rules(&rules);
1095 }
1096
1097 #[test]
1098 fn test_load_licenses_filters_deprecated_by_default() {
1099 let dir = tempdir().unwrap();
1100
1101 fs::write(
1102 dir.path().join("active.LICENSE"),
1103 r#"---
1104key: active
1105name: Active License
1106---
1107Active license text"#,
1108 )
1109 .unwrap();
1110
1111 fs::write(
1112 dir.path().join("deprecated.LICENSE"),
1113 r#"---
1114key: deprecated
1115name: Deprecated License
1116is_deprecated: yes
1117---
1118Deprecated license text"#,
1119 )
1120 .unwrap();
1121
1122 let licenses_without = load_licenses_from_directory(dir.path(), false).unwrap();
1123 assert_eq!(licenses_without.len(), 1);
1124 assert_eq!(licenses_without[0].key, "active");
1125
1126 let licenses_with = load_licenses_from_directory(dir.path(), true).unwrap();
1127 assert_eq!(licenses_with.len(), 2);
1128 }
1129
1130 #[test]
1131 fn test_load_rules_filters_deprecated_by_default() {
1132 let dir = tempdir().unwrap();
1133
1134 fs::write(
1135 dir.path().join("active.RULE"),
1136 r#"---
1137license_expression: active
1138is_license_notice: yes
1139---
1140Active rule text"#,
1141 )
1142 .unwrap();
1143
1144 fs::write(
1145 dir.path().join("deprecated.RULE"),
1146 r#"---
1147license_expression: deprecated
1148is_license_notice: yes
1149is_deprecated: yes
1150---
1151Deprecated rule text"#,
1152 )
1153 .unwrap();
1154
1155 let rules_without = load_rules_from_directory(dir.path(), false).unwrap();
1156 assert_eq!(rules_without.len(), 1);
1157 assert_eq!(rules_without[0].license_expression, "active");
1158
1159 let rules_with = load_rules_from_directory(dir.path(), true).unwrap();
1160 assert_eq!(rules_with.len(), 2);
1161 }
1162
1163 #[test]
1164 fn test_parse_rule_to_loaded() {
1165 let dir = tempdir().unwrap();
1166 let rule_path = dir.path().join("mit_1.RULE");
1167 fs::write(
1168 &rule_path,
1169 r#"---
1170license_expression: mit
1171is_license_reference: yes
1172relevance: 90
1173referenced_filenames:
1174 - MIT.txt
1175---
1176MIT.txt"#,
1177 )
1178 .unwrap();
1179
1180 let loaded = parse_rule_to_loaded(&rule_path).unwrap();
1181 assert_eq!(loaded.identifier, "mit_1.RULE");
1182 assert_eq!(loaded.license_expression, "mit");
1183 assert_eq!(loaded.text, "MIT.txt");
1184 assert_eq!(
1185 loaded.rule_kind,
1186 crate::license_detection::models::RuleKind::Reference
1187 );
1188 assert_eq!(loaded.relevance, Some(90));
1189 assert_eq!(
1190 loaded.referenced_filenames,
1191 Some(vec!["MIT.txt".to_string()])
1192 );
1193 assert!(!loaded.is_deprecated);
1194 }
1195
1196 #[test]
1197 fn test_parse_license_to_loaded() {
1198 let dir = tempdir().unwrap();
1199 let license_path = dir.path().join("mit.LICENSE");
1200 fs::write(
1201 &license_path,
1202 r#"---
1203key: mit
1204short_name: MIT License
1205name: MIT License
1206category: Permissive
1207spdx_license_key: MIT
1208---
1209MIT License text here"#,
1210 )
1211 .unwrap();
1212
1213 let loaded = parse_license_to_loaded(&license_path).unwrap();
1214 assert_eq!(loaded.key, "mit");
1215 assert_eq!(loaded.name, "MIT License");
1216 assert!(loaded.text.contains("MIT License text"));
1217 assert_eq!(loaded.spdx_license_key, Some("MIT".to_string()));
1218 }
1219
1220 #[test]
1221 fn test_load_loaded_rules_from_directory_includes_deprecated() {
1222 let dir = tempdir().unwrap();
1223
1224 fs::write(
1225 dir.path().join("active.RULE"),
1226 r#"---
1227license_expression: active
1228is_license_notice: yes
1229---
1230Active rule text"#,
1231 )
1232 .unwrap();
1233
1234 fs::write(
1235 dir.path().join("deprecated.RULE"),
1236 r#"---
1237license_expression: deprecated
1238is_license_notice: yes
1239is_deprecated: yes
1240---
1241Deprecated rule text"#,
1242 )
1243 .unwrap();
1244
1245 let loaded_rules = load_loaded_rules_from_directory(dir.path()).unwrap();
1246 assert_eq!(loaded_rules.len(), 2);
1247
1248 let active = loaded_rules
1249 .iter()
1250 .find(|r| r.license_expression == "active")
1251 .unwrap();
1252 assert!(!active.is_deprecated);
1253
1254 let deprecated = loaded_rules
1255 .iter()
1256 .find(|r| r.license_expression == "deprecated")
1257 .unwrap();
1258 assert!(deprecated.is_deprecated);
1259 }
1260
1261 #[test]
1262 fn test_load_loaded_licenses_from_directory_includes_deprecated() {
1263 let dir = tempdir().unwrap();
1264
1265 fs::write(
1266 dir.path().join("active.LICENSE"),
1267 r#"---
1268key: active
1269name: Active License
1270---
1271Active license text"#,
1272 )
1273 .unwrap();
1274
1275 fs::write(
1276 dir.path().join("deprecated.LICENSE"),
1277 r#"---
1278key: deprecated
1279name: Deprecated License
1280is_deprecated: yes
1281---
1282Deprecated license text"#,
1283 )
1284 .unwrap();
1285
1286 let loaded_licenses = load_loaded_licenses_from_directory(dir.path()).unwrap();
1287 assert_eq!(loaded_licenses.len(), 2);
1288
1289 let active = loaded_licenses.iter().find(|l| l.key == "active").unwrap();
1290 assert!(!active.is_deprecated);
1291
1292 let deprecated = loaded_licenses
1293 .iter()
1294 .find(|l| l.key == "deprecated")
1295 .unwrap();
1296 assert!(deprecated.is_deprecated);
1297 }
1298}