1use std::path::Path;
50
51use serde::{Deserialize, Serialize};
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
55#[serde(rename_all = "kebab-case")]
56pub enum License {
57 Gpl3,
60 Gpl2,
62 Agpl3,
64
65 Lgpl3,
68 Lgpl21,
70 Mpl2,
72 Epl2,
74
75 Mit,
78 Apache2,
80 Bsd3Clause,
82 Bsd2Clause,
84 Isc,
86 Unlicense,
88 Cc0,
90 Wtfpl,
92
93 Proprietary,
96
97 Unknown,
100}
101
102impl License {
103 pub fn spdx_id(&self) -> &'static str {
105 match self {
106 Self::Gpl3 => "GPL-3.0-only",
107 Self::Gpl2 => "GPL-2.0-only",
108 Self::Agpl3 => "AGPL-3.0-only",
109 Self::Lgpl3 => "LGPL-3.0-only",
110 Self::Lgpl21 => "LGPL-2.1-only",
111 Self::Mpl2 => "MPL-2.0",
112 Self::Epl2 => "EPL-2.0",
113 Self::Mit => "MIT",
114 Self::Apache2 => "Apache-2.0",
115 Self::Bsd3Clause => "BSD-3-Clause",
116 Self::Bsd2Clause => "BSD-2-Clause",
117 Self::Isc => "ISC",
118 Self::Unlicense => "Unlicense",
119 Self::Cc0 => "CC0-1.0",
120 Self::Wtfpl => "WTFPL",
121 Self::Proprietary => "PROPRIETARY",
122 Self::Unknown => "UNKNOWN",
123 }
124 }
125
126 pub fn name(&self) -> &'static str {
128 match self {
129 Self::Gpl3 => "GNU General Public License v3.0",
130 Self::Gpl2 => "GNU General Public License v2.0",
131 Self::Agpl3 => "GNU Affero General Public License v3.0",
132 Self::Lgpl3 => "GNU Lesser General Public License v3.0",
133 Self::Lgpl21 => "GNU Lesser General Public License v2.1",
134 Self::Mpl2 => "Mozilla Public License 2.0",
135 Self::Epl2 => "Eclipse Public License 2.0",
136 Self::Mit => "MIT License",
137 Self::Apache2 => "Apache License 2.0",
138 Self::Bsd3Clause => "BSD 3-Clause License",
139 Self::Bsd2Clause => "BSD 2-Clause License",
140 Self::Isc => "ISC License",
141 Self::Unlicense => "The Unlicense",
142 Self::Cc0 => "Creative Commons Zero v1.0",
143 Self::Wtfpl => "WTFPL",
144 Self::Proprietary => "Proprietary License",
145 Self::Unknown => "Unknown License",
146 }
147 }
148
149 pub fn risk(&self) -> LicenseRisk {
151 match self {
152 Self::Agpl3 => LicenseRisk::Critical,
153 Self::Gpl3 | Self::Gpl2 => LicenseRisk::High,
154 Self::Lgpl3 | Self::Lgpl21 | Self::Mpl2 | Self::Epl2 => LicenseRisk::Medium,
155 Self::Mit
156 | Self::Apache2
157 | Self::Bsd3Clause
158 | Self::Bsd2Clause
159 | Self::Isc
160 | Self::Unlicense
161 | Self::Cc0
162 | Self::Wtfpl => LicenseRisk::Low,
163 Self::Proprietary => LicenseRisk::High,
164 Self::Unknown => LicenseRisk::Unknown,
165 }
166 }
167
168 pub fn is_copyleft(&self) -> bool {
170 matches!(
171 self,
172 Self::Gpl3
173 | Self::Gpl2
174 | Self::Agpl3
175 | Self::Lgpl3
176 | Self::Lgpl21
177 | Self::Mpl2
178 | Self::Epl2
179 )
180 }
181
182 pub fn is_strong_copyleft(&self) -> bool {
184 matches!(self, Self::Gpl3 | Self::Gpl2 | Self::Agpl3)
185 }
186
187 pub fn is_permissive(&self) -> bool {
189 matches!(
190 self,
191 Self::Mit
192 | Self::Apache2
193 | Self::Bsd3Clause
194 | Self::Bsd2Clause
195 | Self::Isc
196 | Self::Unlicense
197 | Self::Cc0
198 | Self::Wtfpl
199 )
200 }
201}
202
203#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
205#[serde(rename_all = "lowercase")]
206pub enum LicenseRisk {
207 Unknown,
209 Low,
211 Medium,
213 High,
215 Critical,
217}
218
219impl LicenseRisk {
220 pub fn as_str(&self) -> &'static str {
222 match self {
223 Self::Unknown => "unknown",
224 Self::Low => "low",
225 Self::Medium => "medium",
226 Self::High => "high",
227 Self::Critical => "critical",
228 }
229 }
230}
231
232#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct LicenseFinding {
235 pub file: String,
237
238 pub license: License,
240
241 pub line: u32,
243
244 pub confidence: f32,
246
247 pub matched_text: String,
249}
250
251#[derive(Debug, Clone)]
253pub struct LicenseScanConfig {
254 pub min_confidence: f32,
256
257 pub min_risk: LicenseRisk,
259
260 pub scan_license_files: bool,
262
263 pub scan_headers: bool,
265
266 pub max_header_lines: usize,
268}
269
270impl Default for LicenseScanConfig {
271 fn default() -> Self {
272 Self {
273 min_confidence: 0.7,
274 min_risk: LicenseRisk::Unknown,
275 scan_license_files: true,
276 scan_headers: true,
277 max_header_lines: 50,
278 }
279 }
280}
281
282pub struct LicenseScanner {
284 config: LicenseScanConfig,
285}
286
287impl Default for LicenseScanner {
288 fn default() -> Self {
289 Self::new()
290 }
291}
292
293impl LicenseScanner {
294 pub fn new() -> Self {
296 Self { config: LicenseScanConfig::default() }
297 }
298
299 pub fn with_config(config: LicenseScanConfig) -> Self {
301 Self { config }
302 }
303
304 pub fn scan(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
306 let mut findings = Vec::new();
307
308 let is_license_file = self.is_license_file(file_path);
310
311 if is_license_file && self.config.scan_license_files {
312 if let Some(finding) = self.scan_license_file(content, file_path) {
313 findings.push(finding);
314 }
315 }
316
317 if self.config.scan_headers {
318 findings.extend(self.scan_headers(content, file_path));
319 }
320
321 findings
323 .into_iter()
324 .filter(|f| {
325 f.confidence >= self.config.min_confidence
326 && f.license.risk() >= self.config.min_risk
327 })
328 .collect()
329 }
330
331 fn is_license_file(&self, file_path: &str) -> bool {
333 let path = Path::new(file_path);
334 let file_name = path
335 .file_name()
336 .and_then(|n| n.to_str())
337 .map(|s| s.to_uppercase())
338 .unwrap_or_default();
339
340 matches!(
341 file_name.as_str(),
342 "LICENSE"
343 | "LICENSE.MD"
344 | "LICENSE.TXT"
345 | "LICENCE"
346 | "LICENCE.MD"
347 | "LICENCE.TXT"
348 | "COPYING"
349 | "COPYING.MD"
350 | "COPYING.TXT"
351 | "LICENSE-MIT"
352 | "LICENSE-APACHE"
353 | "LICENSE.MIT"
354 | "LICENSE.APACHE"
355 )
356 }
357
358 fn scan_license_file(&self, content: &str, file_path: &str) -> Option<LicenseFinding> {
360 let content_lower = content.to_lowercase();
361
362 let detections: Vec<(License, f32, &str)> = vec![
364 (License::Agpl3, 0.95, "gnu affero general public license"),
366 (License::Agpl3, 0.9, "agpl-3.0"),
367 (License::Agpl3, 0.85, "agpl version 3"),
368 (License::Lgpl3, 0.95, "gnu lesser general public license version 3"),
370 (License::Lgpl3, 0.9, "lgpl-3.0"),
371 (License::Lgpl21, 0.95, "gnu lesser general public license version 2.1"),
372 (License::Lgpl21, 0.9, "lgpl-2.1"),
373 (License::Lgpl21, 0.9, "lgpl version 2.1"),
374 (License::Gpl3, 0.95, "gnu general public license version 3"),
376 (License::Gpl3, 0.95, "version 3, 29 june 2007"),
378 (License::Gpl3, 0.9, "gpl-3.0"),
379 (License::Gpl3, 0.85, "gplv3"),
380 (License::Gpl2, 0.95, "gnu general public license version 2"),
381 (License::Gpl2, 0.95, "version 2, june 1991"),
383 (License::Gpl2, 0.9, "gpl-2.0"),
384 (License::Gpl2, 0.85, "gplv2"),
385 (License::Mpl2, 0.95, "mozilla public license version 2.0"),
387 (License::Mpl2, 0.9, "mpl-2.0"),
388 (License::Epl2, 0.95, "eclipse public license - v 2.0"),
390 (License::Epl2, 0.9, "epl-2.0"),
391 (License::Apache2, 0.95, "apache license, version 2.0"),
393 (License::Apache2, 0.95, "apache license version 2.0"),
394 (License::Apache2, 0.9, "apache-2.0"),
395 (License::Apache2, 0.85, "licensed under the apache license"),
396 (License::Mit, 0.95, "mit license"),
398 (License::Mit, 0.9, "permission is hereby granted, free of charge"),
399 (License::Mit, 0.85, "the software is provided \"as is\", without warranty"),
400 (License::Bsd3Clause, 0.95, "3-clause bsd license"),
402 (License::Bsd3Clause, 0.9, "bsd-3-clause"),
403 (License::Bsd3Clause, 0.85, "redistributions of source code must retain"),
404 (License::Bsd2Clause, 0.95, "2-clause bsd license"),
405 (License::Bsd2Clause, 0.9, "bsd-2-clause"),
406 (License::Isc, 0.95, "isc license"),
408 (License::Isc, 0.9, "permission to use, copy, modify, and/or distribute"),
409 (License::Unlicense, 0.95, "this is free and unencumbered software"),
411 (License::Unlicense, 0.9, "unlicense"),
412 (License::Cc0, 0.95, "cc0 1.0 universal"),
414 (License::Cc0, 0.9, "creative commons zero"),
415 (License::Wtfpl, 0.95, "do what the fuck you want to public license"),
417 (License::Wtfpl, 0.9, "wtfpl"),
418 ];
419
420 for (license, confidence, pattern) in detections {
421 if content_lower.contains(pattern) {
422 let line = content_lower
424 .lines()
425 .enumerate()
426 .find(|(_, l)| l.contains(pattern))
427 .map(|(i, _)| (i + 1) as u32)
428 .unwrap_or(1);
429
430 return Some(LicenseFinding {
431 file: file_path.to_string(),
432 license,
433 line,
434 confidence,
435 matched_text: pattern.to_string(),
436 });
437 }
438 }
439
440 None
441 }
442
443 fn scan_headers(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
445 let mut findings = Vec::new();
446 let lines: Vec<&str> = content.lines().take(self.config.max_header_lines).collect();
447
448 for (line_num, line) in lines.iter().enumerate() {
449 let line_lower = line.to_lowercase();
450
451 if let Some(finding) = self.check_spdx_identifier(&line_lower, file_path, line_num + 1)
453 {
454 findings.push(finding);
455 continue;
456 }
457
458 if let Some(finding) = self.check_license_comment(&line_lower, file_path, line_num + 1)
460 {
461 findings.push(finding);
462 }
463 }
464
465 findings
466 }
467
468 fn check_spdx_identifier(
470 &self,
471 line: &str,
472 file_path: &str,
473 line_num: usize,
474 ) -> Option<LicenseFinding> {
475 if !line.contains("spdx-license-identifier") {
477 return None;
478 }
479
480 let spdx_mappings: Vec<(&str, License)> = vec![
481 ("agpl-3.0", License::Agpl3),
482 ("gpl-3.0", License::Gpl3),
483 ("gpl-2.0", License::Gpl2),
484 ("lgpl-3.0", License::Lgpl3),
485 ("lgpl-2.1", License::Lgpl21),
486 ("mpl-2.0", License::Mpl2),
487 ("epl-2.0", License::Epl2),
488 ("apache-2.0", License::Apache2),
489 ("mit", License::Mit),
490 ("bsd-3-clause", License::Bsd3Clause),
491 ("bsd-2-clause", License::Bsd2Clause),
492 ("isc", License::Isc),
493 ("unlicense", License::Unlicense),
494 ("cc0-1.0", License::Cc0),
495 ];
496
497 for (spdx_id, license) in spdx_mappings {
498 if line.contains(spdx_id) {
499 return Some(LicenseFinding {
500 file: file_path.to_string(),
501 license,
502 line: line_num as u32,
503 confidence: 0.99, matched_text: format!("SPDX-License-Identifier: {}", spdx_id),
505 });
506 }
507 }
508
509 None
510 }
511
512 fn check_license_comment(
514 &self,
515 line: &str,
516 file_path: &str,
517 line_num: usize,
518 ) -> Option<LicenseFinding> {
519 if !line.contains("//")
521 && !line.contains("/*")
522 && !line.contains("*")
523 && !line.contains("#")
524 {
525 return None;
526 }
527
528 let comment_patterns: Vec<(&str, License, f32)> = vec![
529 ("licensed under agpl", License::Agpl3, 0.85),
531 ("licensed under gpl", License::Gpl3, 0.8),
532 ("licensed under lgpl", License::Lgpl3, 0.8),
533 ("licensed under the mit license", License::Mit, 0.85),
534 ("licensed under apache", License::Apache2, 0.85),
535 ("this file is part of", License::Unknown, 0.5), ("copyright", License::Unknown, 0.3),
538 ];
539
540 for (pattern, license, confidence) in comment_patterns {
541 if line.contains(pattern) && license != License::Unknown {
542 return Some(LicenseFinding {
543 file: file_path.to_string(),
544 license,
545 line: line_num as u32,
546 confidence,
547 matched_text: pattern.to_string(),
548 });
549 }
550 }
551
552 None
553 }
554
555 pub fn scan_file(&self, path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
557 let content = std::fs::read_to_string(path)?;
558 let file_path = path.to_string_lossy();
559 Ok(self.scan(&content, &file_path))
560 }
561
562 pub fn scan_repository(&self, repo_path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
564 use ignore::WalkBuilder;
565
566 let mut all_findings = Vec::new();
567
568 let walker = WalkBuilder::new(repo_path)
569 .hidden(false)
570 .git_ignore(true)
571 .build();
572
573 for entry in walker.flatten() {
574 let path = entry.path();
575
576 if !path.is_file() {
577 continue;
578 }
579
580 if self.is_license_file(&path.to_string_lossy()) {
582 if let Ok(findings) = self.scan_file(path) {
583 all_findings.extend(findings);
584 }
585 continue;
586 }
587
588 if self.config.scan_headers {
590 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
591 let is_source = matches!(
592 ext,
593 "rs" | "py" | "js" | "ts" | "go" | "c" | "cpp" | "h" | "java" | "rb" | "php"
594 );
595
596 if is_source {
597 if let Ok(findings) = self.scan_file(path) {
598 all_findings.extend(findings);
599 }
600 }
601 }
602 }
603
604 all_findings.sort_by(|a, b| {
606 a.file
607 .cmp(&b.file)
608 .then_with(|| a.license.spdx_id().cmp(b.license.spdx_id()))
609 });
610 all_findings.dedup_by(|a, b| a.file == b.file && a.license == b.license);
611
612 Ok(all_findings)
613 }
614
615 pub fn summarize(findings: &[LicenseFinding]) -> LicenseSummary {
617 let mut summary = LicenseSummary::default();
618
619 for finding in findings {
620 match finding.license.risk() {
621 LicenseRisk::Critical => summary.critical_count += 1,
622 LicenseRisk::High => summary.high_count += 1,
623 LicenseRisk::Medium => summary.medium_count += 1,
624 LicenseRisk::Low => summary.low_count += 1,
625 LicenseRisk::Unknown => summary.unknown_count += 1,
626 }
627
628 if finding.license.is_copyleft() {
629 summary.copyleft_files.push(finding.file.clone());
630 }
631
632 if !summary.licenses.contains(&finding.license) {
634 summary.licenses.push(finding.license);
635 }
636 }
637
638 summary.copyleft_files.sort();
639 summary.copyleft_files.dedup();
640
641 summary
642 }
643}
644
645#[derive(Debug, Clone, Default, Serialize, Deserialize)]
647pub struct LicenseSummary {
648 pub critical_count: usize,
650
651 pub high_count: usize,
653
654 pub medium_count: usize,
656
657 pub low_count: usize,
659
660 pub unknown_count: usize,
662
663 pub copyleft_files: Vec<String>,
665
666 pub licenses: Vec<License>,
668}
669
670impl LicenseSummary {
671 pub fn has_copyleft(&self) -> bool {
673 !self.copyleft_files.is_empty()
674 }
675
676 pub fn has_high_risk(&self) -> bool {
678 self.critical_count > 0 || self.high_count > 0
679 }
680
681 pub fn total(&self) -> usize {
683 self.critical_count
684 + self.high_count
685 + self.medium_count
686 + self.low_count
687 + self.unknown_count
688 }
689}
690
691#[cfg(test)]
692mod tests {
693 use super::*;
694
695 #[test]
696 fn test_license_risk_levels() {
697 assert_eq!(License::Agpl3.risk(), LicenseRisk::Critical);
698 assert_eq!(License::Gpl3.risk(), LicenseRisk::High);
699 assert_eq!(License::Lgpl3.risk(), LicenseRisk::Medium);
700 assert_eq!(License::Mit.risk(), LicenseRisk::Low);
701 assert_eq!(License::Unknown.risk(), LicenseRisk::Unknown);
702 }
703
704 #[test]
705 fn test_copyleft_detection() {
706 assert!(License::Gpl3.is_copyleft());
707 assert!(License::Agpl3.is_copyleft());
708 assert!(License::Lgpl3.is_copyleft());
709 assert!(!License::Mit.is_copyleft());
710 assert!(!License::Apache2.is_copyleft());
711 }
712
713 #[test]
714 fn test_strong_copyleft() {
715 assert!(License::Gpl3.is_strong_copyleft());
716 assert!(License::Agpl3.is_strong_copyleft());
717 assert!(!License::Lgpl3.is_strong_copyleft());
718 assert!(!License::Mit.is_strong_copyleft());
719 }
720
721 #[test]
722 fn test_scan_mit_license() {
723 let scanner = LicenseScanner::new();
724 let content = r#"
725MIT License
726
727Copyright (c) 2024 Example Corp
728
729Permission is hereby granted, free of charge, to any person obtaining a copy
730of this software and associated documentation files (the "Software"), to deal
731in the Software without restriction, including without limitation the rights
732to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
733copies of the Software.
734"#;
735
736 let findings = scanner.scan(content, "LICENSE");
737 assert_eq!(findings.len(), 1);
738 assert_eq!(findings[0].license, License::Mit);
739 assert!(findings[0].confidence >= 0.9);
740 }
741
742 #[test]
743 fn test_scan_gpl3_license() {
744 let scanner = LicenseScanner::new();
745 let content = r#"
746GNU GENERAL PUBLIC LICENSE
747Version 3, 29 June 2007
748
749Copyright (C) 2007 Free Software Foundation, Inc.
750"#;
751
752 let findings = scanner.scan(content, "COPYING");
753 assert_eq!(findings.len(), 1);
754 assert_eq!(findings[0].license, License::Gpl3);
755 }
756
757 #[test]
758 fn test_scan_spdx_identifier() {
759 let scanner = LicenseScanner::new();
760 let content = r#"
761// SPDX-License-Identifier: Apache-2.0
762
763fn main() {
764 println!("Hello, world!");
765}
766"#;
767
768 let findings = scanner.scan(content, "src/main.rs");
769 assert_eq!(findings.len(), 1);
770 assert_eq!(findings[0].license, License::Apache2);
771 assert!(findings[0].confidence >= 0.95);
772 }
773
774 #[test]
775 fn test_scan_agpl_in_header() {
776 let scanner = LicenseScanner::new();
777 let content = r#"
778# Licensed under AGPL-3.0
779# Copyright 2024 Example Corp
780
781def main():
782 pass
783"#;
784
785 let findings = scanner.scan(content, "main.py");
786 assert!(!findings.is_empty());
787 assert!(findings.iter().any(|f| f.license == License::Agpl3));
788 }
789
790 #[test]
791 fn test_license_summary() {
792 let findings = vec![
793 LicenseFinding {
794 file: "lib/a.rs".to_string(),
795 license: License::Gpl3,
796 line: 1,
797 confidence: 0.95,
798 matched_text: "gpl-3.0".to_string(),
799 },
800 LicenseFinding {
801 file: "lib/b.rs".to_string(),
802 license: License::Mit,
803 line: 1,
804 confidence: 0.9,
805 matched_text: "mit".to_string(),
806 },
807 LicenseFinding {
808 file: "lib/c.rs".to_string(),
809 license: License::Agpl3,
810 line: 1,
811 confidence: 0.95,
812 matched_text: "agpl-3.0".to_string(),
813 },
814 ];
815
816 let summary = LicenseScanner::summarize(&findings);
817
818 assert_eq!(summary.critical_count, 1);
819 assert_eq!(summary.high_count, 1);
820 assert_eq!(summary.low_count, 1);
821 assert!(summary.has_copyleft());
822 assert!(summary.has_high_risk());
823 assert_eq!(summary.copyleft_files.len(), 2);
824 }
825
826 #[test]
827 fn test_is_license_file() {
828 let scanner = LicenseScanner::new();
829
830 assert!(scanner.is_license_file("LICENSE"));
831 assert!(scanner.is_license_file("LICENSE.md"));
832 assert!(scanner.is_license_file("COPYING"));
833 assert!(scanner.is_license_file("LICENSE-MIT"));
834 assert!(!scanner.is_license_file("src/main.rs"));
835 assert!(!scanner.is_license_file("README.md"));
836 }
837
838 #[test]
839 fn test_risk_ordering() {
840 assert!(LicenseRisk::Critical > LicenseRisk::High);
841 assert!(LicenseRisk::High > LicenseRisk::Medium);
842 assert!(LicenseRisk::Medium > LicenseRisk::Low);
843 assert!(LicenseRisk::Low > LicenseRisk::Unknown);
844 }
845
846 #[test]
847 fn test_spdx_ids() {
848 assert_eq!(License::Gpl3.spdx_id(), "GPL-3.0-only");
849 assert_eq!(License::Mit.spdx_id(), "MIT");
850 assert_eq!(License::Apache2.spdx_id(), "Apache-2.0");
851 }
852}