1use std::path::Path;
50
51use serde::{Deserialize, Serialize};
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
55#[serde(rename_all = "kebab-case")]
56pub enum License {
57 Gpl3,
60 Gpl2,
62 Agpl3,
64
65 Lgpl3,
68 Lgpl21,
70 Mpl2,
72 Epl2,
74
75 Mit,
78 Apache2,
80 Bsd3Clause,
82 Bsd2Clause,
84 Isc,
86 Unlicense,
88 Cc0,
90 Wtfpl,
92
93 Proprietary,
96
97 Unknown,
100}
101
102impl License {
103 pub fn spdx_id(&self) -> &'static str {
105 match self {
106 Self::Gpl3 => "GPL-3.0-only",
107 Self::Gpl2 => "GPL-2.0-only",
108 Self::Agpl3 => "AGPL-3.0-only",
109 Self::Lgpl3 => "LGPL-3.0-only",
110 Self::Lgpl21 => "LGPL-2.1-only",
111 Self::Mpl2 => "MPL-2.0",
112 Self::Epl2 => "EPL-2.0",
113 Self::Mit => "MIT",
114 Self::Apache2 => "Apache-2.0",
115 Self::Bsd3Clause => "BSD-3-Clause",
116 Self::Bsd2Clause => "BSD-2-Clause",
117 Self::Isc => "ISC",
118 Self::Unlicense => "Unlicense",
119 Self::Cc0 => "CC0-1.0",
120 Self::Wtfpl => "WTFPL",
121 Self::Proprietary => "PROPRIETARY",
122 Self::Unknown => "UNKNOWN",
123 }
124 }
125
126 pub fn name(&self) -> &'static str {
128 match self {
129 Self::Gpl3 => "GNU General Public License v3.0",
130 Self::Gpl2 => "GNU General Public License v2.0",
131 Self::Agpl3 => "GNU Affero General Public License v3.0",
132 Self::Lgpl3 => "GNU Lesser General Public License v3.0",
133 Self::Lgpl21 => "GNU Lesser General Public License v2.1",
134 Self::Mpl2 => "Mozilla Public License 2.0",
135 Self::Epl2 => "Eclipse Public License 2.0",
136 Self::Mit => "MIT License",
137 Self::Apache2 => "Apache License 2.0",
138 Self::Bsd3Clause => "BSD 3-Clause License",
139 Self::Bsd2Clause => "BSD 2-Clause License",
140 Self::Isc => "ISC License",
141 Self::Unlicense => "The Unlicense",
142 Self::Cc0 => "Creative Commons Zero v1.0",
143 Self::Wtfpl => "WTFPL",
144 Self::Proprietary => "Proprietary License",
145 Self::Unknown => "Unknown License",
146 }
147 }
148
149 pub fn risk(&self) -> LicenseRisk {
151 match self {
152 Self::Agpl3 => LicenseRisk::Critical,
153 Self::Gpl3 | Self::Gpl2 => LicenseRisk::High,
154 Self::Lgpl3 | Self::Lgpl21 | Self::Mpl2 | Self::Epl2 => LicenseRisk::Medium,
155 Self::Mit
156 | Self::Apache2
157 | Self::Bsd3Clause
158 | Self::Bsd2Clause
159 | Self::Isc
160 | Self::Unlicense
161 | Self::Cc0
162 | Self::Wtfpl => LicenseRisk::Low,
163 Self::Proprietary => LicenseRisk::High,
164 Self::Unknown => LicenseRisk::Unknown,
165 }
166 }
167
168 pub fn is_copyleft(&self) -> bool {
170 matches!(
171 self,
172 Self::Gpl3
173 | Self::Gpl2
174 | Self::Agpl3
175 | Self::Lgpl3
176 | Self::Lgpl21
177 | Self::Mpl2
178 | Self::Epl2
179 )
180 }
181
182 pub fn is_strong_copyleft(&self) -> bool {
184 matches!(self, Self::Gpl3 | Self::Gpl2 | Self::Agpl3)
185 }
186
187 pub fn is_permissive(&self) -> bool {
189 matches!(
190 self,
191 Self::Mit
192 | Self::Apache2
193 | Self::Bsd3Clause
194 | Self::Bsd2Clause
195 | Self::Isc
196 | Self::Unlicense
197 | Self::Cc0
198 | Self::Wtfpl
199 )
200 }
201}
202
203#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
205#[serde(rename_all = "lowercase")]
206pub enum LicenseRisk {
207 Unknown,
209 Low,
211 Medium,
213 High,
215 Critical,
217}
218
219impl LicenseRisk {
220 pub fn as_str(&self) -> &'static str {
222 match self {
223 Self::Unknown => "unknown",
224 Self::Low => "low",
225 Self::Medium => "medium",
226 Self::High => "high",
227 Self::Critical => "critical",
228 }
229 }
230}
231
232#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct LicenseFinding {
235 pub file: String,
237
238 pub license: License,
240
241 pub line: u32,
243
244 pub confidence: f32,
246
247 pub matched_text: String,
249}
250
251#[derive(Debug, Clone)]
253pub struct LicenseScanConfig {
254 pub min_confidence: f32,
256
257 pub min_risk: LicenseRisk,
259
260 pub scan_license_files: bool,
262
263 pub scan_headers: bool,
265
266 pub max_header_lines: usize,
268}
269
270impl Default for LicenseScanConfig {
271 fn default() -> Self {
272 Self {
273 min_confidence: 0.7,
274 min_risk: LicenseRisk::Unknown,
275 scan_license_files: true,
276 scan_headers: true,
277 max_header_lines: 50,
278 }
279 }
280}
281
282pub struct LicenseScanner {
284 config: LicenseScanConfig,
285}
286
287impl Default for LicenseScanner {
288 fn default() -> Self {
289 Self::new()
290 }
291}
292
293impl LicenseScanner {
294 pub fn new() -> Self {
296 Self {
297 config: LicenseScanConfig::default(),
298 }
299 }
300
301 pub fn with_config(config: LicenseScanConfig) -> Self {
303 Self { config }
304 }
305
306 pub fn scan(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
308 let mut findings = Vec::new();
309
310 let is_license_file = self.is_license_file(file_path);
312
313 if is_license_file && self.config.scan_license_files {
314 if let Some(finding) = self.scan_license_file(content, file_path) {
315 findings.push(finding);
316 }
317 }
318
319 if self.config.scan_headers {
320 findings.extend(self.scan_headers(content, file_path));
321 }
322
323 findings
325 .into_iter()
326 .filter(|f| {
327 f.confidence >= self.config.min_confidence && f.license.risk() >= self.config.min_risk
328 })
329 .collect()
330 }
331
332 fn is_license_file(&self, file_path: &str) -> bool {
334 let path = Path::new(file_path);
335 let file_name = path
336 .file_name()
337 .and_then(|n| n.to_str())
338 .map(|s| s.to_uppercase())
339 .unwrap_or_default();
340
341 matches!(
342 file_name.as_str(),
343 "LICENSE"
344 | "LICENSE.MD"
345 | "LICENSE.TXT"
346 | "LICENCE"
347 | "LICENCE.MD"
348 | "LICENCE.TXT"
349 | "COPYING"
350 | "COPYING.MD"
351 | "COPYING.TXT"
352 | "LICENSE-MIT"
353 | "LICENSE-APACHE"
354 | "LICENSE.MIT"
355 | "LICENSE.APACHE"
356 )
357 }
358
359 fn scan_license_file(&self, content: &str, file_path: &str) -> Option<LicenseFinding> {
361 let content_lower = content.to_lowercase();
362
363 let detections: Vec<(License, f32, &str)> = vec![
365 (
367 License::Agpl3,
368 0.95,
369 "gnu affero general public license",
370 ),
371 (License::Agpl3, 0.9, "agpl-3.0"),
372 (License::Agpl3, 0.85, "agpl version 3"),
373 (
375 License::Lgpl3,
376 0.95,
377 "gnu lesser general public license version 3",
378 ),
379 (License::Lgpl3, 0.9, "lgpl-3.0"),
380 (
381 License::Lgpl21,
382 0.95,
383 "gnu lesser general public license version 2.1",
384 ),
385 (License::Lgpl21, 0.9, "lgpl-2.1"),
386 (License::Lgpl21, 0.9, "lgpl version 2.1"),
387 (
389 License::Gpl3,
390 0.95,
391 "gnu general public license version 3",
392 ),
393 (License::Gpl3, 0.95, "version 3, 29 june 2007"),
395 (License::Gpl3, 0.9, "gpl-3.0"),
396 (License::Gpl3, 0.85, "gplv3"),
397 (
398 License::Gpl2,
399 0.95,
400 "gnu general public license version 2",
401 ),
402 (License::Gpl2, 0.95, "version 2, june 1991"),
404 (License::Gpl2, 0.9, "gpl-2.0"),
405 (License::Gpl2, 0.85, "gplv2"),
406 (License::Mpl2, 0.95, "mozilla public license version 2.0"),
408 (License::Mpl2, 0.9, "mpl-2.0"),
409 (License::Epl2, 0.95, "eclipse public license - v 2.0"),
411 (License::Epl2, 0.9, "epl-2.0"),
412 (License::Apache2, 0.95, "apache license, version 2.0"),
414 (License::Apache2, 0.95, "apache license version 2.0"),
415 (License::Apache2, 0.9, "apache-2.0"),
416 (License::Apache2, 0.85, "licensed under the apache license"),
417 (License::Mit, 0.95, "mit license"),
419 (License::Mit, 0.9, "permission is hereby granted, free of charge"),
420 (
421 License::Mit,
422 0.85,
423 "the software is provided \"as is\", without warranty",
424 ),
425 (License::Bsd3Clause, 0.95, "3-clause bsd license"),
427 (License::Bsd3Clause, 0.9, "bsd-3-clause"),
428 (License::Bsd3Clause, 0.85, "redistributions of source code must retain"),
429 (License::Bsd2Clause, 0.95, "2-clause bsd license"),
430 (License::Bsd2Clause, 0.9, "bsd-2-clause"),
431 (License::Isc, 0.95, "isc license"),
433 (License::Isc, 0.9, "permission to use, copy, modify, and/or distribute"),
434 (License::Unlicense, 0.95, "this is free and unencumbered software"),
436 (License::Unlicense, 0.9, "unlicense"),
437 (License::Cc0, 0.95, "cc0 1.0 universal"),
439 (License::Cc0, 0.9, "creative commons zero"),
440 (License::Wtfpl, 0.95, "do what the fuck you want to public license"),
442 (License::Wtfpl, 0.9, "wtfpl"),
443 ];
444
445 for (license, confidence, pattern) in detections {
446 if content_lower.contains(pattern) {
447 let line = content_lower
449 .lines()
450 .enumerate()
451 .find(|(_, l)| l.contains(pattern))
452 .map(|(i, _)| (i + 1) as u32)
453 .unwrap_or(1);
454
455 return Some(LicenseFinding {
456 file: file_path.to_string(),
457 license,
458 line,
459 confidence,
460 matched_text: pattern.to_string(),
461 });
462 }
463 }
464
465 None
466 }
467
468 fn scan_headers(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
470 let mut findings = Vec::new();
471 let lines: Vec<&str> = content.lines().take(self.config.max_header_lines).collect();
472
473 for (line_num, line) in lines.iter().enumerate() {
474 let line_lower = line.to_lowercase();
475
476 if let Some(finding) = self.check_spdx_identifier(&line_lower, file_path, line_num + 1)
478 {
479 findings.push(finding);
480 continue;
481 }
482
483 if let Some(finding) =
485 self.check_license_comment(&line_lower, file_path, line_num + 1)
486 {
487 findings.push(finding);
488 }
489 }
490
491 findings
492 }
493
494 fn check_spdx_identifier(
496 &self,
497 line: &str,
498 file_path: &str,
499 line_num: usize,
500 ) -> Option<LicenseFinding> {
501 if !line.contains("spdx-license-identifier") {
503 return None;
504 }
505
506 let spdx_mappings: Vec<(&str, License)> = vec![
507 ("agpl-3.0", License::Agpl3),
508 ("gpl-3.0", License::Gpl3),
509 ("gpl-2.0", License::Gpl2),
510 ("lgpl-3.0", License::Lgpl3),
511 ("lgpl-2.1", License::Lgpl21),
512 ("mpl-2.0", License::Mpl2),
513 ("epl-2.0", License::Epl2),
514 ("apache-2.0", License::Apache2),
515 ("mit", License::Mit),
516 ("bsd-3-clause", License::Bsd3Clause),
517 ("bsd-2-clause", License::Bsd2Clause),
518 ("isc", License::Isc),
519 ("unlicense", License::Unlicense),
520 ("cc0-1.0", License::Cc0),
521 ];
522
523 for (spdx_id, license) in spdx_mappings {
524 if line.contains(spdx_id) {
525 return Some(LicenseFinding {
526 file: file_path.to_string(),
527 license,
528 line: line_num as u32,
529 confidence: 0.99, matched_text: format!("SPDX-License-Identifier: {}", spdx_id),
531 });
532 }
533 }
534
535 None
536 }
537
538 fn check_license_comment(
540 &self,
541 line: &str,
542 file_path: &str,
543 line_num: usize,
544 ) -> Option<LicenseFinding> {
545 if !line.contains("//")
547 && !line.contains("/*")
548 && !line.contains("*")
549 && !line.contains("#")
550 {
551 return None;
552 }
553
554 let comment_patterns: Vec<(&str, License, f32)> = vec![
555 ("licensed under agpl", License::Agpl3, 0.85),
557 ("licensed under gpl", License::Gpl3, 0.8),
558 ("licensed under lgpl", License::Lgpl3, 0.8),
559 ("licensed under the mit license", License::Mit, 0.85),
560 ("licensed under apache", License::Apache2, 0.85),
561 ("this file is part of", License::Unknown, 0.5), ("copyright", License::Unknown, 0.3),
564 ];
565
566 for (pattern, license, confidence) in comment_patterns {
567 if line.contains(pattern) && license != License::Unknown {
568 return Some(LicenseFinding {
569 file: file_path.to_string(),
570 license,
571 line: line_num as u32,
572 confidence,
573 matched_text: pattern.to_string(),
574 });
575 }
576 }
577
578 None
579 }
580
581 pub fn scan_file(&self, path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
583 let content = std::fs::read_to_string(path)?;
584 let file_path = path.to_string_lossy();
585 Ok(self.scan(&content, &file_path))
586 }
587
588 pub fn scan_repository(
590 &self,
591 repo_path: &Path,
592 ) -> Result<Vec<LicenseFinding>, std::io::Error> {
593 use ignore::WalkBuilder;
594
595 let mut all_findings = Vec::new();
596
597 let walker = WalkBuilder::new(repo_path)
598 .hidden(false)
599 .git_ignore(true)
600 .build();
601
602 for entry in walker.flatten() {
603 let path = entry.path();
604
605 if !path.is_file() {
606 continue;
607 }
608
609 if self.is_license_file(&path.to_string_lossy()) {
611 if let Ok(findings) = self.scan_file(path) {
612 all_findings.extend(findings);
613 }
614 continue;
615 }
616
617 if self.config.scan_headers {
619 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
620 let is_source = matches!(
621 ext,
622 "rs" | "py" | "js" | "ts" | "go" | "c" | "cpp" | "h" | "java" | "rb" | "php"
623 );
624
625 if is_source {
626 if let Ok(findings) = self.scan_file(path) {
627 all_findings.extend(findings);
628 }
629 }
630 }
631 }
632
633 all_findings.sort_by(|a, b| {
635 a.file
636 .cmp(&b.file)
637 .then_with(|| a.license.spdx_id().cmp(b.license.spdx_id()))
638 });
639 all_findings.dedup_by(|a, b| a.file == b.file && a.license == b.license);
640
641 Ok(all_findings)
642 }
643
644 pub fn summarize(findings: &[LicenseFinding]) -> LicenseSummary {
646 let mut summary = LicenseSummary::default();
647
648 for finding in findings {
649 match finding.license.risk() {
650 LicenseRisk::Critical => summary.critical_count += 1,
651 LicenseRisk::High => summary.high_count += 1,
652 LicenseRisk::Medium => summary.medium_count += 1,
653 LicenseRisk::Low => summary.low_count += 1,
654 LicenseRisk::Unknown => summary.unknown_count += 1,
655 }
656
657 if finding.license.is_copyleft() {
658 summary.copyleft_files.push(finding.file.clone());
659 }
660
661 if !summary.licenses.contains(&finding.license) {
663 summary.licenses.push(finding.license);
664 }
665 }
666
667 summary.copyleft_files.sort();
668 summary.copyleft_files.dedup();
669
670 summary
671 }
672}
673
674#[derive(Debug, Clone, Default, Serialize, Deserialize)]
676pub struct LicenseSummary {
677 pub critical_count: usize,
679
680 pub high_count: usize,
682
683 pub medium_count: usize,
685
686 pub low_count: usize,
688
689 pub unknown_count: usize,
691
692 pub copyleft_files: Vec<String>,
694
695 pub licenses: Vec<License>,
697}
698
699impl LicenseSummary {
700 pub fn has_copyleft(&self) -> bool {
702 !self.copyleft_files.is_empty()
703 }
704
705 pub fn has_high_risk(&self) -> bool {
707 self.critical_count > 0 || self.high_count > 0
708 }
709
710 pub fn total(&self) -> usize {
712 self.critical_count + self.high_count + self.medium_count + self.low_count + self.unknown_count
713 }
714}
715
716#[cfg(test)]
717mod tests {
718 use super::*;
719
720 #[test]
721 fn test_license_risk_levels() {
722 assert_eq!(License::Agpl3.risk(), LicenseRisk::Critical);
723 assert_eq!(License::Gpl3.risk(), LicenseRisk::High);
724 assert_eq!(License::Lgpl3.risk(), LicenseRisk::Medium);
725 assert_eq!(License::Mit.risk(), LicenseRisk::Low);
726 assert_eq!(License::Unknown.risk(), LicenseRisk::Unknown);
727 }
728
729 #[test]
730 fn test_copyleft_detection() {
731 assert!(License::Gpl3.is_copyleft());
732 assert!(License::Agpl3.is_copyleft());
733 assert!(License::Lgpl3.is_copyleft());
734 assert!(!License::Mit.is_copyleft());
735 assert!(!License::Apache2.is_copyleft());
736 }
737
738 #[test]
739 fn test_strong_copyleft() {
740 assert!(License::Gpl3.is_strong_copyleft());
741 assert!(License::Agpl3.is_strong_copyleft());
742 assert!(!License::Lgpl3.is_strong_copyleft());
743 assert!(!License::Mit.is_strong_copyleft());
744 }
745
746 #[test]
747 fn test_scan_mit_license() {
748 let scanner = LicenseScanner::new();
749 let content = r#"
750MIT License
751
752Copyright (c) 2024 Example Corp
753
754Permission is hereby granted, free of charge, to any person obtaining a copy
755of this software and associated documentation files (the "Software"), to deal
756in the Software without restriction, including without limitation the rights
757to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
758copies of the Software.
759"#;
760
761 let findings = scanner.scan(content, "LICENSE");
762 assert_eq!(findings.len(), 1);
763 assert_eq!(findings[0].license, License::Mit);
764 assert!(findings[0].confidence >= 0.9);
765 }
766
767 #[test]
768 fn test_scan_gpl3_license() {
769 let scanner = LicenseScanner::new();
770 let content = r#"
771GNU GENERAL PUBLIC LICENSE
772Version 3, 29 June 2007
773
774Copyright (C) 2007 Free Software Foundation, Inc.
775"#;
776
777 let findings = scanner.scan(content, "COPYING");
778 assert_eq!(findings.len(), 1);
779 assert_eq!(findings[0].license, License::Gpl3);
780 }
781
782 #[test]
783 fn test_scan_spdx_identifier() {
784 let scanner = LicenseScanner::new();
785 let content = r#"
786// SPDX-License-Identifier: Apache-2.0
787
788fn main() {
789 println!("Hello, world!");
790}
791"#;
792
793 let findings = scanner.scan(content, "src/main.rs");
794 assert_eq!(findings.len(), 1);
795 assert_eq!(findings[0].license, License::Apache2);
796 assert!(findings[0].confidence >= 0.95);
797 }
798
799 #[test]
800 fn test_scan_agpl_in_header() {
801 let scanner = LicenseScanner::new();
802 let content = r#"
803# Licensed under AGPL-3.0
804# Copyright 2024 Example Corp
805
806def main():
807 pass
808"#;
809
810 let findings = scanner.scan(content, "main.py");
811 assert!(!findings.is_empty());
812 assert!(findings.iter().any(|f| f.license == License::Agpl3));
813 }
814
815 #[test]
816 fn test_license_summary() {
817 let findings = vec![
818 LicenseFinding {
819 file: "lib/a.rs".to_string(),
820 license: License::Gpl3,
821 line: 1,
822 confidence: 0.95,
823 matched_text: "gpl-3.0".to_string(),
824 },
825 LicenseFinding {
826 file: "lib/b.rs".to_string(),
827 license: License::Mit,
828 line: 1,
829 confidence: 0.9,
830 matched_text: "mit".to_string(),
831 },
832 LicenseFinding {
833 file: "lib/c.rs".to_string(),
834 license: License::Agpl3,
835 line: 1,
836 confidence: 0.95,
837 matched_text: "agpl-3.0".to_string(),
838 },
839 ];
840
841 let summary = LicenseScanner::summarize(&findings);
842
843 assert_eq!(summary.critical_count, 1);
844 assert_eq!(summary.high_count, 1);
845 assert_eq!(summary.low_count, 1);
846 assert!(summary.has_copyleft());
847 assert!(summary.has_high_risk());
848 assert_eq!(summary.copyleft_files.len(), 2);
849 }
850
851 #[test]
852 fn test_is_license_file() {
853 let scanner = LicenseScanner::new();
854
855 assert!(scanner.is_license_file("LICENSE"));
856 assert!(scanner.is_license_file("LICENSE.md"));
857 assert!(scanner.is_license_file("COPYING"));
858 assert!(scanner.is_license_file("LICENSE-MIT"));
859 assert!(!scanner.is_license_file("src/main.rs"));
860 assert!(!scanner.is_license_file("README.md"));
861 }
862
863 #[test]
864 fn test_risk_ordering() {
865 assert!(LicenseRisk::Critical > LicenseRisk::High);
866 assert!(LicenseRisk::High > LicenseRisk::Medium);
867 assert!(LicenseRisk::Medium > LicenseRisk::Low);
868 assert!(LicenseRisk::Low > LicenseRisk::Unknown);
869 }
870
871 #[test]
872 fn test_spdx_ids() {
873 assert_eq!(License::Gpl3.spdx_id(), "GPL-3.0-only");
874 assert_eq!(License::Mit.spdx_id(), "MIT");
875 assert_eq!(License::Apache2.spdx_id(), "Apache-2.0");
876 }
877}