1use std::path::Path;
50
51use serde::{Deserialize, Serialize};
52
53#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
55#[serde(rename_all = "kebab-case")]
56pub enum License {
57 Gpl3,
60 Gpl2,
62 Agpl3,
64
65 Lgpl3,
68 Lgpl21,
70 Mpl2,
72 Epl2,
74
75 Mit,
78 Apache2,
80 Bsd3Clause,
82 Bsd2Clause,
84 Isc,
86 Unlicense,
88 Cc0,
90 Wtfpl,
92
93 Proprietary,
96
97 Unknown,
100}
101
102impl License {
103 pub fn spdx_id(&self) -> &'static str {
105 match self {
106 Self::Gpl3 => "GPL-3.0-only",
107 Self::Gpl2 => "GPL-2.0-only",
108 Self::Agpl3 => "AGPL-3.0-only",
109 Self::Lgpl3 => "LGPL-3.0-only",
110 Self::Lgpl21 => "LGPL-2.1-only",
111 Self::Mpl2 => "MPL-2.0",
112 Self::Epl2 => "EPL-2.0",
113 Self::Mit => "MIT",
114 Self::Apache2 => "Apache-2.0",
115 Self::Bsd3Clause => "BSD-3-Clause",
116 Self::Bsd2Clause => "BSD-2-Clause",
117 Self::Isc => "ISC",
118 Self::Unlicense => "Unlicense",
119 Self::Cc0 => "CC0-1.0",
120 Self::Wtfpl => "WTFPL",
121 Self::Proprietary => "PROPRIETARY",
122 Self::Unknown => "UNKNOWN",
123 }
124 }
125
126 pub fn name(&self) -> &'static str {
128 match self {
129 Self::Gpl3 => "GNU General Public License v3.0",
130 Self::Gpl2 => "GNU General Public License v2.0",
131 Self::Agpl3 => "GNU Affero General Public License v3.0",
132 Self::Lgpl3 => "GNU Lesser General Public License v3.0",
133 Self::Lgpl21 => "GNU Lesser General Public License v2.1",
134 Self::Mpl2 => "Mozilla Public License 2.0",
135 Self::Epl2 => "Eclipse Public License 2.0",
136 Self::Mit => "MIT License",
137 Self::Apache2 => "Apache License 2.0",
138 Self::Bsd3Clause => "BSD 3-Clause License",
139 Self::Bsd2Clause => "BSD 2-Clause License",
140 Self::Isc => "ISC License",
141 Self::Unlicense => "The Unlicense",
142 Self::Cc0 => "Creative Commons Zero v1.0",
143 Self::Wtfpl => "WTFPL",
144 Self::Proprietary => "Proprietary License",
145 Self::Unknown => "Unknown License",
146 }
147 }
148
149 pub fn risk(&self) -> LicenseRisk {
151 match self {
152 Self::Agpl3 => LicenseRisk::Critical,
153 Self::Gpl3 | Self::Gpl2 => LicenseRisk::High,
154 Self::Lgpl3 | Self::Lgpl21 | Self::Mpl2 | Self::Epl2 => LicenseRisk::Medium,
155 Self::Mit
156 | Self::Apache2
157 | Self::Bsd3Clause
158 | Self::Bsd2Clause
159 | Self::Isc
160 | Self::Unlicense
161 | Self::Cc0
162 | Self::Wtfpl => LicenseRisk::Low,
163 Self::Proprietary => LicenseRisk::High,
164 Self::Unknown => LicenseRisk::Unknown,
165 }
166 }
167
168 pub fn is_copyleft(&self) -> bool {
170 matches!(
171 self,
172 Self::Gpl3
173 | Self::Gpl2
174 | Self::Agpl3
175 | Self::Lgpl3
176 | Self::Lgpl21
177 | Self::Mpl2
178 | Self::Epl2
179 )
180 }
181
182 pub fn is_strong_copyleft(&self) -> bool {
184 matches!(self, Self::Gpl3 | Self::Gpl2 | Self::Agpl3)
185 }
186
187 pub fn is_permissive(&self) -> bool {
189 matches!(
190 self,
191 Self::Mit
192 | Self::Apache2
193 | Self::Bsd3Clause
194 | Self::Bsd2Clause
195 | Self::Isc
196 | Self::Unlicense
197 | Self::Cc0
198 | Self::Wtfpl
199 )
200 }
201}
202
203#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
205#[serde(rename_all = "lowercase")]
206pub enum LicenseRisk {
207 Unknown,
209 Low,
211 Medium,
213 High,
215 Critical,
217}
218
219impl LicenseRisk {
220 pub fn as_str(&self) -> &'static str {
222 match self {
223 Self::Unknown => "unknown",
224 Self::Low => "low",
225 Self::Medium => "medium",
226 Self::High => "high",
227 Self::Critical => "critical",
228 }
229 }
230}
231
232#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct LicenseFinding {
235 pub file: String,
237
238 pub license: License,
240
241 pub line: u32,
243
244 pub confidence: f32,
246
247 pub matched_text: String,
249}
250
251#[derive(Debug, Clone)]
253pub struct LicenseScanConfig {
254 pub min_confidence: f32,
256
257 pub min_risk: LicenseRisk,
259
260 pub scan_license_files: bool,
262
263 pub scan_headers: bool,
265
266 pub max_header_lines: usize,
268}
269
270impl Default for LicenseScanConfig {
271 fn default() -> Self {
272 Self {
273 min_confidence: 0.7,
274 min_risk: LicenseRisk::Unknown,
275 scan_license_files: true,
276 scan_headers: true,
277 max_header_lines: 50,
278 }
279 }
280}
281
282pub struct LicenseScanner {
284 config: LicenseScanConfig,
285}
286
287impl Default for LicenseScanner {
288 fn default() -> Self {
289 Self::new()
290 }
291}
292
293impl LicenseScanner {
294 pub fn new() -> Self {
296 Self { config: LicenseScanConfig::default() }
297 }
298
299 pub fn with_config(config: LicenseScanConfig) -> Self {
301 Self { config }
302 }
303
304 pub fn scan(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
306 let mut findings = Vec::new();
307
308 let is_license_file = self.is_license_file(file_path);
310
311 if is_license_file && self.config.scan_license_files {
312 if let Some(finding) = self.scan_license_file(content, file_path) {
313 findings.push(finding);
314 }
315 }
316
317 if self.config.scan_headers {
318 findings.extend(self.scan_headers(content, file_path));
319 }
320
321 findings
323 .into_iter()
324 .filter(|f| {
325 f.confidence >= self.config.min_confidence
326 && f.license.risk() >= self.config.min_risk
327 })
328 .collect()
329 }
330
331 fn is_license_file(&self, file_path: &str) -> bool {
333 let path = Path::new(file_path);
334 let file_name = path
335 .file_name()
336 .and_then(|n| n.to_str())
337 .map(|s| s.to_uppercase())
338 .unwrap_or_default();
339
340 matches!(
341 file_name.as_str(),
342 "LICENSE"
343 | "LICENSE.MD"
344 | "LICENSE.TXT"
345 | "LICENCE"
346 | "LICENCE.MD"
347 | "LICENCE.TXT"
348 | "COPYING"
349 | "COPYING.MD"
350 | "COPYING.TXT"
351 | "LICENSE-MIT"
352 | "LICENSE-APACHE"
353 | "LICENSE.MIT"
354 | "LICENSE.APACHE"
355 )
356 }
357
358 fn scan_license_file(&self, content: &str, file_path: &str) -> Option<LicenseFinding> {
360 let content_lower = content.to_lowercase();
361
362 let detections: Vec<(License, f32, &str)> = vec![
364 (License::Agpl3, 0.95, "gnu affero general public license"),
366 (License::Agpl3, 0.9, "agpl-3.0"),
367 (License::Agpl3, 0.85, "agpl version 3"),
368 (License::Lgpl3, 0.95, "gnu lesser general public license version 3"),
370 (License::Lgpl3, 0.9, "lgpl-3.0"),
371 (License::Lgpl21, 0.95, "gnu lesser general public license version 2.1"),
372 (License::Lgpl21, 0.9, "lgpl-2.1"),
373 (License::Lgpl21, 0.9, "lgpl version 2.1"),
374 (License::Gpl3, 0.95, "gnu general public license version 3"),
376 (License::Gpl3, 0.95, "version 3, 29 june 2007"),
378 (License::Gpl3, 0.9, "gpl-3.0"),
379 (License::Gpl3, 0.85, "gplv3"),
380 (License::Gpl2, 0.95, "gnu general public license version 2"),
381 (License::Gpl2, 0.95, "version 2, june 1991"),
383 (License::Gpl2, 0.9, "gpl-2.0"),
384 (License::Gpl2, 0.85, "gplv2"),
385 (License::Mpl2, 0.95, "mozilla public license version 2.0"),
387 (License::Mpl2, 0.9, "mpl-2.0"),
388 (License::Epl2, 0.95, "eclipse public license - v 2.0"),
390 (License::Epl2, 0.9, "epl-2.0"),
391 (License::Apache2, 0.95, "apache license, version 2.0"),
393 (License::Apache2, 0.95, "apache license version 2.0"),
394 (License::Apache2, 0.9, "apache-2.0"),
395 (License::Apache2, 0.85, "licensed under the apache license"),
396 (License::Mit, 0.95, "mit license"),
398 (License::Mit, 0.9, "permission is hereby granted, free of charge"),
399 (License::Mit, 0.85, "the software is provided \"as is\", without warranty"),
400 (License::Bsd3Clause, 0.95, "3-clause bsd license"),
402 (License::Bsd3Clause, 0.9, "bsd-3-clause"),
403 (License::Bsd3Clause, 0.85, "redistributions of source code must retain"),
404 (License::Bsd2Clause, 0.95, "2-clause bsd license"),
405 (License::Bsd2Clause, 0.9, "bsd-2-clause"),
406 (License::Isc, 0.95, "isc license"),
408 (License::Isc, 0.9, "permission to use, copy, modify, and/or distribute"),
409 (License::Unlicense, 0.95, "this is free and unencumbered software"),
411 (License::Unlicense, 0.9, "unlicense"),
412 (License::Cc0, 0.95, "cc0 1.0 universal"),
414 (License::Cc0, 0.9, "creative commons zero"),
415 (License::Wtfpl, 0.95, "do what the fuck you want to public license"),
417 (License::Wtfpl, 0.9, "wtfpl"),
418 ];
419
420 for (license, confidence, pattern) in detections {
421 if content_lower.contains(pattern) {
422 let line = content_lower
424 .lines()
425 .enumerate()
426 .find(|(_, l)| l.contains(pattern))
427 .map_or(1, |(i, _)| (i + 1) as u32);
428
429 return Some(LicenseFinding {
430 file: file_path.to_owned(),
431 license,
432 line,
433 confidence,
434 matched_text: pattern.to_owned(),
435 });
436 }
437 }
438
439 None
440 }
441
442 fn scan_headers(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
444 let mut findings = Vec::new();
445 let lines: Vec<&str> = content.lines().take(self.config.max_header_lines).collect();
446
447 for (line_num, line) in lines.iter().enumerate() {
448 let line_lower = line.to_lowercase();
449
450 if let Some(finding) = self.check_spdx_identifier(&line_lower, file_path, line_num + 1)
452 {
453 findings.push(finding);
454 continue;
455 }
456
457 if let Some(finding) = self.check_license_comment(&line_lower, file_path, line_num + 1)
459 {
460 findings.push(finding);
461 }
462 }
463
464 findings
465 }
466
467 fn check_spdx_identifier(
469 &self,
470 line: &str,
471 file_path: &str,
472 line_num: usize,
473 ) -> Option<LicenseFinding> {
474 if !line.contains("spdx-license-identifier") {
476 return None;
477 }
478
479 let spdx_mappings: Vec<(&str, License)> = vec![
480 ("agpl-3.0", License::Agpl3),
481 ("gpl-3.0", License::Gpl3),
482 ("gpl-2.0", License::Gpl2),
483 ("lgpl-3.0", License::Lgpl3),
484 ("lgpl-2.1", License::Lgpl21),
485 ("mpl-2.0", License::Mpl2),
486 ("epl-2.0", License::Epl2),
487 ("apache-2.0", License::Apache2),
488 ("mit", License::Mit),
489 ("bsd-3-clause", License::Bsd3Clause),
490 ("bsd-2-clause", License::Bsd2Clause),
491 ("isc", License::Isc),
492 ("unlicense", License::Unlicense),
493 ("cc0-1.0", License::Cc0),
494 ];
495
496 for (spdx_id, license) in spdx_mappings {
497 if line.contains(spdx_id) {
498 return Some(LicenseFinding {
499 file: file_path.to_owned(),
500 license,
501 line: line_num as u32,
502 confidence: 0.99, matched_text: format!("SPDX-License-Identifier: {}", spdx_id),
504 });
505 }
506 }
507
508 None
509 }
510
511 fn check_license_comment(
513 &self,
514 line: &str,
515 file_path: &str,
516 line_num: usize,
517 ) -> Option<LicenseFinding> {
518 if !line.contains("//")
520 && !line.contains("/*")
521 && !line.contains('*')
522 && !line.contains('#')
523 {
524 return None;
525 }
526
527 let comment_patterns: Vec<(&str, License, f32)> = vec![
528 ("licensed under agpl", License::Agpl3, 0.85),
530 ("licensed under gpl", License::Gpl3, 0.8),
531 ("licensed under lgpl", License::Lgpl3, 0.8),
532 ("licensed under the mit license", License::Mit, 0.85),
533 ("licensed under apache", License::Apache2, 0.85),
534 ("this file is part of", License::Unknown, 0.5), ("copyright", License::Unknown, 0.3),
537 ];
538
539 for (pattern, license, confidence) in comment_patterns {
540 if line.contains(pattern) && license != License::Unknown {
541 return Some(LicenseFinding {
542 file: file_path.to_owned(),
543 license,
544 line: line_num as u32,
545 confidence,
546 matched_text: pattern.to_owned(),
547 });
548 }
549 }
550
551 None
552 }
553
554 pub fn scan_file(&self, path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
556 let content = std::fs::read_to_string(path)?;
557 let file_path = path.to_string_lossy();
558 Ok(self.scan(&content, &file_path))
559 }
560
561 pub fn scan_repository(&self, repo_path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
563 use ignore::WalkBuilder;
564
565 let mut all_findings = Vec::new();
566
567 let walker = WalkBuilder::new(repo_path)
568 .hidden(false)
569 .git_ignore(true)
570 .build();
571
572 for entry in walker.flatten() {
573 let path = entry.path();
574
575 if !path.is_file() {
576 continue;
577 }
578
579 if self.is_license_file(&path.to_string_lossy()) {
581 if let Ok(findings) = self.scan_file(path) {
582 all_findings.extend(findings);
583 }
584 continue;
585 }
586
587 if self.config.scan_headers {
589 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
590 let is_source = matches!(
591 ext,
592 "rs" | "py" | "js" | "ts" | "go" | "c" | "cpp" | "h" | "java" | "rb" | "php"
593 );
594
595 if is_source {
596 if let Ok(findings) = self.scan_file(path) {
597 all_findings.extend(findings);
598 }
599 }
600 }
601 }
602
603 all_findings.sort_by(|a, b| {
605 a.file
606 .cmp(&b.file)
607 .then_with(|| a.license.spdx_id().cmp(b.license.spdx_id()))
608 });
609 all_findings.dedup_by(|a, b| a.file == b.file && a.license == b.license);
610
611 Ok(all_findings)
612 }
613
614 pub fn summarize(findings: &[LicenseFinding]) -> LicenseSummary {
616 let mut summary = LicenseSummary::default();
617
618 for finding in findings {
619 match finding.license.risk() {
620 LicenseRisk::Critical => summary.critical_count += 1,
621 LicenseRisk::High => summary.high_count += 1,
622 LicenseRisk::Medium => summary.medium_count += 1,
623 LicenseRisk::Low => summary.low_count += 1,
624 LicenseRisk::Unknown => summary.unknown_count += 1,
625 }
626
627 if finding.license.is_copyleft() {
628 summary.copyleft_files.push(finding.file.clone());
629 }
630
631 if !summary.licenses.contains(&finding.license) {
633 summary.licenses.push(finding.license);
634 }
635 }
636
637 summary.copyleft_files.sort();
638 summary.copyleft_files.dedup();
639
640 summary
641 }
642}
643
644#[derive(Debug, Clone, Default, Serialize, Deserialize)]
646pub struct LicenseSummary {
647 pub critical_count: usize,
649
650 pub high_count: usize,
652
653 pub medium_count: usize,
655
656 pub low_count: usize,
658
659 pub unknown_count: usize,
661
662 pub copyleft_files: Vec<String>,
664
665 pub licenses: Vec<License>,
667}
668
669impl LicenseSummary {
670 pub fn has_copyleft(&self) -> bool {
672 !self.copyleft_files.is_empty()
673 }
674
675 pub fn has_high_risk(&self) -> bool {
677 self.critical_count > 0 || self.high_count > 0
678 }
679
680 pub fn total(&self) -> usize {
682 self.critical_count
683 + self.high_count
684 + self.medium_count
685 + self.low_count
686 + self.unknown_count
687 }
688}
689
690#[cfg(test)]
691mod tests {
692 use super::*;
693
694 #[test]
695 fn test_license_risk_levels() {
696 assert_eq!(License::Agpl3.risk(), LicenseRisk::Critical);
697 assert_eq!(License::Gpl3.risk(), LicenseRisk::High);
698 assert_eq!(License::Lgpl3.risk(), LicenseRisk::Medium);
699 assert_eq!(License::Mit.risk(), LicenseRisk::Low);
700 assert_eq!(License::Unknown.risk(), LicenseRisk::Unknown);
701 }
702
703 #[test]
704 fn test_copyleft_detection() {
705 assert!(License::Gpl3.is_copyleft());
706 assert!(License::Agpl3.is_copyleft());
707 assert!(License::Lgpl3.is_copyleft());
708 assert!(!License::Mit.is_copyleft());
709 assert!(!License::Apache2.is_copyleft());
710 }
711
712 #[test]
713 fn test_strong_copyleft() {
714 assert!(License::Gpl3.is_strong_copyleft());
715 assert!(License::Agpl3.is_strong_copyleft());
716 assert!(!License::Lgpl3.is_strong_copyleft());
717 assert!(!License::Mit.is_strong_copyleft());
718 }
719
720 #[test]
721 fn test_scan_mit_license() {
722 let scanner = LicenseScanner::new();
723 let content = r#"
724MIT License
725
726Copyright (c) 2024 Example Corp
727
728Permission is hereby granted, free of charge, to any person obtaining a copy
729of this software and associated documentation files (the "Software"), to deal
730in the Software without restriction, including without limitation the rights
731to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
732copies of the Software.
733"#;
734
735 let findings = scanner.scan(content, "LICENSE");
736 assert_eq!(findings.len(), 1);
737 assert_eq!(findings[0].license, License::Mit);
738 assert!(findings[0].confidence >= 0.9);
739 }
740
741 #[test]
742 fn test_scan_gpl3_license() {
743 let scanner = LicenseScanner::new();
744 let content = r#"
745GNU GENERAL PUBLIC LICENSE
746Version 3, 29 June 2007
747
748Copyright (C) 2007 Free Software Foundation, Inc.
749"#;
750
751 let findings = scanner.scan(content, "COPYING");
752 assert_eq!(findings.len(), 1);
753 assert_eq!(findings[0].license, License::Gpl3);
754 }
755
756 #[test]
757 fn test_scan_spdx_identifier() {
758 let scanner = LicenseScanner::new();
759 let content = r#"
760// SPDX-License-Identifier: Apache-2.0
761
762fn main() {
763 println!("Hello, world!");
764}
765"#;
766
767 let findings = scanner.scan(content, "src/main.rs");
768 assert_eq!(findings.len(), 1);
769 assert_eq!(findings[0].license, License::Apache2);
770 assert!(findings[0].confidence >= 0.95);
771 }
772
773 #[test]
774 fn test_scan_agpl_in_header() {
775 let scanner = LicenseScanner::new();
776 let content = r#"
777# Licensed under AGPL-3.0
778# Copyright 2024 Example Corp
779
780def main():
781 pass
782"#;
783
784 let findings = scanner.scan(content, "main.py");
785 assert!(!findings.is_empty());
786 assert!(findings.iter().any(|f| f.license == License::Agpl3));
787 }
788
789 #[test]
790 fn test_license_summary() {
791 let findings = vec![
792 LicenseFinding {
793 file: "lib/a.rs".to_owned(),
794 license: License::Gpl3,
795 line: 1,
796 confidence: 0.95,
797 matched_text: "gpl-3.0".to_owned(),
798 },
799 LicenseFinding {
800 file: "lib/b.rs".to_owned(),
801 license: License::Mit,
802 line: 1,
803 confidence: 0.9,
804 matched_text: "mit".to_owned(),
805 },
806 LicenseFinding {
807 file: "lib/c.rs".to_owned(),
808 license: License::Agpl3,
809 line: 1,
810 confidence: 0.95,
811 matched_text: "agpl-3.0".to_owned(),
812 },
813 ];
814
815 let summary = LicenseScanner::summarize(&findings);
816
817 assert_eq!(summary.critical_count, 1);
818 assert_eq!(summary.high_count, 1);
819 assert_eq!(summary.low_count, 1);
820 assert!(summary.has_copyleft());
821 assert!(summary.has_high_risk());
822 assert_eq!(summary.copyleft_files.len(), 2);
823 }
824
825 #[test]
826 fn test_is_license_file() {
827 let scanner = LicenseScanner::new();
828
829 assert!(scanner.is_license_file("LICENSE"));
830 assert!(scanner.is_license_file("LICENSE.md"));
831 assert!(scanner.is_license_file("COPYING"));
832 assert!(scanner.is_license_file("LICENSE-MIT"));
833 assert!(!scanner.is_license_file("src/main.rs"));
834 assert!(!scanner.is_license_file("README.md"));
835 }
836
837 #[test]
838 fn test_risk_ordering() {
839 assert!(LicenseRisk::Critical > LicenseRisk::High);
840 assert!(LicenseRisk::High > LicenseRisk::Medium);
841 assert!(LicenseRisk::Medium > LicenseRisk::Low);
842 assert!(LicenseRisk::Low > LicenseRisk::Unknown);
843 }
844
845 #[test]
846 fn test_spdx_ids() {
847 assert_eq!(License::Gpl3.spdx_id(), "GPL-3.0-only");
848 assert_eq!(License::Mit.spdx_id(), "MIT");
849 assert_eq!(License::Apache2.spdx_id(), "Apache-2.0");
850 }
851}