infiniloom_engine/
license.rs

1//! License detection for compliance scanning
2//!
3//! This module detects open-source licenses in codebases, particularly
4//! focusing on copyleft licenses (GPL, AGPL, LGPL) that may require
5//! special handling in enterprise environments.
6//!
7//! # Compliance Use Cases
8//!
9//! - **Enterprise Code Audit**: Identify copyleft code before embedding
10//! - **Legal Review**: Flag files requiring license compliance
11//! - **CI/CD Gates**: Fail builds containing prohibited licenses
12//!
13//! # Supported Licenses
14//!
15//! | License | Risk Level | Notes |
16//! |---------|------------|-------|
17//! | GPL-3.0 | High | Strong copyleft, viral |
18//! | GPL-2.0 | High | Strong copyleft |
19//! | AGPL-3.0 | Critical | Network copyleft |
20//! | LGPL-3.0 | Medium | Weak copyleft |
21//! | LGPL-2.1 | Medium | Weak copyleft |
22//! | MIT | Low | Permissive |
23//! | Apache-2.0 | Low | Permissive |
24//! | BSD-3-Clause | Low | Permissive |
25//! | Unlicensed | Unknown | No license detected |
26//!
27//! # Example
28//!
29//! ```rust,ignore
30//! use infiniloom_engine::license::{LicenseScanner, LicenseRisk};
31//!
32//! let scanner = LicenseScanner::new();
33//!
34//! // Scan a file
35//! if let Some(finding) = scanner.scan_file(Path::new("lib/crypto.rs")) {
36//!     if finding.license.risk() >= LicenseRisk::High {
37//!         println!("Warning: {} contains {}", finding.file, finding.license.name());
38//!     }
39//! }
40//!
41//! // Scan entire repository
42//! let findings = scanner.scan_repository(repo_path)?;
43//! let copyleft_files: Vec<_> = findings
44//!     .iter()
45//!     .filter(|f| f.license.is_copyleft())
46//!     .collect();
47//! ```
48
49use std::path::Path;
50
51use serde::{Deserialize, Serialize};
52
53/// Detected license types
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
55#[serde(rename_all = "kebab-case")]
56pub enum License {
57    // === Strong Copyleft ===
58    /// GNU General Public License v3.0
59    Gpl3,
60    /// GNU General Public License v2.0
61    Gpl2,
62    /// GNU Affero General Public License v3.0 (network copyleft)
63    Agpl3,
64
65    // === Weak Copyleft ===
66    /// GNU Lesser General Public License v3.0
67    Lgpl3,
68    /// GNU Lesser General Public License v2.1
69    Lgpl21,
70    /// Mozilla Public License 2.0
71    Mpl2,
72    /// Eclipse Public License 2.0
73    Epl2,
74
75    // === Permissive ===
76    /// MIT License
77    Mit,
78    /// Apache License 2.0
79    Apache2,
80    /// BSD 3-Clause "New" License
81    Bsd3Clause,
82    /// BSD 2-Clause "Simplified" License
83    Bsd2Clause,
84    /// ISC License
85    Isc,
86    /// The Unlicense (public domain)
87    Unlicense,
88    /// Creative Commons Zero v1.0 Universal
89    Cc0,
90    /// Do What The Fuck You Want To Public License
91    Wtfpl,
92
93    // === Proprietary/Restricted ===
94    /// Proprietary/Commercial license
95    Proprietary,
96
97    // === Unknown ===
98    /// Unknown license
99    Unknown,
100}
101
102impl License {
103    /// Get the SPDX identifier for this license
104    pub fn spdx_id(&self) -> &'static str {
105        match self {
106            Self::Gpl3 => "GPL-3.0-only",
107            Self::Gpl2 => "GPL-2.0-only",
108            Self::Agpl3 => "AGPL-3.0-only",
109            Self::Lgpl3 => "LGPL-3.0-only",
110            Self::Lgpl21 => "LGPL-2.1-only",
111            Self::Mpl2 => "MPL-2.0",
112            Self::Epl2 => "EPL-2.0",
113            Self::Mit => "MIT",
114            Self::Apache2 => "Apache-2.0",
115            Self::Bsd3Clause => "BSD-3-Clause",
116            Self::Bsd2Clause => "BSD-2-Clause",
117            Self::Isc => "ISC",
118            Self::Unlicense => "Unlicense",
119            Self::Cc0 => "CC0-1.0",
120            Self::Wtfpl => "WTFPL",
121            Self::Proprietary => "PROPRIETARY",
122            Self::Unknown => "UNKNOWN",
123        }
124    }
125
126    /// Get human-readable name
127    pub fn name(&self) -> &'static str {
128        match self {
129            Self::Gpl3 => "GNU General Public License v3.0",
130            Self::Gpl2 => "GNU General Public License v2.0",
131            Self::Agpl3 => "GNU Affero General Public License v3.0",
132            Self::Lgpl3 => "GNU Lesser General Public License v3.0",
133            Self::Lgpl21 => "GNU Lesser General Public License v2.1",
134            Self::Mpl2 => "Mozilla Public License 2.0",
135            Self::Epl2 => "Eclipse Public License 2.0",
136            Self::Mit => "MIT License",
137            Self::Apache2 => "Apache License 2.0",
138            Self::Bsd3Clause => "BSD 3-Clause License",
139            Self::Bsd2Clause => "BSD 2-Clause License",
140            Self::Isc => "ISC License",
141            Self::Unlicense => "The Unlicense",
142            Self::Cc0 => "Creative Commons Zero v1.0",
143            Self::Wtfpl => "WTFPL",
144            Self::Proprietary => "Proprietary License",
145            Self::Unknown => "Unknown License",
146        }
147    }
148
149    /// Get the risk level for this license
150    pub fn risk(&self) -> LicenseRisk {
151        match self {
152            Self::Agpl3 => LicenseRisk::Critical,
153            Self::Gpl3 | Self::Gpl2 => LicenseRisk::High,
154            Self::Lgpl3 | Self::Lgpl21 | Self::Mpl2 | Self::Epl2 => LicenseRisk::Medium,
155            Self::Mit
156            | Self::Apache2
157            | Self::Bsd3Clause
158            | Self::Bsd2Clause
159            | Self::Isc
160            | Self::Unlicense
161            | Self::Cc0
162            | Self::Wtfpl => LicenseRisk::Low,
163            Self::Proprietary => LicenseRisk::High,
164            Self::Unknown => LicenseRisk::Unknown,
165        }
166    }
167
168    /// Check if this is a copyleft license
169    pub fn is_copyleft(&self) -> bool {
170        matches!(
171            self,
172            Self::Gpl3
173                | Self::Gpl2
174                | Self::Agpl3
175                | Self::Lgpl3
176                | Self::Lgpl21
177                | Self::Mpl2
178                | Self::Epl2
179        )
180    }
181
182    /// Check if this is a strong (viral) copyleft license
183    pub fn is_strong_copyleft(&self) -> bool {
184        matches!(self, Self::Gpl3 | Self::Gpl2 | Self::Agpl3)
185    }
186
187    /// Check if this is a permissive license
188    pub fn is_permissive(&self) -> bool {
189        matches!(
190            self,
191            Self::Mit
192                | Self::Apache2
193                | Self::Bsd3Clause
194                | Self::Bsd2Clause
195                | Self::Isc
196                | Self::Unlicense
197                | Self::Cc0
198                | Self::Wtfpl
199        )
200    }
201}
202
203/// License risk levels for compliance
204#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
205#[serde(rename_all = "lowercase")]
206pub enum LicenseRisk {
207    /// Unknown risk (no license detected)
208    Unknown,
209    /// Low risk (permissive licenses)
210    Low,
211    /// Medium risk (weak copyleft)
212    Medium,
213    /// High risk (strong copyleft, proprietary)
214    High,
215    /// Critical risk (AGPL - network copyleft)
216    Critical,
217}
218
219impl LicenseRisk {
220    /// Get string representation
221    pub fn as_str(&self) -> &'static str {
222        match self {
223            Self::Unknown => "unknown",
224            Self::Low => "low",
225            Self::Medium => "medium",
226            Self::High => "high",
227            Self::Critical => "critical",
228        }
229    }
230}
231
232/// A license detection result
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct LicenseFinding {
235    /// File where license was found
236    pub file: String,
237
238    /// Detected license
239    pub license: License,
240
241    /// Line number where license indicator was found
242    pub line: u32,
243
244    /// Confidence score (0.0 - 1.0)
245    pub confidence: f32,
246
247    /// Text snippet that matched
248    pub matched_text: String,
249}
250
251/// Configuration for license scanning
252#[derive(Debug, Clone)]
253pub struct LicenseScanConfig {
254    /// Minimum confidence threshold (0.0 - 1.0)
255    pub min_confidence: f32,
256
257    /// Risk level threshold (only report licenses >= this level)
258    pub min_risk: LicenseRisk,
259
260    /// Scan LICENSE/COPYING files
261    pub scan_license_files: bool,
262
263    /// Scan source code headers
264    pub scan_headers: bool,
265
266    /// Maximum lines to scan per file (for headers)
267    pub max_header_lines: usize,
268}
269
270impl Default for LicenseScanConfig {
271    fn default() -> Self {
272        Self {
273            min_confidence: 0.7,
274            min_risk: LicenseRisk::Unknown,
275            scan_license_files: true,
276            scan_headers: true,
277            max_header_lines: 50,
278        }
279    }
280}
281
282/// License scanner for detecting licenses in codebases
283pub struct LicenseScanner {
284    config: LicenseScanConfig,
285}
286
287impl Default for LicenseScanner {
288    fn default() -> Self {
289        Self::new()
290    }
291}
292
293impl LicenseScanner {
294    /// Create a new license scanner with default config
295    pub fn new() -> Self {
296        Self {
297            config: LicenseScanConfig::default(),
298        }
299    }
300
301    /// Create with custom configuration
302    pub fn with_config(config: LicenseScanConfig) -> Self {
303        Self { config }
304    }
305
306    /// Scan file content for license indicators
307    pub fn scan(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
308        let mut findings = Vec::new();
309
310        // Check if this is a license file
311        let is_license_file = self.is_license_file(file_path);
312
313        if is_license_file && self.config.scan_license_files {
314            if let Some(finding) = self.scan_license_file(content, file_path) {
315                findings.push(finding);
316            }
317        }
318
319        if self.config.scan_headers {
320            findings.extend(self.scan_headers(content, file_path));
321        }
322
323        // Filter by confidence and risk
324        findings
325            .into_iter()
326            .filter(|f| {
327                f.confidence >= self.config.min_confidence && f.license.risk() >= self.config.min_risk
328            })
329            .collect()
330    }
331
332    /// Check if a file is a license file
333    fn is_license_file(&self, file_path: &str) -> bool {
334        let path = Path::new(file_path);
335        let file_name = path
336            .file_name()
337            .and_then(|n| n.to_str())
338            .map(|s| s.to_uppercase())
339            .unwrap_or_default();
340
341        matches!(
342            file_name.as_str(),
343            "LICENSE"
344                | "LICENSE.MD"
345                | "LICENSE.TXT"
346                | "LICENCE"
347                | "LICENCE.MD"
348                | "LICENCE.TXT"
349                | "COPYING"
350                | "COPYING.MD"
351                | "COPYING.TXT"
352                | "LICENSE-MIT"
353                | "LICENSE-APACHE"
354                | "LICENSE.MIT"
355                | "LICENSE.APACHE"
356        )
357    }
358
359    /// Scan a LICENSE/COPYING file
360    fn scan_license_file(&self, content: &str, file_path: &str) -> Option<LicenseFinding> {
361        let content_lower = content.to_lowercase();
362
363        // Check for specific license texts (in order of specificity)
364        let detections: Vec<(License, f32, &str)> = vec![
365            // AGPL (must check before GPL due to substring match)
366            (
367                License::Agpl3,
368                0.95,
369                "gnu affero general public license",
370            ),
371            (License::Agpl3, 0.9, "agpl-3.0"),
372            (License::Agpl3, 0.85, "agpl version 3"),
373            // LGPL (must check before GPL)
374            (
375                License::Lgpl3,
376                0.95,
377                "gnu lesser general public license version 3",
378            ),
379            (License::Lgpl3, 0.9, "lgpl-3.0"),
380            (
381                License::Lgpl21,
382                0.95,
383                "gnu lesser general public license version 2.1",
384            ),
385            (License::Lgpl21, 0.9, "lgpl-2.1"),
386            (License::Lgpl21, 0.9, "lgpl version 2.1"),
387            // GPL
388            (
389                License::Gpl3,
390                0.95,
391                "gnu general public license version 3",
392            ),
393            // Canonical GPL3 header: "GNU GENERAL PUBLIC LICENSE\nVersion 3, 29 June 2007"
394            (License::Gpl3, 0.95, "version 3, 29 june 2007"),
395            (License::Gpl3, 0.9, "gpl-3.0"),
396            (License::Gpl3, 0.85, "gplv3"),
397            (
398                License::Gpl2,
399                0.95,
400                "gnu general public license version 2",
401            ),
402            // Canonical GPL2 header: "GNU GENERAL PUBLIC LICENSE\nVersion 2, June 1991"
403            (License::Gpl2, 0.95, "version 2, june 1991"),
404            (License::Gpl2, 0.9, "gpl-2.0"),
405            (License::Gpl2, 0.85, "gplv2"),
406            // MPL
407            (License::Mpl2, 0.95, "mozilla public license version 2.0"),
408            (License::Mpl2, 0.9, "mpl-2.0"),
409            // EPL
410            (License::Epl2, 0.95, "eclipse public license - v 2.0"),
411            (License::Epl2, 0.9, "epl-2.0"),
412            // Apache
413            (License::Apache2, 0.95, "apache license, version 2.0"),
414            (License::Apache2, 0.95, "apache license version 2.0"),
415            (License::Apache2, 0.9, "apache-2.0"),
416            (License::Apache2, 0.85, "licensed under the apache license"),
417            // MIT
418            (License::Mit, 0.95, "mit license"),
419            (License::Mit, 0.9, "permission is hereby granted, free of charge"),
420            (
421                License::Mit,
422                0.85,
423                "the software is provided \"as is\", without warranty",
424            ),
425            // BSD
426            (License::Bsd3Clause, 0.95, "3-clause bsd license"),
427            (License::Bsd3Clause, 0.9, "bsd-3-clause"),
428            (License::Bsd3Clause, 0.85, "redistributions of source code must retain"),
429            (License::Bsd2Clause, 0.95, "2-clause bsd license"),
430            (License::Bsd2Clause, 0.9, "bsd-2-clause"),
431            // ISC
432            (License::Isc, 0.95, "isc license"),
433            (License::Isc, 0.9, "permission to use, copy, modify, and/or distribute"),
434            // Unlicense
435            (License::Unlicense, 0.95, "this is free and unencumbered software"),
436            (License::Unlicense, 0.9, "unlicense"),
437            // CC0
438            (License::Cc0, 0.95, "cc0 1.0 universal"),
439            (License::Cc0, 0.9, "creative commons zero"),
440            // WTFPL
441            (License::Wtfpl, 0.95, "do what the fuck you want to public license"),
442            (License::Wtfpl, 0.9, "wtfpl"),
443        ];
444
445        for (license, confidence, pattern) in detections {
446            if content_lower.contains(pattern) {
447                // Find the line number
448                let line = content_lower
449                    .lines()
450                    .enumerate()
451                    .find(|(_, l)| l.contains(pattern))
452                    .map(|(i, _)| (i + 1) as u32)
453                    .unwrap_or(1);
454
455                return Some(LicenseFinding {
456                    file: file_path.to_string(),
457                    license,
458                    line,
459                    confidence,
460                    matched_text: pattern.to_string(),
461                });
462            }
463        }
464
465        None
466    }
467
468    /// Scan source code headers for SPDX identifiers and license comments
469    fn scan_headers(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
470        let mut findings = Vec::new();
471        let lines: Vec<&str> = content.lines().take(self.config.max_header_lines).collect();
472
473        for (line_num, line) in lines.iter().enumerate() {
474            let line_lower = line.to_lowercase();
475
476            // Check for SPDX license identifiers
477            if let Some(finding) = self.check_spdx_identifier(&line_lower, file_path, line_num + 1)
478            {
479                findings.push(finding);
480                continue;
481            }
482
483            // Check for license comments
484            if let Some(finding) =
485                self.check_license_comment(&line_lower, file_path, line_num + 1)
486            {
487                findings.push(finding);
488            }
489        }
490
491        findings
492    }
493
494    /// Check for SPDX license identifiers
495    fn check_spdx_identifier(
496        &self,
497        line: &str,
498        file_path: &str,
499        line_num: usize,
500    ) -> Option<LicenseFinding> {
501        // Pattern: SPDX-License-Identifier: <license>
502        if !line.contains("spdx-license-identifier") {
503            return None;
504        }
505
506        let spdx_mappings: Vec<(&str, License)> = vec![
507            ("agpl-3.0", License::Agpl3),
508            ("gpl-3.0", License::Gpl3),
509            ("gpl-2.0", License::Gpl2),
510            ("lgpl-3.0", License::Lgpl3),
511            ("lgpl-2.1", License::Lgpl21),
512            ("mpl-2.0", License::Mpl2),
513            ("epl-2.0", License::Epl2),
514            ("apache-2.0", License::Apache2),
515            ("mit", License::Mit),
516            ("bsd-3-clause", License::Bsd3Clause),
517            ("bsd-2-clause", License::Bsd2Clause),
518            ("isc", License::Isc),
519            ("unlicense", License::Unlicense),
520            ("cc0-1.0", License::Cc0),
521        ];
522
523        for (spdx_id, license) in spdx_mappings {
524            if line.contains(spdx_id) {
525                return Some(LicenseFinding {
526                    file: file_path.to_string(),
527                    license,
528                    line: line_num as u32,
529                    confidence: 0.99, // SPDX identifiers are very reliable
530                    matched_text: format!("SPDX-License-Identifier: {}", spdx_id),
531                });
532            }
533        }
534
535        None
536    }
537
538    /// Check for license mentions in comments
539    fn check_license_comment(
540        &self,
541        line: &str,
542        file_path: &str,
543        line_num: usize,
544    ) -> Option<LicenseFinding> {
545        // Must be in a comment
546        if !line.contains("//")
547            && !line.contains("/*")
548            && !line.contains("*")
549            && !line.contains("#")
550        {
551            return None;
552        }
553
554        let comment_patterns: Vec<(&str, License, f32)> = vec![
555            // High confidence patterns
556            ("licensed under agpl", License::Agpl3, 0.85),
557            ("licensed under gpl", License::Gpl3, 0.8),
558            ("licensed under lgpl", License::Lgpl3, 0.8),
559            ("licensed under the mit license", License::Mit, 0.85),
560            ("licensed under apache", License::Apache2, 0.85),
561            // Medium confidence patterns
562            ("this file is part of", License::Unknown, 0.5), // Often followed by license
563            ("copyright", License::Unknown, 0.3),
564        ];
565
566        for (pattern, license, confidence) in comment_patterns {
567            if line.contains(pattern) && license != License::Unknown {
568                return Some(LicenseFinding {
569                    file: file_path.to_string(),
570                    license,
571                    line: line_num as u32,
572                    confidence,
573                    matched_text: pattern.to_string(),
574                });
575            }
576        }
577
578        None
579    }
580
581    /// Scan a file path for license information
582    pub fn scan_file(&self, path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
583        let content = std::fs::read_to_string(path)?;
584        let file_path = path.to_string_lossy();
585        Ok(self.scan(&content, &file_path))
586    }
587
588    /// Scan a repository for license information
589    pub fn scan_repository(
590        &self,
591        repo_path: &Path,
592    ) -> Result<Vec<LicenseFinding>, std::io::Error> {
593        use ignore::WalkBuilder;
594
595        let mut all_findings = Vec::new();
596
597        let walker = WalkBuilder::new(repo_path)
598            .hidden(false)
599            .git_ignore(true)
600            .build();
601
602        for entry in walker.flatten() {
603            let path = entry.path();
604
605            if !path.is_file() {
606                continue;
607            }
608
609            // Scan license files
610            if self.is_license_file(&path.to_string_lossy()) {
611                if let Ok(findings) = self.scan_file(path) {
612                    all_findings.extend(findings);
613                }
614                continue;
615            }
616
617            // Scan source file headers if configured
618            if self.config.scan_headers {
619                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
620                let is_source = matches!(
621                    ext,
622                    "rs" | "py" | "js" | "ts" | "go" | "c" | "cpp" | "h" | "java" | "rb" | "php"
623                );
624
625                if is_source {
626                    if let Ok(findings) = self.scan_file(path) {
627                        all_findings.extend(findings);
628                    }
629                }
630            }
631        }
632
633        // Deduplicate by file and license
634        all_findings.sort_by(|a, b| {
635            a.file
636                .cmp(&b.file)
637                .then_with(|| a.license.spdx_id().cmp(b.license.spdx_id()))
638        });
639        all_findings.dedup_by(|a, b| a.file == b.file && a.license == b.license);
640
641        Ok(all_findings)
642    }
643
644    /// Get a summary of license findings
645    pub fn summarize(findings: &[LicenseFinding]) -> LicenseSummary {
646        let mut summary = LicenseSummary::default();
647
648        for finding in findings {
649            match finding.license.risk() {
650                LicenseRisk::Critical => summary.critical_count += 1,
651                LicenseRisk::High => summary.high_count += 1,
652                LicenseRisk::Medium => summary.medium_count += 1,
653                LicenseRisk::Low => summary.low_count += 1,
654                LicenseRisk::Unknown => summary.unknown_count += 1,
655            }
656
657            if finding.license.is_copyleft() {
658                summary.copyleft_files.push(finding.file.clone());
659            }
660
661            // Track unique licenses
662            if !summary.licenses.contains(&finding.license) {
663                summary.licenses.push(finding.license);
664            }
665        }
666
667        summary.copyleft_files.sort();
668        summary.copyleft_files.dedup();
669
670        summary
671    }
672}
673
674/// Summary of license findings
675#[derive(Debug, Clone, Default, Serialize, Deserialize)]
676pub struct LicenseSummary {
677    /// Count of critical risk licenses (AGPL)
678    pub critical_count: usize,
679
680    /// Count of high risk licenses (GPL, proprietary)
681    pub high_count: usize,
682
683    /// Count of medium risk licenses (LGPL, MPL)
684    pub medium_count: usize,
685
686    /// Count of low risk licenses (MIT, Apache, BSD)
687    pub low_count: usize,
688
689    /// Count of unknown licenses
690    pub unknown_count: usize,
691
692    /// Files containing copyleft licenses
693    pub copyleft_files: Vec<String>,
694
695    /// Unique licenses found
696    pub licenses: Vec<License>,
697}
698
699impl LicenseSummary {
700    /// Check if any copyleft licenses were found
701    pub fn has_copyleft(&self) -> bool {
702        !self.copyleft_files.is_empty()
703    }
704
705    /// Check if any high-risk licenses were found
706    pub fn has_high_risk(&self) -> bool {
707        self.critical_count > 0 || self.high_count > 0
708    }
709
710    /// Get total number of findings
711    pub fn total(&self) -> usize {
712        self.critical_count + self.high_count + self.medium_count + self.low_count + self.unknown_count
713    }
714}
715
716#[cfg(test)]
717mod tests {
718    use super::*;
719
720    #[test]
721    fn test_license_risk_levels() {
722        assert_eq!(License::Agpl3.risk(), LicenseRisk::Critical);
723        assert_eq!(License::Gpl3.risk(), LicenseRisk::High);
724        assert_eq!(License::Lgpl3.risk(), LicenseRisk::Medium);
725        assert_eq!(License::Mit.risk(), LicenseRisk::Low);
726        assert_eq!(License::Unknown.risk(), LicenseRisk::Unknown);
727    }
728
729    #[test]
730    fn test_copyleft_detection() {
731        assert!(License::Gpl3.is_copyleft());
732        assert!(License::Agpl3.is_copyleft());
733        assert!(License::Lgpl3.is_copyleft());
734        assert!(!License::Mit.is_copyleft());
735        assert!(!License::Apache2.is_copyleft());
736    }
737
738    #[test]
739    fn test_strong_copyleft() {
740        assert!(License::Gpl3.is_strong_copyleft());
741        assert!(License::Agpl3.is_strong_copyleft());
742        assert!(!License::Lgpl3.is_strong_copyleft());
743        assert!(!License::Mit.is_strong_copyleft());
744    }
745
746    #[test]
747    fn test_scan_mit_license() {
748        let scanner = LicenseScanner::new();
749        let content = r#"
750MIT License
751
752Copyright (c) 2024 Example Corp
753
754Permission is hereby granted, free of charge, to any person obtaining a copy
755of this software and associated documentation files (the "Software"), to deal
756in the Software without restriction, including without limitation the rights
757to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
758copies of the Software.
759"#;
760
761        let findings = scanner.scan(content, "LICENSE");
762        assert_eq!(findings.len(), 1);
763        assert_eq!(findings[0].license, License::Mit);
764        assert!(findings[0].confidence >= 0.9);
765    }
766
767    #[test]
768    fn test_scan_gpl3_license() {
769        let scanner = LicenseScanner::new();
770        let content = r#"
771GNU GENERAL PUBLIC LICENSE
772Version 3, 29 June 2007
773
774Copyright (C) 2007 Free Software Foundation, Inc.
775"#;
776
777        let findings = scanner.scan(content, "COPYING");
778        assert_eq!(findings.len(), 1);
779        assert_eq!(findings[0].license, License::Gpl3);
780    }
781
782    #[test]
783    fn test_scan_spdx_identifier() {
784        let scanner = LicenseScanner::new();
785        let content = r#"
786// SPDX-License-Identifier: Apache-2.0
787
788fn main() {
789    println!("Hello, world!");
790}
791"#;
792
793        let findings = scanner.scan(content, "src/main.rs");
794        assert_eq!(findings.len(), 1);
795        assert_eq!(findings[0].license, License::Apache2);
796        assert!(findings[0].confidence >= 0.95);
797    }
798
799    #[test]
800    fn test_scan_agpl_in_header() {
801        let scanner = LicenseScanner::new();
802        let content = r#"
803# Licensed under AGPL-3.0
804# Copyright 2024 Example Corp
805
806def main():
807    pass
808"#;
809
810        let findings = scanner.scan(content, "main.py");
811        assert!(!findings.is_empty());
812        assert!(findings.iter().any(|f| f.license == License::Agpl3));
813    }
814
815    #[test]
816    fn test_license_summary() {
817        let findings = vec![
818            LicenseFinding {
819                file: "lib/a.rs".to_string(),
820                license: License::Gpl3,
821                line: 1,
822                confidence: 0.95,
823                matched_text: "gpl-3.0".to_string(),
824            },
825            LicenseFinding {
826                file: "lib/b.rs".to_string(),
827                license: License::Mit,
828                line: 1,
829                confidence: 0.9,
830                matched_text: "mit".to_string(),
831            },
832            LicenseFinding {
833                file: "lib/c.rs".to_string(),
834                license: License::Agpl3,
835                line: 1,
836                confidence: 0.95,
837                matched_text: "agpl-3.0".to_string(),
838            },
839        ];
840
841        let summary = LicenseScanner::summarize(&findings);
842
843        assert_eq!(summary.critical_count, 1);
844        assert_eq!(summary.high_count, 1);
845        assert_eq!(summary.low_count, 1);
846        assert!(summary.has_copyleft());
847        assert!(summary.has_high_risk());
848        assert_eq!(summary.copyleft_files.len(), 2);
849    }
850
851    #[test]
852    fn test_is_license_file() {
853        let scanner = LicenseScanner::new();
854
855        assert!(scanner.is_license_file("LICENSE"));
856        assert!(scanner.is_license_file("LICENSE.md"));
857        assert!(scanner.is_license_file("COPYING"));
858        assert!(scanner.is_license_file("LICENSE-MIT"));
859        assert!(!scanner.is_license_file("src/main.rs"));
860        assert!(!scanner.is_license_file("README.md"));
861    }
862
863    #[test]
864    fn test_risk_ordering() {
865        assert!(LicenseRisk::Critical > LicenseRisk::High);
866        assert!(LicenseRisk::High > LicenseRisk::Medium);
867        assert!(LicenseRisk::Medium > LicenseRisk::Low);
868        assert!(LicenseRisk::Low > LicenseRisk::Unknown);
869    }
870
871    #[test]
872    fn test_spdx_ids() {
873        assert_eq!(License::Gpl3.spdx_id(), "GPL-3.0-only");
874        assert_eq!(License::Mit.spdx_id(), "MIT");
875        assert_eq!(License::Apache2.spdx_id(), "Apache-2.0");
876    }
877}