infiniloom_engine/
license.rs

1//! License detection for compliance scanning
2//!
3//! This module detects open-source licenses in codebases, particularly
4//! focusing on copyleft licenses (GPL, AGPL, LGPL) that may require
5//! special handling in enterprise environments.
6//!
7//! # Compliance Use Cases
8//!
9//! - **Enterprise Code Audit**: Identify copyleft code before embedding
10//! - **Legal Review**: Flag files requiring license compliance
11//! - **CI/CD Gates**: Fail builds containing prohibited licenses
12//!
13//! # Supported Licenses
14//!
15//! | License | Risk Level | Notes |
16//! |---------|------------|-------|
17//! | GPL-3.0 | High | Strong copyleft, viral |
18//! | GPL-2.0 | High | Strong copyleft |
19//! | AGPL-3.0 | Critical | Network copyleft |
20//! | LGPL-3.0 | Medium | Weak copyleft |
21//! | LGPL-2.1 | Medium | Weak copyleft |
22//! | MIT | Low | Permissive |
23//! | Apache-2.0 | Low | Permissive |
24//! | BSD-3-Clause | Low | Permissive |
25//! | Unlicensed | Unknown | No license detected |
26//!
27//! # Example
28//!
29//! ```rust,ignore
30//! use infiniloom_engine::license::{LicenseScanner, LicenseRisk};
31//!
32//! let scanner = LicenseScanner::new();
33//!
34//! // Scan a file
35//! if let Some(finding) = scanner.scan_file(Path::new("lib/crypto.rs")) {
36//!     if finding.license.risk() >= LicenseRisk::High {
37//!         println!("Warning: {} contains {}", finding.file, finding.license.name());
38//!     }
39//! }
40//!
41//! // Scan entire repository
42//! let findings = scanner.scan_repository(repo_path)?;
43//! let copyleft_files: Vec<_> = findings
44//!     .iter()
45//!     .filter(|f| f.license.is_copyleft())
46//!     .collect();
47//! ```
48
49use std::path::Path;
50
51use serde::{Deserialize, Serialize};
52
53/// Detected license types
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
55#[serde(rename_all = "kebab-case")]
56pub enum License {
57    // === Strong Copyleft ===
58    /// GNU General Public License v3.0
59    Gpl3,
60    /// GNU General Public License v2.0
61    Gpl2,
62    /// GNU Affero General Public License v3.0 (network copyleft)
63    Agpl3,
64
65    // === Weak Copyleft ===
66    /// GNU Lesser General Public License v3.0
67    Lgpl3,
68    /// GNU Lesser General Public License v2.1
69    Lgpl21,
70    /// Mozilla Public License 2.0
71    Mpl2,
72    /// Eclipse Public License 2.0
73    Epl2,
74
75    // === Permissive ===
76    /// MIT License
77    Mit,
78    /// Apache License 2.0
79    Apache2,
80    /// BSD 3-Clause "New" License
81    Bsd3Clause,
82    /// BSD 2-Clause "Simplified" License
83    Bsd2Clause,
84    /// ISC License
85    Isc,
86    /// The Unlicense (public domain)
87    Unlicense,
88    /// Creative Commons Zero v1.0 Universal
89    Cc0,
90    /// Do What The Fuck You Want To Public License
91    Wtfpl,
92
93    // === Proprietary/Restricted ===
94    /// Proprietary/Commercial license
95    Proprietary,
96
97    // === Unknown ===
98    /// Unknown license
99    Unknown,
100}
101
102impl License {
103    /// Get the SPDX identifier for this license
104    pub fn spdx_id(&self) -> &'static str {
105        match self {
106            Self::Gpl3 => "GPL-3.0-only",
107            Self::Gpl2 => "GPL-2.0-only",
108            Self::Agpl3 => "AGPL-3.0-only",
109            Self::Lgpl3 => "LGPL-3.0-only",
110            Self::Lgpl21 => "LGPL-2.1-only",
111            Self::Mpl2 => "MPL-2.0",
112            Self::Epl2 => "EPL-2.0",
113            Self::Mit => "MIT",
114            Self::Apache2 => "Apache-2.0",
115            Self::Bsd3Clause => "BSD-3-Clause",
116            Self::Bsd2Clause => "BSD-2-Clause",
117            Self::Isc => "ISC",
118            Self::Unlicense => "Unlicense",
119            Self::Cc0 => "CC0-1.0",
120            Self::Wtfpl => "WTFPL",
121            Self::Proprietary => "PROPRIETARY",
122            Self::Unknown => "UNKNOWN",
123        }
124    }
125
126    /// Get human-readable name
127    pub fn name(&self) -> &'static str {
128        match self {
129            Self::Gpl3 => "GNU General Public License v3.0",
130            Self::Gpl2 => "GNU General Public License v2.0",
131            Self::Agpl3 => "GNU Affero General Public License v3.0",
132            Self::Lgpl3 => "GNU Lesser General Public License v3.0",
133            Self::Lgpl21 => "GNU Lesser General Public License v2.1",
134            Self::Mpl2 => "Mozilla Public License 2.0",
135            Self::Epl2 => "Eclipse Public License 2.0",
136            Self::Mit => "MIT License",
137            Self::Apache2 => "Apache License 2.0",
138            Self::Bsd3Clause => "BSD 3-Clause License",
139            Self::Bsd2Clause => "BSD 2-Clause License",
140            Self::Isc => "ISC License",
141            Self::Unlicense => "The Unlicense",
142            Self::Cc0 => "Creative Commons Zero v1.0",
143            Self::Wtfpl => "WTFPL",
144            Self::Proprietary => "Proprietary License",
145            Self::Unknown => "Unknown License",
146        }
147    }
148
149    /// Get the risk level for this license
150    pub fn risk(&self) -> LicenseRisk {
151        match self {
152            Self::Agpl3 => LicenseRisk::Critical,
153            Self::Gpl3 | Self::Gpl2 => LicenseRisk::High,
154            Self::Lgpl3 | Self::Lgpl21 | Self::Mpl2 | Self::Epl2 => LicenseRisk::Medium,
155            Self::Mit
156            | Self::Apache2
157            | Self::Bsd3Clause
158            | Self::Bsd2Clause
159            | Self::Isc
160            | Self::Unlicense
161            | Self::Cc0
162            | Self::Wtfpl => LicenseRisk::Low,
163            Self::Proprietary => LicenseRisk::High,
164            Self::Unknown => LicenseRisk::Unknown,
165        }
166    }
167
168    /// Check if this is a copyleft license
169    pub fn is_copyleft(&self) -> bool {
170        matches!(
171            self,
172            Self::Gpl3
173                | Self::Gpl2
174                | Self::Agpl3
175                | Self::Lgpl3
176                | Self::Lgpl21
177                | Self::Mpl2
178                | Self::Epl2
179        )
180    }
181
182    /// Check if this is a strong (viral) copyleft license
183    pub fn is_strong_copyleft(&self) -> bool {
184        matches!(self, Self::Gpl3 | Self::Gpl2 | Self::Agpl3)
185    }
186
187    /// Check if this is a permissive license
188    pub fn is_permissive(&self) -> bool {
189        matches!(
190            self,
191            Self::Mit
192                | Self::Apache2
193                | Self::Bsd3Clause
194                | Self::Bsd2Clause
195                | Self::Isc
196                | Self::Unlicense
197                | Self::Cc0
198                | Self::Wtfpl
199        )
200    }
201}
202
203/// License risk levels for compliance
204#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
205#[serde(rename_all = "lowercase")]
206pub enum LicenseRisk {
207    /// Unknown risk (no license detected)
208    Unknown,
209    /// Low risk (permissive licenses)
210    Low,
211    /// Medium risk (weak copyleft)
212    Medium,
213    /// High risk (strong copyleft, proprietary)
214    High,
215    /// Critical risk (AGPL - network copyleft)
216    Critical,
217}
218
219impl LicenseRisk {
220    /// Get string representation
221    pub fn as_str(&self) -> &'static str {
222        match self {
223            Self::Unknown => "unknown",
224            Self::Low => "low",
225            Self::Medium => "medium",
226            Self::High => "high",
227            Self::Critical => "critical",
228        }
229    }
230}
231
232/// A license detection result
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct LicenseFinding {
235    /// File where license was found
236    pub file: String,
237
238    /// Detected license
239    pub license: License,
240
241    /// Line number where license indicator was found
242    pub line: u32,
243
244    /// Confidence score (0.0 - 1.0)
245    pub confidence: f32,
246
247    /// Text snippet that matched
248    pub matched_text: String,
249}
250
251/// Configuration for license scanning
252#[derive(Debug, Clone)]
253pub struct LicenseScanConfig {
254    /// Minimum confidence threshold (0.0 - 1.0)
255    pub min_confidence: f32,
256
257    /// Risk level threshold (only report licenses >= this level)
258    pub min_risk: LicenseRisk,
259
260    /// Scan LICENSE/COPYING files
261    pub scan_license_files: bool,
262
263    /// Scan source code headers
264    pub scan_headers: bool,
265
266    /// Maximum lines to scan per file (for headers)
267    pub max_header_lines: usize,
268}
269
270impl Default for LicenseScanConfig {
271    fn default() -> Self {
272        Self {
273            min_confidence: 0.7,
274            min_risk: LicenseRisk::Unknown,
275            scan_license_files: true,
276            scan_headers: true,
277            max_header_lines: 50,
278        }
279    }
280}
281
282/// License scanner for detecting licenses in codebases
283pub struct LicenseScanner {
284    config: LicenseScanConfig,
285}
286
287impl Default for LicenseScanner {
288    fn default() -> Self {
289        Self::new()
290    }
291}
292
293impl LicenseScanner {
294    /// Create a new license scanner with default config
295    pub fn new() -> Self {
296        Self { config: LicenseScanConfig::default() }
297    }
298
299    /// Create with custom configuration
300    pub fn with_config(config: LicenseScanConfig) -> Self {
301        Self { config }
302    }
303
304    /// Scan file content for license indicators
305    pub fn scan(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
306        let mut findings = Vec::new();
307
308        // Check if this is a license file
309        let is_license_file = self.is_license_file(file_path);
310
311        if is_license_file && self.config.scan_license_files {
312            if let Some(finding) = self.scan_license_file(content, file_path) {
313                findings.push(finding);
314            }
315        }
316
317        if self.config.scan_headers {
318            findings.extend(self.scan_headers(content, file_path));
319        }
320
321        // Filter by confidence and risk
322        findings
323            .into_iter()
324            .filter(|f| {
325                f.confidence >= self.config.min_confidence
326                    && f.license.risk() >= self.config.min_risk
327            })
328            .collect()
329    }
330
331    /// Check if a file is a license file
332    fn is_license_file(&self, file_path: &str) -> bool {
333        let path = Path::new(file_path);
334        let file_name = path
335            .file_name()
336            .and_then(|n| n.to_str())
337            .map(|s| s.to_uppercase())
338            .unwrap_or_default();
339
340        matches!(
341            file_name.as_str(),
342            "LICENSE"
343                | "LICENSE.MD"
344                | "LICENSE.TXT"
345                | "LICENCE"
346                | "LICENCE.MD"
347                | "LICENCE.TXT"
348                | "COPYING"
349                | "COPYING.MD"
350                | "COPYING.TXT"
351                | "LICENSE-MIT"
352                | "LICENSE-APACHE"
353                | "LICENSE.MIT"
354                | "LICENSE.APACHE"
355        )
356    }
357
358    /// Scan a LICENSE/COPYING file
359    fn scan_license_file(&self, content: &str, file_path: &str) -> Option<LicenseFinding> {
360        let content_lower = content.to_lowercase();
361
362        // Check for specific license texts (in order of specificity)
363        let detections: Vec<(License, f32, &str)> = vec![
364            // AGPL (must check before GPL due to substring match)
365            (License::Agpl3, 0.95, "gnu affero general public license"),
366            (License::Agpl3, 0.9, "agpl-3.0"),
367            (License::Agpl3, 0.85, "agpl version 3"),
368            // LGPL (must check before GPL)
369            (License::Lgpl3, 0.95, "gnu lesser general public license version 3"),
370            (License::Lgpl3, 0.9, "lgpl-3.0"),
371            (License::Lgpl21, 0.95, "gnu lesser general public license version 2.1"),
372            (License::Lgpl21, 0.9, "lgpl-2.1"),
373            (License::Lgpl21, 0.9, "lgpl version 2.1"),
374            // GPL
375            (License::Gpl3, 0.95, "gnu general public license version 3"),
376            // Canonical GPL3 header: "GNU GENERAL PUBLIC LICENSE\nVersion 3, 29 June 2007"
377            (License::Gpl3, 0.95, "version 3, 29 june 2007"),
378            (License::Gpl3, 0.9, "gpl-3.0"),
379            (License::Gpl3, 0.85, "gplv3"),
380            (License::Gpl2, 0.95, "gnu general public license version 2"),
381            // Canonical GPL2 header: "GNU GENERAL PUBLIC LICENSE\nVersion 2, June 1991"
382            (License::Gpl2, 0.95, "version 2, june 1991"),
383            (License::Gpl2, 0.9, "gpl-2.0"),
384            (License::Gpl2, 0.85, "gplv2"),
385            // MPL
386            (License::Mpl2, 0.95, "mozilla public license version 2.0"),
387            (License::Mpl2, 0.9, "mpl-2.0"),
388            // EPL
389            (License::Epl2, 0.95, "eclipse public license - v 2.0"),
390            (License::Epl2, 0.9, "epl-2.0"),
391            // Apache
392            (License::Apache2, 0.95, "apache license, version 2.0"),
393            (License::Apache2, 0.95, "apache license version 2.0"),
394            (License::Apache2, 0.9, "apache-2.0"),
395            (License::Apache2, 0.85, "licensed under the apache license"),
396            // MIT
397            (License::Mit, 0.95, "mit license"),
398            (License::Mit, 0.9, "permission is hereby granted, free of charge"),
399            (License::Mit, 0.85, "the software is provided \"as is\", without warranty"),
400            // BSD
401            (License::Bsd3Clause, 0.95, "3-clause bsd license"),
402            (License::Bsd3Clause, 0.9, "bsd-3-clause"),
403            (License::Bsd3Clause, 0.85, "redistributions of source code must retain"),
404            (License::Bsd2Clause, 0.95, "2-clause bsd license"),
405            (License::Bsd2Clause, 0.9, "bsd-2-clause"),
406            // ISC
407            (License::Isc, 0.95, "isc license"),
408            (License::Isc, 0.9, "permission to use, copy, modify, and/or distribute"),
409            // Unlicense
410            (License::Unlicense, 0.95, "this is free and unencumbered software"),
411            (License::Unlicense, 0.9, "unlicense"),
412            // CC0
413            (License::Cc0, 0.95, "cc0 1.0 universal"),
414            (License::Cc0, 0.9, "creative commons zero"),
415            // WTFPL
416            (License::Wtfpl, 0.95, "do what the fuck you want to public license"),
417            (License::Wtfpl, 0.9, "wtfpl"),
418        ];
419
420        for (license, confidence, pattern) in detections {
421            if content_lower.contains(pattern) {
422                // Find the line number
423                let line = content_lower
424                    .lines()
425                    .enumerate()
426                    .find(|(_, l)| l.contains(pattern))
427                    .map(|(i, _)| (i + 1) as u32)
428                    .unwrap_or(1);
429
430                return Some(LicenseFinding {
431                    file: file_path.to_string(),
432                    license,
433                    line,
434                    confidence,
435                    matched_text: pattern.to_string(),
436                });
437            }
438        }
439
440        None
441    }
442
443    /// Scan source code headers for SPDX identifiers and license comments
444    fn scan_headers(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
445        let mut findings = Vec::new();
446        let lines: Vec<&str> = content.lines().take(self.config.max_header_lines).collect();
447
448        for (line_num, line) in lines.iter().enumerate() {
449            let line_lower = line.to_lowercase();
450
451            // Check for SPDX license identifiers
452            if let Some(finding) = self.check_spdx_identifier(&line_lower, file_path, line_num + 1)
453            {
454                findings.push(finding);
455                continue;
456            }
457
458            // Check for license comments
459            if let Some(finding) = self.check_license_comment(&line_lower, file_path, line_num + 1)
460            {
461                findings.push(finding);
462            }
463        }
464
465        findings
466    }
467
468    /// Check for SPDX license identifiers
469    fn check_spdx_identifier(
470        &self,
471        line: &str,
472        file_path: &str,
473        line_num: usize,
474    ) -> Option<LicenseFinding> {
475        // Pattern: SPDX-License-Identifier: <license>
476        if !line.contains("spdx-license-identifier") {
477            return None;
478        }
479
480        let spdx_mappings: Vec<(&str, License)> = vec![
481            ("agpl-3.0", License::Agpl3),
482            ("gpl-3.0", License::Gpl3),
483            ("gpl-2.0", License::Gpl2),
484            ("lgpl-3.0", License::Lgpl3),
485            ("lgpl-2.1", License::Lgpl21),
486            ("mpl-2.0", License::Mpl2),
487            ("epl-2.0", License::Epl2),
488            ("apache-2.0", License::Apache2),
489            ("mit", License::Mit),
490            ("bsd-3-clause", License::Bsd3Clause),
491            ("bsd-2-clause", License::Bsd2Clause),
492            ("isc", License::Isc),
493            ("unlicense", License::Unlicense),
494            ("cc0-1.0", License::Cc0),
495        ];
496
497        for (spdx_id, license) in spdx_mappings {
498            if line.contains(spdx_id) {
499                return Some(LicenseFinding {
500                    file: file_path.to_string(),
501                    license,
502                    line: line_num as u32,
503                    confidence: 0.99, // SPDX identifiers are very reliable
504                    matched_text: format!("SPDX-License-Identifier: {}", spdx_id),
505                });
506            }
507        }
508
509        None
510    }
511
512    /// Check for license mentions in comments
513    fn check_license_comment(
514        &self,
515        line: &str,
516        file_path: &str,
517        line_num: usize,
518    ) -> Option<LicenseFinding> {
519        // Must be in a comment
520        if !line.contains("//")
521            && !line.contains("/*")
522            && !line.contains("*")
523            && !line.contains("#")
524        {
525            return None;
526        }
527
528        let comment_patterns: Vec<(&str, License, f32)> = vec![
529            // High confidence patterns
530            ("licensed under agpl", License::Agpl3, 0.85),
531            ("licensed under gpl", License::Gpl3, 0.8),
532            ("licensed under lgpl", License::Lgpl3, 0.8),
533            ("licensed under the mit license", License::Mit, 0.85),
534            ("licensed under apache", License::Apache2, 0.85),
535            // Medium confidence patterns
536            ("this file is part of", License::Unknown, 0.5), // Often followed by license
537            ("copyright", License::Unknown, 0.3),
538        ];
539
540        for (pattern, license, confidence) in comment_patterns {
541            if line.contains(pattern) && license != License::Unknown {
542                return Some(LicenseFinding {
543                    file: file_path.to_string(),
544                    license,
545                    line: line_num as u32,
546                    confidence,
547                    matched_text: pattern.to_string(),
548                });
549            }
550        }
551
552        None
553    }
554
555    /// Scan a file path for license information
556    pub fn scan_file(&self, path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
557        let content = std::fs::read_to_string(path)?;
558        let file_path = path.to_string_lossy();
559        Ok(self.scan(&content, &file_path))
560    }
561
562    /// Scan a repository for license information
563    pub fn scan_repository(&self, repo_path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
564        use ignore::WalkBuilder;
565
566        let mut all_findings = Vec::new();
567
568        let walker = WalkBuilder::new(repo_path)
569            .hidden(false)
570            .git_ignore(true)
571            .build();
572
573        for entry in walker.flatten() {
574            let path = entry.path();
575
576            if !path.is_file() {
577                continue;
578            }
579
580            // Scan license files
581            if self.is_license_file(&path.to_string_lossy()) {
582                if let Ok(findings) = self.scan_file(path) {
583                    all_findings.extend(findings);
584                }
585                continue;
586            }
587
588            // Scan source file headers if configured
589            if self.config.scan_headers {
590                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
591                let is_source = matches!(
592                    ext,
593                    "rs" | "py" | "js" | "ts" | "go" | "c" | "cpp" | "h" | "java" | "rb" | "php"
594                );
595
596                if is_source {
597                    if let Ok(findings) = self.scan_file(path) {
598                        all_findings.extend(findings);
599                    }
600                }
601            }
602        }
603
604        // Deduplicate by file and license
605        all_findings.sort_by(|a, b| {
606            a.file
607                .cmp(&b.file)
608                .then_with(|| a.license.spdx_id().cmp(b.license.spdx_id()))
609        });
610        all_findings.dedup_by(|a, b| a.file == b.file && a.license == b.license);
611
612        Ok(all_findings)
613    }
614
615    /// Get a summary of license findings
616    pub fn summarize(findings: &[LicenseFinding]) -> LicenseSummary {
617        let mut summary = LicenseSummary::default();
618
619        for finding in findings {
620            match finding.license.risk() {
621                LicenseRisk::Critical => summary.critical_count += 1,
622                LicenseRisk::High => summary.high_count += 1,
623                LicenseRisk::Medium => summary.medium_count += 1,
624                LicenseRisk::Low => summary.low_count += 1,
625                LicenseRisk::Unknown => summary.unknown_count += 1,
626            }
627
628            if finding.license.is_copyleft() {
629                summary.copyleft_files.push(finding.file.clone());
630            }
631
632            // Track unique licenses
633            if !summary.licenses.contains(&finding.license) {
634                summary.licenses.push(finding.license);
635            }
636        }
637
638        summary.copyleft_files.sort();
639        summary.copyleft_files.dedup();
640
641        summary
642    }
643}
644
645/// Summary of license findings
646#[derive(Debug, Clone, Default, Serialize, Deserialize)]
647pub struct LicenseSummary {
648    /// Count of critical risk licenses (AGPL)
649    pub critical_count: usize,
650
651    /// Count of high risk licenses (GPL, proprietary)
652    pub high_count: usize,
653
654    /// Count of medium risk licenses (LGPL, MPL)
655    pub medium_count: usize,
656
657    /// Count of low risk licenses (MIT, Apache, BSD)
658    pub low_count: usize,
659
660    /// Count of unknown licenses
661    pub unknown_count: usize,
662
663    /// Files containing copyleft licenses
664    pub copyleft_files: Vec<String>,
665
666    /// Unique licenses found
667    pub licenses: Vec<License>,
668}
669
670impl LicenseSummary {
671    /// Check if any copyleft licenses were found
672    pub fn has_copyleft(&self) -> bool {
673        !self.copyleft_files.is_empty()
674    }
675
676    /// Check if any high-risk licenses were found
677    pub fn has_high_risk(&self) -> bool {
678        self.critical_count > 0 || self.high_count > 0
679    }
680
681    /// Get total number of findings
682    pub fn total(&self) -> usize {
683        self.critical_count
684            + self.high_count
685            + self.medium_count
686            + self.low_count
687            + self.unknown_count
688    }
689}
690
691#[cfg(test)]
692mod tests {
693    use super::*;
694
695    #[test]
696    fn test_license_risk_levels() {
697        assert_eq!(License::Agpl3.risk(), LicenseRisk::Critical);
698        assert_eq!(License::Gpl3.risk(), LicenseRisk::High);
699        assert_eq!(License::Lgpl3.risk(), LicenseRisk::Medium);
700        assert_eq!(License::Mit.risk(), LicenseRisk::Low);
701        assert_eq!(License::Unknown.risk(), LicenseRisk::Unknown);
702    }
703
704    #[test]
705    fn test_copyleft_detection() {
706        assert!(License::Gpl3.is_copyleft());
707        assert!(License::Agpl3.is_copyleft());
708        assert!(License::Lgpl3.is_copyleft());
709        assert!(!License::Mit.is_copyleft());
710        assert!(!License::Apache2.is_copyleft());
711    }
712
713    #[test]
714    fn test_strong_copyleft() {
715        assert!(License::Gpl3.is_strong_copyleft());
716        assert!(License::Agpl3.is_strong_copyleft());
717        assert!(!License::Lgpl3.is_strong_copyleft());
718        assert!(!License::Mit.is_strong_copyleft());
719    }
720
721    #[test]
722    fn test_scan_mit_license() {
723        let scanner = LicenseScanner::new();
724        let content = r#"
725MIT License
726
727Copyright (c) 2024 Example Corp
728
729Permission is hereby granted, free of charge, to any person obtaining a copy
730of this software and associated documentation files (the "Software"), to deal
731in the Software without restriction, including without limitation the rights
732to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
733copies of the Software.
734"#;
735
736        let findings = scanner.scan(content, "LICENSE");
737        assert_eq!(findings.len(), 1);
738        assert_eq!(findings[0].license, License::Mit);
739        assert!(findings[0].confidence >= 0.9);
740    }
741
742    #[test]
743    fn test_scan_gpl3_license() {
744        let scanner = LicenseScanner::new();
745        let content = r#"
746GNU GENERAL PUBLIC LICENSE
747Version 3, 29 June 2007
748
749Copyright (C) 2007 Free Software Foundation, Inc.
750"#;
751
752        let findings = scanner.scan(content, "COPYING");
753        assert_eq!(findings.len(), 1);
754        assert_eq!(findings[0].license, License::Gpl3);
755    }
756
757    #[test]
758    fn test_scan_spdx_identifier() {
759        let scanner = LicenseScanner::new();
760        let content = r#"
761// SPDX-License-Identifier: Apache-2.0
762
763fn main() {
764    println!("Hello, world!");
765}
766"#;
767
768        let findings = scanner.scan(content, "src/main.rs");
769        assert_eq!(findings.len(), 1);
770        assert_eq!(findings[0].license, License::Apache2);
771        assert!(findings[0].confidence >= 0.95);
772    }
773
774    #[test]
775    fn test_scan_agpl_in_header() {
776        let scanner = LicenseScanner::new();
777        let content = r#"
778# Licensed under AGPL-3.0
779# Copyright 2024 Example Corp
780
781def main():
782    pass
783"#;
784
785        let findings = scanner.scan(content, "main.py");
786        assert!(!findings.is_empty());
787        assert!(findings.iter().any(|f| f.license == License::Agpl3));
788    }
789
790    #[test]
791    fn test_license_summary() {
792        let findings = vec![
793            LicenseFinding {
794                file: "lib/a.rs".to_string(),
795                license: License::Gpl3,
796                line: 1,
797                confidence: 0.95,
798                matched_text: "gpl-3.0".to_string(),
799            },
800            LicenseFinding {
801                file: "lib/b.rs".to_string(),
802                license: License::Mit,
803                line: 1,
804                confidence: 0.9,
805                matched_text: "mit".to_string(),
806            },
807            LicenseFinding {
808                file: "lib/c.rs".to_string(),
809                license: License::Agpl3,
810                line: 1,
811                confidence: 0.95,
812                matched_text: "agpl-3.0".to_string(),
813            },
814        ];
815
816        let summary = LicenseScanner::summarize(&findings);
817
818        assert_eq!(summary.critical_count, 1);
819        assert_eq!(summary.high_count, 1);
820        assert_eq!(summary.low_count, 1);
821        assert!(summary.has_copyleft());
822        assert!(summary.has_high_risk());
823        assert_eq!(summary.copyleft_files.len(), 2);
824    }
825
826    #[test]
827    fn test_is_license_file() {
828        let scanner = LicenseScanner::new();
829
830        assert!(scanner.is_license_file("LICENSE"));
831        assert!(scanner.is_license_file("LICENSE.md"));
832        assert!(scanner.is_license_file("COPYING"));
833        assert!(scanner.is_license_file("LICENSE-MIT"));
834        assert!(!scanner.is_license_file("src/main.rs"));
835        assert!(!scanner.is_license_file("README.md"));
836    }
837
838    #[test]
839    fn test_risk_ordering() {
840        assert!(LicenseRisk::Critical > LicenseRisk::High);
841        assert!(LicenseRisk::High > LicenseRisk::Medium);
842        assert!(LicenseRisk::Medium > LicenseRisk::Low);
843        assert!(LicenseRisk::Low > LicenseRisk::Unknown);
844    }
845
846    #[test]
847    fn test_spdx_ids() {
848        assert_eq!(License::Gpl3.spdx_id(), "GPL-3.0-only");
849        assert_eq!(License::Mit.spdx_id(), "MIT");
850        assert_eq!(License::Apache2.spdx_id(), "Apache-2.0");
851    }
852}