infiniloom_engine/
license.rs

1//! License detection for compliance scanning
2//!
3//! This module detects open-source licenses in codebases, particularly
4//! focusing on copyleft licenses (GPL, AGPL, LGPL) that may require
5//! special handling in enterprise environments.
6//!
7//! # Compliance Use Cases
8//!
9//! - **Enterprise Code Audit**: Identify copyleft code before embedding
10//! - **Legal Review**: Flag files requiring license compliance
11//! - **CI/CD Gates**: Fail builds containing prohibited licenses
12//!
13//! # Supported Licenses
14//!
15//! | License | Risk Level | Notes |
16//! |---------|------------|-------|
17//! | GPL-3.0 | High | Strong copyleft, viral |
18//! | GPL-2.0 | High | Strong copyleft |
19//! | AGPL-3.0 | Critical | Network copyleft |
20//! | LGPL-3.0 | Medium | Weak copyleft |
21//! | LGPL-2.1 | Medium | Weak copyleft |
22//! | MIT | Low | Permissive |
23//! | Apache-2.0 | Low | Permissive |
24//! | BSD-3-Clause | Low | Permissive |
25//! | Unlicensed | Unknown | No license detected |
26//!
27//! # Example
28//!
29//! ```rust,ignore
30//! use infiniloom_engine::license::{LicenseScanner, LicenseRisk};
31//!
32//! let scanner = LicenseScanner::new();
33//!
34//! // Scan a file
35//! if let Some(finding) = scanner.scan_file(Path::new("lib/crypto.rs")) {
36//!     if finding.license.risk() >= LicenseRisk::High {
37//!         println!("Warning: {} contains {}", finding.file, finding.license.name());
38//!     }
39//! }
40//!
41//! // Scan entire repository
42//! let findings = scanner.scan_repository(repo_path)?;
43//! let copyleft_files: Vec<_> = findings
44//!     .iter()
45//!     .filter(|f| f.license.is_copyleft())
46//!     .collect();
47//! ```
48
49use std::path::Path;
50
51use serde::{Deserialize, Serialize};
52
53/// Detected license types
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
55#[serde(rename_all = "kebab-case")]
56pub enum License {
57    // === Strong Copyleft ===
58    /// GNU General Public License v3.0
59    Gpl3,
60    /// GNU General Public License v2.0
61    Gpl2,
62    /// GNU Affero General Public License v3.0 (network copyleft)
63    Agpl3,
64
65    // === Weak Copyleft ===
66    /// GNU Lesser General Public License v3.0
67    Lgpl3,
68    /// GNU Lesser General Public License v2.1
69    Lgpl21,
70    /// Mozilla Public License 2.0
71    Mpl2,
72    /// Eclipse Public License 2.0
73    Epl2,
74
75    // === Permissive ===
76    /// MIT License
77    Mit,
78    /// Apache License 2.0
79    Apache2,
80    /// BSD 3-Clause "New" License
81    Bsd3Clause,
82    /// BSD 2-Clause "Simplified" License
83    Bsd2Clause,
84    /// ISC License
85    Isc,
86    /// The Unlicense (public domain)
87    Unlicense,
88    /// Creative Commons Zero v1.0 Universal
89    Cc0,
90    /// Do What The Fuck You Want To Public License
91    Wtfpl,
92
93    // === Proprietary/Restricted ===
94    /// Proprietary/Commercial license
95    Proprietary,
96
97    // === Unknown ===
98    /// Unknown license
99    Unknown,
100}
101
102impl License {
103    /// Get the SPDX identifier for this license
104    pub fn spdx_id(&self) -> &'static str {
105        match self {
106            Self::Gpl3 => "GPL-3.0-only",
107            Self::Gpl2 => "GPL-2.0-only",
108            Self::Agpl3 => "AGPL-3.0-only",
109            Self::Lgpl3 => "LGPL-3.0-only",
110            Self::Lgpl21 => "LGPL-2.1-only",
111            Self::Mpl2 => "MPL-2.0",
112            Self::Epl2 => "EPL-2.0",
113            Self::Mit => "MIT",
114            Self::Apache2 => "Apache-2.0",
115            Self::Bsd3Clause => "BSD-3-Clause",
116            Self::Bsd2Clause => "BSD-2-Clause",
117            Self::Isc => "ISC",
118            Self::Unlicense => "Unlicense",
119            Self::Cc0 => "CC0-1.0",
120            Self::Wtfpl => "WTFPL",
121            Self::Proprietary => "PROPRIETARY",
122            Self::Unknown => "UNKNOWN",
123        }
124    }
125
126    /// Get human-readable name
127    pub fn name(&self) -> &'static str {
128        match self {
129            Self::Gpl3 => "GNU General Public License v3.0",
130            Self::Gpl2 => "GNU General Public License v2.0",
131            Self::Agpl3 => "GNU Affero General Public License v3.0",
132            Self::Lgpl3 => "GNU Lesser General Public License v3.0",
133            Self::Lgpl21 => "GNU Lesser General Public License v2.1",
134            Self::Mpl2 => "Mozilla Public License 2.0",
135            Self::Epl2 => "Eclipse Public License 2.0",
136            Self::Mit => "MIT License",
137            Self::Apache2 => "Apache License 2.0",
138            Self::Bsd3Clause => "BSD 3-Clause License",
139            Self::Bsd2Clause => "BSD 2-Clause License",
140            Self::Isc => "ISC License",
141            Self::Unlicense => "The Unlicense",
142            Self::Cc0 => "Creative Commons Zero v1.0",
143            Self::Wtfpl => "WTFPL",
144            Self::Proprietary => "Proprietary License",
145            Self::Unknown => "Unknown License",
146        }
147    }
148
149    /// Get the risk level for this license
150    pub fn risk(&self) -> LicenseRisk {
151        match self {
152            Self::Agpl3 => LicenseRisk::Critical,
153            Self::Gpl3 | Self::Gpl2 => LicenseRisk::High,
154            Self::Lgpl3 | Self::Lgpl21 | Self::Mpl2 | Self::Epl2 => LicenseRisk::Medium,
155            Self::Mit
156            | Self::Apache2
157            | Self::Bsd3Clause
158            | Self::Bsd2Clause
159            | Self::Isc
160            | Self::Unlicense
161            | Self::Cc0
162            | Self::Wtfpl => LicenseRisk::Low,
163            Self::Proprietary => LicenseRisk::High,
164            Self::Unknown => LicenseRisk::Unknown,
165        }
166    }
167
168    /// Check if this is a copyleft license
169    pub fn is_copyleft(&self) -> bool {
170        matches!(
171            self,
172            Self::Gpl3
173                | Self::Gpl2
174                | Self::Agpl3
175                | Self::Lgpl3
176                | Self::Lgpl21
177                | Self::Mpl2
178                | Self::Epl2
179        )
180    }
181
182    /// Check if this is a strong (viral) copyleft license
183    pub fn is_strong_copyleft(&self) -> bool {
184        matches!(self, Self::Gpl3 | Self::Gpl2 | Self::Agpl3)
185    }
186
187    /// Check if this is a permissive license
188    pub fn is_permissive(&self) -> bool {
189        matches!(
190            self,
191            Self::Mit
192                | Self::Apache2
193                | Self::Bsd3Clause
194                | Self::Bsd2Clause
195                | Self::Isc
196                | Self::Unlicense
197                | Self::Cc0
198                | Self::Wtfpl
199        )
200    }
201}
202
203/// License risk levels for compliance
204#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
205#[serde(rename_all = "lowercase")]
206pub enum LicenseRisk {
207    /// Unknown risk (no license detected)
208    Unknown,
209    /// Low risk (permissive licenses)
210    Low,
211    /// Medium risk (weak copyleft)
212    Medium,
213    /// High risk (strong copyleft, proprietary)
214    High,
215    /// Critical risk (AGPL - network copyleft)
216    Critical,
217}
218
219impl LicenseRisk {
220    /// Get string representation
221    pub fn as_str(&self) -> &'static str {
222        match self {
223            Self::Unknown => "unknown",
224            Self::Low => "low",
225            Self::Medium => "medium",
226            Self::High => "high",
227            Self::Critical => "critical",
228        }
229    }
230}
231
232/// A license detection result
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct LicenseFinding {
235    /// File where license was found
236    pub file: String,
237
238    /// Detected license
239    pub license: License,
240
241    /// Line number where license indicator was found
242    pub line: u32,
243
244    /// Confidence score (0.0 - 1.0)
245    pub confidence: f32,
246
247    /// Text snippet that matched
248    pub matched_text: String,
249}
250
251/// Configuration for license scanning
252#[derive(Debug, Clone)]
253pub struct LicenseScanConfig {
254    /// Minimum confidence threshold (0.0 - 1.0)
255    pub min_confidence: f32,
256
257    /// Risk level threshold (only report licenses >= this level)
258    pub min_risk: LicenseRisk,
259
260    /// Scan LICENSE/COPYING files
261    pub scan_license_files: bool,
262
263    /// Scan source code headers
264    pub scan_headers: bool,
265
266    /// Maximum lines to scan per file (for headers)
267    pub max_header_lines: usize,
268}
269
270impl Default for LicenseScanConfig {
271    fn default() -> Self {
272        Self {
273            min_confidence: 0.7,
274            min_risk: LicenseRisk::Unknown,
275            scan_license_files: true,
276            scan_headers: true,
277            max_header_lines: 50,
278        }
279    }
280}
281
282/// License scanner for detecting licenses in codebases
283pub struct LicenseScanner {
284    config: LicenseScanConfig,
285}
286
287impl Default for LicenseScanner {
288    fn default() -> Self {
289        Self::new()
290    }
291}
292
293impl LicenseScanner {
294    /// Create a new license scanner with default config
295    pub fn new() -> Self {
296        Self { config: LicenseScanConfig::default() }
297    }
298
299    /// Create with custom configuration
300    pub fn with_config(config: LicenseScanConfig) -> Self {
301        Self { config }
302    }
303
304    /// Scan file content for license indicators
305    pub fn scan(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
306        let mut findings = Vec::new();
307
308        // Check if this is a license file
309        let is_license_file = self.is_license_file(file_path);
310
311        if is_license_file && self.config.scan_license_files {
312            if let Some(finding) = self.scan_license_file(content, file_path) {
313                findings.push(finding);
314            }
315        }
316
317        if self.config.scan_headers {
318            findings.extend(self.scan_headers(content, file_path));
319        }
320
321        // Filter by confidence and risk
322        findings
323            .into_iter()
324            .filter(|f| {
325                f.confidence >= self.config.min_confidence
326                    && f.license.risk() >= self.config.min_risk
327            })
328            .collect()
329    }
330
331    /// Check if a file is a license file
332    fn is_license_file(&self, file_path: &str) -> bool {
333        let path = Path::new(file_path);
334        let file_name = path
335            .file_name()
336            .and_then(|n| n.to_str())
337            .map(|s| s.to_uppercase())
338            .unwrap_or_default();
339
340        matches!(
341            file_name.as_str(),
342            "LICENSE"
343                | "LICENSE.MD"
344                | "LICENSE.TXT"
345                | "LICENCE"
346                | "LICENCE.MD"
347                | "LICENCE.TXT"
348                | "COPYING"
349                | "COPYING.MD"
350                | "COPYING.TXT"
351                | "LICENSE-MIT"
352                | "LICENSE-APACHE"
353                | "LICENSE.MIT"
354                | "LICENSE.APACHE"
355        )
356    }
357
358    /// Scan a LICENSE/COPYING file
359    fn scan_license_file(&self, content: &str, file_path: &str) -> Option<LicenseFinding> {
360        let content_lower = content.to_lowercase();
361
362        // Check for specific license texts (in order of specificity)
363        let detections: Vec<(License, f32, &str)> = vec![
364            // AGPL (must check before GPL due to substring match)
365            (License::Agpl3, 0.95, "gnu affero general public license"),
366            (License::Agpl3, 0.9, "agpl-3.0"),
367            (License::Agpl3, 0.85, "agpl version 3"),
368            // LGPL (must check before GPL)
369            (License::Lgpl3, 0.95, "gnu lesser general public license version 3"),
370            (License::Lgpl3, 0.9, "lgpl-3.0"),
371            (License::Lgpl21, 0.95, "gnu lesser general public license version 2.1"),
372            (License::Lgpl21, 0.9, "lgpl-2.1"),
373            (License::Lgpl21, 0.9, "lgpl version 2.1"),
374            // GPL
375            (License::Gpl3, 0.95, "gnu general public license version 3"),
376            // Canonical GPL3 header: "GNU GENERAL PUBLIC LICENSE\nVersion 3, 29 June 2007"
377            (License::Gpl3, 0.95, "version 3, 29 june 2007"),
378            (License::Gpl3, 0.9, "gpl-3.0"),
379            (License::Gpl3, 0.85, "gplv3"),
380            (License::Gpl2, 0.95, "gnu general public license version 2"),
381            // Canonical GPL2 header: "GNU GENERAL PUBLIC LICENSE\nVersion 2, June 1991"
382            (License::Gpl2, 0.95, "version 2, june 1991"),
383            (License::Gpl2, 0.9, "gpl-2.0"),
384            (License::Gpl2, 0.85, "gplv2"),
385            // MPL
386            (License::Mpl2, 0.95, "mozilla public license version 2.0"),
387            (License::Mpl2, 0.9, "mpl-2.0"),
388            // EPL
389            (License::Epl2, 0.95, "eclipse public license - v 2.0"),
390            (License::Epl2, 0.9, "epl-2.0"),
391            // Apache
392            (License::Apache2, 0.95, "apache license, version 2.0"),
393            (License::Apache2, 0.95, "apache license version 2.0"),
394            (License::Apache2, 0.9, "apache-2.0"),
395            (License::Apache2, 0.85, "licensed under the apache license"),
396            // MIT
397            (License::Mit, 0.95, "mit license"),
398            (License::Mit, 0.9, "permission is hereby granted, free of charge"),
399            (License::Mit, 0.85, "the software is provided \"as is\", without warranty"),
400            // BSD
401            (License::Bsd3Clause, 0.95, "3-clause bsd license"),
402            (License::Bsd3Clause, 0.9, "bsd-3-clause"),
403            (License::Bsd3Clause, 0.85, "redistributions of source code must retain"),
404            (License::Bsd2Clause, 0.95, "2-clause bsd license"),
405            (License::Bsd2Clause, 0.9, "bsd-2-clause"),
406            // ISC
407            (License::Isc, 0.95, "isc license"),
408            (License::Isc, 0.9, "permission to use, copy, modify, and/or distribute"),
409            // Unlicense
410            (License::Unlicense, 0.95, "this is free and unencumbered software"),
411            (License::Unlicense, 0.9, "unlicense"),
412            // CC0
413            (License::Cc0, 0.95, "cc0 1.0 universal"),
414            (License::Cc0, 0.9, "creative commons zero"),
415            // WTFPL
416            (License::Wtfpl, 0.95, "do what the fuck you want to public license"),
417            (License::Wtfpl, 0.9, "wtfpl"),
418        ];
419
420        for (license, confidence, pattern) in detections {
421            if content_lower.contains(pattern) {
422                // Find the line number
423                let line = content_lower
424                    .lines()
425                    .enumerate()
426                    .find(|(_, l)| l.contains(pattern))
427                    .map_or(1, |(i, _)| (i + 1) as u32);
428
429                return Some(LicenseFinding {
430                    file: file_path.to_owned(),
431                    license,
432                    line,
433                    confidence,
434                    matched_text: pattern.to_owned(),
435                });
436            }
437        }
438
439        None
440    }
441
442    /// Scan source code headers for SPDX identifiers and license comments
443    fn scan_headers(&self, content: &str, file_path: &str) -> Vec<LicenseFinding> {
444        let mut findings = Vec::new();
445        let lines: Vec<&str> = content.lines().take(self.config.max_header_lines).collect();
446
447        for (line_num, line) in lines.iter().enumerate() {
448            let line_lower = line.to_lowercase();
449
450            // Check for SPDX license identifiers
451            if let Some(finding) = self.check_spdx_identifier(&line_lower, file_path, line_num + 1)
452            {
453                findings.push(finding);
454                continue;
455            }
456
457            // Check for license comments
458            if let Some(finding) = self.check_license_comment(&line_lower, file_path, line_num + 1)
459            {
460                findings.push(finding);
461            }
462        }
463
464        findings
465    }
466
467    /// Check for SPDX license identifiers
468    fn check_spdx_identifier(
469        &self,
470        line: &str,
471        file_path: &str,
472        line_num: usize,
473    ) -> Option<LicenseFinding> {
474        // Pattern: SPDX-License-Identifier: <license>
475        if !line.contains("spdx-license-identifier") {
476            return None;
477        }
478
479        let spdx_mappings: Vec<(&str, License)> = vec![
480            ("agpl-3.0", License::Agpl3),
481            ("gpl-3.0", License::Gpl3),
482            ("gpl-2.0", License::Gpl2),
483            ("lgpl-3.0", License::Lgpl3),
484            ("lgpl-2.1", License::Lgpl21),
485            ("mpl-2.0", License::Mpl2),
486            ("epl-2.0", License::Epl2),
487            ("apache-2.0", License::Apache2),
488            ("mit", License::Mit),
489            ("bsd-3-clause", License::Bsd3Clause),
490            ("bsd-2-clause", License::Bsd2Clause),
491            ("isc", License::Isc),
492            ("unlicense", License::Unlicense),
493            ("cc0-1.0", License::Cc0),
494        ];
495
496        for (spdx_id, license) in spdx_mappings {
497            if line.contains(spdx_id) {
498                return Some(LicenseFinding {
499                    file: file_path.to_owned(),
500                    license,
501                    line: line_num as u32,
502                    confidence: 0.99, // SPDX identifiers are very reliable
503                    matched_text: format!("SPDX-License-Identifier: {}", spdx_id),
504                });
505            }
506        }
507
508        None
509    }
510
511    /// Check for license mentions in comments
512    fn check_license_comment(
513        &self,
514        line: &str,
515        file_path: &str,
516        line_num: usize,
517    ) -> Option<LicenseFinding> {
518        // Must be in a comment
519        if !line.contains("//")
520            && !line.contains("/*")
521            && !line.contains('*')
522            && !line.contains('#')
523        {
524            return None;
525        }
526
527        let comment_patterns: Vec<(&str, License, f32)> = vec![
528            // High confidence patterns
529            ("licensed under agpl", License::Agpl3, 0.85),
530            ("licensed under gpl", License::Gpl3, 0.8),
531            ("licensed under lgpl", License::Lgpl3, 0.8),
532            ("licensed under the mit license", License::Mit, 0.85),
533            ("licensed under apache", License::Apache2, 0.85),
534            // Medium confidence patterns
535            ("this file is part of", License::Unknown, 0.5), // Often followed by license
536            ("copyright", License::Unknown, 0.3),
537        ];
538
539        for (pattern, license, confidence) in comment_patterns {
540            if line.contains(pattern) && license != License::Unknown {
541                return Some(LicenseFinding {
542                    file: file_path.to_owned(),
543                    license,
544                    line: line_num as u32,
545                    confidence,
546                    matched_text: pattern.to_owned(),
547                });
548            }
549        }
550
551        None
552    }
553
554    /// Scan a file path for license information
555    pub fn scan_file(&self, path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
556        let content = std::fs::read_to_string(path)?;
557        let file_path = path.to_string_lossy();
558        Ok(self.scan(&content, &file_path))
559    }
560
561    /// Scan a repository for license information
562    pub fn scan_repository(&self, repo_path: &Path) -> Result<Vec<LicenseFinding>, std::io::Error> {
563        use ignore::WalkBuilder;
564
565        let mut all_findings = Vec::new();
566
567        let walker = WalkBuilder::new(repo_path)
568            .hidden(false)
569            .git_ignore(true)
570            .build();
571
572        for entry in walker.flatten() {
573            let path = entry.path();
574
575            if !path.is_file() {
576                continue;
577            }
578
579            // Scan license files
580            if self.is_license_file(&path.to_string_lossy()) {
581                if let Ok(findings) = self.scan_file(path) {
582                    all_findings.extend(findings);
583                }
584                continue;
585            }
586
587            // Scan source file headers if configured
588            if self.config.scan_headers {
589                let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
590                let is_source = matches!(
591                    ext,
592                    "rs" | "py" | "js" | "ts" | "go" | "c" | "cpp" | "h" | "java" | "rb" | "php"
593                );
594
595                if is_source {
596                    if let Ok(findings) = self.scan_file(path) {
597                        all_findings.extend(findings);
598                    }
599                }
600            }
601        }
602
603        // Deduplicate by file and license
604        all_findings.sort_by(|a, b| {
605            a.file
606                .cmp(&b.file)
607                .then_with(|| a.license.spdx_id().cmp(b.license.spdx_id()))
608        });
609        all_findings.dedup_by(|a, b| a.file == b.file && a.license == b.license);
610
611        Ok(all_findings)
612    }
613
614    /// Get a summary of license findings
615    pub fn summarize(findings: &[LicenseFinding]) -> LicenseSummary {
616        let mut summary = LicenseSummary::default();
617
618        for finding in findings {
619            match finding.license.risk() {
620                LicenseRisk::Critical => summary.critical_count += 1,
621                LicenseRisk::High => summary.high_count += 1,
622                LicenseRisk::Medium => summary.medium_count += 1,
623                LicenseRisk::Low => summary.low_count += 1,
624                LicenseRisk::Unknown => summary.unknown_count += 1,
625            }
626
627            if finding.license.is_copyleft() {
628                summary.copyleft_files.push(finding.file.clone());
629            }
630
631            // Track unique licenses
632            if !summary.licenses.contains(&finding.license) {
633                summary.licenses.push(finding.license);
634            }
635        }
636
637        summary.copyleft_files.sort();
638        summary.copyleft_files.dedup();
639
640        summary
641    }
642}
643
644/// Summary of license findings
645#[derive(Debug, Clone, Default, Serialize, Deserialize)]
646pub struct LicenseSummary {
647    /// Count of critical risk licenses (AGPL)
648    pub critical_count: usize,
649
650    /// Count of high risk licenses (GPL, proprietary)
651    pub high_count: usize,
652
653    /// Count of medium risk licenses (LGPL, MPL)
654    pub medium_count: usize,
655
656    /// Count of low risk licenses (MIT, Apache, BSD)
657    pub low_count: usize,
658
659    /// Count of unknown licenses
660    pub unknown_count: usize,
661
662    /// Files containing copyleft licenses
663    pub copyleft_files: Vec<String>,
664
665    /// Unique licenses found
666    pub licenses: Vec<License>,
667}
668
669impl LicenseSummary {
670    /// Check if any copyleft licenses were found
671    pub fn has_copyleft(&self) -> bool {
672        !self.copyleft_files.is_empty()
673    }
674
675    /// Check if any high-risk licenses were found
676    pub fn has_high_risk(&self) -> bool {
677        self.critical_count > 0 || self.high_count > 0
678    }
679
680    /// Get total number of findings
681    pub fn total(&self) -> usize {
682        self.critical_count
683            + self.high_count
684            + self.medium_count
685            + self.low_count
686            + self.unknown_count
687    }
688}
689
690#[cfg(test)]
691mod tests {
692    use super::*;
693
694    #[test]
695    fn test_license_risk_levels() {
696        assert_eq!(License::Agpl3.risk(), LicenseRisk::Critical);
697        assert_eq!(License::Gpl3.risk(), LicenseRisk::High);
698        assert_eq!(License::Lgpl3.risk(), LicenseRisk::Medium);
699        assert_eq!(License::Mit.risk(), LicenseRisk::Low);
700        assert_eq!(License::Unknown.risk(), LicenseRisk::Unknown);
701    }
702
703    #[test]
704    fn test_copyleft_detection() {
705        assert!(License::Gpl3.is_copyleft());
706        assert!(License::Agpl3.is_copyleft());
707        assert!(License::Lgpl3.is_copyleft());
708        assert!(!License::Mit.is_copyleft());
709        assert!(!License::Apache2.is_copyleft());
710    }
711
712    #[test]
713    fn test_strong_copyleft() {
714        assert!(License::Gpl3.is_strong_copyleft());
715        assert!(License::Agpl3.is_strong_copyleft());
716        assert!(!License::Lgpl3.is_strong_copyleft());
717        assert!(!License::Mit.is_strong_copyleft());
718    }
719
720    #[test]
721    fn test_scan_mit_license() {
722        let scanner = LicenseScanner::new();
723        let content = r#"
724MIT License
725
726Copyright (c) 2024 Example Corp
727
728Permission is hereby granted, free of charge, to any person obtaining a copy
729of this software and associated documentation files (the "Software"), to deal
730in the Software without restriction, including without limitation the rights
731to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
732copies of the Software.
733"#;
734
735        let findings = scanner.scan(content, "LICENSE");
736        assert_eq!(findings.len(), 1);
737        assert_eq!(findings[0].license, License::Mit);
738        assert!(findings[0].confidence >= 0.9);
739    }
740
741    #[test]
742    fn test_scan_gpl3_license() {
743        let scanner = LicenseScanner::new();
744        let content = r#"
745GNU GENERAL PUBLIC LICENSE
746Version 3, 29 June 2007
747
748Copyright (C) 2007 Free Software Foundation, Inc.
749"#;
750
751        let findings = scanner.scan(content, "COPYING");
752        assert_eq!(findings.len(), 1);
753        assert_eq!(findings[0].license, License::Gpl3);
754    }
755
756    #[test]
757    fn test_scan_spdx_identifier() {
758        let scanner = LicenseScanner::new();
759        let content = r#"
760// SPDX-License-Identifier: Apache-2.0
761
762fn main() {
763    println!("Hello, world!");
764}
765"#;
766
767        let findings = scanner.scan(content, "src/main.rs");
768        assert_eq!(findings.len(), 1);
769        assert_eq!(findings[0].license, License::Apache2);
770        assert!(findings[0].confidence >= 0.95);
771    }
772
773    #[test]
774    fn test_scan_agpl_in_header() {
775        let scanner = LicenseScanner::new();
776        let content = r#"
777# Licensed under AGPL-3.0
778# Copyright 2024 Example Corp
779
780def main():
781    pass
782"#;
783
784        let findings = scanner.scan(content, "main.py");
785        assert!(!findings.is_empty());
786        assert!(findings.iter().any(|f| f.license == License::Agpl3));
787    }
788
789    #[test]
790    fn test_license_summary() {
791        let findings = vec![
792            LicenseFinding {
793                file: "lib/a.rs".to_owned(),
794                license: License::Gpl3,
795                line: 1,
796                confidence: 0.95,
797                matched_text: "gpl-3.0".to_owned(),
798            },
799            LicenseFinding {
800                file: "lib/b.rs".to_owned(),
801                license: License::Mit,
802                line: 1,
803                confidence: 0.9,
804                matched_text: "mit".to_owned(),
805            },
806            LicenseFinding {
807                file: "lib/c.rs".to_owned(),
808                license: License::Agpl3,
809                line: 1,
810                confidence: 0.95,
811                matched_text: "agpl-3.0".to_owned(),
812            },
813        ];
814
815        let summary = LicenseScanner::summarize(&findings);
816
817        assert_eq!(summary.critical_count, 1);
818        assert_eq!(summary.high_count, 1);
819        assert_eq!(summary.low_count, 1);
820        assert!(summary.has_copyleft());
821        assert!(summary.has_high_risk());
822        assert_eq!(summary.copyleft_files.len(), 2);
823    }
824
825    #[test]
826    fn test_is_license_file() {
827        let scanner = LicenseScanner::new();
828
829        assert!(scanner.is_license_file("LICENSE"));
830        assert!(scanner.is_license_file("LICENSE.md"));
831        assert!(scanner.is_license_file("COPYING"));
832        assert!(scanner.is_license_file("LICENSE-MIT"));
833        assert!(!scanner.is_license_file("src/main.rs"));
834        assert!(!scanner.is_license_file("README.md"));
835    }
836
837    #[test]
838    fn test_risk_ordering() {
839        assert!(LicenseRisk::Critical > LicenseRisk::High);
840        assert!(LicenseRisk::High > LicenseRisk::Medium);
841        assert!(LicenseRisk::Medium > LicenseRisk::Low);
842        assert!(LicenseRisk::Low > LicenseRisk::Unknown);
843    }
844
845    #[test]
846    fn test_spdx_ids() {
847        assert_eq!(License::Gpl3.spdx_id(), "GPL-3.0-only");
848        assert_eq!(License::Mit.spdx_id(), "MIT");
849        assert_eq!(License::Apache2.spdx_id(), "Apache-2.0");
850    }
851}