mod collect;
mod process;
use std::path::PathBuf;
use crate::models::FileInfo;
pub struct ProcessResult {
pub files: Vec<FileInfo>,
pub excluded_count: usize,
}
#[derive(Debug, Clone, Copy, Default)]
pub struct LicenseScanOptions {
pub include_text: bool,
pub include_text_diagnostics: bool,
pub include_diagnostics: bool,
pub unknown_licenses: bool,
}
#[derive(Debug, Clone)]
pub struct TextDetectionOptions {
pub collect_info: bool,
pub detect_packages: bool,
pub detect_copyrights: bool,
pub detect_generated: bool,
pub detect_emails: bool,
pub detect_urls: bool,
pub max_emails: usize,
pub max_urls: usize,
pub timeout_seconds: f64,
pub scan_cache_dir: Option<PathBuf>,
}
impl Default for TextDetectionOptions {
fn default() -> Self {
Self {
collect_info: false,
detect_packages: false,
detect_copyrights: true,
detect_generated: false,
detect_emails: false,
detect_urls: false,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
}
}
}
#[allow(unused_imports)]
pub use self::collect::{CollectedPaths, collect_paths};
pub use self::process::process_collected;
#[cfg(test)]
mod tests {
use std::fs;
use std::sync::Arc;
use tempfile::TempDir;
use crate::models::FileType;
use crate::progress::{ProgressMode, ScanProgress};
use super::{LicenseScanOptions, TextDetectionOptions, collect_paths, process_collected};
#[test]
fn default_options_keep_copyright_detection_enabled() {
let options = TextDetectionOptions::default();
assert!(!options.detect_packages);
assert!(options.detect_copyrights);
}
fn scan_single_file(
file_name: &str,
content: &str,
options: &TextDetectionOptions,
) -> crate::models::FileInfo {
let temp_dir = TempDir::new().expect("create temp dir");
let file_path = temp_dir.path().join(file_name);
fs::write(&file_path, content).expect("write test file");
let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
let collected = collect_paths(temp_dir.path(), 0, &[]);
let result = process_collected(
&collected,
progress,
None,
LicenseScanOptions::default(),
options,
);
result
.files
.into_iter()
.find(|entry| {
entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
})
.expect("scanned file entry")
}
#[test]
fn scanner_reports_repeated_email_occurrences() {
let options = TextDetectionOptions {
collect_info: false,
detect_packages: false,
detect_copyrights: false,
detect_generated: false,
detect_emails: true,
detect_urls: false,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
};
let scanned = scan_single_file(
"contacts.txt",
"linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
&options,
);
let emails: Vec<(&str, usize)> = scanned
.emails
.iter()
.map(|email| (email.email.as_str(), email.start_line))
.collect();
assert_eq!(emails.len(), 4, "emails: {emails:#?}");
assert_eq!(
emails,
vec![
("linux@3ware.com", 1),
("linux@3ware.com", 2),
("andre@suse.com", 3),
("linux@3ware.com", 4),
]
);
}
#[test]
fn scanner_skips_pem_certificate_text_detection() {
let options = TextDetectionOptions {
collect_info: false,
detect_packages: false,
detect_copyrights: true,
detect_generated: false,
detect_emails: true,
detect_urls: true,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
};
let pem_fixture = concat!(
"-----BEGIN CERTIFICATE-----\n",
"MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
"ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
"-----END CERTIFICATE-----\n",
"Certificate:\n",
" Data:\n",
" Signature Algorithm: sha1WithRSAEncryption\n",
" Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
" Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
" Contact: cert-owner@example.com\n",
);
let scanned = scan_single_file("cert.pem", pem_fixture, &options);
assert!(
scanned.copyrights.is_empty(),
"copyrights: {:#?}",
scanned.copyrights
);
assert!(
scanned.holders.is_empty(),
"holders: {:#?}",
scanned.holders
);
assert!(
scanned.authors.is_empty(),
"authors: {:#?}",
scanned.authors
);
assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
assert!(
scanned.license_detections.is_empty(),
"licenses: {:#?}",
scanned.license_detections
);
assert!(
scanned.license_clues.is_empty(),
"license clues: {:#?}",
scanned.license_clues
);
}
#[test]
fn scanner_detects_structured_credits_authors() {
let options = TextDetectionOptions {
collect_info: false,
detect_packages: false,
detect_copyrights: true,
detect_generated: false,
detect_emails: false,
detect_urls: false,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
};
let credits_fixture = concat!(
"N: Jack Lloyd\n",
"E: lloyd@randombit.net\n",
"W: http://www.randombit.net/\n",
);
let scanned = scan_single_file("CREDITS", credits_fixture, &options);
let authors: Vec<(&str, usize, usize)> = scanned
.authors
.iter()
.map(|author| (author.author.as_str(), author.start_line, author.end_line))
.collect();
assert_eq!(
authors,
vec![(
"Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
1,
3,
)]
);
assert!(scanned.copyrights.is_empty());
assert!(scanned.holders.is_empty());
}
#[test]
fn scanner_sets_generated_flag_when_enabled() {
let options = TextDetectionOptions {
collect_info: false,
detect_packages: false,
detect_copyrights: false,
detect_generated: true,
detect_emails: false,
detect_urls: false,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
};
let scanned = scan_single_file(
"generated.c",
"/* DO NOT EDIT THIS FILE - it is machine generated */\n",
&options,
);
assert_eq!(scanned.is_generated, Some(true));
}
#[test]
fn scanner_leaves_generated_flag_unset_when_disabled() {
let options = TextDetectionOptions {
collect_info: false,
detect_packages: false,
detect_copyrights: false,
detect_generated: false,
detect_emails: false,
detect_urls: false,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
};
let scanned = scan_single_file(
"generated.c",
"/* DO NOT EDIT THIS FILE - it is machine generated */\n",
&options,
);
assert_eq!(scanned.is_generated, None);
}
#[test]
fn scanner_skips_package_parsing_when_disabled() {
let options = TextDetectionOptions {
collect_info: false,
detect_packages: false,
detect_copyrights: false,
detect_generated: false,
detect_emails: false,
detect_urls: false,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
};
let scanned = scan_single_file(
"package.json",
r#"{"name":"demo","version":"1.0.0"}"#,
&options,
);
assert!(
scanned.package_data.is_empty(),
"package_data: {:#?}",
scanned.package_data
);
}
#[test]
fn scanner_parses_package_manifests_when_enabled() {
let options = TextDetectionOptions {
collect_info: false,
detect_packages: true,
detect_copyrights: false,
detect_generated: false,
detect_emails: false,
detect_urls: false,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
};
let scanned = scan_single_file(
"package.json",
r#"{"name":"demo","version":"1.0.0"}"#,
&options,
);
assert_eq!(
scanned.package_data.len(),
1,
"package_data: {:#?}",
scanned.package_data
);
}
#[test]
fn scanner_sets_is_source_only_when_info_enabled() {
let without_info = TextDetectionOptions {
collect_info: false,
detect_packages: false,
detect_copyrights: false,
detect_generated: false,
detect_emails: false,
detect_urls: false,
max_emails: 50,
max_urls: 50,
timeout_seconds: 120.0,
scan_cache_dir: None,
};
let with_info = TextDetectionOptions {
collect_info: true,
..without_info.clone()
};
let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
assert_eq!(scanned_without_info.is_source, None);
assert_eq!(scanned_with_info.is_source, Some(true));
}
#[test]
fn collect_paths_includes_root_directory_entry() {
let temp_dir = TempDir::new().expect("create temp dir");
fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
.expect("write nested file");
let collected = collect_paths(temp_dir.path(), 0, &[]);
assert!(
collected
.directories
.iter()
.any(|(path, _)| path == temp_dir.path())
);
}
#[test]
fn collect_paths_supports_single_file_input() {
let temp_dir = TempDir::new().expect("create temp dir");
let file_path = temp_dir.path().join("main.rs");
fs::write(&file_path, "fn main() {}\n").expect("write file");
let collected = collect_paths(&file_path, 0, &[]);
assert_eq!(collected.files.len(), 1);
assert!(collected.directories.is_empty());
assert_eq!(collected.files[0].0, file_path);
}
}