1mod count;
2mod process;
3
4use std::path::PathBuf;
5
6use crate::models::FileInfo;
7
8pub struct ProcessResult {
13 pub files: Vec<FileInfo>,
15 pub excluded_count: usize,
17}
18
19#[derive(Debug, Clone)]
20pub struct TextDetectionOptions {
21 pub detect_copyrights: bool,
22 pub detect_emails: bool,
23 pub detect_urls: bool,
24 pub max_emails: usize,
25 pub max_urls: usize,
26 pub timeout_seconds: f64,
27 pub scan_cache_dir: Option<PathBuf>,
28}
29
30impl Default for TextDetectionOptions {
31 fn default() -> Self {
32 Self {
33 detect_copyrights: true,
34 detect_emails: false,
35 detect_urls: false,
36 max_emails: 50,
37 max_urls: 50,
38 timeout_seconds: 120.0,
39 scan_cache_dir: None,
40 }
41 }
42}
43
44pub use self::count::count_with_size;
45pub use self::process::{process, process_with_options};
46
47#[cfg(test)]
48mod tests {
49 use std::fs;
50 use std::sync::Arc;
51
52 use tempfile::TempDir;
53
54 use crate::askalono::{ScanStrategy, Store};
55 use crate::models::FileType;
56 use crate::progress::{ProgressMode, ScanProgress};
57
58 use super::TextDetectionOptions;
59 use super::process_with_options;
60
61 #[test]
62 fn default_options_keep_copyright_detection_enabled() {
63 let options = TextDetectionOptions::default();
64 assert!(options.detect_copyrights);
65 }
66
67 fn scan_strategy_without_licenses() -> ScanStrategy<'static> {
68 let store = Box::leak(Box::new(Store::new()));
69 ScanStrategy::new(store)
70 }
71
72 fn scan_single_file(
73 file_name: &str,
74 content: &str,
75 options: &TextDetectionOptions,
76 ) -> crate::models::FileInfo {
77 let temp_dir = TempDir::new().expect("create temp dir");
78 let file_path = temp_dir.path().join(file_name);
79 fs::write(&file_path, content).expect("write test file");
80
81 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
82 let strategy = scan_strategy_without_licenses();
83 let result = process_with_options(temp_dir.path(), 0, progress, &[], &strategy, options)
84 .expect("scan should succeed");
85
86 result
87 .files
88 .into_iter()
89 .find(|entry| {
90 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
91 })
92 .expect("scanned file entry")
93 }
94
95 #[test]
96 fn scanner_reports_repeated_email_occurrences() {
97 let options = TextDetectionOptions {
98 detect_copyrights: false,
99 detect_emails: true,
100 detect_urls: false,
101 max_emails: 50,
102 max_urls: 50,
103 timeout_seconds: 120.0,
104 scan_cache_dir: None,
105 };
106 let scanned = scan_single_file(
107 "contacts.txt",
108 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
109 &options,
110 );
111
112 let emails: Vec<(&str, usize)> = scanned
113 .emails
114 .iter()
115 .map(|email| (email.email.as_str(), email.start_line))
116 .collect();
117
118 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
119 assert_eq!(
120 emails,
121 vec![
122 ("linux@3ware.com", 1),
123 ("linux@3ware.com", 2),
124 ("andre@suse.com", 3),
125 ("linux@3ware.com", 4),
126 ]
127 );
128 }
129
130 #[test]
131 fn scanner_skips_pem_certificate_text_detection() {
132 let options = TextDetectionOptions {
133 detect_copyrights: true,
134 detect_emails: true,
135 detect_urls: true,
136 max_emails: 50,
137 max_urls: 50,
138 timeout_seconds: 120.0,
139 scan_cache_dir: None,
140 };
141 let pem_fixture = concat!(
142 "-----BEGIN CERTIFICATE-----\n",
143 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
144 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
145 "-----END CERTIFICATE-----\n",
146 "Certificate:\n",
147 " Data:\n",
148 " Signature Algorithm: sha1WithRSAEncryption\n",
149 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
150 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
151 " Contact: cert-owner@example.com\n",
152 );
153 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
154
155 assert!(
156 scanned.copyrights.is_empty(),
157 "copyrights: {:#?}",
158 scanned.copyrights
159 );
160 assert!(
161 scanned.holders.is_empty(),
162 "holders: {:#?}",
163 scanned.holders
164 );
165 assert!(
166 scanned.authors.is_empty(),
167 "authors: {:#?}",
168 scanned.authors
169 );
170 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
171 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
172 assert!(
173 scanned.license_detections.is_empty(),
174 "licenses: {:#?}",
175 scanned.license_detections
176 );
177 }
178
179 #[test]
180 fn scanner_detects_structured_credits_authors() {
181 let options = TextDetectionOptions {
182 detect_copyrights: true,
183 detect_emails: false,
184 detect_urls: false,
185 max_emails: 50,
186 max_urls: 50,
187 timeout_seconds: 120.0,
188 scan_cache_dir: None,
189 };
190 let credits_fixture = concat!(
191 "N: Jack Lloyd\n",
192 "E: lloyd@randombit.net\n",
193 "W: http://www.randombit.net/\n",
194 );
195 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
196
197 let authors: Vec<(&str, usize, usize)> = scanned
198 .authors
199 .iter()
200 .map(|author| (author.author.as_str(), author.start_line, author.end_line))
201 .collect();
202
203 assert_eq!(
204 authors,
205 vec![(
206 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
207 1,
208 3,
209 )]
210 );
211 assert!(scanned.copyrights.is_empty());
212 assert!(scanned.holders.is_empty());
213 }
214}