1mod collect;
2mod process;
3
4use std::path::PathBuf;
5
6use crate::models::FileInfo;
7
8pub struct ProcessResult {
9 pub files: Vec<FileInfo>,
10 pub excluded_count: usize,
11}
12
13#[derive(Debug, Clone, Copy, Default)]
14pub struct LicenseScanOptions {
15 pub include_text: bool,
16 pub include_text_diagnostics: bool,
17 pub include_diagnostics: bool,
18 pub unknown_licenses: bool,
19}
20
21#[derive(Debug, Clone)]
22pub struct TextDetectionOptions {
23 pub collect_info: bool,
24 pub detect_packages: bool,
25 pub detect_copyrights: bool,
26 pub detect_generated: bool,
27 pub detect_emails: bool,
28 pub detect_urls: bool,
29 pub max_emails: usize,
30 pub max_urls: usize,
31 pub timeout_seconds: f64,
32 pub scan_cache_dir: Option<PathBuf>,
33}
34
35impl Default for TextDetectionOptions {
36 fn default() -> Self {
37 Self {
38 collect_info: false,
39 detect_packages: false,
40 detect_copyrights: true,
41 detect_generated: false,
42 detect_emails: false,
43 detect_urls: false,
44 max_emails: 50,
45 max_urls: 50,
46 timeout_seconds: 120.0,
47 scan_cache_dir: None,
48 }
49 }
50}
51
52#[allow(unused_imports)]
53pub use self::collect::{CollectedPaths, collect_paths};
54pub use self::process::process_collected;
55
56#[cfg(test)]
57mod tests {
58 use std::fs;
59 use std::sync::Arc;
60
61 use tempfile::TempDir;
62
63 use crate::models::FileType;
64 use crate::progress::{ProgressMode, ScanProgress};
65
66 use super::{LicenseScanOptions, TextDetectionOptions, collect_paths, process_collected};
67
68 #[test]
69 fn default_options_keep_copyright_detection_enabled() {
70 let options = TextDetectionOptions::default();
71 assert!(!options.detect_packages);
72 assert!(options.detect_copyrights);
73 }
74
75 fn scan_single_file(
76 file_name: &str,
77 content: &str,
78 options: &TextDetectionOptions,
79 ) -> crate::models::FileInfo {
80 let temp_dir = TempDir::new().expect("create temp dir");
81 let file_path = temp_dir.path().join(file_name);
82 fs::write(&file_path, content).expect("write test file");
83
84 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
85 let collected = collect_paths(temp_dir.path(), 0, &[]);
86 let result = process_collected(
87 &collected,
88 progress,
89 None,
90 LicenseScanOptions::default(),
91 options,
92 );
93
94 result
95 .files
96 .into_iter()
97 .find(|entry| {
98 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
99 })
100 .expect("scanned file entry")
101 }
102
103 #[test]
104 fn scanner_reports_repeated_email_occurrences() {
105 let options = TextDetectionOptions {
106 collect_info: false,
107 detect_packages: false,
108 detect_copyrights: false,
109 detect_generated: false,
110 detect_emails: true,
111 detect_urls: false,
112 max_emails: 50,
113 max_urls: 50,
114 timeout_seconds: 120.0,
115 scan_cache_dir: None,
116 };
117 let scanned = scan_single_file(
118 "contacts.txt",
119 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
120 &options,
121 );
122
123 let emails: Vec<(&str, usize)> = scanned
124 .emails
125 .iter()
126 .map(|email| (email.email.as_str(), email.start_line))
127 .collect();
128
129 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
130 assert_eq!(
131 emails,
132 vec![
133 ("linux@3ware.com", 1),
134 ("linux@3ware.com", 2),
135 ("andre@suse.com", 3),
136 ("linux@3ware.com", 4),
137 ]
138 );
139 }
140
141 #[test]
142 fn scanner_skips_pem_certificate_text_detection() {
143 let options = TextDetectionOptions {
144 collect_info: false,
145 detect_packages: false,
146 detect_copyrights: true,
147 detect_generated: false,
148 detect_emails: true,
149 detect_urls: true,
150 max_emails: 50,
151 max_urls: 50,
152 timeout_seconds: 120.0,
153 scan_cache_dir: None,
154 };
155 let pem_fixture = concat!(
156 "-----BEGIN CERTIFICATE-----\n",
157 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
158 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
159 "-----END CERTIFICATE-----\n",
160 "Certificate:\n",
161 " Data:\n",
162 " Signature Algorithm: sha1WithRSAEncryption\n",
163 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
164 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
165 " Contact: cert-owner@example.com\n",
166 );
167 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
168
169 assert!(
170 scanned.copyrights.is_empty(),
171 "copyrights: {:#?}",
172 scanned.copyrights
173 );
174 assert!(
175 scanned.holders.is_empty(),
176 "holders: {:#?}",
177 scanned.holders
178 );
179 assert!(
180 scanned.authors.is_empty(),
181 "authors: {:#?}",
182 scanned.authors
183 );
184 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
185 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
186 assert!(
187 scanned.license_detections.is_empty(),
188 "licenses: {:#?}",
189 scanned.license_detections
190 );
191 assert!(
192 scanned.license_clues.is_empty(),
193 "license clues: {:#?}",
194 scanned.license_clues
195 );
196 }
197
198 #[test]
199 fn scanner_detects_structured_credits_authors() {
200 let options = TextDetectionOptions {
201 collect_info: false,
202 detect_packages: false,
203 detect_copyrights: true,
204 detect_generated: false,
205 detect_emails: false,
206 detect_urls: false,
207 max_emails: 50,
208 max_urls: 50,
209 timeout_seconds: 120.0,
210 scan_cache_dir: None,
211 };
212 let credits_fixture = concat!(
213 "N: Jack Lloyd\n",
214 "E: lloyd@randombit.net\n",
215 "W: http://www.randombit.net/\n",
216 );
217 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
218
219 let authors: Vec<(&str, usize, usize)> = scanned
220 .authors
221 .iter()
222 .map(|author| (author.author.as_str(), author.start_line, author.end_line))
223 .collect();
224
225 assert_eq!(
226 authors,
227 vec![(
228 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
229 1,
230 3,
231 )]
232 );
233 assert!(scanned.copyrights.is_empty());
234 assert!(scanned.holders.is_empty());
235 }
236
237 #[test]
238 fn scanner_sets_generated_flag_when_enabled() {
239 let options = TextDetectionOptions {
240 collect_info: false,
241 detect_packages: false,
242 detect_copyrights: false,
243 detect_generated: true,
244 detect_emails: false,
245 detect_urls: false,
246 max_emails: 50,
247 max_urls: 50,
248 timeout_seconds: 120.0,
249 scan_cache_dir: None,
250 };
251 let scanned = scan_single_file(
252 "generated.c",
253 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
254 &options,
255 );
256
257 assert_eq!(scanned.is_generated, Some(true));
258 }
259
260 #[test]
261 fn scanner_leaves_generated_flag_unset_when_disabled() {
262 let options = TextDetectionOptions {
263 collect_info: false,
264 detect_packages: false,
265 detect_copyrights: false,
266 detect_generated: false,
267 detect_emails: false,
268 detect_urls: false,
269 max_emails: 50,
270 max_urls: 50,
271 timeout_seconds: 120.0,
272 scan_cache_dir: None,
273 };
274 let scanned = scan_single_file(
275 "generated.c",
276 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
277 &options,
278 );
279
280 assert_eq!(scanned.is_generated, None);
281 }
282
283 #[test]
284 fn scanner_skips_package_parsing_when_disabled() {
285 let options = TextDetectionOptions {
286 collect_info: false,
287 detect_packages: false,
288 detect_copyrights: false,
289 detect_generated: false,
290 detect_emails: false,
291 detect_urls: false,
292 max_emails: 50,
293 max_urls: 50,
294 timeout_seconds: 120.0,
295 scan_cache_dir: None,
296 };
297 let scanned = scan_single_file(
298 "package.json",
299 r#"{"name":"demo","version":"1.0.0"}"#,
300 &options,
301 );
302
303 assert!(
304 scanned.package_data.is_empty(),
305 "package_data: {:#?}",
306 scanned.package_data
307 );
308 }
309
310 #[test]
311 fn scanner_parses_package_manifests_when_enabled() {
312 let options = TextDetectionOptions {
313 collect_info: false,
314 detect_packages: true,
315 detect_copyrights: false,
316 detect_generated: false,
317 detect_emails: false,
318 detect_urls: false,
319 max_emails: 50,
320 max_urls: 50,
321 timeout_seconds: 120.0,
322 scan_cache_dir: None,
323 };
324 let scanned = scan_single_file(
325 "package.json",
326 r#"{"name":"demo","version":"1.0.0"}"#,
327 &options,
328 );
329
330 assert_eq!(
331 scanned.package_data.len(),
332 1,
333 "package_data: {:#?}",
334 scanned.package_data
335 );
336 }
337
338 #[test]
339 fn scanner_sets_is_source_only_when_info_enabled() {
340 let without_info = TextDetectionOptions {
341 collect_info: false,
342 detect_packages: false,
343 detect_copyrights: false,
344 detect_generated: false,
345 detect_emails: false,
346 detect_urls: false,
347 max_emails: 50,
348 max_urls: 50,
349 timeout_seconds: 120.0,
350 scan_cache_dir: None,
351 };
352 let with_info = TextDetectionOptions {
353 collect_info: true,
354 ..without_info.clone()
355 };
356
357 let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
358 let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
359
360 assert_eq!(scanned_without_info.is_source, None);
361 assert_eq!(scanned_with_info.is_source, Some(true));
362 }
363
364 #[test]
365 fn collect_paths_includes_root_directory_entry() {
366 let temp_dir = TempDir::new().expect("create temp dir");
367 fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
368 fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
369 .expect("write nested file");
370
371 let collected = collect_paths(temp_dir.path(), 0, &[]);
372
373 assert!(
374 collected
375 .directories
376 .iter()
377 .any(|(path, _)| path == temp_dir.path())
378 );
379 }
380
381 #[test]
382 fn collect_paths_supports_single_file_input() {
383 let temp_dir = TempDir::new().expect("create temp dir");
384 let file_path = temp_dir.path().join("main.rs");
385 fs::write(&file_path, "fn main() {}\n").expect("write file");
386
387 let collected = collect_paths(&file_path, 0, &[]);
388
389 assert_eq!(collected.files.len(), 1);
390 assert!(collected.directories.is_empty());
391 assert_eq!(collected.files[0].0, file_path);
392 }
393}