1mod collect;
2mod process;
3
4use std::path::PathBuf;
5
6use crate::models::FileInfo;
7
8pub struct ProcessResult {
9 pub files: Vec<FileInfo>,
10 pub excluded_count: usize,
11}
12
13#[derive(Debug, Clone, Copy, Default)]
14pub struct LicenseScanOptions {
15 pub include_text: bool,
16 pub include_text_diagnostics: bool,
17 pub include_diagnostics: bool,
18 pub unknown_licenses: bool,
19 pub min_score: u8,
20}
21
22#[derive(Debug, Clone)]
23pub struct TextDetectionOptions {
24 pub collect_info: bool,
25 pub detect_packages: bool,
26 pub detect_application_packages: bool,
27 pub detect_system_packages: bool,
28 pub detect_packages_in_compiled: bool,
29 pub detect_copyrights: bool,
30 pub detect_generated: bool,
31 pub detect_emails: bool,
32 pub detect_urls: bool,
33 pub max_emails: usize,
34 pub max_urls: usize,
35 pub timeout_seconds: f64,
36 pub scan_cache_dir: Option<PathBuf>,
37}
38
39impl Default for TextDetectionOptions {
40 fn default() -> Self {
41 Self {
42 collect_info: false,
43 detect_packages: false,
44 detect_application_packages: false,
45 detect_system_packages: false,
46 detect_packages_in_compiled: false,
47 detect_copyrights: true,
48 detect_generated: false,
49 detect_emails: false,
50 detect_urls: false,
51 max_emails: 50,
52 max_urls: 50,
53 timeout_seconds: 120.0,
54 scan_cache_dir: None,
55 }
56 }
57}
58
59pub use self::collect::{CollectedPaths, collect_paths};
60#[allow(unused_imports)]
61pub use self::process::{process_collected, process_collected_with_memory_limit};
62
63#[cfg(test)]
64mod tests {
65 use std::fs;
66 use std::sync::Arc;
67
68 use tempfile::TempDir;
69
70 use crate::models::FileType;
71 use crate::progress::{ProgressMode, ScanProgress};
72
73 use super::{
74 LicenseScanOptions, TextDetectionOptions, collect_paths, process_collected,
75 process_collected_with_memory_limit,
76 };
77
78 #[test]
79 fn default_options_keep_copyright_detection_enabled() {
80 let options = TextDetectionOptions::default();
81 assert!(!options.detect_packages);
82 assert!(options.detect_copyrights);
83 }
84
85 fn scan_single_file(
86 file_name: &str,
87 content: &str,
88 options: &TextDetectionOptions,
89 ) -> crate::models::FileInfo {
90 let temp_dir = TempDir::new().expect("create temp dir");
91 let file_path = temp_dir.path().join(file_name);
92 fs::write(&file_path, content).expect("write test file");
93
94 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
95 let collected = collect_paths(temp_dir.path(), 0, &[]);
96 let result = process_collected(
97 &collected,
98 progress,
99 None,
100 LicenseScanOptions::default(),
101 options,
102 );
103
104 result
105 .files
106 .into_iter()
107 .find(|entry| {
108 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
109 })
110 .expect("scanned file entry")
111 }
112
113 fn scan_file_at_relative_path(
114 relative_path: &str,
115 content: &[u8],
116 options: &TextDetectionOptions,
117 ) -> crate::models::FileInfo {
118 let temp_dir = TempDir::new().expect("create temp dir");
119 let file_path = temp_dir.path().join(relative_path);
120 if let Some(parent) = file_path.parent() {
121 fs::create_dir_all(parent).expect("create parent dirs");
122 }
123 fs::write(&file_path, content).expect("write test file");
124
125 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
126 let collected = collect_paths(temp_dir.path(), 0, &[]);
127 let result = process_collected(
128 &collected,
129 progress,
130 None,
131 LicenseScanOptions::default(),
132 options,
133 );
134
135 result
136 .files
137 .into_iter()
138 .find(|entry| {
139 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
140 })
141 .expect("scanned file entry")
142 }
143
144 #[test]
145 fn scanner_reports_repeated_email_occurrences() {
146 let options = TextDetectionOptions {
147 collect_info: false,
148 detect_packages: false,
149 detect_application_packages: false,
150 detect_system_packages: false,
151 detect_packages_in_compiled: false,
152 detect_copyrights: false,
153 detect_generated: false,
154 detect_emails: true,
155 detect_urls: false,
156 max_emails: 50,
157 max_urls: 50,
158 timeout_seconds: 120.0,
159 scan_cache_dir: None,
160 };
161 let scanned = scan_single_file(
162 "contacts.txt",
163 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
164 &options,
165 );
166
167 let emails: Vec<(&str, usize)> = scanned
168 .emails
169 .iter()
170 .map(|email| (email.email.as_str(), email.start_line))
171 .collect();
172
173 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
174 assert_eq!(
175 emails,
176 vec![
177 ("linux@3ware.com", 1),
178 ("linux@3ware.com", 2),
179 ("andre@suse.com", 3),
180 ("linux@3ware.com", 4),
181 ]
182 );
183 }
184
185 #[test]
186 fn scanner_skips_pem_certificate_text_detection() {
187 let options = TextDetectionOptions {
188 collect_info: false,
189 detect_packages: false,
190 detect_application_packages: false,
191 detect_system_packages: false,
192 detect_packages_in_compiled: false,
193 detect_copyrights: true,
194 detect_generated: false,
195 detect_emails: true,
196 detect_urls: true,
197 max_emails: 50,
198 max_urls: 50,
199 timeout_seconds: 120.0,
200 scan_cache_dir: None,
201 };
202 let pem_fixture = concat!(
203 "-----BEGIN CERTIFICATE-----\n",
204 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
205 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
206 "-----END CERTIFICATE-----\n",
207 "Certificate:\n",
208 " Data:\n",
209 " Signature Algorithm: sha1WithRSAEncryption\n",
210 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
211 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
212 " Contact: cert-owner@example.com\n",
213 );
214 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
215
216 assert!(
217 scanned.copyrights.is_empty(),
218 "copyrights: {:#?}",
219 scanned.copyrights
220 );
221 assert!(
222 scanned.holders.is_empty(),
223 "holders: {:#?}",
224 scanned.holders
225 );
226 assert!(
227 scanned.authors.is_empty(),
228 "authors: {:#?}",
229 scanned.authors
230 );
231 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
232 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
233 assert!(
234 scanned.license_detections.is_empty(),
235 "licenses: {:#?}",
236 scanned.license_detections
237 );
238 assert!(
239 scanned.license_clues.is_empty(),
240 "license clues: {:#?}",
241 scanned.license_clues
242 );
243 }
244
245 #[test]
246 fn scanner_detects_structured_credits_authors() {
247 let options = TextDetectionOptions {
248 collect_info: false,
249 detect_packages: false,
250 detect_application_packages: false,
251 detect_system_packages: false,
252 detect_packages_in_compiled: false,
253 detect_copyrights: true,
254 detect_generated: false,
255 detect_emails: false,
256 detect_urls: false,
257 max_emails: 50,
258 max_urls: 50,
259 timeout_seconds: 120.0,
260 scan_cache_dir: None,
261 };
262 let credits_fixture = concat!(
263 "N: Jack Lloyd\n",
264 "E: lloyd@randombit.net\n",
265 "W: http://www.randombit.net/\n",
266 );
267 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
268
269 let authors: Vec<(&str, usize, usize)> = scanned
270 .authors
271 .iter()
272 .map(|author| (author.author.as_str(), author.start_line, author.end_line))
273 .collect();
274
275 assert_eq!(
276 authors,
277 vec![(
278 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
279 1,
280 3,
281 )]
282 );
283 assert!(scanned.copyrights.is_empty());
284 assert!(scanned.holders.is_empty());
285 }
286
287 #[test]
288 fn scanner_sets_generated_flag_when_enabled() {
289 let options = TextDetectionOptions {
290 collect_info: false,
291 detect_packages: false,
292 detect_application_packages: false,
293 detect_system_packages: false,
294 detect_packages_in_compiled: false,
295 detect_copyrights: false,
296 detect_generated: true,
297 detect_emails: false,
298 detect_urls: false,
299 max_emails: 50,
300 max_urls: 50,
301 timeout_seconds: 120.0,
302 scan_cache_dir: None,
303 };
304 let scanned = scan_single_file(
305 "generated.c",
306 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
307 &options,
308 );
309
310 assert_eq!(scanned.is_generated, Some(true));
311 }
312
313 #[test]
314 fn scanner_leaves_generated_flag_unset_when_disabled() {
315 let options = TextDetectionOptions {
316 collect_info: false,
317 detect_packages: false,
318 detect_application_packages: false,
319 detect_system_packages: false,
320 detect_packages_in_compiled: false,
321 detect_copyrights: false,
322 detect_generated: false,
323 detect_emails: false,
324 detect_urls: false,
325 max_emails: 50,
326 max_urls: 50,
327 timeout_seconds: 120.0,
328 scan_cache_dir: None,
329 };
330 let scanned = scan_single_file(
331 "generated.c",
332 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
333 &options,
334 );
335
336 assert_eq!(scanned.is_generated, None);
337 }
338
339 #[test]
340 fn scanner_populates_info_surface_when_enabled() {
341 let options = TextDetectionOptions {
342 collect_info: true,
343 detect_packages: false,
344 detect_application_packages: false,
345 detect_system_packages: false,
346 detect_packages_in_compiled: false,
347 detect_copyrights: false,
348 detect_generated: false,
349 detect_emails: false,
350 detect_urls: false,
351 max_emails: 50,
352 max_urls: 50,
353 timeout_seconds: 120.0,
354 scan_cache_dir: None,
355 };
356 let scanned = scan_single_file(
357 "script.py",
358 "#!/usr/bin/env python3\nprint(\"hello\")\n",
359 &options,
360 );
361
362 assert!(scanned.sha1.is_some());
363 assert!(scanned.md5.is_some());
364 assert!(scanned.sha256.is_some());
365 assert!(scanned.sha1_git.is_some());
366 assert!(scanned.mime_type.is_some());
367 assert!(scanned.date.is_some());
368 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
369 assert_eq!(scanned.is_text, Some(true));
370 assert_eq!(scanned.is_script, Some(true));
371 assert_eq!(scanned.is_source, Some(true));
372 }
373
374 #[test]
375 fn scanner_treats_latin1_python_sources_as_textual_scripts() {
376 let options = TextDetectionOptions {
377 collect_info: true,
378 detect_packages: false,
379 detect_application_packages: false,
380 detect_system_packages: false,
381 detect_packages_in_compiled: false,
382 detect_copyrights: false,
383 detect_generated: false,
384 detect_emails: false,
385 detect_urls: false,
386 max_emails: 50,
387 max_urls: 50,
388 timeout_seconds: 120.0,
389 scan_cache_dir: None,
390 };
391 let latin1_python = b"# coding: latin-1\nprint(\"caf\xe9\")\n# comment padding\n";
392 let scanned = scan_file_at_relative_path("script.py", latin1_python, &options);
393
394 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
395 assert_eq!(
396 scanned.file_type_label.as_deref(),
397 Some("python script, text executable")
398 );
399 assert_eq!(scanned.is_binary, Some(false));
400 assert_eq!(scanned.is_text, Some(true));
401 assert_eq!(scanned.is_script, Some(true));
402 assert_eq!(scanned.is_source, Some(true));
403 }
404
405 #[test]
406 fn scanner_skips_findings_for_zip_like_archives() {
407 let options = TextDetectionOptions {
408 collect_info: true,
409 detect_packages: false,
410 detect_application_packages: false,
411 detect_system_packages: false,
412 detect_packages_in_compiled: false,
413 detect_copyrights: true,
414 detect_generated: false,
415 detect_emails: true,
416 detect_urls: true,
417 max_emails: 50,
418 max_urls: 50,
419 timeout_seconds: 120.0,
420 scan_cache_dir: None,
421 };
422 let archive_like = b"PK\x03\x04\x14\x00\x00\x00\x08\x00MIT License\ncontact@example.com\nhttps://example.com\n";
423 let scanned = scan_file_at_relative_path("demo.whl", archive_like, &options);
424
425 assert_eq!(scanned.mime_type.as_deref(), Some("application/zip"));
426 assert_eq!(scanned.is_archive, Some(true));
427 assert!(scanned.license_detections.is_empty());
428 assert!(scanned.copyrights.is_empty());
429 assert!(scanned.emails.is_empty());
430 assert!(scanned.urls.is_empty());
431 }
432
433 #[test]
434 fn scanner_treats_typescript_sources_as_text_not_video_media() {
435 let options = TextDetectionOptions {
436 collect_info: true,
437 detect_packages: false,
438 detect_application_packages: false,
439 detect_system_packages: false,
440 detect_packages_in_compiled: false,
441 detect_copyrights: false,
442 detect_generated: false,
443 detect_emails: false,
444 detect_urls: false,
445 max_emails: 50,
446 max_urls: 50,
447 timeout_seconds: 120.0,
448 scan_cache_dir: None,
449 };
450 let scanned = scan_single_file("main.ts", "export const answer: number = 42;\n", &options);
451
452 assert_eq!(scanned.programming_language.as_deref(), Some("TypeScript"));
453 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
454 assert_eq!(
455 scanned.file_type_label.as_deref(),
456 Some("UTF-8 Unicode text")
457 );
458 assert_eq!(scanned.is_text, Some(true));
459 assert_eq!(scanned.is_media, Some(false));
460 assert_eq!(scanned.is_script, Some(false));
461 assert_eq!(scanned.is_source, Some(true));
462 }
463
464 #[test]
465 fn scanner_normalizes_sparse_ts_files_away_from_video_mime() {
466 let options = TextDetectionOptions {
467 collect_info: true,
468 detect_packages: false,
469 detect_application_packages: false,
470 detect_system_packages: false,
471 detect_packages_in_compiled: false,
472 detect_copyrights: false,
473 detect_generated: false,
474 detect_emails: false,
475 detect_urls: false,
476 max_emails: 50,
477 max_urls: 50,
478 timeout_seconds: 120.0,
479 scan_cache_dir: None,
480 };
481 let scanned = scan_single_file("main.ts", "// comment-only TypeScript fixture\n", &options);
482
483 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
484 assert_eq!(
485 scanned.file_type_label.as_deref(),
486 Some("UTF-8 Unicode text")
487 );
488 assert_eq!(scanned.is_text, Some(true));
489 assert_eq!(scanned.is_media, Some(false));
490 assert_eq!(scanned.is_script, Some(false));
491 assert_eq!(scanned.is_source, Some(true));
492 }
493
494 #[test]
495 fn scanner_treats_empty_files_like_scancode_info_surface() {
496 let options = TextDetectionOptions {
497 collect_info: true,
498 detect_packages: false,
499 detect_application_packages: false,
500 detect_system_packages: false,
501 detect_packages_in_compiled: false,
502 detect_copyrights: false,
503 detect_generated: false,
504 detect_emails: false,
505 detect_urls: false,
506 max_emails: 50,
507 max_urls: 50,
508 timeout_seconds: 120.0,
509 scan_cache_dir: None,
510 };
511 let scanned = scan_single_file("test.txt", "", &options);
512
513 assert_eq!(scanned.mime_type.as_deref(), Some("inode/x-empty"));
514 assert_eq!(scanned.file_type_label.as_deref(), Some("empty"));
515 assert_eq!(scanned.programming_language, None);
516 assert_eq!(scanned.is_binary, Some(false));
517 assert_eq!(scanned.is_text, Some(true));
518 assert_eq!(scanned.is_archive, Some(false));
519 assert_eq!(scanned.is_media, Some(false));
520 assert_eq!(scanned.is_source, Some(false));
521 assert_eq!(scanned.is_script, Some(false));
522 }
523
524 #[test]
525 fn scanner_treats_package_json_as_text_not_source() {
526 let options = TextDetectionOptions {
527 collect_info: true,
528 detect_packages: false,
529 detect_application_packages: false,
530 detect_system_packages: false,
531 detect_packages_in_compiled: false,
532 detect_copyrights: false,
533 detect_generated: false,
534 detect_emails: false,
535 detect_urls: false,
536 max_emails: 50,
537 max_urls: 50,
538 timeout_seconds: 120.0,
539 scan_cache_dir: None,
540 };
541 let scanned = scan_single_file("package.json", r#"{"name":"demo"}"#, &options);
542
543 assert_eq!(scanned.mime_type.as_deref(), Some("application/json"));
544 assert_eq!(scanned.file_type_label.as_deref(), Some("JSON text data"));
545 assert_eq!(scanned.programming_language, None);
546 assert_eq!(scanned.is_text, Some(true));
547 assert_eq!(scanned.is_source, Some(false));
548 assert_eq!(scanned.is_script, Some(false));
549 }
550
551 #[test]
552 fn scanner_classifies_gradle_and_nix_manifests_as_source() {
553 let options = TextDetectionOptions {
554 collect_info: true,
555 detect_packages: false,
556 detect_application_packages: false,
557 detect_system_packages: false,
558 detect_packages_in_compiled: false,
559 detect_copyrights: false,
560 detect_generated: false,
561 detect_emails: false,
562 detect_urls: false,
563 max_emails: 50,
564 max_urls: 50,
565 timeout_seconds: 120.0,
566 scan_cache_dir: None,
567 };
568
569 let gradle = scan_single_file("build.gradle", "plugins { id 'java' }\n", &options);
570 let nix = scan_single_file("flake.nix", "{ inputs, ... }: {}\n", &options);
571
572 assert_eq!(gradle.programming_language.as_deref(), Some("Groovy"));
573 assert_eq!(gradle.mime_type.as_deref(), Some("text/plain"));
574 assert_eq!(gradle.is_source, Some(true));
575 assert_eq!(gradle.is_script, Some(false));
576
577 assert_eq!(nix.programming_language.as_deref(), Some("Nix"));
578 assert_eq!(nix.mime_type.as_deref(), Some("text/plain"));
579 assert_eq!(nix.is_source, Some(true));
580 assert_eq!(nix.is_script, Some(false));
581 }
582
583 #[test]
584 fn scanner_treats_gitmodules_as_text_not_source() {
585 let options = TextDetectionOptions {
586 collect_info: true,
587 detect_packages: false,
588 detect_application_packages: false,
589 detect_system_packages: false,
590 detect_packages_in_compiled: false,
591 detect_copyrights: false,
592 detect_generated: false,
593 detect_emails: false,
594 detect_urls: false,
595 max_emails: 50,
596 max_urls: 50,
597 timeout_seconds: 120.0,
598 scan_cache_dir: None,
599 };
600 let scanned = scan_file_at_relative_path(
601 ".gitmodules",
602 b"[submodule \"demo\"]\n\tpath = vendor/demo\n",
603 &options,
604 );
605
606 assert_eq!(scanned.programming_language, None);
607 assert_eq!(
608 scanned.file_type_label.as_deref(),
609 Some("Git configuration text")
610 );
611 assert_eq!(scanned.is_text, Some(true));
612 assert_eq!(scanned.is_source, Some(false));
613 assert_eq!(scanned.is_script, Some(false));
614 }
615
616 #[test]
617 fn scanner_treats_javascript_shebang_files_as_scripts() {
618 let options = TextDetectionOptions {
619 collect_info: true,
620 detect_packages: false,
621 detect_application_packages: false,
622 detect_system_packages: false,
623 detect_packages_in_compiled: false,
624 detect_copyrights: false,
625 detect_generated: false,
626 detect_emails: false,
627 detect_urls: false,
628 max_emails: 50,
629 max_urls: 50,
630 timeout_seconds: 120.0,
631 scan_cache_dir: None,
632 };
633 let scanned = scan_file_at_relative_path(
634 "bin/run",
635 b"#!/usr/bin/env node\nconsole.log('hello');\n",
636 &options,
637 );
638
639 assert_eq!(scanned.programming_language.as_deref(), Some("JavaScript"));
640 assert_eq!(
641 scanned.file_type_label.as_deref(),
642 Some("javascript script, UTF-8 Unicode text executable")
643 );
644 assert_eq!(scanned.is_script, Some(true));
645 assert_eq!(scanned.is_source, Some(true));
646 }
647
648 #[test]
649 fn scanner_treats_dockerfile_as_source() {
650 let options = TextDetectionOptions {
651 collect_info: true,
652 detect_packages: false,
653 detect_application_packages: false,
654 detect_system_packages: false,
655 detect_packages_in_compiled: false,
656 detect_copyrights: false,
657 detect_generated: false,
658 detect_emails: false,
659 detect_urls: false,
660 max_emails: 50,
661 max_urls: 50,
662 timeout_seconds: 120.0,
663 scan_cache_dir: None,
664 };
665 let scanned = scan_single_file("Dockerfile", "FROM scratch\n", &options);
666
667 assert_eq!(scanned.programming_language.as_deref(), Some("Dockerfile"));
668 assert_eq!(
669 scanned.file_type_label.as_deref(),
670 Some("UTF-8 Unicode text")
671 );
672 assert_eq!(scanned.is_source, Some(true));
673 assert_eq!(scanned.is_script, Some(false));
674 }
675
676 #[test]
677 fn scanner_treats_makefile_as_text_not_source() {
678 let options = TextDetectionOptions {
679 collect_info: true,
680 detect_packages: false,
681 detect_application_packages: false,
682 detect_system_packages: false,
683 detect_packages_in_compiled: false,
684 detect_copyrights: false,
685 detect_generated: false,
686 detect_emails: false,
687 detect_urls: false,
688 max_emails: 50,
689 max_urls: 50,
690 timeout_seconds: 120.0,
691 scan_cache_dir: None,
692 };
693 let scanned = scan_single_file("Makefile", "all:\n\techo hi\n", &options);
694
695 assert_eq!(scanned.programming_language, None);
696 assert_eq!(
697 scanned.file_type_label.as_deref(),
698 Some("UTF-8 Unicode text")
699 );
700 assert_eq!(scanned.is_text, Some(true));
701 assert_eq!(scanned.is_source, Some(false));
702 assert_eq!(scanned.is_script, Some(false));
703 }
704
705 #[test]
706 fn scanner_omits_info_surface_when_disabled() {
707 let options = TextDetectionOptions {
708 collect_info: false,
709 detect_packages: false,
710 detect_application_packages: false,
711 detect_system_packages: false,
712 detect_packages_in_compiled: false,
713 detect_copyrights: false,
714 detect_generated: false,
715 detect_emails: false,
716 detect_urls: false,
717 max_emails: 50,
718 max_urls: 50,
719 timeout_seconds: 120.0,
720 scan_cache_dir: None,
721 };
722 let scanned = scan_single_file(
723 "script.py",
724 "#!/usr/bin/env python3\nprint(\"hello\")\n",
725 &options,
726 );
727
728 assert!(scanned.sha1.is_none());
729 assert!(scanned.md5.is_none());
730 assert!(scanned.sha256.is_none());
731 assert!(scanned.sha1_git.is_none());
732 assert!(scanned.mime_type.is_none());
733 assert!(scanned.date.is_none());
734 assert!(scanned.programming_language.is_none());
735 assert!(scanned.is_binary.is_none());
736 assert!(scanned.is_text.is_none());
737 assert!(scanned.is_archive.is_none());
738 assert!(scanned.is_media.is_none());
739 assert!(scanned.is_script.is_none());
740 assert!(scanned.is_source.is_none());
741 }
742
743 #[test]
744 fn scanner_skips_package_parsing_when_disabled() {
745 let options = TextDetectionOptions {
746 collect_info: false,
747 detect_packages: false,
748 detect_application_packages: false,
749 detect_system_packages: false,
750 detect_packages_in_compiled: false,
751 detect_copyrights: false,
752 detect_generated: false,
753 detect_emails: false,
754 detect_urls: false,
755 max_emails: 50,
756 max_urls: 50,
757 timeout_seconds: 120.0,
758 scan_cache_dir: None,
759 };
760 let scanned = scan_single_file(
761 "package.json",
762 r#"{"name":"demo","version":"1.0.0"}"#,
763 &options,
764 );
765
766 assert!(
767 scanned.package_data.is_empty(),
768 "package_data: {:#?}",
769 scanned.package_data
770 );
771 }
772
773 #[test]
774 fn scanner_parses_package_manifests_when_enabled() {
775 let options = TextDetectionOptions {
776 collect_info: false,
777 detect_packages: true,
778 detect_application_packages: true,
779 detect_system_packages: false,
780 detect_packages_in_compiled: false,
781 detect_copyrights: false,
782 detect_generated: false,
783 detect_emails: false,
784 detect_urls: false,
785 max_emails: 50,
786 max_urls: 50,
787 timeout_seconds: 120.0,
788 scan_cache_dir: None,
789 };
790 let scanned = scan_single_file(
791 "package.json",
792 r#"{"name":"demo","version":"1.0.0"}"#,
793 &options,
794 );
795
796 assert_eq!(
797 scanned.package_data.len(),
798 1,
799 "package_data: {:#?}",
800 scanned.package_data
801 );
802 }
803
804 #[test]
805 fn scanner_skips_application_packages_when_only_system_packages_enabled() {
806 let options = TextDetectionOptions {
807 collect_info: false,
808 detect_packages: true,
809 detect_application_packages: false,
810 detect_system_packages: true,
811 detect_packages_in_compiled: false,
812 detect_copyrights: false,
813 detect_generated: false,
814 detect_emails: false,
815 detect_urls: false,
816 max_emails: 50,
817 max_urls: 50,
818 timeout_seconds: 120.0,
819 scan_cache_dir: None,
820 };
821 let scanned = scan_single_file(
822 "package.json",
823 r#"{"name":"demo","version":"1.0.0"}"#,
824 &options,
825 );
826
827 assert!(
828 scanned.package_data.is_empty(),
829 "package_data: {:#?}",
830 scanned.package_data
831 );
832 }
833
834 #[test]
835 fn scanner_parses_system_package_files_when_enabled() {
836 let options = TextDetectionOptions {
837 collect_info: false,
838 detect_packages: true,
839 detect_application_packages: false,
840 detect_system_packages: true,
841 detect_packages_in_compiled: false,
842 detect_copyrights: false,
843 detect_generated: false,
844 detect_emails: false,
845 detect_urls: false,
846 max_emails: 50,
847 max_urls: 50,
848 timeout_seconds: 120.0,
849 scan_cache_dir: None,
850 };
851 let scanned = scan_file_at_relative_path(
852 "var/lib/dpkg/status",
853 b"Package: demo\nVersion: 1.0\nArchitecture: all\nDescription: demo package\n\n",
854 &options,
855 );
856
857 assert!(
858 !scanned.package_data.is_empty(),
859 "package_data: {:#?}",
860 scanned.package_data
861 );
862 }
863
864 #[test]
865 fn scanner_only_parses_compiled_packages_when_package_in_compiled_is_enabled() {
866 let temp_dir = TempDir::new().expect("create temp dir");
867 fs::write(
868 temp_dir.path().join("go.mod"),
869 "module example.com/demo\n\ngo 1.23.0\n",
870 )
871 .expect("write go.mod");
872 fs::write(
873 temp_dir.path().join("main.go"),
874 "package main\nfunc main() {}\n",
875 )
876 .expect("write main.go");
877 let file_path = temp_dir.path().join("demo");
878 let status = std::process::Command::new("go")
879 .current_dir(temp_dir.path())
880 .args(["build", "-o"])
881 .arg(&file_path)
882 .status()
883 .expect("run go build");
884 assert!(status.success());
885
886 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
887 let collected = collect_paths(temp_dir.path(), 0, &[]);
888
889 let without_compiled = process_collected(
890 &collected,
891 Arc::clone(&progress),
892 None,
893 LicenseScanOptions::default(),
894 &TextDetectionOptions {
895 collect_info: false,
896 detect_packages: true,
897 detect_application_packages: true,
898 detect_system_packages: false,
899 detect_packages_in_compiled: false,
900 detect_copyrights: false,
901 detect_generated: false,
902 detect_emails: false,
903 detect_urls: false,
904 max_emails: 50,
905 max_urls: 50,
906 timeout_seconds: 120.0,
907 scan_cache_dir: None,
908 },
909 );
910 let with_compiled = process_collected(
911 &collected,
912 progress,
913 None,
914 LicenseScanOptions::default(),
915 &TextDetectionOptions {
916 collect_info: false,
917 detect_packages: true,
918 detect_application_packages: true,
919 detect_system_packages: false,
920 detect_packages_in_compiled: true,
921 detect_copyrights: false,
922 detect_generated: false,
923 detect_emails: false,
924 detect_urls: false,
925 max_emails: 50,
926 max_urls: 50,
927 timeout_seconds: 120.0,
928 scan_cache_dir: None,
929 },
930 );
931
932 let without_compiled = without_compiled
933 .files
934 .into_iter()
935 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
936 .expect("compiled artifact present");
937 let with_compiled = with_compiled
938 .files
939 .into_iter()
940 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
941 .expect("compiled artifact present");
942
943 assert!(
944 without_compiled.package_data.is_empty(),
945 "package_data: {:#?}",
946 without_compiled.package_data
947 );
948 assert!(!with_compiled.package_data.is_empty());
949 }
950
951 #[test]
952 fn scanner_sets_is_source_only_when_info_enabled() {
953 let without_info = TextDetectionOptions {
954 collect_info: false,
955 detect_packages: false,
956 detect_application_packages: false,
957 detect_system_packages: false,
958 detect_packages_in_compiled: false,
959 detect_copyrights: false,
960 detect_generated: false,
961 detect_emails: false,
962 detect_urls: false,
963 max_emails: 50,
964 max_urls: 50,
965 timeout_seconds: 120.0,
966 scan_cache_dir: None,
967 };
968 let with_info = TextDetectionOptions {
969 collect_info: true,
970 ..without_info.clone()
971 };
972
973 let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
974 let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
975
976 assert_eq!(scanned_without_info.is_source, None);
977 assert_eq!(scanned_with_info.is_source, Some(true));
978 }
979
980 #[test]
981 fn directory_omits_info_fields_when_info_disabled() {
982 let temp_dir = TempDir::new().expect("create temp dir");
983 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
984
985 let collected = collect_paths(temp_dir.path(), 0, &[]);
986 let result = process_collected(
987 &collected,
988 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
989 None,
990 LicenseScanOptions::default(),
991 &TextDetectionOptions {
992 collect_info: false,
993 detect_packages: false,
994 detect_application_packages: false,
995 detect_system_packages: false,
996 detect_packages_in_compiled: false,
997 detect_copyrights: false,
998 detect_generated: false,
999 detect_emails: false,
1000 detect_urls: false,
1001 max_emails: 50,
1002 max_urls: 50,
1003 timeout_seconds: 120.0,
1004 scan_cache_dir: None,
1005 },
1006 );
1007
1008 let directory = result
1009 .files
1010 .into_iter()
1011 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1012 .expect("directory entry");
1013
1014 assert!(directory.date.is_none());
1015 assert!(directory.file_type_label.is_none());
1016 assert!(directory.is_binary.is_none());
1017 assert!(directory.is_text.is_none());
1018 assert!(directory.is_archive.is_none());
1019 assert!(directory.is_media.is_none());
1020 assert!(directory.is_source.is_none());
1021 assert!(directory.is_script.is_none());
1022 }
1023
1024 #[test]
1025 fn directory_includes_info_fields_when_info_enabled() {
1026 let temp_dir = TempDir::new().expect("create temp dir");
1027 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1028
1029 let collected = collect_paths(temp_dir.path(), 0, &[]);
1030 let result = process_collected(
1031 &collected,
1032 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1033 None,
1034 LicenseScanOptions::default(),
1035 &TextDetectionOptions {
1036 collect_info: true,
1037 detect_packages: false,
1038 detect_application_packages: false,
1039 detect_system_packages: false,
1040 detect_packages_in_compiled: false,
1041 detect_copyrights: false,
1042 detect_generated: false,
1043 detect_emails: false,
1044 detect_urls: false,
1045 max_emails: 50,
1046 max_urls: 50,
1047 timeout_seconds: 120.0,
1048 scan_cache_dir: None,
1049 },
1050 );
1051
1052 let directory = result
1053 .files
1054 .into_iter()
1055 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1056 .expect("directory entry");
1057
1058 assert!(directory.date.is_none());
1059 assert!(directory.file_type_label.is_none());
1060 assert_eq!(directory.is_binary, Some(false));
1061 assert_eq!(directory.is_text, Some(false));
1062 assert_eq!(directory.is_archive, Some(false));
1063 assert_eq!(directory.is_media, Some(false));
1064 assert_eq!(directory.is_source, Some(false));
1065 assert_eq!(directory.is_script, Some(false));
1066 assert_eq!(directory.files_count, Some(0));
1067 assert_eq!(directory.dirs_count, Some(0));
1068 assert_eq!(directory.size_count, Some(0));
1069 }
1070
1071 #[test]
1072 fn collect_paths_includes_root_directory_entry() {
1073 let temp_dir = TempDir::new().expect("create temp dir");
1074 fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
1075 fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
1076 .expect("write nested file");
1077
1078 let collected = collect_paths(temp_dir.path(), 0, &[]);
1079
1080 assert!(
1081 collected
1082 .directories
1083 .iter()
1084 .any(|(path, _)| path == temp_dir.path())
1085 );
1086 }
1087
1088 #[test]
1089 fn collect_paths_supports_single_file_input() {
1090 let temp_dir = TempDir::new().expect("create temp dir");
1091 let file_path = temp_dir.path().join("main.rs");
1092 fs::write(&file_path, "fn main() {}\n").expect("write file");
1093
1094 let collected = collect_paths(&file_path, 0, &[]);
1095
1096 assert_eq!(collected.files.len(), 1);
1097 assert!(collected.directories.is_empty());
1098 assert_eq!(collected.files[0].0, file_path);
1099 }
1100
1101 #[test]
1102 fn process_collected_with_memory_limit_preserves_results_when_spilling() {
1103 let temp_dir = TempDir::new().expect("create temp dir");
1104 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1105 fs::write(temp_dir.path().join("b.txt"), "world").expect("write second file");
1106
1107 let collected = collect_paths(temp_dir.path(), 0, &[]);
1108 let result = process_collected_with_memory_limit(
1109 &collected,
1110 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1111 None,
1112 LicenseScanOptions::default(),
1113 &TextDetectionOptions {
1114 collect_info: false,
1115 detect_packages: false,
1116 detect_application_packages: false,
1117 detect_system_packages: false,
1118 detect_packages_in_compiled: false,
1119 detect_copyrights: false,
1120 detect_generated: false,
1121 detect_emails: false,
1122 detect_urls: false,
1123 max_emails: 50,
1124 max_urls: 50,
1125 timeout_seconds: 120.0,
1126 scan_cache_dir: None,
1127 },
1128 1,
1129 );
1130
1131 assert_eq!(result.files.len(), 3);
1132 }
1133
1134 #[test]
1135 fn process_collected_with_negative_one_uses_disk_only_mode() {
1136 let temp_dir = TempDir::new().expect("create temp dir");
1137 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1138
1139 let collected = collect_paths(temp_dir.path(), 0, &[]);
1140 let result = process_collected_with_memory_limit(
1141 &collected,
1142 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1143 None,
1144 LicenseScanOptions::default(),
1145 &TextDetectionOptions {
1146 collect_info: false,
1147 detect_packages: false,
1148 detect_application_packages: false,
1149 detect_system_packages: false,
1150 detect_packages_in_compiled: false,
1151 detect_copyrights: false,
1152 detect_generated: false,
1153 detect_emails: false,
1154 detect_urls: false,
1155 max_emails: 50,
1156 max_urls: 50,
1157 timeout_seconds: 120.0,
1158 scan_cache_dir: None,
1159 },
1160 -1,
1161 );
1162
1163 assert_eq!(result.files.len(), 2);
1164 }
1165}