1mod collect;
2mod process;
3
4use crate::license_detection::LicenseDetectionEngine;
5use crate::models::FileInfo;
6
7pub struct ProcessResult {
8 pub files: Vec<FileInfo>,
9 pub excluded_count: usize,
10}
11
12#[derive(Debug, Clone, Copy, Default)]
13pub struct LicenseScanOptions {
14 pub include_text: bool,
15 pub include_text_diagnostics: bool,
16 pub include_diagnostics: bool,
17 pub unknown_licenses: bool,
18 pub min_score: u8,
19}
20
21#[derive(Debug, Clone)]
22pub struct TextDetectionOptions {
23 pub collect_info: bool,
24 pub detect_packages: bool,
25 pub detect_application_packages: bool,
26 pub detect_system_packages: bool,
27 pub detect_packages_in_compiled: bool,
28 pub detect_copyrights: bool,
29 pub detect_generated: bool,
30 pub detect_emails: bool,
31 pub detect_urls: bool,
32 pub max_emails: usize,
33 pub max_urls: usize,
34 pub timeout_seconds: f64,
35}
36
37impl Default for TextDetectionOptions {
38 fn default() -> Self {
39 Self {
40 collect_info: false,
41 detect_packages: false,
42 detect_application_packages: false,
43 detect_system_packages: false,
44 detect_packages_in_compiled: false,
45 detect_copyrights: true,
46 detect_generated: false,
47 detect_emails: false,
48 detect_urls: false,
49 max_emails: 50,
50 max_urls: 50,
51 timeout_seconds: 120.0,
52 }
53 }
54}
55
56pub fn scan_options_fingerprint(
57 text_options: &TextDetectionOptions,
58 license_options: LicenseScanOptions,
59 license_engine: Option<&LicenseDetectionEngine>,
60) -> String {
61 let (license_enabled, rules_count, first_rule_id, last_rule_id) = match license_engine {
62 Some(engine) => {
63 let rules = &engine.index().rules_by_rid;
64 (
65 true,
66 rules.len(),
67 rules
68 .first()
69 .map(|rule| rule.identifier.as_str())
70 .unwrap_or(""),
71 rules
72 .last()
73 .map(|rule| rule.identifier.as_str())
74 .unwrap_or(""),
75 )
76 }
77 None => (false, 0, "", ""),
78 };
79
80 format!(
81 "tool_version={};info={};packages={};app_packages={};system_packages={};compiled_packages={};copyrights={};generated={};emails={};urls={};max_emails={};max_urls={};timeout={:.6};license_enabled={};rules_count={};first_rule_id={};last_rule_id={};license_text={};license_text_diagnostics={};license_diagnostics={};unknown_licenses={};license_score={}",
82 crate::version::BUILD_VERSION,
83 text_options.collect_info,
84 text_options.detect_packages,
85 text_options.detect_application_packages,
86 text_options.detect_system_packages,
87 text_options.detect_packages_in_compiled,
88 text_options.detect_copyrights,
89 text_options.detect_generated,
90 text_options.detect_emails,
91 text_options.detect_urls,
92 text_options.max_emails,
93 text_options.max_urls,
94 text_options.timeout_seconds,
95 license_enabled,
96 rules_count,
97 first_rule_id,
98 last_rule_id,
99 license_options.include_text,
100 license_options.include_text_diagnostics,
101 license_options.include_diagnostics,
102 license_options.unknown_licenses,
103 license_options.min_score,
104 )
105}
106
107pub use self::collect::{CollectedPaths, collect_paths};
108#[allow(unused_imports)]
109pub use self::process::{
110 MemoryMode, process_collected, process_collected_sequential,
111 process_collected_with_memory_limit, process_collected_with_memory_limit_sequential,
112};
113
114#[cfg(test)]
115mod tests {
116 use std::fs;
117 use std::sync::Arc;
118
119 use tempfile::TempDir;
120
121 use crate::license_detection::LicenseDetectionEngine;
122 use crate::models::{DatasourceId, FileType, PackageType as FilePackageType};
123 use crate::progress::{ProgressMode, ScanProgress};
124
125 use super::{
126 LicenseScanOptions, MemoryMode, TextDetectionOptions, collect_paths, process_collected,
127 process_collected_with_memory_limit, scan_options_fingerprint,
128 };
129
130 #[test]
131 fn default_options_keep_copyright_detection_enabled() {
132 let options = TextDetectionOptions::default();
133 assert!(!options.detect_packages);
134 assert!(options.detect_copyrights);
135 }
136
137 #[test]
138 fn test_scan_options_fingerprint_changes_with_license_score() {
139 let text_options = TextDetectionOptions::default();
140 let default_fingerprint = scan_options_fingerprint(
141 &text_options,
142 LicenseScanOptions {
143 min_score: 0,
144 ..LicenseScanOptions::default()
145 },
146 None,
147 );
148 let filtered_fingerprint = scan_options_fingerprint(
149 &text_options,
150 LicenseScanOptions {
151 min_score: 70,
152 ..LicenseScanOptions::default()
153 },
154 None,
155 );
156
157 assert_ne!(default_fingerprint, filtered_fingerprint);
158 }
159
160 fn scan_single_file(
161 file_name: &str,
162 content: &str,
163 options: &TextDetectionOptions,
164 ) -> crate::models::FileInfo {
165 let temp_dir = TempDir::new().expect("create temp dir");
166 let file_path = temp_dir.path().join(file_name);
167 fs::write(&file_path, content).expect("write test file");
168
169 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
170 let collected = collect_paths(temp_dir.path(), 0, &[]);
171 let result = process_collected(
172 &collected,
173 progress,
174 None,
175 LicenseScanOptions::default(),
176 options,
177 );
178
179 result
180 .files
181 .into_iter()
182 .find(|entry| {
183 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
184 })
185 .expect("scanned file entry")
186 }
187
188 fn scan_file_at_relative_path(
189 relative_path: &str,
190 content: &[u8],
191 options: &TextDetectionOptions,
192 ) -> crate::models::FileInfo {
193 let temp_dir = TempDir::new().expect("create temp dir");
194 let file_path = temp_dir.path().join(relative_path);
195 if let Some(parent) = file_path.parent() {
196 fs::create_dir_all(parent).expect("create parent dirs");
197 }
198 fs::write(&file_path, content).expect("write test file");
199
200 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
201 let collected = collect_paths(temp_dir.path(), 0, &[]);
202 let result = process_collected(
203 &collected,
204 progress,
205 None,
206 LicenseScanOptions::default(),
207 options,
208 );
209
210 result
211 .files
212 .into_iter()
213 .find(|entry| {
214 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
215 })
216 .expect("scanned file entry")
217 }
218
219 fn scan_single_file_with_license_engine(
220 file_name: &str,
221 content: &str,
222 options: &TextDetectionOptions,
223 ) -> crate::models::FileInfo {
224 let temp_dir = TempDir::new().expect("create temp dir");
225 let file_path = temp_dir.path().join(file_name);
226 fs::write(&file_path, content).expect("write test file");
227
228 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
229 let collected = collect_paths(temp_dir.path(), 0, &[]);
230 let engine =
231 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
232 let result = process_collected(
233 &collected,
234 progress,
235 Some(engine),
236 LicenseScanOptions::default(),
237 options,
238 );
239
240 result
241 .files
242 .into_iter()
243 .find(|entry| {
244 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
245 })
246 .expect("scanned file entry")
247 }
248
249 #[test]
250 fn scanner_reports_repeated_email_occurrences() {
251 let options = TextDetectionOptions {
252 collect_info: false,
253 detect_packages: false,
254 detect_application_packages: false,
255 detect_system_packages: false,
256 detect_packages_in_compiled: false,
257 detect_copyrights: false,
258 detect_generated: false,
259 detect_emails: true,
260 detect_urls: false,
261 max_emails: 50,
262 max_urls: 50,
263 timeout_seconds: 120.0,
264 };
265 let scanned = scan_single_file(
266 "contacts.txt",
267 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
268 &options,
269 );
270
271 let emails: Vec<(&str, usize)> = scanned
272 .emails
273 .iter()
274 .map(|email| (email.email.as_str(), email.start_line.get()))
275 .collect();
276
277 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
278 assert_eq!(
279 emails,
280 vec![
281 ("linux@3ware.com", 1),
282 ("linux@3ware.com", 2),
283 ("andre@suse.com", 3),
284 ("linux@3ware.com", 4),
285 ]
286 );
287 }
288
289 #[test]
290 fn scanner_skips_pem_certificate_text_detection() {
291 let options = TextDetectionOptions {
292 collect_info: false,
293 detect_packages: false,
294 detect_application_packages: false,
295 detect_system_packages: false,
296 detect_packages_in_compiled: false,
297 detect_copyrights: true,
298 detect_generated: false,
299 detect_emails: true,
300 detect_urls: true,
301 max_emails: 50,
302 max_urls: 50,
303 timeout_seconds: 120.0,
304 };
305 let pem_fixture = concat!(
306 "-----BEGIN CERTIFICATE-----\n",
307 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
308 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
309 "-----END CERTIFICATE-----\n",
310 "Certificate:\n",
311 " Data:\n",
312 " Signature Algorithm: sha1WithRSAEncryption\n",
313 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
314 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
315 " Contact: cert-owner@example.com\n",
316 );
317 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
318
319 assert!(
320 scanned.copyrights.is_empty(),
321 "copyrights: {:#?}",
322 scanned.copyrights
323 );
324 assert!(
325 scanned.holders.is_empty(),
326 "holders: {:#?}",
327 scanned.holders
328 );
329 assert!(
330 scanned.authors.is_empty(),
331 "authors: {:#?}",
332 scanned.authors
333 );
334 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
335 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
336 assert!(
337 scanned.license_detections.is_empty(),
338 "licenses: {:#?}",
339 scanned.license_detections
340 );
341 assert!(
342 scanned.license_clues.is_empty(),
343 "license clues: {:#?}",
344 scanned.license_clues
345 );
346 }
347
348 #[test]
349 fn scanner_keeps_source_headers_when_pem_blocks_are_embedded() {
350 let options = TextDetectionOptions {
351 collect_info: false,
352 detect_packages: false,
353 detect_application_packages: false,
354 detect_system_packages: false,
355 detect_packages_in_compiled: false,
356 detect_copyrights: true,
357 detect_generated: false,
358 detect_emails: false,
359 detect_urls: true,
360 max_emails: 50,
361 max_urls: 50,
362 timeout_seconds: 120.0,
363 };
364 let fixture = concat!(
365 "/*\n",
366 "Copyright 2022 The Kubernetes Authors.\n\n",
367 "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
368 "you may not use this file except in compliance with the License.\n",
369 "You may obtain a copy of the License at\n\n",
370 " http://www.apache.org/licenses/LICENSE-2.0\n",
371 "*/\n\n",
372 "package storage\n\n",
373 "const validCert = `\n",
374 "-----BEGIN CERTIFICATE-----\n",
375 "MIIDmTCCAoGgAwIBAgIUWQ==\n",
376 "-----END CERTIFICATE-----\n",
377 "`\n",
378 );
379 let temp_dir = TempDir::new().expect("create temp dir");
380 let file_path = temp_dir.path().join("storage_test.go");
381 fs::write(&file_path, fixture).expect("write fixture");
382
383 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
384 let collected = collect_paths(temp_dir.path(), 0, &[]);
385 let engine =
386 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
387 let result = process_collected(
388 &collected,
389 progress,
390 Some(engine),
391 LicenseScanOptions::default(),
392 &options,
393 );
394 let scanned = result
395 .files
396 .into_iter()
397 .find(|entry| {
398 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
399 })
400 .expect("scanned file entry");
401
402 assert!(
403 scanned
404 .copyrights
405 .iter()
406 .any(|c| c.copyright == "Copyright 2022 The Kubernetes Authors"),
407 "copyrights: {:#?}",
408 scanned.copyrights
409 );
410 assert!(
411 scanned
412 .holders
413 .iter()
414 .any(|h| h.holder == "The Kubernetes Authors"),
415 "holders: {:#?}",
416 scanned.holders
417 );
418 assert!(
419 scanned
420 .urls
421 .iter()
422 .any(|u| u.url == "http://www.apache.org/licenses/LICENSE-2.0"),
423 "urls: {:#?}",
424 scanned.urls
425 );
426 assert_eq!(scanned.license_expression.as_deref(), Some("Apache-2.0"));
427 }
428
429 #[test]
430 fn scanner_detects_structured_credits_authors() {
431 let options = TextDetectionOptions {
432 collect_info: false,
433 detect_packages: false,
434 detect_application_packages: false,
435 detect_system_packages: false,
436 detect_packages_in_compiled: false,
437 detect_copyrights: true,
438 detect_generated: false,
439 detect_emails: false,
440 detect_urls: false,
441 max_emails: 50,
442 max_urls: 50,
443 timeout_seconds: 120.0,
444 };
445 let credits_fixture = concat!(
446 "N: Jack Lloyd\n",
447 "E: lloyd@randombit.net\n",
448 "W: http://www.randombit.net/\n",
449 );
450 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
451
452 let authors: Vec<(&str, usize, usize)> = scanned
453 .authors
454 .iter()
455 .map(|author| {
456 (
457 author.author.as_str(),
458 author.start_line.get(),
459 author.end_line.get(),
460 )
461 })
462 .collect();
463
464 assert_eq!(
465 authors,
466 vec![(
467 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
468 1,
469 3,
470 )]
471 );
472 assert!(scanned.copyrights.is_empty());
473 assert!(scanned.holders.is_empty());
474 }
475
476 #[test]
477 fn scanner_uses_or_for_alternative_license_header() {
478 let fixture =
479 include_str!("../../testdata/license-golden/datadriven/external/boost-json-d2s.ipp");
480 let temp_dir = TempDir::new().expect("create temp dir");
481 let file_path = temp_dir.path().join("d2s.ipp");
482 fs::write(&file_path, fixture).expect("write fixture");
483
484 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
485 let collected = collect_paths(temp_dir.path(), 0, &[]);
486 let engine =
487 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
488 let result = process_collected(
489 &collected,
490 progress,
491 Some(engine),
492 LicenseScanOptions::default(),
493 &TextDetectionOptions::default(),
494 );
495 let scanned = result
496 .files
497 .into_iter()
498 .find(|entry| {
499 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
500 })
501 .expect("scanned file entry");
502
503 assert_eq!(
504 scanned.license_expression.as_deref(),
505 Some("Apache-2.0 OR BSL-1.0")
506 );
507 assert!(
508 scanned.license_clues.is_empty(),
509 "license clues: {:#?}",
510 scanned.license_clues
511 );
512 assert_eq!(
513 scanned.license_detections.len(),
514 1,
515 "detections: {:#?}",
516 scanned.license_detections
517 );
518
519 let detection = &scanned.license_detections[0];
520 assert_eq!(detection.license_expression_spdx, "Apache-2.0 OR BSL-1.0");
521
522 let match_expressions: Vec<_> = detection
523 .matches
524 .iter()
525 .map(|m| m.license_expression_spdx.as_str())
526 .collect();
527 assert_eq!(match_expressions, vec!["Apache-2.0", "BSL-1.0"]);
528 }
529
530 #[test]
531 fn scanner_sets_generated_flag_when_enabled() {
532 let options = TextDetectionOptions {
533 collect_info: false,
534 detect_packages: false,
535 detect_application_packages: false,
536 detect_system_packages: false,
537 detect_packages_in_compiled: false,
538 detect_copyrights: false,
539 detect_generated: true,
540 detect_emails: false,
541 detect_urls: false,
542 max_emails: 50,
543 max_urls: 50,
544 timeout_seconds: 120.0,
545 };
546 let scanned = scan_single_file(
547 "generated.c",
548 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
549 &options,
550 );
551
552 assert_eq!(scanned.is_generated, Some(true));
553 }
554
555 #[test]
556 fn scanner_leaves_generated_flag_unset_when_disabled() {
557 let options = TextDetectionOptions {
558 collect_info: false,
559 detect_packages: false,
560 detect_application_packages: false,
561 detect_system_packages: false,
562 detect_packages_in_compiled: false,
563 detect_copyrights: false,
564 detect_generated: false,
565 detect_emails: false,
566 detect_urls: false,
567 max_emails: 50,
568 max_urls: 50,
569 timeout_seconds: 120.0,
570 };
571 let scanned = scan_single_file(
572 "generated.c",
573 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
574 &options,
575 );
576
577 assert_eq!(scanned.is_generated, None);
578 }
579
580 #[test]
581 fn scanner_populates_info_surface_when_enabled() {
582 let options = TextDetectionOptions {
583 collect_info: true,
584 detect_packages: false,
585 detect_application_packages: false,
586 detect_system_packages: false,
587 detect_packages_in_compiled: false,
588 detect_copyrights: false,
589 detect_generated: false,
590 detect_emails: false,
591 detect_urls: false,
592 max_emails: 50,
593 max_urls: 50,
594 timeout_seconds: 120.0,
595 };
596 let scanned = scan_single_file(
597 "script.py",
598 "#!/usr/bin/env python3\nprint(\"hello\")\n",
599 &options,
600 );
601
602 assert!(scanned.sha1.is_some());
603 assert!(scanned.md5.is_some());
604 assert!(scanned.sha256.is_some());
605 assert!(scanned.sha1_git.is_some());
606 assert!(scanned.mime_type.is_some());
607 assert!(scanned.date.is_some());
608 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
609 assert_eq!(scanned.is_text, Some(true));
610 assert_eq!(scanned.is_script, Some(true));
611 assert_eq!(scanned.is_source, Some(true));
612 }
613
614 #[test]
615 fn scanner_treats_latin1_python_sources_as_textual_scripts() {
616 let options = TextDetectionOptions {
617 collect_info: true,
618 detect_packages: false,
619 detect_application_packages: false,
620 detect_system_packages: false,
621 detect_packages_in_compiled: false,
622 detect_copyrights: false,
623 detect_generated: false,
624 detect_emails: false,
625 detect_urls: false,
626 max_emails: 50,
627 max_urls: 50,
628 timeout_seconds: 120.0,
629 };
630 let latin1_python = b"# coding: latin-1\nprint(\"caf\xe9\")\n# comment padding\n";
631 let scanned = scan_file_at_relative_path("script.py", latin1_python, &options);
632
633 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
634 assert_eq!(
635 scanned.file_type_label.as_deref(),
636 Some("python script, text executable")
637 );
638 assert_eq!(scanned.is_binary, Some(false));
639 assert_eq!(scanned.is_text, Some(true));
640 assert_eq!(scanned.is_script, Some(true));
641 assert_eq!(scanned.is_source, Some(true));
642 }
643
644 #[test]
645 fn scanner_skips_findings_for_zip_like_archives() {
646 let options = TextDetectionOptions {
647 collect_info: true,
648 detect_packages: false,
649 detect_application_packages: false,
650 detect_system_packages: false,
651 detect_packages_in_compiled: false,
652 detect_copyrights: true,
653 detect_generated: false,
654 detect_emails: true,
655 detect_urls: true,
656 max_emails: 50,
657 max_urls: 50,
658 timeout_seconds: 120.0,
659 };
660 let archive_like = b"PK\x03\x04\x14\x00\x00\x00\x08\x00MIT License\ncontact@example.com\nhttps://example.com\n";
661 let scanned = scan_file_at_relative_path("demo.whl", archive_like, &options);
662
663 assert_eq!(scanned.mime_type.as_deref(), Some("application/zip"));
664 assert_eq!(scanned.is_archive, Some(true));
665 assert!(scanned.license_detections.is_empty());
666 assert!(scanned.copyrights.is_empty());
667 assert!(scanned.emails.is_empty());
668 assert!(scanned.urls.is_empty());
669 }
670
671 #[test]
672 fn scanner_treats_typescript_sources_as_text_not_video_media() {
673 let options = TextDetectionOptions {
674 collect_info: true,
675 detect_packages: false,
676 detect_application_packages: false,
677 detect_system_packages: false,
678 detect_packages_in_compiled: false,
679 detect_copyrights: false,
680 detect_generated: false,
681 detect_emails: false,
682 detect_urls: false,
683 max_emails: 50,
684 max_urls: 50,
685 timeout_seconds: 120.0,
686 };
687 let scanned = scan_single_file("main.ts", "export const answer: number = 42;\n", &options);
688
689 assert_eq!(scanned.programming_language.as_deref(), Some("TypeScript"));
690 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
691 assert_eq!(
692 scanned.file_type_label.as_deref(),
693 Some("UTF-8 Unicode text")
694 );
695 assert_eq!(scanned.is_text, Some(true));
696 assert_eq!(scanned.is_media, Some(false));
697 assert_eq!(scanned.is_script, Some(false));
698 assert_eq!(scanned.is_source, Some(true));
699 }
700
701 #[test]
702 fn scanner_normalizes_sparse_ts_files_away_from_video_mime() {
703 let options = TextDetectionOptions {
704 collect_info: true,
705 detect_packages: false,
706 detect_application_packages: false,
707 detect_system_packages: false,
708 detect_packages_in_compiled: false,
709 detect_copyrights: false,
710 detect_generated: false,
711 detect_emails: false,
712 detect_urls: false,
713 max_emails: 50,
714 max_urls: 50,
715 timeout_seconds: 120.0,
716 };
717 let scanned = scan_single_file("main.ts", "// comment-only TypeScript fixture\n", &options);
718
719 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
720 assert_eq!(
721 scanned.file_type_label.as_deref(),
722 Some("UTF-8 Unicode text")
723 );
724 assert_eq!(scanned.is_text, Some(true));
725 assert_eq!(scanned.is_media, Some(false));
726 assert_eq!(scanned.is_script, Some(false));
727 assert_eq!(scanned.is_source, Some(true));
728 }
729
730 #[test]
731 fn scanner_treats_empty_files_like_scancode_info_surface() {
732 let options = TextDetectionOptions {
733 collect_info: true,
734 detect_packages: false,
735 detect_application_packages: false,
736 detect_system_packages: false,
737 detect_packages_in_compiled: false,
738 detect_copyrights: false,
739 detect_generated: false,
740 detect_emails: false,
741 detect_urls: false,
742 max_emails: 50,
743 max_urls: 50,
744 timeout_seconds: 120.0,
745 };
746 let scanned = scan_single_file("test.txt", "", &options);
747
748 assert_eq!(scanned.mime_type.as_deref(), Some("inode/x-empty"));
749 assert_eq!(scanned.file_type_label.as_deref(), Some("empty"));
750 assert_eq!(scanned.programming_language, None);
751 assert_eq!(scanned.is_binary, Some(false));
752 assert_eq!(scanned.is_text, Some(true));
753 assert_eq!(scanned.is_archive, Some(false));
754 assert_eq!(scanned.is_media, Some(false));
755 assert_eq!(scanned.is_source, Some(false));
756 assert_eq!(scanned.is_script, Some(false));
757 }
758
759 #[test]
760 fn scanner_treats_package_json_as_text_not_source() {
761 let options = TextDetectionOptions {
762 collect_info: true,
763 detect_packages: false,
764 detect_application_packages: false,
765 detect_system_packages: false,
766 detect_packages_in_compiled: false,
767 detect_copyrights: false,
768 detect_generated: false,
769 detect_emails: false,
770 detect_urls: false,
771 max_emails: 50,
772 max_urls: 50,
773 timeout_seconds: 120.0,
774 };
775 let scanned = scan_single_file("package.json", r#"{"name":"demo"}"#, &options);
776
777 assert_eq!(scanned.mime_type.as_deref(), Some("application/json"));
778 assert_eq!(scanned.file_type_label.as_deref(), Some("JSON text data"));
779 assert_eq!(scanned.programming_language, None);
780 assert_eq!(scanned.is_text, Some(true));
781 assert_eq!(scanned.is_source, Some(false));
782 assert_eq!(scanned.is_script, Some(false));
783 }
784
785 #[test]
786 fn scanner_classifies_gradle_and_nix_manifests_as_source() {
787 let options = TextDetectionOptions {
788 collect_info: true,
789 detect_packages: false,
790 detect_application_packages: false,
791 detect_system_packages: false,
792 detect_packages_in_compiled: false,
793 detect_copyrights: false,
794 detect_generated: false,
795 detect_emails: false,
796 detect_urls: false,
797 max_emails: 50,
798 max_urls: 50,
799 timeout_seconds: 120.0,
800 };
801
802 let gradle = scan_single_file("build.gradle", "plugins { id 'java' }\n", &options);
803 let nix = scan_single_file("flake.nix", "{ inputs, ... }: {}\n", &options);
804
805 assert_eq!(gradle.programming_language.as_deref(), Some("Groovy"));
806 assert_eq!(gradle.mime_type.as_deref(), Some("text/plain"));
807 assert_eq!(gradle.is_source, Some(true));
808 assert_eq!(gradle.is_script, Some(false));
809
810 assert_eq!(nix.programming_language.as_deref(), Some("Nix"));
811 assert_eq!(nix.mime_type.as_deref(), Some("text/plain"));
812 assert_eq!(nix.is_source, Some(true));
813 assert_eq!(nix.is_script, Some(false));
814 }
815
816 #[test]
817 fn scanner_treats_gitmodules_as_text_not_source() {
818 let options = TextDetectionOptions {
819 collect_info: true,
820 detect_packages: false,
821 detect_application_packages: false,
822 detect_system_packages: false,
823 detect_packages_in_compiled: false,
824 detect_copyrights: false,
825 detect_generated: false,
826 detect_emails: false,
827 detect_urls: false,
828 max_emails: 50,
829 max_urls: 50,
830 timeout_seconds: 120.0,
831 };
832 let scanned = scan_file_at_relative_path(
833 ".gitmodules",
834 b"[submodule \"demo\"]\n\tpath = vendor/demo\n",
835 &options,
836 );
837
838 assert_eq!(scanned.programming_language, None);
839 assert_eq!(
840 scanned.file_type_label.as_deref(),
841 Some("Git configuration text")
842 );
843 assert_eq!(scanned.is_text, Some(true));
844 assert_eq!(scanned.is_source, Some(false));
845 assert_eq!(scanned.is_script, Some(false));
846 }
847
848 #[test]
849 fn scanner_treats_javascript_shebang_files_as_scripts() {
850 let options = TextDetectionOptions {
851 collect_info: true,
852 detect_packages: false,
853 detect_application_packages: false,
854 detect_system_packages: false,
855 detect_packages_in_compiled: false,
856 detect_copyrights: false,
857 detect_generated: false,
858 detect_emails: false,
859 detect_urls: false,
860 max_emails: 50,
861 max_urls: 50,
862 timeout_seconds: 120.0,
863 };
864 let scanned = scan_file_at_relative_path(
865 "bin/run",
866 b"#!/usr/bin/env node\nconsole.log('hello');\n",
867 &options,
868 );
869
870 assert_eq!(scanned.programming_language.as_deref(), Some("JavaScript"));
871 assert_eq!(
872 scanned.file_type_label.as_deref(),
873 Some("javascript script, UTF-8 Unicode text executable")
874 );
875 assert_eq!(scanned.is_script, Some(true));
876 assert_eq!(scanned.is_source, Some(true));
877 }
878
879 #[test]
880 fn scanner_treats_dockerfile_as_source() {
881 let options = TextDetectionOptions {
882 collect_info: true,
883 detect_packages: false,
884 detect_application_packages: false,
885 detect_system_packages: false,
886 detect_packages_in_compiled: false,
887 detect_copyrights: false,
888 detect_generated: false,
889 detect_emails: false,
890 detect_urls: false,
891 max_emails: 50,
892 max_urls: 50,
893 timeout_seconds: 120.0,
894 };
895 let scanned = scan_single_file("Dockerfile", "FROM scratch\n", &options);
896
897 assert_eq!(scanned.programming_language.as_deref(), Some("Dockerfile"));
898 assert_eq!(
899 scanned.file_type_label.as_deref(),
900 Some("UTF-8 Unicode text")
901 );
902 assert_eq!(scanned.is_source, Some(true));
903 assert_eq!(scanned.is_script, Some(false));
904 }
905
906 #[test]
907 fn scanner_treats_makefile_as_text_not_source() {
908 let options = TextDetectionOptions {
909 collect_info: true,
910 detect_packages: false,
911 detect_application_packages: false,
912 detect_system_packages: false,
913 detect_packages_in_compiled: false,
914 detect_copyrights: false,
915 detect_generated: false,
916 detect_emails: false,
917 detect_urls: false,
918 max_emails: 50,
919 max_urls: 50,
920 timeout_seconds: 120.0,
921 };
922 let scanned = scan_single_file("Makefile", "all:\n\techo hi\n", &options);
923
924 assert_eq!(scanned.programming_language, None);
925 assert_eq!(
926 scanned.file_type_label.as_deref(),
927 Some("UTF-8 Unicode text")
928 );
929 assert_eq!(scanned.is_text, Some(true));
930 assert_eq!(scanned.is_source, Some(false));
931 assert_eq!(scanned.is_script, Some(false));
932 }
933
934 #[test]
935 fn scanner_omits_info_surface_when_disabled() {
936 let options = TextDetectionOptions {
937 collect_info: false,
938 detect_packages: false,
939 detect_application_packages: false,
940 detect_system_packages: false,
941 detect_packages_in_compiled: false,
942 detect_copyrights: false,
943 detect_generated: false,
944 detect_emails: false,
945 detect_urls: false,
946 max_emails: 50,
947 max_urls: 50,
948 timeout_seconds: 120.0,
949 };
950 let scanned = scan_single_file(
951 "script.py",
952 "#!/usr/bin/env python3\nprint(\"hello\")\n",
953 &options,
954 );
955
956 assert!(scanned.sha1.is_none());
957 assert!(scanned.md5.is_none());
958 assert!(scanned.sha256.is_none());
959 assert!(scanned.sha1_git.is_none());
960 assert!(scanned.mime_type.is_none());
961 assert!(scanned.date.is_none());
962 assert!(scanned.programming_language.is_none());
963 assert!(scanned.is_binary.is_none());
964 assert!(scanned.is_text.is_none());
965 assert!(scanned.is_archive.is_none());
966 assert!(scanned.is_media.is_none());
967 assert!(scanned.is_script.is_none());
968 assert!(scanned.is_source.is_none());
969 }
970
971 #[test]
972 fn scanner_skips_package_parsing_when_disabled() {
973 let options = TextDetectionOptions {
974 collect_info: false,
975 detect_packages: false,
976 detect_application_packages: false,
977 detect_system_packages: false,
978 detect_packages_in_compiled: false,
979 detect_copyrights: false,
980 detect_generated: false,
981 detect_emails: false,
982 detect_urls: false,
983 max_emails: 50,
984 max_urls: 50,
985 timeout_seconds: 120.0,
986 };
987 let scanned = scan_single_file(
988 "package.json",
989 r#"{"name":"demo","version":"1.0.0"}"#,
990 &options,
991 );
992
993 assert!(
994 scanned.package_data.is_empty(),
995 "package_data: {:#?}",
996 scanned.package_data
997 );
998 }
999
1000 #[test]
1001 fn scanner_parses_package_manifests_when_enabled() {
1002 let options = TextDetectionOptions {
1003 collect_info: false,
1004 detect_packages: true,
1005 detect_application_packages: true,
1006 detect_system_packages: false,
1007 detect_packages_in_compiled: false,
1008 detect_copyrights: false,
1009 detect_generated: false,
1010 detect_emails: false,
1011 detect_urls: false,
1012 max_emails: 50,
1013 max_urls: 50,
1014 timeout_seconds: 120.0,
1015 };
1016 let scanned = scan_single_file(
1017 "package.json",
1018 r#"{"name":"demo","version":"1.0.0"}"#,
1019 &options,
1020 );
1021
1022 assert_eq!(
1023 scanned.package_data.len(),
1024 1,
1025 "package_data: {:#?}",
1026 scanned.package_data
1027 );
1028 }
1029
1030 #[test]
1031 fn scanner_skips_application_packages_when_only_system_packages_enabled() {
1032 let options = TextDetectionOptions {
1033 collect_info: false,
1034 detect_packages: true,
1035 detect_application_packages: false,
1036 detect_system_packages: true,
1037 detect_packages_in_compiled: false,
1038 detect_copyrights: false,
1039 detect_generated: false,
1040 detect_emails: false,
1041 detect_urls: false,
1042 max_emails: 50,
1043 max_urls: 50,
1044 timeout_seconds: 120.0,
1045 };
1046 let scanned = scan_single_file(
1047 "package.json",
1048 r#"{"name":"demo","version":"1.0.0"}"#,
1049 &options,
1050 );
1051
1052 assert!(
1053 scanned.package_data.is_empty(),
1054 "package_data: {:#?}",
1055 scanned.package_data
1056 );
1057 }
1058
1059 #[test]
1060 fn scanner_parses_system_package_files_when_enabled() {
1061 let options = TextDetectionOptions {
1062 collect_info: false,
1063 detect_packages: true,
1064 detect_application_packages: false,
1065 detect_system_packages: true,
1066 detect_packages_in_compiled: false,
1067 detect_copyrights: false,
1068 detect_generated: false,
1069 detect_emails: false,
1070 detect_urls: false,
1071 max_emails: 50,
1072 max_urls: 50,
1073 timeout_seconds: 120.0,
1074 };
1075 let scanned = scan_file_at_relative_path(
1076 "var/lib/dpkg/status",
1077 b"Package: demo\nVersion: 1.0\nArchitecture: all\nDescription: demo package\n\n",
1078 &options,
1079 );
1080
1081 assert!(
1082 !scanned.package_data.is_empty(),
1083 "package_data: {:#?}",
1084 scanned.package_data
1085 );
1086 }
1087
1088 #[test]
1089 fn scanner_only_parses_compiled_packages_when_package_in_compiled_is_enabled() {
1090 if std::process::Command::new("go")
1091 .arg("version")
1092 .status()
1093 .is_err()
1094 {
1095 return;
1096 }
1097
1098 let temp_dir = TempDir::new().expect("create temp dir");
1099 fs::write(
1100 temp_dir.path().join("go.mod"),
1101 "module example.com/demo\n\ngo 1.23.0\n",
1102 )
1103 .expect("write go.mod");
1104 fs::write(
1105 temp_dir.path().join("main.go"),
1106 "package main\nfunc main() {}\n",
1107 )
1108 .expect("write main.go");
1109 let file_path = temp_dir.path().join("demo");
1110 let status = std::process::Command::new("go")
1111 .current_dir(temp_dir.path())
1112 .args(["build", "-o"])
1113 .arg(&file_path)
1114 .status()
1115 .expect("run go build");
1116 assert!(status.success());
1117
1118 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1119 let collected = collect_paths(temp_dir.path(), 0, &[]);
1120
1121 let without_compiled = process_collected(
1122 &collected,
1123 Arc::clone(&progress),
1124 None,
1125 LicenseScanOptions::default(),
1126 &TextDetectionOptions {
1127 collect_info: false,
1128 detect_packages: true,
1129 detect_application_packages: true,
1130 detect_system_packages: false,
1131 detect_packages_in_compiled: false,
1132 detect_copyrights: false,
1133 detect_generated: false,
1134 detect_emails: false,
1135 detect_urls: false,
1136 max_emails: 50,
1137 max_urls: 50,
1138 timeout_seconds: 120.0,
1139 },
1140 );
1141 let with_compiled = process_collected(
1142 &collected,
1143 progress,
1144 None,
1145 LicenseScanOptions::default(),
1146 &TextDetectionOptions {
1147 collect_info: false,
1148 detect_packages: true,
1149 detect_application_packages: true,
1150 detect_system_packages: false,
1151 detect_packages_in_compiled: true,
1152 detect_copyrights: false,
1153 detect_generated: false,
1154 detect_emails: false,
1155 detect_urls: false,
1156 max_emails: 50,
1157 max_urls: 50,
1158 timeout_seconds: 120.0,
1159 },
1160 );
1161
1162 let without_compiled = without_compiled
1163 .files
1164 .into_iter()
1165 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1166 .expect("compiled artifact present");
1167 let with_compiled = with_compiled
1168 .files
1169 .into_iter()
1170 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1171 .expect("compiled artifact present");
1172
1173 assert!(
1174 without_compiled.package_data.is_empty(),
1175 "package_data: {:#?}",
1176 without_compiled.package_data
1177 );
1178 assert!(!with_compiled.package_data.is_empty());
1179 }
1180
1181 #[test]
1182 fn scanner_parses_windows_executable_packages_under_normal_package_scan() {
1183 let temp_dir = TempDir::new().expect("create temp dir");
1184 let file_path = temp_dir.path().join("libiconv2.dll");
1185 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1186 .expect("read PE fixture");
1187 fs::write(&file_path, fixture).expect("write PE fixture");
1188
1189 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1190 let collected = collect_paths(temp_dir.path(), 0, &[]);
1191
1192 let without_package = process_collected(
1193 &collected,
1194 Arc::clone(&progress),
1195 None,
1196 LicenseScanOptions::default(),
1197 &TextDetectionOptions {
1198 collect_info: false,
1199 detect_packages: false,
1200 detect_application_packages: false,
1201 detect_system_packages: false,
1202 detect_packages_in_compiled: false,
1203 detect_copyrights: false,
1204 detect_generated: false,
1205 detect_emails: false,
1206 detect_urls: false,
1207 max_emails: 50,
1208 max_urls: 50,
1209 timeout_seconds: 120.0,
1210 },
1211 );
1212 let with_package = process_collected(
1213 &collected,
1214 progress,
1215 None,
1216 LicenseScanOptions::default(),
1217 &TextDetectionOptions {
1218 collect_info: false,
1219 detect_packages: true,
1220 detect_application_packages: true,
1221 detect_system_packages: false,
1222 detect_packages_in_compiled: false,
1223 detect_copyrights: false,
1224 detect_generated: false,
1225 detect_emails: false,
1226 detect_urls: false,
1227 max_emails: 50,
1228 max_urls: 50,
1229 timeout_seconds: 120.0,
1230 },
1231 );
1232
1233 let without_package = without_package
1234 .files
1235 .into_iter()
1236 .find(|entry| {
1237 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1238 })
1239 .expect("compiled artifact present");
1240 let with_package = with_package
1241 .files
1242 .into_iter()
1243 .find(|entry| {
1244 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1245 })
1246 .expect("compiled artifact present");
1247
1248 assert!(without_package.package_data.is_empty());
1249 assert_eq!(with_package.package_data.len(), 1);
1250 assert_eq!(
1251 with_package.package_data[0].package_type,
1252 Some(FilePackageType::Winexe)
1253 );
1254 assert_eq!(
1255 with_package.package_data[0].datasource_id,
1256 Some(DatasourceId::WindowsExecutable)
1257 );
1258 }
1259
1260 #[test]
1261 fn scanner_detects_license_from_font_metadata() {
1262 let temp_dir = TempDir::new().expect("create temp dir");
1263 let file_path = temp_dir.path().join("Lato-Bold.ttf");
1264 let fixture = fs::read("testdata/font-fixtures/Lato-Bold.ttf").expect("read font fixture");
1265 fs::write(&file_path, fixture).expect("write font fixture");
1266
1267 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1268 let collected = collect_paths(temp_dir.path(), 0, &[]);
1269 let engine =
1270 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1271 let result = process_collected(
1272 &collected,
1273 progress,
1274 Some(engine),
1275 LicenseScanOptions::default(),
1276 &TextDetectionOptions::default(),
1277 );
1278 let scanned = result
1279 .files
1280 .into_iter()
1281 .find(|entry| {
1282 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1283 })
1284 .expect("scanned file entry");
1285
1286 assert!(
1287 scanned.license_expression.is_some(),
1288 "license detections: {:#?}",
1289 scanned.license_detections
1290 );
1291 assert!(
1292 scanned
1293 .license_expression
1294 .as_deref()
1295 .is_some_and(
1296 |expression| expression.contains("OFL-1.1") || expression.contains("ofl-1.1")
1297 ),
1298 "license expression: {:?}",
1299 scanned.license_expression
1300 );
1301 }
1302
1303 #[test]
1304 fn scanner_detects_license_from_windows_executable_metadata() {
1305 let temp_dir = TempDir::new().expect("create temp dir");
1306 let file_path = temp_dir.path().join("libiconv2.dll");
1307 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1308 .expect("read PE fixture");
1309 fs::write(&file_path, fixture).expect("write PE fixture");
1310
1311 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1312 let collected = collect_paths(temp_dir.path(), 0, &[]);
1313 let engine =
1314 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1315 let result = process_collected(
1316 &collected,
1317 progress,
1318 Some(engine),
1319 LicenseScanOptions::default(),
1320 &TextDetectionOptions::default(),
1321 );
1322 let scanned = result
1323 .files
1324 .into_iter()
1325 .find(|entry| {
1326 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1327 })
1328 .expect("scanned file entry");
1329
1330 assert!(
1331 scanned.license_expression.is_some(),
1332 "license detections: {:#?}",
1333 scanned.license_detections
1334 );
1335 assert!(
1336 scanned
1337 .license_expression
1338 .as_deref()
1339 .is_some_and(|expression| {
1340 expression.contains("lgpl") || expression.contains("LGPL")
1341 }),
1342 "license expression: {:?}",
1343 scanned.license_expression
1344 );
1345 }
1346
1347 #[test]
1348 fn scanner_detects_cc_by_license_from_markdown_comment_banner() {
1349 let scanned = scan_single_file_with_license_engine(
1350 "navbar.md",
1351 "<!-- Documentation licensed under CC BY 4.0 -->\n<!-- License available at https://creativecommons.org/licenses/by/4.0/ -->\n",
1352 &TextDetectionOptions::default(),
1353 );
1354
1355 assert!(
1356 scanned
1357 .license_expression
1358 .as_deref()
1359 .is_some_and(|expression| {
1360 expression.contains("cc-by-4.0") || expression.contains("CC-BY-4.0")
1361 }),
1362 "license expression: {:?}",
1363 scanned.license_expression
1364 );
1365 }
1366
1367 #[test]
1368 fn scanner_detects_mit_license_from_shields_badge_markdown() {
1369 let scanned = scan_single_file_with_license_engine(
1370 "README.md",
1371 "[](https://opensource.org/licenses/MIT)\n",
1372 &TextDetectionOptions::default(),
1373 );
1374
1375 assert!(
1376 scanned
1377 .license_expression
1378 .as_deref()
1379 .is_some_and(|expression| {
1380 expression.contains("mit") || expression.contains("MIT")
1381 }),
1382 "license expression: {:?}",
1383 scanned.license_expression
1384 );
1385 }
1386
1387 #[test]
1388 fn scanner_detects_apache_license_from_markdown_readme_phrase() {
1389 let scanned = scan_single_file_with_license_engine(
1390 "README.md",
1391 "This crate is distributed under the terms of the Apache License (Version 2.0).\n",
1392 &TextDetectionOptions::default(),
1393 );
1394
1395 assert!(
1396 scanned
1397 .license_expression
1398 .as_deref()
1399 .is_some_and(|expression| {
1400 expression.contains("apache-2.0") || expression.contains("Apache-2.0")
1401 }),
1402 "license expression: {:?}",
1403 scanned.license_expression
1404 );
1405 }
1406
1407 #[test]
1408 fn scanner_sets_is_source_only_when_info_enabled() {
1409 let without_info = TextDetectionOptions {
1410 collect_info: false,
1411 detect_packages: false,
1412 detect_application_packages: false,
1413 detect_system_packages: false,
1414 detect_packages_in_compiled: false,
1415 detect_copyrights: false,
1416 detect_generated: false,
1417 detect_emails: false,
1418 detect_urls: false,
1419 max_emails: 50,
1420 max_urls: 50,
1421 timeout_seconds: 120.0,
1422 };
1423 let with_info = TextDetectionOptions {
1424 collect_info: true,
1425 ..without_info.clone()
1426 };
1427
1428 let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
1429 let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
1430
1431 assert_eq!(scanned_without_info.is_source, None);
1432 assert_eq!(scanned_with_info.is_source, Some(true));
1433 }
1434
1435 #[test]
1436 fn directory_omits_info_fields_when_info_disabled() {
1437 let temp_dir = TempDir::new().expect("create temp dir");
1438 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1439
1440 let collected = collect_paths(temp_dir.path(), 0, &[]);
1441 let result = process_collected(
1442 &collected,
1443 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1444 None,
1445 LicenseScanOptions::default(),
1446 &TextDetectionOptions {
1447 collect_info: false,
1448 detect_packages: false,
1449 detect_application_packages: false,
1450 detect_system_packages: false,
1451 detect_packages_in_compiled: false,
1452 detect_copyrights: false,
1453 detect_generated: false,
1454 detect_emails: false,
1455 detect_urls: false,
1456 max_emails: 50,
1457 max_urls: 50,
1458 timeout_seconds: 120.0,
1459 },
1460 );
1461
1462 let directory = result
1463 .files
1464 .into_iter()
1465 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1466 .expect("directory entry");
1467
1468 assert!(directory.date.is_none());
1469 assert!(directory.file_type_label.is_none());
1470 assert!(directory.is_binary.is_none());
1471 assert!(directory.is_text.is_none());
1472 assert!(directory.is_archive.is_none());
1473 assert!(directory.is_media.is_none());
1474 assert!(directory.is_source.is_none());
1475 assert!(directory.is_script.is_none());
1476 }
1477
1478 #[test]
1479 fn directory_includes_info_fields_when_info_enabled() {
1480 let temp_dir = TempDir::new().expect("create temp dir");
1481 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1482
1483 let collected = collect_paths(temp_dir.path(), 0, &[]);
1484 let result = process_collected(
1485 &collected,
1486 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1487 None,
1488 LicenseScanOptions::default(),
1489 &TextDetectionOptions {
1490 collect_info: true,
1491 detect_packages: false,
1492 detect_application_packages: false,
1493 detect_system_packages: false,
1494 detect_packages_in_compiled: false,
1495 detect_copyrights: false,
1496 detect_generated: false,
1497 detect_emails: false,
1498 detect_urls: false,
1499 max_emails: 50,
1500 max_urls: 50,
1501 timeout_seconds: 120.0,
1502 },
1503 );
1504
1505 let directory = result
1506 .files
1507 .into_iter()
1508 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1509 .expect("directory entry");
1510
1511 assert!(directory.date.is_none());
1512 assert!(directory.file_type_label.is_none());
1513 assert_eq!(directory.is_binary, Some(false));
1514 assert_eq!(directory.is_text, Some(false));
1515 assert_eq!(directory.is_archive, Some(false));
1516 assert_eq!(directory.is_media, Some(false));
1517 assert_eq!(directory.is_source, Some(false));
1518 assert_eq!(directory.is_script, Some(false));
1519 assert_eq!(directory.files_count, Some(0));
1520 assert_eq!(directory.dirs_count, Some(0));
1521 assert_eq!(directory.size_count, Some(0));
1522 }
1523
1524 #[test]
1525 fn collect_paths_includes_root_directory_entry() {
1526 let temp_dir = TempDir::new().expect("create temp dir");
1527 fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
1528 fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
1529 .expect("write nested file");
1530
1531 let collected = collect_paths(temp_dir.path(), 0, &[]);
1532
1533 assert!(
1534 collected
1535 .directories
1536 .iter()
1537 .any(|(path, _)| path == temp_dir.path())
1538 );
1539 }
1540
1541 #[test]
1542 fn collect_paths_supports_single_file_input() {
1543 let temp_dir = TempDir::new().expect("create temp dir");
1544 let file_path = temp_dir.path().join("main.rs");
1545 fs::write(&file_path, "fn main() {}\n").expect("write file");
1546
1547 let collected = collect_paths(&file_path, 0, &[]);
1548
1549 assert_eq!(collected.files.len(), 1);
1550 assert!(collected.directories.is_empty());
1551 assert_eq!(collected.files[0].0, file_path);
1552 }
1553
1554 #[test]
1555 fn process_collected_with_memory_limit_preserves_results_when_spilling() {
1556 let temp_dir = TempDir::new().expect("create temp dir");
1557 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1558 fs::write(temp_dir.path().join("b.txt"), "world").expect("write second file");
1559
1560 let collected = collect_paths(temp_dir.path(), 0, &[]);
1561 let result = process_collected_with_memory_limit(
1562 &collected,
1563 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1564 None,
1565 LicenseScanOptions::default(),
1566 &TextDetectionOptions {
1567 collect_info: false,
1568 detect_packages: false,
1569 detect_application_packages: false,
1570 detect_system_packages: false,
1571 detect_packages_in_compiled: false,
1572 detect_copyrights: false,
1573 detect_generated: false,
1574 detect_emails: false,
1575 detect_urls: false,
1576 max_emails: 50,
1577 max_urls: 50,
1578 timeout_seconds: 120.0,
1579 },
1580 MemoryMode::Limit(1),
1581 );
1582
1583 assert_eq!(result.files.len(), 3);
1584 }
1585
1586 #[test]
1587 fn process_collected_with_negative_one_uses_disk_only_mode() {
1588 let temp_dir = TempDir::new().expect("create temp dir");
1589 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1590
1591 let collected = collect_paths(temp_dir.path(), 0, &[]);
1592 let result = process_collected_with_memory_limit(
1593 &collected,
1594 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1595 None,
1596 LicenseScanOptions::default(),
1597 &TextDetectionOptions {
1598 collect_info: false,
1599 detect_packages: false,
1600 detect_application_packages: false,
1601 detect_system_packages: false,
1602 detect_packages_in_compiled: false,
1603 detect_copyrights: false,
1604 detect_generated: false,
1605 detect_emails: false,
1606 detect_urls: false,
1607 max_emails: 50,
1608 max_urls: 50,
1609 timeout_seconds: 120.0,
1610 },
1611 MemoryMode::StreamUnlimited,
1612 );
1613
1614 assert_eq!(result.files.len(), 2);
1615 }
1616}