1mod collect;
2mod process;
3
4use crate::license_detection::LicenseDetectionEngine;
5use crate::models::FileInfo;
6
7pub struct ProcessResult {
8 pub files: Vec<FileInfo>,
9 pub excluded_count: usize,
10}
11
12#[derive(Debug, Clone, Copy, Default)]
13pub struct LicenseScanOptions {
14 pub include_text: bool,
15 pub include_text_diagnostics: bool,
16 pub include_diagnostics: bool,
17 pub unknown_licenses: bool,
18 pub min_score: u8,
19}
20
21#[derive(Debug, Clone)]
22pub struct TextDetectionOptions {
23 pub collect_info: bool,
24 pub detect_packages: bool,
25 pub detect_application_packages: bool,
26 pub detect_system_packages: bool,
27 pub detect_packages_in_compiled: bool,
28 pub detect_copyrights: bool,
29 pub detect_generated: bool,
30 pub detect_emails: bool,
31 pub detect_urls: bool,
32 pub max_emails: usize,
33 pub max_urls: usize,
34 pub timeout_seconds: f64,
35}
36
37impl Default for TextDetectionOptions {
38 fn default() -> Self {
39 Self {
40 collect_info: false,
41 detect_packages: false,
42 detect_application_packages: false,
43 detect_system_packages: false,
44 detect_packages_in_compiled: false,
45 detect_copyrights: true,
46 detect_generated: false,
47 detect_emails: false,
48 detect_urls: false,
49 max_emails: 50,
50 max_urls: 50,
51 timeout_seconds: 120.0,
52 }
53 }
54}
55
56pub fn scan_options_fingerprint(
57 text_options: &TextDetectionOptions,
58 license_options: LicenseScanOptions,
59 license_engine: Option<&LicenseDetectionEngine>,
60) -> String {
61 let (license_enabled, rules_count, first_rule_id, last_rule_id) = match license_engine {
62 Some(engine) => {
63 let rules = &engine.index().rules_by_rid;
64 (
65 true,
66 rules.len(),
67 rules
68 .first()
69 .map(|rule| rule.identifier.as_str())
70 .unwrap_or(""),
71 rules
72 .last()
73 .map(|rule| rule.identifier.as_str())
74 .unwrap_or(""),
75 )
76 }
77 None => (false, 0, "", ""),
78 };
79
80 format!(
81 "tool_version={};info={};packages={};app_packages={};system_packages={};compiled_packages={};copyrights={};generated={};emails={};urls={};max_emails={};max_urls={};timeout={:.6};license_enabled={};rules_count={};first_rule_id={};last_rule_id={};license_text={};license_text_diagnostics={};license_diagnostics={};unknown_licenses={};license_score={}",
82 crate::version::BUILD_VERSION,
83 text_options.collect_info,
84 text_options.detect_packages,
85 text_options.detect_application_packages,
86 text_options.detect_system_packages,
87 text_options.detect_packages_in_compiled,
88 text_options.detect_copyrights,
89 text_options.detect_generated,
90 text_options.detect_emails,
91 text_options.detect_urls,
92 text_options.max_emails,
93 text_options.max_urls,
94 text_options.timeout_seconds,
95 license_enabled,
96 rules_count,
97 first_rule_id,
98 last_rule_id,
99 license_options.include_text,
100 license_options.include_text_diagnostics,
101 license_options.include_diagnostics,
102 license_options.unknown_licenses,
103 license_options.min_score,
104 )
105}
106
107pub use self::collect::{CollectedPaths, collect_paths};
108#[allow(unused_imports)]
109pub use self::process::{
110 MemoryMode, process_collected, process_collected_sequential,
111 process_collected_with_memory_limit, process_collected_with_memory_limit_sequential,
112};
113
114#[cfg(test)]
115mod tests {
116 use std::fs;
117 use std::sync::Arc;
118
119 use tempfile::TempDir;
120
121 use crate::license_detection::LicenseDetectionEngine;
122 use crate::models::{DatasourceId, FileType, PackageType as FilePackageType};
123 use crate::progress::{ProgressMode, ScanProgress};
124
125 use super::{
126 LicenseScanOptions, MemoryMode, TextDetectionOptions, collect_paths, process_collected,
127 process_collected_with_memory_limit,
128 };
129
130 #[test]
131 fn default_options_keep_copyright_detection_enabled() {
132 let options = TextDetectionOptions::default();
133 assert!(!options.detect_packages);
134 assert!(options.detect_copyrights);
135 }
136
137 fn scan_single_file(
138 file_name: &str,
139 content: &str,
140 options: &TextDetectionOptions,
141 ) -> crate::models::FileInfo {
142 let temp_dir = TempDir::new().expect("create temp dir");
143 let file_path = temp_dir.path().join(file_name);
144 fs::write(&file_path, content).expect("write test file");
145
146 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
147 let collected = collect_paths(temp_dir.path(), 0, &[]);
148 let result = process_collected(
149 &collected,
150 progress,
151 None,
152 LicenseScanOptions::default(),
153 options,
154 );
155
156 result
157 .files
158 .into_iter()
159 .find(|entry| {
160 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
161 })
162 .expect("scanned file entry")
163 }
164
165 fn scan_file_at_relative_path(
166 relative_path: &str,
167 content: &[u8],
168 options: &TextDetectionOptions,
169 ) -> crate::models::FileInfo {
170 let temp_dir = TempDir::new().expect("create temp dir");
171 let file_path = temp_dir.path().join(relative_path);
172 if let Some(parent) = file_path.parent() {
173 fs::create_dir_all(parent).expect("create parent dirs");
174 }
175 fs::write(&file_path, content).expect("write test file");
176
177 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
178 let collected = collect_paths(temp_dir.path(), 0, &[]);
179 let result = process_collected(
180 &collected,
181 progress,
182 None,
183 LicenseScanOptions::default(),
184 options,
185 );
186
187 result
188 .files
189 .into_iter()
190 .find(|entry| {
191 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
192 })
193 .expect("scanned file entry")
194 }
195
196 fn scan_single_file_with_license_engine(
197 file_name: &str,
198 content: &str,
199 options: &TextDetectionOptions,
200 ) -> crate::models::FileInfo {
201 let temp_dir = TempDir::new().expect("create temp dir");
202 let file_path = temp_dir.path().join(file_name);
203 fs::write(&file_path, content).expect("write test file");
204
205 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
206 let collected = collect_paths(temp_dir.path(), 0, &[]);
207 let engine =
208 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
209 let result = process_collected(
210 &collected,
211 progress,
212 Some(engine),
213 LicenseScanOptions::default(),
214 options,
215 );
216
217 result
218 .files
219 .into_iter()
220 .find(|entry| {
221 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
222 })
223 .expect("scanned file entry")
224 }
225
226 #[test]
227 fn scanner_reports_repeated_email_occurrences() {
228 let options = TextDetectionOptions {
229 collect_info: false,
230 detect_packages: false,
231 detect_application_packages: false,
232 detect_system_packages: false,
233 detect_packages_in_compiled: false,
234 detect_copyrights: false,
235 detect_generated: false,
236 detect_emails: true,
237 detect_urls: false,
238 max_emails: 50,
239 max_urls: 50,
240 timeout_seconds: 120.0,
241 };
242 let scanned = scan_single_file(
243 "contacts.txt",
244 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
245 &options,
246 );
247
248 let emails: Vec<(&str, usize)> = scanned
249 .emails
250 .iter()
251 .map(|email| (email.email.as_str(), email.start_line.get()))
252 .collect();
253
254 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
255 assert_eq!(
256 emails,
257 vec![
258 ("linux@3ware.com", 1),
259 ("linux@3ware.com", 2),
260 ("andre@suse.com", 3),
261 ("linux@3ware.com", 4),
262 ]
263 );
264 }
265
266 #[test]
267 fn scanner_skips_pem_certificate_text_detection() {
268 let options = TextDetectionOptions {
269 collect_info: false,
270 detect_packages: false,
271 detect_application_packages: false,
272 detect_system_packages: false,
273 detect_packages_in_compiled: false,
274 detect_copyrights: true,
275 detect_generated: false,
276 detect_emails: true,
277 detect_urls: true,
278 max_emails: 50,
279 max_urls: 50,
280 timeout_seconds: 120.0,
281 };
282 let pem_fixture = concat!(
283 "-----BEGIN CERTIFICATE-----\n",
284 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
285 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
286 "-----END CERTIFICATE-----\n",
287 "Certificate:\n",
288 " Data:\n",
289 " Signature Algorithm: sha1WithRSAEncryption\n",
290 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
291 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
292 " Contact: cert-owner@example.com\n",
293 );
294 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
295
296 assert!(
297 scanned.copyrights.is_empty(),
298 "copyrights: {:#?}",
299 scanned.copyrights
300 );
301 assert!(
302 scanned.holders.is_empty(),
303 "holders: {:#?}",
304 scanned.holders
305 );
306 assert!(
307 scanned.authors.is_empty(),
308 "authors: {:#?}",
309 scanned.authors
310 );
311 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
312 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
313 assert!(
314 scanned.license_detections.is_empty(),
315 "licenses: {:#?}",
316 scanned.license_detections
317 );
318 assert!(
319 scanned.license_clues.is_empty(),
320 "license clues: {:#?}",
321 scanned.license_clues
322 );
323 }
324
325 #[test]
326 fn scanner_keeps_source_headers_when_pem_blocks_are_embedded() {
327 let options = TextDetectionOptions {
328 collect_info: false,
329 detect_packages: false,
330 detect_application_packages: false,
331 detect_system_packages: false,
332 detect_packages_in_compiled: false,
333 detect_copyrights: true,
334 detect_generated: false,
335 detect_emails: false,
336 detect_urls: true,
337 max_emails: 50,
338 max_urls: 50,
339 timeout_seconds: 120.0,
340 };
341 let fixture = concat!(
342 "/*\n",
343 "Copyright 2022 The Kubernetes Authors.\n\n",
344 "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
345 "you may not use this file except in compliance with the License.\n",
346 "You may obtain a copy of the License at\n\n",
347 " http://www.apache.org/licenses/LICENSE-2.0\n",
348 "*/\n\n",
349 "package storage\n\n",
350 "const validCert = `\n",
351 "-----BEGIN CERTIFICATE-----\n",
352 "MIIDmTCCAoGgAwIBAgIUWQ==\n",
353 "-----END CERTIFICATE-----\n",
354 "`\n",
355 );
356 let temp_dir = TempDir::new().expect("create temp dir");
357 let file_path = temp_dir.path().join("storage_test.go");
358 fs::write(&file_path, fixture).expect("write fixture");
359
360 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
361 let collected = collect_paths(temp_dir.path(), 0, &[]);
362 let engine =
363 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
364 let result = process_collected(
365 &collected,
366 progress,
367 Some(engine),
368 LicenseScanOptions::default(),
369 &options,
370 );
371 let scanned = result
372 .files
373 .into_iter()
374 .find(|entry| {
375 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
376 })
377 .expect("scanned file entry");
378
379 assert!(
380 scanned
381 .copyrights
382 .iter()
383 .any(|c| c.copyright == "Copyright 2022 The Kubernetes Authors"),
384 "copyrights: {:#?}",
385 scanned.copyrights
386 );
387 assert!(
388 scanned
389 .holders
390 .iter()
391 .any(|h| h.holder == "The Kubernetes Authors"),
392 "holders: {:#?}",
393 scanned.holders
394 );
395 assert!(
396 scanned
397 .urls
398 .iter()
399 .any(|u| u.url == "http://www.apache.org/licenses/LICENSE-2.0"),
400 "urls: {:#?}",
401 scanned.urls
402 );
403 assert_eq!(scanned.license_expression.as_deref(), Some("Apache-2.0"));
404 }
405
406 #[test]
407 fn scanner_detects_structured_credits_authors() {
408 let options = TextDetectionOptions {
409 collect_info: false,
410 detect_packages: false,
411 detect_application_packages: false,
412 detect_system_packages: false,
413 detect_packages_in_compiled: false,
414 detect_copyrights: true,
415 detect_generated: false,
416 detect_emails: false,
417 detect_urls: false,
418 max_emails: 50,
419 max_urls: 50,
420 timeout_seconds: 120.0,
421 };
422 let credits_fixture = concat!(
423 "N: Jack Lloyd\n",
424 "E: lloyd@randombit.net\n",
425 "W: http://www.randombit.net/\n",
426 );
427 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
428
429 let authors: Vec<(&str, usize, usize)> = scanned
430 .authors
431 .iter()
432 .map(|author| {
433 (
434 author.author.as_str(),
435 author.start_line.get(),
436 author.end_line.get(),
437 )
438 })
439 .collect();
440
441 assert_eq!(
442 authors,
443 vec![(
444 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
445 1,
446 3,
447 )]
448 );
449 assert!(scanned.copyrights.is_empty());
450 assert!(scanned.holders.is_empty());
451 }
452
453 #[test]
454 fn scanner_uses_or_for_alternative_license_header() {
455 let fixture =
456 include_str!("../../testdata/license-golden/datadriven/external/boost-json-d2s.ipp");
457 let temp_dir = TempDir::new().expect("create temp dir");
458 let file_path = temp_dir.path().join("d2s.ipp");
459 fs::write(&file_path, fixture).expect("write fixture");
460
461 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
462 let collected = collect_paths(temp_dir.path(), 0, &[]);
463 let engine =
464 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
465 let result = process_collected(
466 &collected,
467 progress,
468 Some(engine),
469 LicenseScanOptions::default(),
470 &TextDetectionOptions::default(),
471 );
472 let scanned = result
473 .files
474 .into_iter()
475 .find(|entry| {
476 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
477 })
478 .expect("scanned file entry");
479
480 assert_eq!(
481 scanned.license_expression.as_deref(),
482 Some("Apache-2.0 OR BSL-1.0")
483 );
484 assert!(
485 scanned.license_clues.is_empty(),
486 "license clues: {:#?}",
487 scanned.license_clues
488 );
489 assert_eq!(
490 scanned.license_detections.len(),
491 1,
492 "detections: {:#?}",
493 scanned.license_detections
494 );
495
496 let detection = &scanned.license_detections[0];
497 assert_eq!(detection.license_expression_spdx, "Apache-2.0 OR BSL-1.0");
498
499 let match_expressions: Vec<_> = detection
500 .matches
501 .iter()
502 .map(|m| m.license_expression_spdx.as_str())
503 .collect();
504 assert_eq!(match_expressions, vec!["Apache-2.0", "BSL-1.0"]);
505 }
506
507 #[test]
508 fn scanner_sets_generated_flag_when_enabled() {
509 let options = TextDetectionOptions {
510 collect_info: false,
511 detect_packages: false,
512 detect_application_packages: false,
513 detect_system_packages: false,
514 detect_packages_in_compiled: false,
515 detect_copyrights: false,
516 detect_generated: true,
517 detect_emails: false,
518 detect_urls: false,
519 max_emails: 50,
520 max_urls: 50,
521 timeout_seconds: 120.0,
522 };
523 let scanned = scan_single_file(
524 "generated.c",
525 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
526 &options,
527 );
528
529 assert_eq!(scanned.is_generated, Some(true));
530 }
531
532 #[test]
533 fn scanner_leaves_generated_flag_unset_when_disabled() {
534 let options = TextDetectionOptions {
535 collect_info: false,
536 detect_packages: false,
537 detect_application_packages: false,
538 detect_system_packages: false,
539 detect_packages_in_compiled: false,
540 detect_copyrights: false,
541 detect_generated: false,
542 detect_emails: false,
543 detect_urls: false,
544 max_emails: 50,
545 max_urls: 50,
546 timeout_seconds: 120.0,
547 };
548 let scanned = scan_single_file(
549 "generated.c",
550 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
551 &options,
552 );
553
554 assert_eq!(scanned.is_generated, None);
555 }
556
557 #[test]
558 fn scanner_populates_info_surface_when_enabled() {
559 let options = TextDetectionOptions {
560 collect_info: true,
561 detect_packages: false,
562 detect_application_packages: false,
563 detect_system_packages: false,
564 detect_packages_in_compiled: false,
565 detect_copyrights: false,
566 detect_generated: false,
567 detect_emails: false,
568 detect_urls: false,
569 max_emails: 50,
570 max_urls: 50,
571 timeout_seconds: 120.0,
572 };
573 let scanned = scan_single_file(
574 "script.py",
575 "#!/usr/bin/env python3\nprint(\"hello\")\n",
576 &options,
577 );
578
579 assert!(scanned.sha1.is_some());
580 assert!(scanned.md5.is_some());
581 assert!(scanned.sha256.is_some());
582 assert!(scanned.sha1_git.is_some());
583 assert!(scanned.mime_type.is_some());
584 assert!(scanned.date.is_some());
585 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
586 assert_eq!(scanned.is_text, Some(true));
587 assert_eq!(scanned.is_script, Some(true));
588 assert_eq!(scanned.is_source, Some(true));
589 }
590
591 #[test]
592 fn scanner_treats_latin1_python_sources_as_textual_scripts() {
593 let options = TextDetectionOptions {
594 collect_info: true,
595 detect_packages: false,
596 detect_application_packages: false,
597 detect_system_packages: false,
598 detect_packages_in_compiled: false,
599 detect_copyrights: false,
600 detect_generated: false,
601 detect_emails: false,
602 detect_urls: false,
603 max_emails: 50,
604 max_urls: 50,
605 timeout_seconds: 120.0,
606 };
607 let latin1_python = b"# coding: latin-1\nprint(\"caf\xe9\")\n# comment padding\n";
608 let scanned = scan_file_at_relative_path("script.py", latin1_python, &options);
609
610 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
611 assert_eq!(
612 scanned.file_type_label.as_deref(),
613 Some("python script, text executable")
614 );
615 assert_eq!(scanned.is_binary, Some(false));
616 assert_eq!(scanned.is_text, Some(true));
617 assert_eq!(scanned.is_script, Some(true));
618 assert_eq!(scanned.is_source, Some(true));
619 }
620
621 #[test]
622 fn scanner_skips_findings_for_zip_like_archives() {
623 let options = TextDetectionOptions {
624 collect_info: true,
625 detect_packages: false,
626 detect_application_packages: false,
627 detect_system_packages: false,
628 detect_packages_in_compiled: false,
629 detect_copyrights: true,
630 detect_generated: false,
631 detect_emails: true,
632 detect_urls: true,
633 max_emails: 50,
634 max_urls: 50,
635 timeout_seconds: 120.0,
636 };
637 let archive_like = b"PK\x03\x04\x14\x00\x00\x00\x08\x00MIT License\ncontact@example.com\nhttps://example.com\n";
638 let scanned = scan_file_at_relative_path("demo.whl", archive_like, &options);
639
640 assert_eq!(scanned.mime_type.as_deref(), Some("application/zip"));
641 assert_eq!(scanned.is_archive, Some(true));
642 assert!(scanned.license_detections.is_empty());
643 assert!(scanned.copyrights.is_empty());
644 assert!(scanned.emails.is_empty());
645 assert!(scanned.urls.is_empty());
646 }
647
648 #[test]
649 fn scanner_treats_typescript_sources_as_text_not_video_media() {
650 let options = TextDetectionOptions {
651 collect_info: true,
652 detect_packages: false,
653 detect_application_packages: false,
654 detect_system_packages: false,
655 detect_packages_in_compiled: false,
656 detect_copyrights: false,
657 detect_generated: false,
658 detect_emails: false,
659 detect_urls: false,
660 max_emails: 50,
661 max_urls: 50,
662 timeout_seconds: 120.0,
663 };
664 let scanned = scan_single_file("main.ts", "export const answer: number = 42;\n", &options);
665
666 assert_eq!(scanned.programming_language.as_deref(), Some("TypeScript"));
667 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
668 assert_eq!(
669 scanned.file_type_label.as_deref(),
670 Some("UTF-8 Unicode text")
671 );
672 assert_eq!(scanned.is_text, Some(true));
673 assert_eq!(scanned.is_media, Some(false));
674 assert_eq!(scanned.is_script, Some(false));
675 assert_eq!(scanned.is_source, Some(true));
676 }
677
678 #[test]
679 fn scanner_normalizes_sparse_ts_files_away_from_video_mime() {
680 let options = TextDetectionOptions {
681 collect_info: true,
682 detect_packages: false,
683 detect_application_packages: false,
684 detect_system_packages: false,
685 detect_packages_in_compiled: false,
686 detect_copyrights: false,
687 detect_generated: false,
688 detect_emails: false,
689 detect_urls: false,
690 max_emails: 50,
691 max_urls: 50,
692 timeout_seconds: 120.0,
693 };
694 let scanned = scan_single_file("main.ts", "// comment-only TypeScript fixture\n", &options);
695
696 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
697 assert_eq!(
698 scanned.file_type_label.as_deref(),
699 Some("UTF-8 Unicode text")
700 );
701 assert_eq!(scanned.is_text, Some(true));
702 assert_eq!(scanned.is_media, Some(false));
703 assert_eq!(scanned.is_script, Some(false));
704 assert_eq!(scanned.is_source, Some(true));
705 }
706
707 #[test]
708 fn scanner_treats_empty_files_like_scancode_info_surface() {
709 let options = TextDetectionOptions {
710 collect_info: true,
711 detect_packages: false,
712 detect_application_packages: false,
713 detect_system_packages: false,
714 detect_packages_in_compiled: false,
715 detect_copyrights: false,
716 detect_generated: false,
717 detect_emails: false,
718 detect_urls: false,
719 max_emails: 50,
720 max_urls: 50,
721 timeout_seconds: 120.0,
722 };
723 let scanned = scan_single_file("test.txt", "", &options);
724
725 assert_eq!(scanned.mime_type.as_deref(), Some("inode/x-empty"));
726 assert_eq!(scanned.file_type_label.as_deref(), Some("empty"));
727 assert_eq!(scanned.programming_language, None);
728 assert_eq!(scanned.is_binary, Some(false));
729 assert_eq!(scanned.is_text, Some(true));
730 assert_eq!(scanned.is_archive, Some(false));
731 assert_eq!(scanned.is_media, Some(false));
732 assert_eq!(scanned.is_source, Some(false));
733 assert_eq!(scanned.is_script, Some(false));
734 }
735
736 #[test]
737 fn scanner_treats_package_json_as_text_not_source() {
738 let options = TextDetectionOptions {
739 collect_info: true,
740 detect_packages: false,
741 detect_application_packages: false,
742 detect_system_packages: false,
743 detect_packages_in_compiled: false,
744 detect_copyrights: false,
745 detect_generated: false,
746 detect_emails: false,
747 detect_urls: false,
748 max_emails: 50,
749 max_urls: 50,
750 timeout_seconds: 120.0,
751 };
752 let scanned = scan_single_file("package.json", r#"{"name":"demo"}"#, &options);
753
754 assert_eq!(scanned.mime_type.as_deref(), Some("application/json"));
755 assert_eq!(scanned.file_type_label.as_deref(), Some("JSON text data"));
756 assert_eq!(scanned.programming_language, None);
757 assert_eq!(scanned.is_text, Some(true));
758 assert_eq!(scanned.is_source, Some(false));
759 assert_eq!(scanned.is_script, Some(false));
760 }
761
762 #[test]
763 fn scanner_classifies_gradle_and_nix_manifests_as_source() {
764 let options = TextDetectionOptions {
765 collect_info: true,
766 detect_packages: false,
767 detect_application_packages: false,
768 detect_system_packages: false,
769 detect_packages_in_compiled: false,
770 detect_copyrights: false,
771 detect_generated: false,
772 detect_emails: false,
773 detect_urls: false,
774 max_emails: 50,
775 max_urls: 50,
776 timeout_seconds: 120.0,
777 };
778
779 let gradle = scan_single_file("build.gradle", "plugins { id 'java' }\n", &options);
780 let nix = scan_single_file("flake.nix", "{ inputs, ... }: {}\n", &options);
781
782 assert_eq!(gradle.programming_language.as_deref(), Some("Groovy"));
783 assert_eq!(gradle.mime_type.as_deref(), Some("text/plain"));
784 assert_eq!(gradle.is_source, Some(true));
785 assert_eq!(gradle.is_script, Some(false));
786
787 assert_eq!(nix.programming_language.as_deref(), Some("Nix"));
788 assert_eq!(nix.mime_type.as_deref(), Some("text/plain"));
789 assert_eq!(nix.is_source, Some(true));
790 assert_eq!(nix.is_script, Some(false));
791 }
792
793 #[test]
794 fn scanner_treats_gitmodules_as_text_not_source() {
795 let options = TextDetectionOptions {
796 collect_info: true,
797 detect_packages: false,
798 detect_application_packages: false,
799 detect_system_packages: false,
800 detect_packages_in_compiled: false,
801 detect_copyrights: false,
802 detect_generated: false,
803 detect_emails: false,
804 detect_urls: false,
805 max_emails: 50,
806 max_urls: 50,
807 timeout_seconds: 120.0,
808 };
809 let scanned = scan_file_at_relative_path(
810 ".gitmodules",
811 b"[submodule \"demo\"]\n\tpath = vendor/demo\n",
812 &options,
813 );
814
815 assert_eq!(scanned.programming_language, None);
816 assert_eq!(
817 scanned.file_type_label.as_deref(),
818 Some("Git configuration text")
819 );
820 assert_eq!(scanned.is_text, Some(true));
821 assert_eq!(scanned.is_source, Some(false));
822 assert_eq!(scanned.is_script, Some(false));
823 }
824
825 #[test]
826 fn scanner_treats_javascript_shebang_files_as_scripts() {
827 let options = TextDetectionOptions {
828 collect_info: true,
829 detect_packages: false,
830 detect_application_packages: false,
831 detect_system_packages: false,
832 detect_packages_in_compiled: false,
833 detect_copyrights: false,
834 detect_generated: false,
835 detect_emails: false,
836 detect_urls: false,
837 max_emails: 50,
838 max_urls: 50,
839 timeout_seconds: 120.0,
840 };
841 let scanned = scan_file_at_relative_path(
842 "bin/run",
843 b"#!/usr/bin/env node\nconsole.log('hello');\n",
844 &options,
845 );
846
847 assert_eq!(scanned.programming_language.as_deref(), Some("JavaScript"));
848 assert_eq!(
849 scanned.file_type_label.as_deref(),
850 Some("javascript script, UTF-8 Unicode text executable")
851 );
852 assert_eq!(scanned.is_script, Some(true));
853 assert_eq!(scanned.is_source, Some(true));
854 }
855
856 #[test]
857 fn scanner_treats_dockerfile_as_source() {
858 let options = TextDetectionOptions {
859 collect_info: true,
860 detect_packages: false,
861 detect_application_packages: false,
862 detect_system_packages: false,
863 detect_packages_in_compiled: false,
864 detect_copyrights: false,
865 detect_generated: false,
866 detect_emails: false,
867 detect_urls: false,
868 max_emails: 50,
869 max_urls: 50,
870 timeout_seconds: 120.0,
871 };
872 let scanned = scan_single_file("Dockerfile", "FROM scratch\n", &options);
873
874 assert_eq!(scanned.programming_language.as_deref(), Some("Dockerfile"));
875 assert_eq!(
876 scanned.file_type_label.as_deref(),
877 Some("UTF-8 Unicode text")
878 );
879 assert_eq!(scanned.is_source, Some(true));
880 assert_eq!(scanned.is_script, Some(false));
881 }
882
883 #[test]
884 fn scanner_treats_makefile_as_text_not_source() {
885 let options = TextDetectionOptions {
886 collect_info: true,
887 detect_packages: false,
888 detect_application_packages: false,
889 detect_system_packages: false,
890 detect_packages_in_compiled: false,
891 detect_copyrights: false,
892 detect_generated: false,
893 detect_emails: false,
894 detect_urls: false,
895 max_emails: 50,
896 max_urls: 50,
897 timeout_seconds: 120.0,
898 };
899 let scanned = scan_single_file("Makefile", "all:\n\techo hi\n", &options);
900
901 assert_eq!(scanned.programming_language, None);
902 assert_eq!(
903 scanned.file_type_label.as_deref(),
904 Some("UTF-8 Unicode text")
905 );
906 assert_eq!(scanned.is_text, Some(true));
907 assert_eq!(scanned.is_source, Some(false));
908 assert_eq!(scanned.is_script, Some(false));
909 }
910
911 #[test]
912 fn scanner_omits_info_surface_when_disabled() {
913 let options = TextDetectionOptions {
914 collect_info: false,
915 detect_packages: false,
916 detect_application_packages: false,
917 detect_system_packages: false,
918 detect_packages_in_compiled: false,
919 detect_copyrights: false,
920 detect_generated: false,
921 detect_emails: false,
922 detect_urls: false,
923 max_emails: 50,
924 max_urls: 50,
925 timeout_seconds: 120.0,
926 };
927 let scanned = scan_single_file(
928 "script.py",
929 "#!/usr/bin/env python3\nprint(\"hello\")\n",
930 &options,
931 );
932
933 assert!(scanned.sha1.is_none());
934 assert!(scanned.md5.is_none());
935 assert!(scanned.sha256.is_none());
936 assert!(scanned.sha1_git.is_none());
937 assert!(scanned.mime_type.is_none());
938 assert!(scanned.date.is_none());
939 assert!(scanned.programming_language.is_none());
940 assert!(scanned.is_binary.is_none());
941 assert!(scanned.is_text.is_none());
942 assert!(scanned.is_archive.is_none());
943 assert!(scanned.is_media.is_none());
944 assert!(scanned.is_script.is_none());
945 assert!(scanned.is_source.is_none());
946 }
947
948 #[test]
949 fn scanner_skips_package_parsing_when_disabled() {
950 let options = TextDetectionOptions {
951 collect_info: false,
952 detect_packages: false,
953 detect_application_packages: false,
954 detect_system_packages: false,
955 detect_packages_in_compiled: false,
956 detect_copyrights: false,
957 detect_generated: false,
958 detect_emails: false,
959 detect_urls: false,
960 max_emails: 50,
961 max_urls: 50,
962 timeout_seconds: 120.0,
963 };
964 let scanned = scan_single_file(
965 "package.json",
966 r#"{"name":"demo","version":"1.0.0"}"#,
967 &options,
968 );
969
970 assert!(
971 scanned.package_data.is_empty(),
972 "package_data: {:#?}",
973 scanned.package_data
974 );
975 }
976
977 #[test]
978 fn scanner_parses_package_manifests_when_enabled() {
979 let options = TextDetectionOptions {
980 collect_info: false,
981 detect_packages: true,
982 detect_application_packages: true,
983 detect_system_packages: false,
984 detect_packages_in_compiled: false,
985 detect_copyrights: false,
986 detect_generated: false,
987 detect_emails: false,
988 detect_urls: false,
989 max_emails: 50,
990 max_urls: 50,
991 timeout_seconds: 120.0,
992 };
993 let scanned = scan_single_file(
994 "package.json",
995 r#"{"name":"demo","version":"1.0.0"}"#,
996 &options,
997 );
998
999 assert_eq!(
1000 scanned.package_data.len(),
1001 1,
1002 "package_data: {:#?}",
1003 scanned.package_data
1004 );
1005 }
1006
1007 #[test]
1008 fn scanner_skips_application_packages_when_only_system_packages_enabled() {
1009 let options = TextDetectionOptions {
1010 collect_info: false,
1011 detect_packages: true,
1012 detect_application_packages: false,
1013 detect_system_packages: true,
1014 detect_packages_in_compiled: false,
1015 detect_copyrights: false,
1016 detect_generated: false,
1017 detect_emails: false,
1018 detect_urls: false,
1019 max_emails: 50,
1020 max_urls: 50,
1021 timeout_seconds: 120.0,
1022 };
1023 let scanned = scan_single_file(
1024 "package.json",
1025 r#"{"name":"demo","version":"1.0.0"}"#,
1026 &options,
1027 );
1028
1029 assert!(
1030 scanned.package_data.is_empty(),
1031 "package_data: {:#?}",
1032 scanned.package_data
1033 );
1034 }
1035
1036 #[test]
1037 fn scanner_parses_system_package_files_when_enabled() {
1038 let options = TextDetectionOptions {
1039 collect_info: false,
1040 detect_packages: true,
1041 detect_application_packages: false,
1042 detect_system_packages: true,
1043 detect_packages_in_compiled: false,
1044 detect_copyrights: false,
1045 detect_generated: false,
1046 detect_emails: false,
1047 detect_urls: false,
1048 max_emails: 50,
1049 max_urls: 50,
1050 timeout_seconds: 120.0,
1051 };
1052 let scanned = scan_file_at_relative_path(
1053 "var/lib/dpkg/status",
1054 b"Package: demo\nVersion: 1.0\nArchitecture: all\nDescription: demo package\n\n",
1055 &options,
1056 );
1057
1058 assert!(
1059 !scanned.package_data.is_empty(),
1060 "package_data: {:#?}",
1061 scanned.package_data
1062 );
1063 }
1064
1065 #[test]
1066 fn scanner_only_parses_compiled_packages_when_package_in_compiled_is_enabled() {
1067 if std::process::Command::new("go")
1068 .arg("version")
1069 .status()
1070 .is_err()
1071 {
1072 return;
1073 }
1074
1075 let temp_dir = TempDir::new().expect("create temp dir");
1076 fs::write(
1077 temp_dir.path().join("go.mod"),
1078 "module example.com/demo\n\ngo 1.23.0\n",
1079 )
1080 .expect("write go.mod");
1081 fs::write(
1082 temp_dir.path().join("main.go"),
1083 "package main\nfunc main() {}\n",
1084 )
1085 .expect("write main.go");
1086 let file_path = temp_dir.path().join("demo");
1087 let status = std::process::Command::new("go")
1088 .current_dir(temp_dir.path())
1089 .args(["build", "-o"])
1090 .arg(&file_path)
1091 .status()
1092 .expect("run go build");
1093 assert!(status.success());
1094
1095 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1096 let collected = collect_paths(temp_dir.path(), 0, &[]);
1097
1098 let without_compiled = process_collected(
1099 &collected,
1100 Arc::clone(&progress),
1101 None,
1102 LicenseScanOptions::default(),
1103 &TextDetectionOptions {
1104 collect_info: false,
1105 detect_packages: true,
1106 detect_application_packages: true,
1107 detect_system_packages: false,
1108 detect_packages_in_compiled: false,
1109 detect_copyrights: false,
1110 detect_generated: false,
1111 detect_emails: false,
1112 detect_urls: false,
1113 max_emails: 50,
1114 max_urls: 50,
1115 timeout_seconds: 120.0,
1116 },
1117 );
1118 let with_compiled = process_collected(
1119 &collected,
1120 progress,
1121 None,
1122 LicenseScanOptions::default(),
1123 &TextDetectionOptions {
1124 collect_info: false,
1125 detect_packages: true,
1126 detect_application_packages: true,
1127 detect_system_packages: false,
1128 detect_packages_in_compiled: true,
1129 detect_copyrights: false,
1130 detect_generated: false,
1131 detect_emails: false,
1132 detect_urls: false,
1133 max_emails: 50,
1134 max_urls: 50,
1135 timeout_seconds: 120.0,
1136 },
1137 );
1138
1139 let without_compiled = without_compiled
1140 .files
1141 .into_iter()
1142 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1143 .expect("compiled artifact present");
1144 let with_compiled = with_compiled
1145 .files
1146 .into_iter()
1147 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1148 .expect("compiled artifact present");
1149
1150 assert!(
1151 without_compiled.package_data.is_empty(),
1152 "package_data: {:#?}",
1153 without_compiled.package_data
1154 );
1155 assert!(!with_compiled.package_data.is_empty());
1156 }
1157
1158 #[test]
1159 fn scanner_parses_windows_executable_packages_under_normal_package_scan() {
1160 let temp_dir = TempDir::new().expect("create temp dir");
1161 let file_path = temp_dir.path().join("libiconv2.dll");
1162 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1163 .expect("read PE fixture");
1164 fs::write(&file_path, fixture).expect("write PE fixture");
1165
1166 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1167 let collected = collect_paths(temp_dir.path(), 0, &[]);
1168
1169 let without_package = process_collected(
1170 &collected,
1171 Arc::clone(&progress),
1172 None,
1173 LicenseScanOptions::default(),
1174 &TextDetectionOptions {
1175 collect_info: false,
1176 detect_packages: false,
1177 detect_application_packages: false,
1178 detect_system_packages: false,
1179 detect_packages_in_compiled: false,
1180 detect_copyrights: false,
1181 detect_generated: false,
1182 detect_emails: false,
1183 detect_urls: false,
1184 max_emails: 50,
1185 max_urls: 50,
1186 timeout_seconds: 120.0,
1187 },
1188 );
1189 let with_package = process_collected(
1190 &collected,
1191 progress,
1192 None,
1193 LicenseScanOptions::default(),
1194 &TextDetectionOptions {
1195 collect_info: false,
1196 detect_packages: true,
1197 detect_application_packages: true,
1198 detect_system_packages: false,
1199 detect_packages_in_compiled: false,
1200 detect_copyrights: false,
1201 detect_generated: false,
1202 detect_emails: false,
1203 detect_urls: false,
1204 max_emails: 50,
1205 max_urls: 50,
1206 timeout_seconds: 120.0,
1207 },
1208 );
1209
1210 let without_package = without_package
1211 .files
1212 .into_iter()
1213 .find(|entry| {
1214 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1215 })
1216 .expect("compiled artifact present");
1217 let with_package = with_package
1218 .files
1219 .into_iter()
1220 .find(|entry| {
1221 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1222 })
1223 .expect("compiled artifact present");
1224
1225 assert!(without_package.package_data.is_empty());
1226 assert_eq!(with_package.package_data.len(), 1);
1227 assert_eq!(
1228 with_package.package_data[0].package_type,
1229 Some(FilePackageType::Winexe)
1230 );
1231 assert_eq!(
1232 with_package.package_data[0].datasource_id,
1233 Some(DatasourceId::WindowsExecutable)
1234 );
1235 }
1236
1237 #[test]
1238 fn scanner_detects_license_from_font_metadata() {
1239 let temp_dir = TempDir::new().expect("create temp dir");
1240 let file_path = temp_dir.path().join("Lato-Bold.ttf");
1241 let fixture = fs::read("testdata/font-fixtures/Lato-Bold.ttf").expect("read font fixture");
1242 fs::write(&file_path, fixture).expect("write font fixture");
1243
1244 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1245 let collected = collect_paths(temp_dir.path(), 0, &[]);
1246 let engine =
1247 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1248 let result = process_collected(
1249 &collected,
1250 progress,
1251 Some(engine),
1252 LicenseScanOptions::default(),
1253 &TextDetectionOptions::default(),
1254 );
1255 let scanned = result
1256 .files
1257 .into_iter()
1258 .find(|entry| {
1259 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1260 })
1261 .expect("scanned file entry");
1262
1263 assert!(
1264 scanned.license_expression.is_some(),
1265 "license detections: {:#?}",
1266 scanned.license_detections
1267 );
1268 assert!(
1269 scanned
1270 .license_expression
1271 .as_deref()
1272 .is_some_and(
1273 |expression| expression.contains("OFL-1.1") || expression.contains("ofl-1.1")
1274 ),
1275 "license expression: {:?}",
1276 scanned.license_expression
1277 );
1278 }
1279
1280 #[test]
1281 fn scanner_detects_license_from_windows_executable_metadata() {
1282 let temp_dir = TempDir::new().expect("create temp dir");
1283 let file_path = temp_dir.path().join("libiconv2.dll");
1284 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1285 .expect("read PE fixture");
1286 fs::write(&file_path, fixture).expect("write PE fixture");
1287
1288 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1289 let collected = collect_paths(temp_dir.path(), 0, &[]);
1290 let engine =
1291 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1292 let result = process_collected(
1293 &collected,
1294 progress,
1295 Some(engine),
1296 LicenseScanOptions::default(),
1297 &TextDetectionOptions::default(),
1298 );
1299 let scanned = result
1300 .files
1301 .into_iter()
1302 .find(|entry| {
1303 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1304 })
1305 .expect("scanned file entry");
1306
1307 assert!(
1308 scanned.license_expression.is_some(),
1309 "license detections: {:#?}",
1310 scanned.license_detections
1311 );
1312 assert!(
1313 scanned
1314 .license_expression
1315 .as_deref()
1316 .is_some_and(|expression| {
1317 expression.contains("lgpl") || expression.contains("LGPL")
1318 }),
1319 "license expression: {:?}",
1320 scanned.license_expression
1321 );
1322 }
1323
1324 #[test]
1325 fn scanner_detects_cc_by_license_from_markdown_comment_banner() {
1326 let scanned = scan_single_file_with_license_engine(
1327 "navbar.md",
1328 "<!-- Documentation licensed under CC BY 4.0 -->\n<!-- License available at https://creativecommons.org/licenses/by/4.0/ -->\n",
1329 &TextDetectionOptions::default(),
1330 );
1331
1332 assert!(
1333 scanned
1334 .license_expression
1335 .as_deref()
1336 .is_some_and(|expression| {
1337 expression.contains("cc-by-4.0") || expression.contains("CC-BY-4.0")
1338 }),
1339 "license expression: {:?}",
1340 scanned.license_expression
1341 );
1342 }
1343
1344 #[test]
1345 fn scanner_detects_mit_license_from_shields_badge_markdown() {
1346 let scanned = scan_single_file_with_license_engine(
1347 "README.md",
1348 "[](https://opensource.org/licenses/MIT)\n",
1349 &TextDetectionOptions::default(),
1350 );
1351
1352 assert!(
1353 scanned
1354 .license_expression
1355 .as_deref()
1356 .is_some_and(|expression| {
1357 expression.contains("mit") || expression.contains("MIT")
1358 }),
1359 "license expression: {:?}",
1360 scanned.license_expression
1361 );
1362 }
1363
1364 #[test]
1365 fn scanner_detects_apache_license_from_markdown_readme_phrase() {
1366 let scanned = scan_single_file_with_license_engine(
1367 "README.md",
1368 "This crate is distributed under the terms of the Apache License (Version 2.0).\n",
1369 &TextDetectionOptions::default(),
1370 );
1371
1372 assert!(
1373 scanned
1374 .license_expression
1375 .as_deref()
1376 .is_some_and(|expression| {
1377 expression.contains("apache-2.0") || expression.contains("Apache-2.0")
1378 }),
1379 "license expression: {:?}",
1380 scanned.license_expression
1381 );
1382 }
1383
1384 #[test]
1385 fn scanner_sets_is_source_only_when_info_enabled() {
1386 let without_info = TextDetectionOptions {
1387 collect_info: false,
1388 detect_packages: false,
1389 detect_application_packages: false,
1390 detect_system_packages: false,
1391 detect_packages_in_compiled: false,
1392 detect_copyrights: false,
1393 detect_generated: false,
1394 detect_emails: false,
1395 detect_urls: false,
1396 max_emails: 50,
1397 max_urls: 50,
1398 timeout_seconds: 120.0,
1399 };
1400 let with_info = TextDetectionOptions {
1401 collect_info: true,
1402 ..without_info.clone()
1403 };
1404
1405 let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
1406 let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
1407
1408 assert_eq!(scanned_without_info.is_source, None);
1409 assert_eq!(scanned_with_info.is_source, Some(true));
1410 }
1411
1412 #[test]
1413 fn directory_omits_info_fields_when_info_disabled() {
1414 let temp_dir = TempDir::new().expect("create temp dir");
1415 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1416
1417 let collected = collect_paths(temp_dir.path(), 0, &[]);
1418 let result = process_collected(
1419 &collected,
1420 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1421 None,
1422 LicenseScanOptions::default(),
1423 &TextDetectionOptions {
1424 collect_info: false,
1425 detect_packages: false,
1426 detect_application_packages: false,
1427 detect_system_packages: false,
1428 detect_packages_in_compiled: false,
1429 detect_copyrights: false,
1430 detect_generated: false,
1431 detect_emails: false,
1432 detect_urls: false,
1433 max_emails: 50,
1434 max_urls: 50,
1435 timeout_seconds: 120.0,
1436 },
1437 );
1438
1439 let directory = result
1440 .files
1441 .into_iter()
1442 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1443 .expect("directory entry");
1444
1445 assert!(directory.date.is_none());
1446 assert!(directory.file_type_label.is_none());
1447 assert!(directory.is_binary.is_none());
1448 assert!(directory.is_text.is_none());
1449 assert!(directory.is_archive.is_none());
1450 assert!(directory.is_media.is_none());
1451 assert!(directory.is_source.is_none());
1452 assert!(directory.is_script.is_none());
1453 }
1454
1455 #[test]
1456 fn directory_includes_info_fields_when_info_enabled() {
1457 let temp_dir = TempDir::new().expect("create temp dir");
1458 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1459
1460 let collected = collect_paths(temp_dir.path(), 0, &[]);
1461 let result = process_collected(
1462 &collected,
1463 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1464 None,
1465 LicenseScanOptions::default(),
1466 &TextDetectionOptions {
1467 collect_info: true,
1468 detect_packages: false,
1469 detect_application_packages: false,
1470 detect_system_packages: false,
1471 detect_packages_in_compiled: false,
1472 detect_copyrights: false,
1473 detect_generated: false,
1474 detect_emails: false,
1475 detect_urls: false,
1476 max_emails: 50,
1477 max_urls: 50,
1478 timeout_seconds: 120.0,
1479 },
1480 );
1481
1482 let directory = result
1483 .files
1484 .into_iter()
1485 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1486 .expect("directory entry");
1487
1488 assert!(directory.date.is_none());
1489 assert!(directory.file_type_label.is_none());
1490 assert_eq!(directory.is_binary, Some(false));
1491 assert_eq!(directory.is_text, Some(false));
1492 assert_eq!(directory.is_archive, Some(false));
1493 assert_eq!(directory.is_media, Some(false));
1494 assert_eq!(directory.is_source, Some(false));
1495 assert_eq!(directory.is_script, Some(false));
1496 assert_eq!(directory.files_count, Some(0));
1497 assert_eq!(directory.dirs_count, Some(0));
1498 assert_eq!(directory.size_count, Some(0));
1499 }
1500
1501 #[test]
1502 fn collect_paths_includes_root_directory_entry() {
1503 let temp_dir = TempDir::new().expect("create temp dir");
1504 fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
1505 fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
1506 .expect("write nested file");
1507
1508 let collected = collect_paths(temp_dir.path(), 0, &[]);
1509
1510 assert!(
1511 collected
1512 .directories
1513 .iter()
1514 .any(|(path, _)| path == temp_dir.path())
1515 );
1516 }
1517
1518 #[test]
1519 fn collect_paths_supports_single_file_input() {
1520 let temp_dir = TempDir::new().expect("create temp dir");
1521 let file_path = temp_dir.path().join("main.rs");
1522 fs::write(&file_path, "fn main() {}\n").expect("write file");
1523
1524 let collected = collect_paths(&file_path, 0, &[]);
1525
1526 assert_eq!(collected.files.len(), 1);
1527 assert!(collected.directories.is_empty());
1528 assert_eq!(collected.files[0].0, file_path);
1529 }
1530
1531 #[test]
1532 fn process_collected_with_memory_limit_preserves_results_when_spilling() {
1533 let temp_dir = TempDir::new().expect("create temp dir");
1534 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1535 fs::write(temp_dir.path().join("b.txt"), "world").expect("write second file");
1536
1537 let collected = collect_paths(temp_dir.path(), 0, &[]);
1538 let result = process_collected_with_memory_limit(
1539 &collected,
1540 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1541 None,
1542 LicenseScanOptions::default(),
1543 &TextDetectionOptions {
1544 collect_info: false,
1545 detect_packages: false,
1546 detect_application_packages: false,
1547 detect_system_packages: false,
1548 detect_packages_in_compiled: false,
1549 detect_copyrights: false,
1550 detect_generated: false,
1551 detect_emails: false,
1552 detect_urls: false,
1553 max_emails: 50,
1554 max_urls: 50,
1555 timeout_seconds: 120.0,
1556 },
1557 MemoryMode::Limit(1),
1558 );
1559
1560 assert_eq!(result.files.len(), 3);
1561 }
1562
1563 #[test]
1564 fn process_collected_with_negative_one_uses_disk_only_mode() {
1565 let temp_dir = TempDir::new().expect("create temp dir");
1566 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1567
1568 let collected = collect_paths(temp_dir.path(), 0, &[]);
1569 let result = process_collected_with_memory_limit(
1570 &collected,
1571 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1572 None,
1573 LicenseScanOptions::default(),
1574 &TextDetectionOptions {
1575 collect_info: false,
1576 detect_packages: false,
1577 detect_application_packages: false,
1578 detect_system_packages: false,
1579 detect_packages_in_compiled: false,
1580 detect_copyrights: false,
1581 detect_generated: false,
1582 detect_emails: false,
1583 detect_urls: false,
1584 max_emails: 50,
1585 max_urls: 50,
1586 timeout_seconds: 120.0,
1587 },
1588 MemoryMode::StreamUnlimited,
1589 );
1590
1591 assert_eq!(result.files.len(), 2);
1592 }
1593}