1mod collect;
2mod process;
3
4use crate::license_detection::LicenseDetectionEngine;
5use crate::models::FileInfo;
6
7pub struct ProcessResult {
8 pub files: Vec<FileInfo>,
9 pub excluded_count: usize,
10}
11
12#[derive(Debug, Clone, Copy, Default)]
13pub struct LicenseScanOptions {
14 pub include_text: bool,
15 pub include_text_diagnostics: bool,
16 pub include_diagnostics: bool,
17 pub unknown_licenses: bool,
18 pub min_score: u8,
19}
20
21#[derive(Debug, Clone)]
22pub struct TextDetectionOptions {
23 pub collect_info: bool,
24 pub detect_packages: bool,
25 pub detect_application_packages: bool,
26 pub detect_system_packages: bool,
27 pub detect_packages_in_compiled: bool,
28 pub detect_copyrights: bool,
29 pub detect_generated: bool,
30 pub detect_emails: bool,
31 pub detect_urls: bool,
32 pub max_emails: usize,
33 pub max_urls: usize,
34 pub timeout_seconds: f64,
35}
36
37impl Default for TextDetectionOptions {
38 fn default() -> Self {
39 Self {
40 collect_info: false,
41 detect_packages: false,
42 detect_application_packages: false,
43 detect_system_packages: false,
44 detect_packages_in_compiled: false,
45 detect_copyrights: true,
46 detect_generated: false,
47 detect_emails: false,
48 detect_urls: false,
49 max_emails: 50,
50 max_urls: 50,
51 timeout_seconds: 120.0,
52 }
53 }
54}
55
56pub fn scan_options_fingerprint(
57 text_options: &TextDetectionOptions,
58 license_options: LicenseScanOptions,
59 license_engine: Option<&LicenseDetectionEngine>,
60) -> String {
61 let (license_enabled, rules_count, first_rule_id, last_rule_id) = match license_engine {
62 Some(engine) => {
63 let rules = &engine.index().rules_by_rid;
64 (
65 true,
66 rules.len(),
67 rules
68 .first()
69 .map(|rule| rule.identifier.as_str())
70 .unwrap_or(""),
71 rules
72 .last()
73 .map(|rule| rule.identifier.as_str())
74 .unwrap_or(""),
75 )
76 }
77 None => (false, 0, "", ""),
78 };
79
80 format!(
81 "tool_version={};info={};packages={};app_packages={};system_packages={};compiled_packages={};copyrights={};generated={};emails={};urls={};max_emails={};max_urls={};timeout={:.6};license_enabled={};rules_count={};first_rule_id={};last_rule_id={};license_text={};license_text_diagnostics={};license_diagnostics={};unknown_licenses={};license_score={}",
82 env!("CARGO_PKG_VERSION"),
83 text_options.collect_info,
84 text_options.detect_packages,
85 text_options.detect_application_packages,
86 text_options.detect_system_packages,
87 text_options.detect_packages_in_compiled,
88 text_options.detect_copyrights,
89 text_options.detect_generated,
90 text_options.detect_emails,
91 text_options.detect_urls,
92 text_options.max_emails,
93 text_options.max_urls,
94 text_options.timeout_seconds,
95 license_enabled,
96 rules_count,
97 first_rule_id,
98 last_rule_id,
99 license_options.include_text,
100 license_options.include_text_diagnostics,
101 license_options.include_diagnostics,
102 license_options.unknown_licenses,
103 license_options.min_score,
104 )
105}
106
107pub use self::collect::{CollectedPaths, collect_paths};
108#[allow(unused_imports)]
109pub use self::process::{process_collected, process_collected_with_memory_limit};
110
111#[cfg(test)]
112mod tests {
113 use std::fs;
114 use std::sync::Arc;
115
116 use tempfile::TempDir;
117
118 use crate::license_detection::LicenseDetectionEngine;
119 use crate::models::{DatasourceId, FileType, PackageType as FilePackageType};
120 use crate::progress::{ProgressMode, ScanProgress};
121
122 use super::{
123 LicenseScanOptions, TextDetectionOptions, collect_paths, process_collected,
124 process_collected_with_memory_limit,
125 };
126
127 #[test]
128 fn default_options_keep_copyright_detection_enabled() {
129 let options = TextDetectionOptions::default();
130 assert!(!options.detect_packages);
131 assert!(options.detect_copyrights);
132 }
133
134 fn scan_single_file(
135 file_name: &str,
136 content: &str,
137 options: &TextDetectionOptions,
138 ) -> crate::models::FileInfo {
139 let temp_dir = TempDir::new().expect("create temp dir");
140 let file_path = temp_dir.path().join(file_name);
141 fs::write(&file_path, content).expect("write test file");
142
143 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
144 let collected = collect_paths(temp_dir.path(), 0, &[]);
145 let result = process_collected(
146 &collected,
147 progress,
148 None,
149 LicenseScanOptions::default(),
150 options,
151 );
152
153 result
154 .files
155 .into_iter()
156 .find(|entry| {
157 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
158 })
159 .expect("scanned file entry")
160 }
161
162 fn scan_file_at_relative_path(
163 relative_path: &str,
164 content: &[u8],
165 options: &TextDetectionOptions,
166 ) -> crate::models::FileInfo {
167 let temp_dir = TempDir::new().expect("create temp dir");
168 let file_path = temp_dir.path().join(relative_path);
169 if let Some(parent) = file_path.parent() {
170 fs::create_dir_all(parent).expect("create parent dirs");
171 }
172 fs::write(&file_path, content).expect("write test file");
173
174 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
175 let collected = collect_paths(temp_dir.path(), 0, &[]);
176 let result = process_collected(
177 &collected,
178 progress,
179 None,
180 LicenseScanOptions::default(),
181 options,
182 );
183
184 result
185 .files
186 .into_iter()
187 .find(|entry| {
188 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
189 })
190 .expect("scanned file entry")
191 }
192
193 fn scan_single_file_with_license_engine(
194 file_name: &str,
195 content: &str,
196 options: &TextDetectionOptions,
197 ) -> crate::models::FileInfo {
198 let temp_dir = TempDir::new().expect("create temp dir");
199 let file_path = temp_dir.path().join(file_name);
200 fs::write(&file_path, content).expect("write test file");
201
202 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
203 let collected = collect_paths(temp_dir.path(), 0, &[]);
204 let engine =
205 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
206 let result = process_collected(
207 &collected,
208 progress,
209 Some(engine),
210 LicenseScanOptions::default(),
211 options,
212 );
213
214 result
215 .files
216 .into_iter()
217 .find(|entry| {
218 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
219 })
220 .expect("scanned file entry")
221 }
222
223 #[test]
224 fn scanner_reports_repeated_email_occurrences() {
225 let options = TextDetectionOptions {
226 collect_info: false,
227 detect_packages: false,
228 detect_application_packages: false,
229 detect_system_packages: false,
230 detect_packages_in_compiled: false,
231 detect_copyrights: false,
232 detect_generated: false,
233 detect_emails: true,
234 detect_urls: false,
235 max_emails: 50,
236 max_urls: 50,
237 timeout_seconds: 120.0,
238 };
239 let scanned = scan_single_file(
240 "contacts.txt",
241 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
242 &options,
243 );
244
245 let emails: Vec<(&str, usize)> = scanned
246 .emails
247 .iter()
248 .map(|email| (email.email.as_str(), email.start_line))
249 .collect();
250
251 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
252 assert_eq!(
253 emails,
254 vec![
255 ("linux@3ware.com", 1),
256 ("linux@3ware.com", 2),
257 ("andre@suse.com", 3),
258 ("linux@3ware.com", 4),
259 ]
260 );
261 }
262
263 #[test]
264 fn scanner_skips_pem_certificate_text_detection() {
265 let options = TextDetectionOptions {
266 collect_info: false,
267 detect_packages: false,
268 detect_application_packages: false,
269 detect_system_packages: false,
270 detect_packages_in_compiled: false,
271 detect_copyrights: true,
272 detect_generated: false,
273 detect_emails: true,
274 detect_urls: true,
275 max_emails: 50,
276 max_urls: 50,
277 timeout_seconds: 120.0,
278 };
279 let pem_fixture = concat!(
280 "-----BEGIN CERTIFICATE-----\n",
281 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
282 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
283 "-----END CERTIFICATE-----\n",
284 "Certificate:\n",
285 " Data:\n",
286 " Signature Algorithm: sha1WithRSAEncryption\n",
287 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
288 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
289 " Contact: cert-owner@example.com\n",
290 );
291 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
292
293 assert!(
294 scanned.copyrights.is_empty(),
295 "copyrights: {:#?}",
296 scanned.copyrights
297 );
298 assert!(
299 scanned.holders.is_empty(),
300 "holders: {:#?}",
301 scanned.holders
302 );
303 assert!(
304 scanned.authors.is_empty(),
305 "authors: {:#?}",
306 scanned.authors
307 );
308 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
309 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
310 assert!(
311 scanned.license_detections.is_empty(),
312 "licenses: {:#?}",
313 scanned.license_detections
314 );
315 assert!(
316 scanned.license_clues.is_empty(),
317 "license clues: {:#?}",
318 scanned.license_clues
319 );
320 }
321
322 #[test]
323 fn scanner_keeps_source_headers_when_pem_blocks_are_embedded() {
324 let options = TextDetectionOptions {
325 collect_info: false,
326 detect_packages: false,
327 detect_application_packages: false,
328 detect_system_packages: false,
329 detect_packages_in_compiled: false,
330 detect_copyrights: true,
331 detect_generated: false,
332 detect_emails: false,
333 detect_urls: true,
334 max_emails: 50,
335 max_urls: 50,
336 timeout_seconds: 120.0,
337 };
338 let fixture = concat!(
339 "/*\n",
340 "Copyright 2022 The Kubernetes Authors.\n\n",
341 "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
342 "you may not use this file except in compliance with the License.\n",
343 "You may obtain a copy of the License at\n\n",
344 " http://www.apache.org/licenses/LICENSE-2.0\n",
345 "*/\n\n",
346 "package storage\n\n",
347 "const validCert = `\n",
348 "-----BEGIN CERTIFICATE-----\n",
349 "MIIDmTCCAoGgAwIBAgIUWQ==\n",
350 "-----END CERTIFICATE-----\n",
351 "`\n",
352 );
353 let temp_dir = TempDir::new().expect("create temp dir");
354 let file_path = temp_dir.path().join("storage_test.go");
355 fs::write(&file_path, fixture).expect("write fixture");
356
357 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
358 let collected = collect_paths(temp_dir.path(), 0, &[]);
359 let engine =
360 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
361 let result = process_collected(
362 &collected,
363 progress,
364 Some(engine),
365 LicenseScanOptions::default(),
366 &options,
367 );
368 let scanned = result
369 .files
370 .into_iter()
371 .find(|entry| {
372 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
373 })
374 .expect("scanned file entry");
375
376 assert!(
377 scanned
378 .copyrights
379 .iter()
380 .any(|c| c.copyright == "Copyright 2022 The Kubernetes Authors"),
381 "copyrights: {:#?}",
382 scanned.copyrights
383 );
384 assert!(
385 scanned
386 .holders
387 .iter()
388 .any(|h| h.holder == "The Kubernetes Authors"),
389 "holders: {:#?}",
390 scanned.holders
391 );
392 assert!(
393 scanned
394 .urls
395 .iter()
396 .any(|u| u.url == "http://www.apache.org/licenses/LICENSE-2.0"),
397 "urls: {:#?}",
398 scanned.urls
399 );
400 assert_eq!(scanned.license_expression.as_deref(), Some("Apache-2.0"));
401 }
402
403 #[test]
404 fn scanner_detects_structured_credits_authors() {
405 let options = TextDetectionOptions {
406 collect_info: false,
407 detect_packages: false,
408 detect_application_packages: false,
409 detect_system_packages: false,
410 detect_packages_in_compiled: false,
411 detect_copyrights: true,
412 detect_generated: false,
413 detect_emails: false,
414 detect_urls: false,
415 max_emails: 50,
416 max_urls: 50,
417 timeout_seconds: 120.0,
418 };
419 let credits_fixture = concat!(
420 "N: Jack Lloyd\n",
421 "E: lloyd@randombit.net\n",
422 "W: http://www.randombit.net/\n",
423 );
424 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
425
426 let authors: Vec<(&str, usize, usize)> = scanned
427 .authors
428 .iter()
429 .map(|author| (author.author.as_str(), author.start_line, author.end_line))
430 .collect();
431
432 assert_eq!(
433 authors,
434 vec![(
435 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
436 1,
437 3,
438 )]
439 );
440 assert!(scanned.copyrights.is_empty());
441 assert!(scanned.holders.is_empty());
442 }
443
444 #[test]
445 fn scanner_uses_or_for_alternative_license_header() {
446 let fixture =
447 include_str!("../../testdata/license-golden/datadriven/external/boost-json-d2s.ipp");
448 let temp_dir = TempDir::new().expect("create temp dir");
449 let file_path = temp_dir.path().join("d2s.ipp");
450 fs::write(&file_path, fixture).expect("write fixture");
451
452 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
453 let collected = collect_paths(temp_dir.path(), 0, &[]);
454 let engine =
455 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
456 let result = process_collected(
457 &collected,
458 progress,
459 Some(engine),
460 LicenseScanOptions::default(),
461 &TextDetectionOptions::default(),
462 );
463 let scanned = result
464 .files
465 .into_iter()
466 .find(|entry| {
467 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
468 })
469 .expect("scanned file entry");
470
471 assert_eq!(
472 scanned.license_expression.as_deref(),
473 Some("Apache-2.0 OR BSL-1.0")
474 );
475 assert!(
476 scanned.license_clues.is_empty(),
477 "license clues: {:#?}",
478 scanned.license_clues
479 );
480 assert_eq!(
481 scanned.license_detections.len(),
482 1,
483 "detections: {:#?}",
484 scanned.license_detections
485 );
486
487 let detection = &scanned.license_detections[0];
488 assert_eq!(detection.license_expression_spdx, "Apache-2.0 OR BSL-1.0");
489
490 let match_expressions: Vec<_> = detection
491 .matches
492 .iter()
493 .map(|m| m.license_expression_spdx.as_str())
494 .collect();
495 assert_eq!(match_expressions, vec!["Apache-2.0", "BSL-1.0"]);
496 }
497
498 #[test]
499 fn scanner_sets_generated_flag_when_enabled() {
500 let options = TextDetectionOptions {
501 collect_info: false,
502 detect_packages: false,
503 detect_application_packages: false,
504 detect_system_packages: false,
505 detect_packages_in_compiled: false,
506 detect_copyrights: false,
507 detect_generated: true,
508 detect_emails: false,
509 detect_urls: false,
510 max_emails: 50,
511 max_urls: 50,
512 timeout_seconds: 120.0,
513 };
514 let scanned = scan_single_file(
515 "generated.c",
516 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
517 &options,
518 );
519
520 assert_eq!(scanned.is_generated, Some(true));
521 }
522
523 #[test]
524 fn scanner_leaves_generated_flag_unset_when_disabled() {
525 let options = TextDetectionOptions {
526 collect_info: false,
527 detect_packages: false,
528 detect_application_packages: false,
529 detect_system_packages: false,
530 detect_packages_in_compiled: false,
531 detect_copyrights: false,
532 detect_generated: false,
533 detect_emails: false,
534 detect_urls: false,
535 max_emails: 50,
536 max_urls: 50,
537 timeout_seconds: 120.0,
538 };
539 let scanned = scan_single_file(
540 "generated.c",
541 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
542 &options,
543 );
544
545 assert_eq!(scanned.is_generated, None);
546 }
547
548 #[test]
549 fn scanner_populates_info_surface_when_enabled() {
550 let options = TextDetectionOptions {
551 collect_info: true,
552 detect_packages: false,
553 detect_application_packages: false,
554 detect_system_packages: false,
555 detect_packages_in_compiled: false,
556 detect_copyrights: false,
557 detect_generated: false,
558 detect_emails: false,
559 detect_urls: false,
560 max_emails: 50,
561 max_urls: 50,
562 timeout_seconds: 120.0,
563 };
564 let scanned = scan_single_file(
565 "script.py",
566 "#!/usr/bin/env python3\nprint(\"hello\")\n",
567 &options,
568 );
569
570 assert!(scanned.sha1.is_some());
571 assert!(scanned.md5.is_some());
572 assert!(scanned.sha256.is_some());
573 assert!(scanned.sha1_git.is_some());
574 assert!(scanned.mime_type.is_some());
575 assert!(scanned.date.is_some());
576 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
577 assert_eq!(scanned.is_text, Some(true));
578 assert_eq!(scanned.is_script, Some(true));
579 assert_eq!(scanned.is_source, Some(true));
580 }
581
582 #[test]
583 fn scanner_treats_latin1_python_sources_as_textual_scripts() {
584 let options = TextDetectionOptions {
585 collect_info: true,
586 detect_packages: false,
587 detect_application_packages: false,
588 detect_system_packages: false,
589 detect_packages_in_compiled: false,
590 detect_copyrights: false,
591 detect_generated: false,
592 detect_emails: false,
593 detect_urls: false,
594 max_emails: 50,
595 max_urls: 50,
596 timeout_seconds: 120.0,
597 };
598 let latin1_python = b"# coding: latin-1\nprint(\"caf\xe9\")\n# comment padding\n";
599 let scanned = scan_file_at_relative_path("script.py", latin1_python, &options);
600
601 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
602 assert_eq!(
603 scanned.file_type_label.as_deref(),
604 Some("python script, text executable")
605 );
606 assert_eq!(scanned.is_binary, Some(false));
607 assert_eq!(scanned.is_text, Some(true));
608 assert_eq!(scanned.is_script, Some(true));
609 assert_eq!(scanned.is_source, Some(true));
610 }
611
612 #[test]
613 fn scanner_skips_findings_for_zip_like_archives() {
614 let options = TextDetectionOptions {
615 collect_info: true,
616 detect_packages: false,
617 detect_application_packages: false,
618 detect_system_packages: false,
619 detect_packages_in_compiled: false,
620 detect_copyrights: true,
621 detect_generated: false,
622 detect_emails: true,
623 detect_urls: true,
624 max_emails: 50,
625 max_urls: 50,
626 timeout_seconds: 120.0,
627 };
628 let archive_like = b"PK\x03\x04\x14\x00\x00\x00\x08\x00MIT License\ncontact@example.com\nhttps://example.com\n";
629 let scanned = scan_file_at_relative_path("demo.whl", archive_like, &options);
630
631 assert_eq!(scanned.mime_type.as_deref(), Some("application/zip"));
632 assert_eq!(scanned.is_archive, Some(true));
633 assert!(scanned.license_detections.is_empty());
634 assert!(scanned.copyrights.is_empty());
635 assert!(scanned.emails.is_empty());
636 assert!(scanned.urls.is_empty());
637 }
638
639 #[test]
640 fn scanner_treats_typescript_sources_as_text_not_video_media() {
641 let options = TextDetectionOptions {
642 collect_info: true,
643 detect_packages: false,
644 detect_application_packages: false,
645 detect_system_packages: false,
646 detect_packages_in_compiled: false,
647 detect_copyrights: false,
648 detect_generated: false,
649 detect_emails: false,
650 detect_urls: false,
651 max_emails: 50,
652 max_urls: 50,
653 timeout_seconds: 120.0,
654 };
655 let scanned = scan_single_file("main.ts", "export const answer: number = 42;\n", &options);
656
657 assert_eq!(scanned.programming_language.as_deref(), Some("TypeScript"));
658 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
659 assert_eq!(
660 scanned.file_type_label.as_deref(),
661 Some("UTF-8 Unicode text")
662 );
663 assert_eq!(scanned.is_text, Some(true));
664 assert_eq!(scanned.is_media, Some(false));
665 assert_eq!(scanned.is_script, Some(false));
666 assert_eq!(scanned.is_source, Some(true));
667 }
668
669 #[test]
670 fn scanner_normalizes_sparse_ts_files_away_from_video_mime() {
671 let options = TextDetectionOptions {
672 collect_info: true,
673 detect_packages: false,
674 detect_application_packages: false,
675 detect_system_packages: false,
676 detect_packages_in_compiled: false,
677 detect_copyrights: false,
678 detect_generated: false,
679 detect_emails: false,
680 detect_urls: false,
681 max_emails: 50,
682 max_urls: 50,
683 timeout_seconds: 120.0,
684 };
685 let scanned = scan_single_file("main.ts", "// comment-only TypeScript fixture\n", &options);
686
687 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
688 assert_eq!(
689 scanned.file_type_label.as_deref(),
690 Some("UTF-8 Unicode text")
691 );
692 assert_eq!(scanned.is_text, Some(true));
693 assert_eq!(scanned.is_media, Some(false));
694 assert_eq!(scanned.is_script, Some(false));
695 assert_eq!(scanned.is_source, Some(true));
696 }
697
698 #[test]
699 fn scanner_treats_empty_files_like_scancode_info_surface() {
700 let options = TextDetectionOptions {
701 collect_info: true,
702 detect_packages: false,
703 detect_application_packages: false,
704 detect_system_packages: false,
705 detect_packages_in_compiled: false,
706 detect_copyrights: false,
707 detect_generated: false,
708 detect_emails: false,
709 detect_urls: false,
710 max_emails: 50,
711 max_urls: 50,
712 timeout_seconds: 120.0,
713 };
714 let scanned = scan_single_file("test.txt", "", &options);
715
716 assert_eq!(scanned.mime_type.as_deref(), Some("inode/x-empty"));
717 assert_eq!(scanned.file_type_label.as_deref(), Some("empty"));
718 assert_eq!(scanned.programming_language, None);
719 assert_eq!(scanned.is_binary, Some(false));
720 assert_eq!(scanned.is_text, Some(true));
721 assert_eq!(scanned.is_archive, Some(false));
722 assert_eq!(scanned.is_media, Some(false));
723 assert_eq!(scanned.is_source, Some(false));
724 assert_eq!(scanned.is_script, Some(false));
725 }
726
727 #[test]
728 fn scanner_treats_package_json_as_text_not_source() {
729 let options = TextDetectionOptions {
730 collect_info: true,
731 detect_packages: false,
732 detect_application_packages: false,
733 detect_system_packages: false,
734 detect_packages_in_compiled: false,
735 detect_copyrights: false,
736 detect_generated: false,
737 detect_emails: false,
738 detect_urls: false,
739 max_emails: 50,
740 max_urls: 50,
741 timeout_seconds: 120.0,
742 };
743 let scanned = scan_single_file("package.json", r#"{"name":"demo"}"#, &options);
744
745 assert_eq!(scanned.mime_type.as_deref(), Some("application/json"));
746 assert_eq!(scanned.file_type_label.as_deref(), Some("JSON text data"));
747 assert_eq!(scanned.programming_language, None);
748 assert_eq!(scanned.is_text, Some(true));
749 assert_eq!(scanned.is_source, Some(false));
750 assert_eq!(scanned.is_script, Some(false));
751 }
752
753 #[test]
754 fn scanner_classifies_gradle_and_nix_manifests_as_source() {
755 let options = TextDetectionOptions {
756 collect_info: true,
757 detect_packages: false,
758 detect_application_packages: false,
759 detect_system_packages: false,
760 detect_packages_in_compiled: false,
761 detect_copyrights: false,
762 detect_generated: false,
763 detect_emails: false,
764 detect_urls: false,
765 max_emails: 50,
766 max_urls: 50,
767 timeout_seconds: 120.0,
768 };
769
770 let gradle = scan_single_file("build.gradle", "plugins { id 'java' }\n", &options);
771 let nix = scan_single_file("flake.nix", "{ inputs, ... }: {}\n", &options);
772
773 assert_eq!(gradle.programming_language.as_deref(), Some("Groovy"));
774 assert_eq!(gradle.mime_type.as_deref(), Some("text/plain"));
775 assert_eq!(gradle.is_source, Some(true));
776 assert_eq!(gradle.is_script, Some(false));
777
778 assert_eq!(nix.programming_language.as_deref(), Some("Nix"));
779 assert_eq!(nix.mime_type.as_deref(), Some("text/plain"));
780 assert_eq!(nix.is_source, Some(true));
781 assert_eq!(nix.is_script, Some(false));
782 }
783
784 #[test]
785 fn scanner_treats_gitmodules_as_text_not_source() {
786 let options = TextDetectionOptions {
787 collect_info: true,
788 detect_packages: false,
789 detect_application_packages: false,
790 detect_system_packages: false,
791 detect_packages_in_compiled: false,
792 detect_copyrights: false,
793 detect_generated: false,
794 detect_emails: false,
795 detect_urls: false,
796 max_emails: 50,
797 max_urls: 50,
798 timeout_seconds: 120.0,
799 };
800 let scanned = scan_file_at_relative_path(
801 ".gitmodules",
802 b"[submodule \"demo\"]\n\tpath = vendor/demo\n",
803 &options,
804 );
805
806 assert_eq!(scanned.programming_language, None);
807 assert_eq!(
808 scanned.file_type_label.as_deref(),
809 Some("Git configuration text")
810 );
811 assert_eq!(scanned.is_text, Some(true));
812 assert_eq!(scanned.is_source, Some(false));
813 assert_eq!(scanned.is_script, Some(false));
814 }
815
816 #[test]
817 fn scanner_treats_javascript_shebang_files_as_scripts() {
818 let options = TextDetectionOptions {
819 collect_info: true,
820 detect_packages: false,
821 detect_application_packages: false,
822 detect_system_packages: false,
823 detect_packages_in_compiled: false,
824 detect_copyrights: false,
825 detect_generated: false,
826 detect_emails: false,
827 detect_urls: false,
828 max_emails: 50,
829 max_urls: 50,
830 timeout_seconds: 120.0,
831 };
832 let scanned = scan_file_at_relative_path(
833 "bin/run",
834 b"#!/usr/bin/env node\nconsole.log('hello');\n",
835 &options,
836 );
837
838 assert_eq!(scanned.programming_language.as_deref(), Some("JavaScript"));
839 assert_eq!(
840 scanned.file_type_label.as_deref(),
841 Some("javascript script, UTF-8 Unicode text executable")
842 );
843 assert_eq!(scanned.is_script, Some(true));
844 assert_eq!(scanned.is_source, Some(true));
845 }
846
847 #[test]
848 fn scanner_treats_dockerfile_as_source() {
849 let options = TextDetectionOptions {
850 collect_info: true,
851 detect_packages: false,
852 detect_application_packages: false,
853 detect_system_packages: false,
854 detect_packages_in_compiled: false,
855 detect_copyrights: false,
856 detect_generated: false,
857 detect_emails: false,
858 detect_urls: false,
859 max_emails: 50,
860 max_urls: 50,
861 timeout_seconds: 120.0,
862 };
863 let scanned = scan_single_file("Dockerfile", "FROM scratch\n", &options);
864
865 assert_eq!(scanned.programming_language.as_deref(), Some("Dockerfile"));
866 assert_eq!(
867 scanned.file_type_label.as_deref(),
868 Some("UTF-8 Unicode text")
869 );
870 assert_eq!(scanned.is_source, Some(true));
871 assert_eq!(scanned.is_script, Some(false));
872 }
873
874 #[test]
875 fn scanner_treats_makefile_as_text_not_source() {
876 let options = TextDetectionOptions {
877 collect_info: true,
878 detect_packages: false,
879 detect_application_packages: false,
880 detect_system_packages: false,
881 detect_packages_in_compiled: false,
882 detect_copyrights: false,
883 detect_generated: false,
884 detect_emails: false,
885 detect_urls: false,
886 max_emails: 50,
887 max_urls: 50,
888 timeout_seconds: 120.0,
889 };
890 let scanned = scan_single_file("Makefile", "all:\n\techo hi\n", &options);
891
892 assert_eq!(scanned.programming_language, None);
893 assert_eq!(
894 scanned.file_type_label.as_deref(),
895 Some("UTF-8 Unicode text")
896 );
897 assert_eq!(scanned.is_text, Some(true));
898 assert_eq!(scanned.is_source, Some(false));
899 assert_eq!(scanned.is_script, Some(false));
900 }
901
902 #[test]
903 fn scanner_omits_info_surface_when_disabled() {
904 let options = TextDetectionOptions {
905 collect_info: false,
906 detect_packages: false,
907 detect_application_packages: false,
908 detect_system_packages: false,
909 detect_packages_in_compiled: false,
910 detect_copyrights: false,
911 detect_generated: false,
912 detect_emails: false,
913 detect_urls: false,
914 max_emails: 50,
915 max_urls: 50,
916 timeout_seconds: 120.0,
917 };
918 let scanned = scan_single_file(
919 "script.py",
920 "#!/usr/bin/env python3\nprint(\"hello\")\n",
921 &options,
922 );
923
924 assert!(scanned.sha1.is_none());
925 assert!(scanned.md5.is_none());
926 assert!(scanned.sha256.is_none());
927 assert!(scanned.sha1_git.is_none());
928 assert!(scanned.mime_type.is_none());
929 assert!(scanned.date.is_none());
930 assert!(scanned.programming_language.is_none());
931 assert!(scanned.is_binary.is_none());
932 assert!(scanned.is_text.is_none());
933 assert!(scanned.is_archive.is_none());
934 assert!(scanned.is_media.is_none());
935 assert!(scanned.is_script.is_none());
936 assert!(scanned.is_source.is_none());
937 }
938
939 #[test]
940 fn scanner_skips_package_parsing_when_disabled() {
941 let options = TextDetectionOptions {
942 collect_info: false,
943 detect_packages: false,
944 detect_application_packages: false,
945 detect_system_packages: false,
946 detect_packages_in_compiled: false,
947 detect_copyrights: false,
948 detect_generated: false,
949 detect_emails: false,
950 detect_urls: false,
951 max_emails: 50,
952 max_urls: 50,
953 timeout_seconds: 120.0,
954 };
955 let scanned = scan_single_file(
956 "package.json",
957 r#"{"name":"demo","version":"1.0.0"}"#,
958 &options,
959 );
960
961 assert!(
962 scanned.package_data.is_empty(),
963 "package_data: {:#?}",
964 scanned.package_data
965 );
966 }
967
968 #[test]
969 fn scanner_parses_package_manifests_when_enabled() {
970 let options = TextDetectionOptions {
971 collect_info: false,
972 detect_packages: true,
973 detect_application_packages: true,
974 detect_system_packages: false,
975 detect_packages_in_compiled: false,
976 detect_copyrights: false,
977 detect_generated: false,
978 detect_emails: false,
979 detect_urls: false,
980 max_emails: 50,
981 max_urls: 50,
982 timeout_seconds: 120.0,
983 };
984 let scanned = scan_single_file(
985 "package.json",
986 r#"{"name":"demo","version":"1.0.0"}"#,
987 &options,
988 );
989
990 assert_eq!(
991 scanned.package_data.len(),
992 1,
993 "package_data: {:#?}",
994 scanned.package_data
995 );
996 }
997
998 #[test]
999 fn scanner_skips_application_packages_when_only_system_packages_enabled() {
1000 let options = TextDetectionOptions {
1001 collect_info: false,
1002 detect_packages: true,
1003 detect_application_packages: false,
1004 detect_system_packages: true,
1005 detect_packages_in_compiled: false,
1006 detect_copyrights: false,
1007 detect_generated: false,
1008 detect_emails: false,
1009 detect_urls: false,
1010 max_emails: 50,
1011 max_urls: 50,
1012 timeout_seconds: 120.0,
1013 };
1014 let scanned = scan_single_file(
1015 "package.json",
1016 r#"{"name":"demo","version":"1.0.0"}"#,
1017 &options,
1018 );
1019
1020 assert!(
1021 scanned.package_data.is_empty(),
1022 "package_data: {:#?}",
1023 scanned.package_data
1024 );
1025 }
1026
1027 #[test]
1028 fn scanner_parses_system_package_files_when_enabled() {
1029 let options = TextDetectionOptions {
1030 collect_info: false,
1031 detect_packages: true,
1032 detect_application_packages: false,
1033 detect_system_packages: true,
1034 detect_packages_in_compiled: false,
1035 detect_copyrights: false,
1036 detect_generated: false,
1037 detect_emails: false,
1038 detect_urls: false,
1039 max_emails: 50,
1040 max_urls: 50,
1041 timeout_seconds: 120.0,
1042 };
1043 let scanned = scan_file_at_relative_path(
1044 "var/lib/dpkg/status",
1045 b"Package: demo\nVersion: 1.0\nArchitecture: all\nDescription: demo package\n\n",
1046 &options,
1047 );
1048
1049 assert!(
1050 !scanned.package_data.is_empty(),
1051 "package_data: {:#?}",
1052 scanned.package_data
1053 );
1054 }
1055
1056 #[test]
1057 fn scanner_only_parses_compiled_packages_when_package_in_compiled_is_enabled() {
1058 if std::process::Command::new("go")
1059 .arg("version")
1060 .status()
1061 .is_err()
1062 {
1063 return;
1064 }
1065
1066 let temp_dir = TempDir::new().expect("create temp dir");
1067 fs::write(
1068 temp_dir.path().join("go.mod"),
1069 "module example.com/demo\n\ngo 1.23.0\n",
1070 )
1071 .expect("write go.mod");
1072 fs::write(
1073 temp_dir.path().join("main.go"),
1074 "package main\nfunc main() {}\n",
1075 )
1076 .expect("write main.go");
1077 let file_path = temp_dir.path().join("demo");
1078 let status = std::process::Command::new("go")
1079 .current_dir(temp_dir.path())
1080 .args(["build", "-o"])
1081 .arg(&file_path)
1082 .status()
1083 .expect("run go build");
1084 assert!(status.success());
1085
1086 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1087 let collected = collect_paths(temp_dir.path(), 0, &[]);
1088
1089 let without_compiled = process_collected(
1090 &collected,
1091 Arc::clone(&progress),
1092 None,
1093 LicenseScanOptions::default(),
1094 &TextDetectionOptions {
1095 collect_info: false,
1096 detect_packages: true,
1097 detect_application_packages: true,
1098 detect_system_packages: false,
1099 detect_packages_in_compiled: false,
1100 detect_copyrights: false,
1101 detect_generated: false,
1102 detect_emails: false,
1103 detect_urls: false,
1104 max_emails: 50,
1105 max_urls: 50,
1106 timeout_seconds: 120.0,
1107 },
1108 );
1109 let with_compiled = process_collected(
1110 &collected,
1111 progress,
1112 None,
1113 LicenseScanOptions::default(),
1114 &TextDetectionOptions {
1115 collect_info: false,
1116 detect_packages: true,
1117 detect_application_packages: true,
1118 detect_system_packages: false,
1119 detect_packages_in_compiled: true,
1120 detect_copyrights: false,
1121 detect_generated: false,
1122 detect_emails: false,
1123 detect_urls: false,
1124 max_emails: 50,
1125 max_urls: 50,
1126 timeout_seconds: 120.0,
1127 },
1128 );
1129
1130 let without_compiled = without_compiled
1131 .files
1132 .into_iter()
1133 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1134 .expect("compiled artifact present");
1135 let with_compiled = with_compiled
1136 .files
1137 .into_iter()
1138 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1139 .expect("compiled artifact present");
1140
1141 assert!(
1142 without_compiled.package_data.is_empty(),
1143 "package_data: {:#?}",
1144 without_compiled.package_data
1145 );
1146 assert!(!with_compiled.package_data.is_empty());
1147 }
1148
1149 #[test]
1150 fn scanner_parses_windows_executable_packages_under_normal_package_scan() {
1151 let temp_dir = TempDir::new().expect("create temp dir");
1152 let file_path = temp_dir.path().join("libiconv2.dll");
1153 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1154 .expect("read PE fixture");
1155 fs::write(&file_path, fixture).expect("write PE fixture");
1156
1157 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1158 let collected = collect_paths(temp_dir.path(), 0, &[]);
1159
1160 let without_package = process_collected(
1161 &collected,
1162 Arc::clone(&progress),
1163 None,
1164 LicenseScanOptions::default(),
1165 &TextDetectionOptions {
1166 collect_info: false,
1167 detect_packages: false,
1168 detect_application_packages: false,
1169 detect_system_packages: false,
1170 detect_packages_in_compiled: false,
1171 detect_copyrights: false,
1172 detect_generated: false,
1173 detect_emails: false,
1174 detect_urls: false,
1175 max_emails: 50,
1176 max_urls: 50,
1177 timeout_seconds: 120.0,
1178 },
1179 );
1180 let with_package = process_collected(
1181 &collected,
1182 progress,
1183 None,
1184 LicenseScanOptions::default(),
1185 &TextDetectionOptions {
1186 collect_info: false,
1187 detect_packages: true,
1188 detect_application_packages: true,
1189 detect_system_packages: false,
1190 detect_packages_in_compiled: false,
1191 detect_copyrights: false,
1192 detect_generated: false,
1193 detect_emails: false,
1194 detect_urls: false,
1195 max_emails: 50,
1196 max_urls: 50,
1197 timeout_seconds: 120.0,
1198 },
1199 );
1200
1201 let without_package = without_package
1202 .files
1203 .into_iter()
1204 .find(|entry| {
1205 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1206 })
1207 .expect("compiled artifact present");
1208 let with_package = with_package
1209 .files
1210 .into_iter()
1211 .find(|entry| {
1212 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1213 })
1214 .expect("compiled artifact present");
1215
1216 assert!(without_package.package_data.is_empty());
1217 assert_eq!(with_package.package_data.len(), 1);
1218 assert_eq!(
1219 with_package.package_data[0].package_type,
1220 Some(FilePackageType::Winexe)
1221 );
1222 assert_eq!(
1223 with_package.package_data[0].datasource_id,
1224 Some(DatasourceId::WindowsExecutable)
1225 );
1226 }
1227
1228 #[test]
1229 fn scanner_detects_license_from_font_metadata() {
1230 let temp_dir = TempDir::new().expect("create temp dir");
1231 let file_path = temp_dir.path().join("Lato-Bold.ttf");
1232 let fixture = fs::read("testdata/font-fixtures/Lato-Bold.ttf").expect("read font fixture");
1233 fs::write(&file_path, fixture).expect("write font fixture");
1234
1235 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1236 let collected = collect_paths(temp_dir.path(), 0, &[]);
1237 let engine =
1238 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1239 let result = process_collected(
1240 &collected,
1241 progress,
1242 Some(engine),
1243 LicenseScanOptions::default(),
1244 &TextDetectionOptions::default(),
1245 );
1246 let scanned = result
1247 .files
1248 .into_iter()
1249 .find(|entry| {
1250 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1251 })
1252 .expect("scanned file entry");
1253
1254 assert!(
1255 scanned.license_expression.is_some(),
1256 "license detections: {:#?}",
1257 scanned.license_detections
1258 );
1259 assert!(
1260 scanned
1261 .license_expression
1262 .as_deref()
1263 .is_some_and(
1264 |expression| expression.contains("OFL-1.1") || expression.contains("ofl-1.1")
1265 ),
1266 "license expression: {:?}",
1267 scanned.license_expression
1268 );
1269 }
1270
1271 #[test]
1272 fn scanner_detects_license_from_windows_executable_metadata() {
1273 let temp_dir = TempDir::new().expect("create temp dir");
1274 let file_path = temp_dir.path().join("libiconv2.dll");
1275 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1276 .expect("read PE fixture");
1277 fs::write(&file_path, fixture).expect("write PE fixture");
1278
1279 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1280 let collected = collect_paths(temp_dir.path(), 0, &[]);
1281 let engine =
1282 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1283 let result = process_collected(
1284 &collected,
1285 progress,
1286 Some(engine),
1287 LicenseScanOptions::default(),
1288 &TextDetectionOptions::default(),
1289 );
1290 let scanned = result
1291 .files
1292 .into_iter()
1293 .find(|entry| {
1294 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1295 })
1296 .expect("scanned file entry");
1297
1298 assert!(
1299 scanned.license_expression.is_some(),
1300 "license detections: {:#?}",
1301 scanned.license_detections
1302 );
1303 assert!(
1304 scanned
1305 .license_expression
1306 .as_deref()
1307 .is_some_and(|expression| {
1308 expression.contains("lgpl") || expression.contains("LGPL")
1309 }),
1310 "license expression: {:?}",
1311 scanned.license_expression
1312 );
1313 }
1314
1315 #[test]
1316 fn scanner_detects_cc_by_license_from_markdown_comment_banner() {
1317 let scanned = scan_single_file_with_license_engine(
1318 "navbar.md",
1319 "<!-- Documentation licensed under CC BY 4.0 -->\n<!-- License available at https://creativecommons.org/licenses/by/4.0/ -->\n",
1320 &TextDetectionOptions::default(),
1321 );
1322
1323 assert!(
1324 scanned
1325 .license_expression
1326 .as_deref()
1327 .is_some_and(|expression| {
1328 expression.contains("cc-by-4.0") || expression.contains("CC-BY-4.0")
1329 }),
1330 "license expression: {:?}",
1331 scanned.license_expression
1332 );
1333 }
1334
1335 #[test]
1336 fn scanner_detects_mit_license_from_shields_badge_markdown() {
1337 let scanned = scan_single_file_with_license_engine(
1338 "README.md",
1339 "[](https://opensource.org/licenses/MIT)\n",
1340 &TextDetectionOptions::default(),
1341 );
1342
1343 assert!(
1344 scanned
1345 .license_expression
1346 .as_deref()
1347 .is_some_and(|expression| {
1348 expression.contains("mit") || expression.contains("MIT")
1349 }),
1350 "license expression: {:?}",
1351 scanned.license_expression
1352 );
1353 }
1354
1355 #[test]
1356 fn scanner_detects_apache_license_from_markdown_readme_phrase() {
1357 let scanned = scan_single_file_with_license_engine(
1358 "README.md",
1359 "This crate is distributed under the terms of the Apache License (Version 2.0).\n",
1360 &TextDetectionOptions::default(),
1361 );
1362
1363 assert!(
1364 scanned
1365 .license_expression
1366 .as_deref()
1367 .is_some_and(|expression| {
1368 expression.contains("apache-2.0") || expression.contains("Apache-2.0")
1369 }),
1370 "license expression: {:?}",
1371 scanned.license_expression
1372 );
1373 }
1374
1375 #[test]
1376 fn scanner_sets_is_source_only_when_info_enabled() {
1377 let without_info = TextDetectionOptions {
1378 collect_info: false,
1379 detect_packages: false,
1380 detect_application_packages: false,
1381 detect_system_packages: false,
1382 detect_packages_in_compiled: false,
1383 detect_copyrights: false,
1384 detect_generated: false,
1385 detect_emails: false,
1386 detect_urls: false,
1387 max_emails: 50,
1388 max_urls: 50,
1389 timeout_seconds: 120.0,
1390 };
1391 let with_info = TextDetectionOptions {
1392 collect_info: true,
1393 ..without_info.clone()
1394 };
1395
1396 let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
1397 let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
1398
1399 assert_eq!(scanned_without_info.is_source, None);
1400 assert_eq!(scanned_with_info.is_source, Some(true));
1401 }
1402
1403 #[test]
1404 fn directory_omits_info_fields_when_info_disabled() {
1405 let temp_dir = TempDir::new().expect("create temp dir");
1406 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1407
1408 let collected = collect_paths(temp_dir.path(), 0, &[]);
1409 let result = process_collected(
1410 &collected,
1411 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1412 None,
1413 LicenseScanOptions::default(),
1414 &TextDetectionOptions {
1415 collect_info: false,
1416 detect_packages: false,
1417 detect_application_packages: false,
1418 detect_system_packages: false,
1419 detect_packages_in_compiled: false,
1420 detect_copyrights: false,
1421 detect_generated: false,
1422 detect_emails: false,
1423 detect_urls: false,
1424 max_emails: 50,
1425 max_urls: 50,
1426 timeout_seconds: 120.0,
1427 },
1428 );
1429
1430 let directory = result
1431 .files
1432 .into_iter()
1433 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1434 .expect("directory entry");
1435
1436 assert!(directory.date.is_none());
1437 assert!(directory.file_type_label.is_none());
1438 assert!(directory.is_binary.is_none());
1439 assert!(directory.is_text.is_none());
1440 assert!(directory.is_archive.is_none());
1441 assert!(directory.is_media.is_none());
1442 assert!(directory.is_source.is_none());
1443 assert!(directory.is_script.is_none());
1444 }
1445
1446 #[test]
1447 fn directory_includes_info_fields_when_info_enabled() {
1448 let temp_dir = TempDir::new().expect("create temp dir");
1449 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1450
1451 let collected = collect_paths(temp_dir.path(), 0, &[]);
1452 let result = process_collected(
1453 &collected,
1454 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1455 None,
1456 LicenseScanOptions::default(),
1457 &TextDetectionOptions {
1458 collect_info: true,
1459 detect_packages: false,
1460 detect_application_packages: false,
1461 detect_system_packages: false,
1462 detect_packages_in_compiled: false,
1463 detect_copyrights: false,
1464 detect_generated: false,
1465 detect_emails: false,
1466 detect_urls: false,
1467 max_emails: 50,
1468 max_urls: 50,
1469 timeout_seconds: 120.0,
1470 },
1471 );
1472
1473 let directory = result
1474 .files
1475 .into_iter()
1476 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1477 .expect("directory entry");
1478
1479 assert!(directory.date.is_none());
1480 assert!(directory.file_type_label.is_none());
1481 assert_eq!(directory.is_binary, Some(false));
1482 assert_eq!(directory.is_text, Some(false));
1483 assert_eq!(directory.is_archive, Some(false));
1484 assert_eq!(directory.is_media, Some(false));
1485 assert_eq!(directory.is_source, Some(false));
1486 assert_eq!(directory.is_script, Some(false));
1487 assert_eq!(directory.files_count, Some(0));
1488 assert_eq!(directory.dirs_count, Some(0));
1489 assert_eq!(directory.size_count, Some(0));
1490 }
1491
1492 #[test]
1493 fn collect_paths_includes_root_directory_entry() {
1494 let temp_dir = TempDir::new().expect("create temp dir");
1495 fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
1496 fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
1497 .expect("write nested file");
1498
1499 let collected = collect_paths(temp_dir.path(), 0, &[]);
1500
1501 assert!(
1502 collected
1503 .directories
1504 .iter()
1505 .any(|(path, _)| path == temp_dir.path())
1506 );
1507 }
1508
1509 #[test]
1510 fn collect_paths_supports_single_file_input() {
1511 let temp_dir = TempDir::new().expect("create temp dir");
1512 let file_path = temp_dir.path().join("main.rs");
1513 fs::write(&file_path, "fn main() {}\n").expect("write file");
1514
1515 let collected = collect_paths(&file_path, 0, &[]);
1516
1517 assert_eq!(collected.files.len(), 1);
1518 assert!(collected.directories.is_empty());
1519 assert_eq!(collected.files[0].0, file_path);
1520 }
1521
1522 #[test]
1523 fn process_collected_with_memory_limit_preserves_results_when_spilling() {
1524 let temp_dir = TempDir::new().expect("create temp dir");
1525 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1526 fs::write(temp_dir.path().join("b.txt"), "world").expect("write second file");
1527
1528 let collected = collect_paths(temp_dir.path(), 0, &[]);
1529 let result = process_collected_with_memory_limit(
1530 &collected,
1531 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1532 None,
1533 LicenseScanOptions::default(),
1534 &TextDetectionOptions {
1535 collect_info: false,
1536 detect_packages: false,
1537 detect_application_packages: false,
1538 detect_system_packages: false,
1539 detect_packages_in_compiled: false,
1540 detect_copyrights: false,
1541 detect_generated: false,
1542 detect_emails: false,
1543 detect_urls: false,
1544 max_emails: 50,
1545 max_urls: 50,
1546 timeout_seconds: 120.0,
1547 },
1548 1,
1549 );
1550
1551 assert_eq!(result.files.len(), 3);
1552 }
1553
1554 #[test]
1555 fn process_collected_with_negative_one_uses_disk_only_mode() {
1556 let temp_dir = TempDir::new().expect("create temp dir");
1557 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1558
1559 let collected = collect_paths(temp_dir.path(), 0, &[]);
1560 let result = process_collected_with_memory_limit(
1561 &collected,
1562 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1563 None,
1564 LicenseScanOptions::default(),
1565 &TextDetectionOptions {
1566 collect_info: false,
1567 detect_packages: false,
1568 detect_application_packages: false,
1569 detect_system_packages: false,
1570 detect_packages_in_compiled: false,
1571 detect_copyrights: false,
1572 detect_generated: false,
1573 detect_emails: false,
1574 detect_urls: false,
1575 max_emails: 50,
1576 max_urls: 50,
1577 timeout_seconds: 120.0,
1578 },
1579 -1,
1580 );
1581
1582 assert_eq!(result.files.len(), 2);
1583 }
1584}