1mod collect;
5pub(crate) mod process;
6
7use crate::license_detection::LicenseDetectionEngine;
8use crate::models::FileInfo;
9
10pub struct ProcessResult {
11 pub files: Vec<FileInfo>,
12 pub excluded_count: usize,
13}
14
15#[derive(Debug, Clone, Copy, Default)]
16pub struct LicenseScanOptions {
17 pub include_text: bool,
18 pub include_text_diagnostics: bool,
19 pub include_diagnostics: bool,
20 pub unknown_licenses: bool,
21 pub min_score: u8,
22}
23
24#[derive(Debug, Clone)]
25pub struct TextDetectionOptions {
26 pub collect_info: bool,
27 pub detect_packages: bool,
28 pub detect_application_packages: bool,
29 pub detect_system_packages: bool,
30 pub detect_packages_in_compiled: bool,
31 pub detect_copyrights: bool,
32 pub detect_generated: bool,
33 pub detect_emails: bool,
34 pub detect_urls: bool,
35 pub max_emails: usize,
36 pub max_urls: usize,
37 pub timeout_seconds: f64,
38}
39
40impl Default for TextDetectionOptions {
41 fn default() -> Self {
42 Self {
43 collect_info: false,
44 detect_packages: false,
45 detect_application_packages: false,
46 detect_system_packages: false,
47 detect_packages_in_compiled: false,
48 detect_copyrights: true,
49 detect_generated: false,
50 detect_emails: false,
51 detect_urls: false,
52 max_emails: 50,
53 max_urls: 50,
54 timeout_seconds: 120.0,
55 }
56 }
57}
58
59pub fn scan_options_fingerprint(
60 text_options: &TextDetectionOptions,
61 license_options: LicenseScanOptions,
62 license_engine: Option<&LicenseDetectionEngine>,
63) -> String {
64 let (license_enabled, rules_count, first_rule_id, last_rule_id) = match license_engine {
65 Some(engine) => {
66 let rules = &engine.index().rules_by_rid;
67 (
68 true,
69 rules.len(),
70 rules
71 .first()
72 .map(|rule| rule.identifier.as_str())
73 .unwrap_or(""),
74 rules
75 .last()
76 .map(|rule| rule.identifier.as_str())
77 .unwrap_or(""),
78 )
79 }
80 None => (false, 0, "", ""),
81 };
82
83 format!(
84 "tool_version={};info={};packages={};app_packages={};system_packages={};compiled_packages={};copyrights={};generated={};emails={};urls={};max_emails={};max_urls={};timeout={:.6};license_enabled={};rules_count={};first_rule_id={};last_rule_id={};license_text={};license_text_diagnostics={};license_diagnostics={};unknown_licenses={};license_score={}",
85 crate::version::BUILD_VERSION,
86 text_options.collect_info,
87 text_options.detect_packages,
88 text_options.detect_application_packages,
89 text_options.detect_system_packages,
90 text_options.detect_packages_in_compiled,
91 text_options.detect_copyrights,
92 text_options.detect_generated,
93 text_options.detect_emails,
94 text_options.detect_urls,
95 text_options.max_emails,
96 text_options.max_urls,
97 text_options.timeout_seconds,
98 license_enabled,
99 rules_count,
100 first_rule_id,
101 last_rule_id,
102 license_options.include_text,
103 license_options.include_text_diagnostics,
104 license_options.include_diagnostics,
105 license_options.unknown_licenses,
106 license_options.min_score,
107 )
108}
109
110pub use self::collect::{
111 CollectedPaths, CollectionFrontier, collect_paths, collect_selected_paths,
112};
113#[allow(unused_imports)]
114pub use self::process::{
115 MemoryMode, process_collected, process_collected_sequential,
116 process_collected_with_memory_limit, process_collected_with_memory_limit_sequential,
117};
118
119#[cfg(test)]
120mod tests {
121 use std::fs;
122 use std::path::PathBuf;
123 use std::sync::Arc;
124
125 use object::pe;
126 use tempfile::TempDir;
127
128 use crate::cache::build_collection_exclude_patterns;
129 use crate::license_detection::LicenseDetectionEngine;
130 use crate::models::{DatasourceId, FileType, PackageType as FilePackageType};
131 use crate::progress::{ProgressMode, ScanProgress};
132
133 use super::{
134 CollectionFrontier, LicenseScanOptions, MemoryMode, TextDetectionOptions, collect_paths,
135 collect_selected_paths, process_collected, process_collected_with_memory_limit,
136 scan_options_fingerprint,
137 };
138
139 fn build_sparse_oversized_rpm_with_filename(
140 temp_dir: &TempDir,
141 package_name: &str,
142 filename: &str,
143 ) -> PathBuf {
144 let file_path = temp_dir.path().join(filename);
145 rpm::PackageBuilder::new(package_name, "1.0", "MIT", "x86_64", "Demo RPM package")
146 .release("1")
147 .build()
148 .expect("build rpm fixture")
149 .write_file(&file_path)
150 .expect("write rpm fixture");
151 fs::OpenOptions::new()
152 .write(true)
153 .open(&file_path)
154 .expect("open rpm fixture for sparse extension")
155 .set_len(100 * 1024 * 1024 + 1_048_576)
156 .expect("extend rpm fixture");
157 file_path
158 }
159
160 fn build_sparse_oversized_rpm(temp_dir: &TempDir, name: &str) -> PathBuf {
161 build_sparse_oversized_rpm_with_filename(
162 temp_dir,
163 name,
164 &format!("{name}-1.0-1.x86_64.rpm"),
165 )
166 }
167
168 fn build_sparse_oversized_pack_rpm(temp_dir: &TempDir, name: &str) -> PathBuf {
169 build_sparse_oversized_rpm_with_filename(
170 temp_dir,
171 name,
172 &format!("{name}-1.0-1.x86_64.pack"),
173 )
174 }
175
176 #[test]
177 fn default_options_keep_copyright_detection_enabled() {
178 let options = TextDetectionOptions::default();
179 assert!(!options.detect_packages);
180 assert!(options.detect_copyrights);
181 }
182
183 #[test]
184 fn test_scan_options_fingerprint_changes_with_license_score() {
185 let text_options = TextDetectionOptions::default();
186 let default_fingerprint = scan_options_fingerprint(
187 &text_options,
188 LicenseScanOptions {
189 min_score: 0,
190 ..LicenseScanOptions::default()
191 },
192 None,
193 );
194 let filtered_fingerprint = scan_options_fingerprint(
195 &text_options,
196 LicenseScanOptions {
197 min_score: 70,
198 ..LicenseScanOptions::default()
199 },
200 None,
201 );
202
203 assert_ne!(default_fingerprint, filtered_fingerprint);
204 }
205
206 fn scan_single_file(
207 file_name: &str,
208 content: &str,
209 options: &TextDetectionOptions,
210 ) -> crate::models::FileInfo {
211 let temp_dir = TempDir::new().expect("create temp dir");
212 let file_path = temp_dir.path().join(file_name);
213 fs::write(&file_path, content).expect("write test file");
214
215 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
216 let collected = collect_paths(temp_dir.path(), 0, &[]);
217 let result = process_collected(
218 &collected,
219 progress,
220 None,
221 LicenseScanOptions::default(),
222 options,
223 );
224
225 result
226 .files
227 .into_iter()
228 .find(|entry| {
229 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
230 })
231 .expect("scanned file entry")
232 }
233
234 fn scan_file_at_relative_path(
235 relative_path: &str,
236 content: &[u8],
237 options: &TextDetectionOptions,
238 ) -> crate::models::FileInfo {
239 let temp_dir = TempDir::new().expect("create temp dir");
240 let file_path = temp_dir.path().join(relative_path);
241 if let Some(parent) = file_path.parent() {
242 fs::create_dir_all(parent).expect("create parent dirs");
243 }
244 fs::write(&file_path, content).expect("write test file");
245
246 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
247 let collected = collect_paths(temp_dir.path(), 0, &[]);
248 let result = process_collected(
249 &collected,
250 progress,
251 None,
252 LicenseScanOptions::default(),
253 options,
254 );
255
256 result
257 .files
258 .into_iter()
259 .find(|entry| {
260 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
261 })
262 .expect("scanned file entry")
263 }
264
265 fn scan_single_file_with_license_engine(
266 file_name: &str,
267 content: &str,
268 options: &TextDetectionOptions,
269 ) -> crate::models::FileInfo {
270 let temp_dir = TempDir::new().expect("create temp dir");
271 let file_path = temp_dir.path().join(file_name);
272 fs::write(&file_path, content).expect("write test file");
273
274 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
275 let collected = collect_paths(temp_dir.path(), 0, &[]);
276 let engine =
277 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
278 let result = process_collected(
279 &collected,
280 progress,
281 Some(engine),
282 LicenseScanOptions::default(),
283 options,
284 );
285
286 result
287 .files
288 .into_iter()
289 .find(|entry| {
290 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
291 })
292 .expect("scanned file entry")
293 }
294
295 #[test]
296 fn scanner_reports_repeated_email_occurrences() {
297 let options = TextDetectionOptions {
298 collect_info: false,
299 detect_packages: false,
300 detect_application_packages: false,
301 detect_system_packages: false,
302 detect_packages_in_compiled: false,
303 detect_copyrights: false,
304 detect_generated: false,
305 detect_emails: true,
306 detect_urls: false,
307 max_emails: 50,
308 max_urls: 50,
309 timeout_seconds: 120.0,
310 };
311 let scanned = scan_single_file(
312 "contacts.txt",
313 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
314 &options,
315 );
316
317 let emails: Vec<(&str, usize)> = scanned
318 .emails
319 .iter()
320 .map(|email| (email.email.as_str(), email.start_line.get()))
321 .collect();
322
323 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
324 assert_eq!(
325 emails,
326 vec![
327 ("linux@3ware.com", 1),
328 ("linux@3ware.com", 2),
329 ("andre@suse.com", 3),
330 ("linux@3ware.com", 4),
331 ]
332 );
333 }
334
335 #[test]
336 fn scanner_skips_pem_certificate_text_detection() {
337 let options = TextDetectionOptions {
338 collect_info: false,
339 detect_packages: false,
340 detect_application_packages: false,
341 detect_system_packages: false,
342 detect_packages_in_compiled: false,
343 detect_copyrights: true,
344 detect_generated: false,
345 detect_emails: true,
346 detect_urls: true,
347 max_emails: 50,
348 max_urls: 50,
349 timeout_seconds: 120.0,
350 };
351 let pem_fixture = concat!(
352 "-----BEGIN CERTIFICATE-----\n",
353 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
354 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
355 "-----END CERTIFICATE-----\n",
356 "Certificate:\n",
357 " Data:\n",
358 " Signature Algorithm: sha1WithRSAEncryption\n",
359 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
360 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
361 " Contact: cert-owner@example.com\n",
362 );
363 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
364
365 assert!(
366 scanned.copyrights.is_empty(),
367 "copyrights: {:#?}",
368 scanned.copyrights
369 );
370 assert!(
371 scanned.holders.is_empty(),
372 "holders: {:#?}",
373 scanned.holders
374 );
375 assert!(
376 scanned.authors.is_empty(),
377 "authors: {:#?}",
378 scanned.authors
379 );
380 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
381 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
382 assert!(
383 scanned.license_detections.is_empty(),
384 "licenses: {:#?}",
385 scanned.license_detections
386 );
387 assert!(
388 scanned.license_clues.is_empty(),
389 "license clues: {:#?}",
390 scanned.license_clues
391 );
392 }
393
394 #[test]
395 fn scanner_keeps_source_headers_when_pem_blocks_are_embedded() {
396 let options = TextDetectionOptions {
397 collect_info: false,
398 detect_packages: false,
399 detect_application_packages: false,
400 detect_system_packages: false,
401 detect_packages_in_compiled: false,
402 detect_copyrights: true,
403 detect_generated: false,
404 detect_emails: false,
405 detect_urls: true,
406 max_emails: 50,
407 max_urls: 50,
408 timeout_seconds: 120.0,
409 };
410 let fixture = concat!(
411 "/*\n",
412 "Copyright 2022 The Kubernetes Authors.\n\n",
413 "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
414 "you may not use this file except in compliance with the License.\n",
415 "You may obtain a copy of the License at\n\n",
416 " http://www.apache.org/licenses/LICENSE-2.0\n",
417 "*/\n\n",
418 "package storage\n\n",
419 "const validCert = `\n",
420 "-----BEGIN CERTIFICATE-----\n",
421 "MIIDmTCCAoGgAwIBAgIUWQ==\n",
422 "-----END CERTIFICATE-----\n",
423 "`\n",
424 );
425 let temp_dir = TempDir::new().expect("create temp dir");
426 let file_path = temp_dir.path().join("storage_test.go");
427 fs::write(&file_path, fixture).expect("write fixture");
428
429 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
430 let collected = collect_paths(temp_dir.path(), 0, &[]);
431 let engine =
432 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
433 let result = process_collected(
434 &collected,
435 progress,
436 Some(engine),
437 LicenseScanOptions::default(),
438 &options,
439 );
440 let scanned = result
441 .files
442 .into_iter()
443 .find(|entry| {
444 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
445 })
446 .expect("scanned file entry");
447
448 assert!(
449 scanned
450 .copyrights
451 .iter()
452 .any(|c| c.copyright == "Copyright 2022 The Kubernetes Authors."),
453 "copyrights: {:#?}",
454 scanned.copyrights
455 );
456 assert!(
457 scanned
458 .holders
459 .iter()
460 .any(|h| h.holder == "The Kubernetes Authors"),
461 "holders: {:#?}",
462 scanned.holders
463 );
464 assert!(
465 scanned
466 .urls
467 .iter()
468 .any(|u| u.url == "http://www.apache.org/licenses/LICENSE-2.0"),
469 "urls: {:#?}",
470 scanned.urls
471 );
472 assert_eq!(scanned.license_expression.as_deref(), Some("Apache-2.0"));
473 }
474
475 #[test]
476 fn scanner_detects_structured_credits_authors() {
477 let options = TextDetectionOptions {
478 collect_info: false,
479 detect_packages: false,
480 detect_application_packages: false,
481 detect_system_packages: false,
482 detect_packages_in_compiled: false,
483 detect_copyrights: true,
484 detect_generated: false,
485 detect_emails: false,
486 detect_urls: false,
487 max_emails: 50,
488 max_urls: 50,
489 timeout_seconds: 120.0,
490 };
491 let credits_fixture = concat!(
492 "N: Jack Lloyd\n",
493 "E: lloyd@randombit.net\n",
494 "W: http://www.randombit.net/\n",
495 );
496 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
497
498 let authors: Vec<(&str, usize, usize)> = scanned
499 .authors
500 .iter()
501 .map(|author| {
502 (
503 author.author.as_str(),
504 author.start_line.get(),
505 author.end_line.get(),
506 )
507 })
508 .collect();
509
510 assert_eq!(
511 authors,
512 vec![(
513 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
514 1,
515 3,
516 )]
517 );
518 assert!(scanned.copyrights.is_empty());
519 assert!(scanned.holders.is_empty());
520 }
521
522 #[test]
523 fn scanner_uses_or_for_alternative_license_header() {
524 let fixture =
525 include_str!("../../testdata/license-golden/datadriven/external/boost-json-d2s.ipp");
526 let temp_dir = TempDir::new().expect("create temp dir");
527 let file_path = temp_dir.path().join("d2s.ipp");
528 fs::write(&file_path, fixture).expect("write fixture");
529
530 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
531 let collected = collect_paths(temp_dir.path(), 0, &[]);
532 let engine =
533 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
534 let result = process_collected(
535 &collected,
536 progress,
537 Some(engine),
538 LicenseScanOptions::default(),
539 &TextDetectionOptions::default(),
540 );
541 let scanned = result
542 .files
543 .into_iter()
544 .find(|entry| {
545 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
546 })
547 .expect("scanned file entry");
548
549 assert_eq!(
550 scanned.license_expression.as_deref(),
551 Some("Apache-2.0 OR BSL-1.0")
552 );
553 assert!(
554 scanned.license_clues.is_empty(),
555 "license clues: {:#?}",
556 scanned.license_clues
557 );
558 assert_eq!(
559 scanned.license_detections.len(),
560 1,
561 "detections: {:#?}",
562 scanned.license_detections
563 );
564
565 let detection = &scanned.license_detections[0];
566 assert_eq!(detection.license_expression_spdx, "Apache-2.0 OR BSL-1.0");
567
568 let match_expressions: Vec<_> = detection
569 .matches
570 .iter()
571 .map(|m| m.license_expression_spdx.as_str())
572 .collect();
573 assert_eq!(match_expressions, vec!["Apache-2.0", "BSL-1.0"]);
574 }
575
576 #[test]
577 fn scanner_sets_generated_flag_when_enabled() {
578 let options = TextDetectionOptions {
579 collect_info: false,
580 detect_packages: false,
581 detect_application_packages: false,
582 detect_system_packages: false,
583 detect_packages_in_compiled: false,
584 detect_copyrights: false,
585 detect_generated: true,
586 detect_emails: false,
587 detect_urls: false,
588 max_emails: 50,
589 max_urls: 50,
590 timeout_seconds: 120.0,
591 };
592 let scanned = scan_single_file(
593 "generated.c",
594 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
595 &options,
596 );
597
598 assert_eq!(scanned.is_generated, Some(true));
599 }
600
601 #[test]
602 fn scanner_leaves_generated_flag_unset_when_disabled() {
603 let options = TextDetectionOptions {
604 collect_info: false,
605 detect_packages: false,
606 detect_application_packages: false,
607 detect_system_packages: false,
608 detect_packages_in_compiled: false,
609 detect_copyrights: false,
610 detect_generated: false,
611 detect_emails: false,
612 detect_urls: false,
613 max_emails: 50,
614 max_urls: 50,
615 timeout_seconds: 120.0,
616 };
617 let scanned = scan_single_file(
618 "generated.c",
619 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
620 &options,
621 );
622
623 assert_eq!(scanned.is_generated, None);
624 }
625
626 #[test]
627 fn scanner_populates_info_surface_when_enabled() {
628 let options = TextDetectionOptions {
629 collect_info: true,
630 detect_packages: false,
631 detect_application_packages: false,
632 detect_system_packages: false,
633 detect_packages_in_compiled: false,
634 detect_copyrights: false,
635 detect_generated: false,
636 detect_emails: false,
637 detect_urls: false,
638 max_emails: 50,
639 max_urls: 50,
640 timeout_seconds: 120.0,
641 };
642 let scanned = scan_single_file(
643 "script.py",
644 "#!/usr/bin/env python3\nprint(\"hello\")\n",
645 &options,
646 );
647
648 assert!(scanned.sha1.is_some());
649 assert!(scanned.md5.is_some());
650 assert!(scanned.sha256.is_some());
651 assert!(scanned.sha1_git.is_some());
652 assert!(scanned.mime_type.is_some());
653 assert!(scanned.date.is_some());
654 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
655 assert_eq!(scanned.is_text, Some(true));
656 assert_eq!(scanned.is_script, Some(true));
657 assert_eq!(scanned.is_source, Some(true));
658 }
659
660 #[test]
661 fn scanner_treats_latin1_python_sources_as_textual_scripts() {
662 let options = TextDetectionOptions {
663 collect_info: true,
664 detect_packages: false,
665 detect_application_packages: false,
666 detect_system_packages: false,
667 detect_packages_in_compiled: false,
668 detect_copyrights: false,
669 detect_generated: false,
670 detect_emails: false,
671 detect_urls: false,
672 max_emails: 50,
673 max_urls: 50,
674 timeout_seconds: 120.0,
675 };
676 let latin1_python = b"# coding: latin-1\nprint(\"caf\xe9\")\n# comment padding\n";
677 let scanned = scan_file_at_relative_path("script.py", latin1_python, &options);
678
679 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
680 assert_eq!(
681 scanned.file_type_label.as_deref(),
682 Some("python script, text executable")
683 );
684 assert_eq!(scanned.is_binary, Some(false));
685 assert_eq!(scanned.is_text, Some(true));
686 assert_eq!(scanned.is_script, Some(true));
687 assert_eq!(scanned.is_source, Some(true));
688 }
689
690 #[test]
691 fn scanner_skips_findings_for_zip_like_archives() {
692 let options = TextDetectionOptions {
693 collect_info: true,
694 detect_packages: false,
695 detect_application_packages: false,
696 detect_system_packages: false,
697 detect_packages_in_compiled: false,
698 detect_copyrights: true,
699 detect_generated: false,
700 detect_emails: true,
701 detect_urls: true,
702 max_emails: 50,
703 max_urls: 50,
704 timeout_seconds: 120.0,
705 };
706 let archive_like = b"PK\x03\x04\x14\x00\x00\x00\x08\x00MIT License\ncontact@example.com\nhttps://example.com\n";
707 let scanned = scan_file_at_relative_path("demo.whl", archive_like, &options);
708
709 assert_eq!(scanned.mime_type.as_deref(), Some("application/zip"));
710 assert_eq!(scanned.is_archive, Some(true));
711 assert!(scanned.license_detections.is_empty());
712 assert!(scanned.copyrights.is_empty());
713 assert!(scanned.emails.is_empty());
714 assert!(scanned.urls.is_empty());
715 }
716
717 #[test]
718 fn scanner_treats_typescript_sources_as_text_not_video_media() {
719 let options = TextDetectionOptions {
720 collect_info: true,
721 detect_packages: false,
722 detect_application_packages: false,
723 detect_system_packages: false,
724 detect_packages_in_compiled: false,
725 detect_copyrights: false,
726 detect_generated: false,
727 detect_emails: false,
728 detect_urls: false,
729 max_emails: 50,
730 max_urls: 50,
731 timeout_seconds: 120.0,
732 };
733 let scanned = scan_single_file("main.ts", "export const answer: number = 42;\n", &options);
734
735 assert_eq!(scanned.programming_language.as_deref(), Some("TypeScript"));
736 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
737 assert_eq!(
738 scanned.file_type_label.as_deref(),
739 Some("TypeScript source, UTF-8 Unicode text")
740 );
741 assert_eq!(scanned.is_text, Some(true));
742 assert_eq!(scanned.is_media, Some(false));
743 assert_eq!(scanned.is_script, Some(false));
744 assert_eq!(scanned.is_source, Some(true));
745 }
746
747 #[test]
748 fn scanner_normalizes_sparse_ts_files_away_from_video_mime() {
749 let options = TextDetectionOptions {
750 collect_info: true,
751 detect_packages: false,
752 detect_application_packages: false,
753 detect_system_packages: false,
754 detect_packages_in_compiled: false,
755 detect_copyrights: false,
756 detect_generated: false,
757 detect_emails: false,
758 detect_urls: false,
759 max_emails: 50,
760 max_urls: 50,
761 timeout_seconds: 120.0,
762 };
763 let scanned = scan_single_file("main.ts", "// comment-only TypeScript fixture\n", &options);
764
765 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
766 assert_eq!(
767 scanned.file_type_label.as_deref(),
768 Some("TypeScript source, UTF-8 Unicode text")
769 );
770 assert_eq!(scanned.is_text, Some(true));
771 assert_eq!(scanned.is_media, Some(false));
772 assert_eq!(scanned.is_script, Some(false));
773 assert_eq!(scanned.is_source, Some(true));
774 }
775
776 #[test]
777 fn scanner_treats_empty_files_like_scancode_info_surface() {
778 let options = TextDetectionOptions {
779 collect_info: true,
780 detect_packages: false,
781 detect_application_packages: false,
782 detect_system_packages: false,
783 detect_packages_in_compiled: false,
784 detect_copyrights: false,
785 detect_generated: false,
786 detect_emails: false,
787 detect_urls: false,
788 max_emails: 50,
789 max_urls: 50,
790 timeout_seconds: 120.0,
791 };
792 let scanned = scan_single_file("test.txt", "", &options);
793
794 assert_eq!(scanned.mime_type.as_deref(), Some("inode/x-empty"));
795 assert_eq!(scanned.file_type_label.as_deref(), Some("empty"));
796 assert_eq!(scanned.programming_language, None);
797 assert_eq!(scanned.is_binary, Some(false));
798 assert_eq!(scanned.is_text, Some(true));
799 assert_eq!(scanned.is_archive, Some(false));
800 assert_eq!(scanned.is_media, Some(false));
801 assert_eq!(scanned.is_source, Some(false));
802 assert_eq!(scanned.is_script, Some(false));
803 }
804
805 #[test]
806 fn scanner_treats_package_json_as_text_not_source() {
807 let options = TextDetectionOptions {
808 collect_info: true,
809 detect_packages: false,
810 detect_application_packages: false,
811 detect_system_packages: false,
812 detect_packages_in_compiled: false,
813 detect_copyrights: false,
814 detect_generated: false,
815 detect_emails: false,
816 detect_urls: false,
817 max_emails: 50,
818 max_urls: 50,
819 timeout_seconds: 120.0,
820 };
821 let scanned = scan_single_file("package.json", r#"{"name":"demo"}"#, &options);
822
823 assert_eq!(scanned.mime_type.as_deref(), Some("application/json"));
824 assert_eq!(scanned.file_type_label.as_deref(), Some("JSON text data"));
825 assert_eq!(scanned.programming_language, None);
826 assert_eq!(scanned.is_text, Some(true));
827 assert_eq!(scanned.is_source, Some(false));
828 assert_eq!(scanned.is_script, Some(false));
829 }
830
831 #[test]
832 fn scanner_classifies_gradle_and_nix_manifests_as_source() {
833 let options = TextDetectionOptions {
834 collect_info: true,
835 detect_packages: false,
836 detect_application_packages: false,
837 detect_system_packages: false,
838 detect_packages_in_compiled: false,
839 detect_copyrights: false,
840 detect_generated: false,
841 detect_emails: false,
842 detect_urls: false,
843 max_emails: 50,
844 max_urls: 50,
845 timeout_seconds: 120.0,
846 };
847
848 let gradle = scan_single_file("build.gradle", "plugins { id 'java' }\n", &options);
849 let nix = scan_single_file("flake.nix", "{ inputs, ... }: {}\n", &options);
850
851 assert_eq!(gradle.programming_language.as_deref(), Some("Groovy"));
852 assert_eq!(gradle.mime_type.as_deref(), Some("text/plain"));
853 assert_eq!(gradle.is_source, Some(true));
854 assert_eq!(gradle.is_script, Some(false));
855
856 assert_eq!(nix.programming_language.as_deref(), Some("Nix"));
857 assert_eq!(nix.mime_type.as_deref(), Some("text/plain"));
858 assert_eq!(nix.is_source, Some(true));
859 assert_eq!(nix.is_script, Some(false));
860 }
861
862 #[test]
863 fn scanner_treats_gitmodules_as_text_not_source() {
864 let options = TextDetectionOptions {
865 collect_info: true,
866 detect_packages: false,
867 detect_application_packages: false,
868 detect_system_packages: false,
869 detect_packages_in_compiled: false,
870 detect_copyrights: false,
871 detect_generated: false,
872 detect_emails: false,
873 detect_urls: false,
874 max_emails: 50,
875 max_urls: 50,
876 timeout_seconds: 120.0,
877 };
878 let scanned = scan_file_at_relative_path(
879 ".gitmodules",
880 b"[submodule \"demo\"]\n\tpath = vendor/demo\n",
881 &options,
882 );
883
884 assert_eq!(scanned.programming_language, None);
885 assert_eq!(
886 scanned.file_type_label.as_deref(),
887 Some("Git configuration text")
888 );
889 assert_eq!(scanned.is_text, Some(true));
890 assert_eq!(scanned.is_source, Some(false));
891 assert_eq!(scanned.is_script, Some(false));
892 }
893
894 #[test]
895 fn scanner_treats_javascript_shebang_files_as_scripts() {
896 let options = TextDetectionOptions {
897 collect_info: true,
898 detect_packages: false,
899 detect_application_packages: false,
900 detect_system_packages: false,
901 detect_packages_in_compiled: false,
902 detect_copyrights: false,
903 detect_generated: false,
904 detect_emails: false,
905 detect_urls: false,
906 max_emails: 50,
907 max_urls: 50,
908 timeout_seconds: 120.0,
909 };
910 let scanned = scan_file_at_relative_path(
911 "bin/run",
912 b"#!/usr/bin/env node\nconsole.log('hello');\n",
913 &options,
914 );
915
916 assert_eq!(scanned.programming_language.as_deref(), Some("JavaScript"));
917 assert_eq!(
918 scanned.file_type_label.as_deref(),
919 Some("javascript script, UTF-8 Unicode text executable")
920 );
921 assert_eq!(scanned.is_script, Some(true));
922 assert_eq!(scanned.is_source, Some(true));
923 }
924
925 #[test]
926 fn scanner_treats_dockerfile_as_source() {
927 let options = TextDetectionOptions {
928 collect_info: true,
929 detect_packages: false,
930 detect_application_packages: false,
931 detect_system_packages: false,
932 detect_packages_in_compiled: false,
933 detect_copyrights: false,
934 detect_generated: false,
935 detect_emails: false,
936 detect_urls: false,
937 max_emails: 50,
938 max_urls: 50,
939 timeout_seconds: 120.0,
940 };
941 let scanned = scan_single_file("Dockerfile", "FROM scratch\n", &options);
942
943 assert_eq!(scanned.programming_language.as_deref(), Some("Dockerfile"));
944 assert_eq!(
945 scanned.file_type_label.as_deref(),
946 Some("Dockerfile source, UTF-8 Unicode text")
947 );
948 assert_eq!(scanned.is_source, Some(true));
949 assert_eq!(scanned.is_script, Some(false));
950 }
951
952 #[test]
953 fn scanner_treats_makefile_as_text_not_source() {
954 let options = TextDetectionOptions {
955 collect_info: true,
956 detect_packages: false,
957 detect_application_packages: false,
958 detect_system_packages: false,
959 detect_packages_in_compiled: false,
960 detect_copyrights: false,
961 detect_generated: false,
962 detect_emails: false,
963 detect_urls: false,
964 max_emails: 50,
965 max_urls: 50,
966 timeout_seconds: 120.0,
967 };
968 let scanned = scan_single_file("Makefile", "all:\n\techo hi\n", &options);
969
970 assert_eq!(scanned.programming_language, None);
971 assert_eq!(
972 scanned.file_type_label.as_deref(),
973 Some("UTF-8 Unicode text")
974 );
975 assert_eq!(scanned.is_text, Some(true));
976 assert_eq!(scanned.is_source, Some(false));
977 assert_eq!(scanned.is_script, Some(false));
978 }
979
980 #[test]
981 fn scanner_omits_info_surface_when_disabled() {
982 let options = TextDetectionOptions {
983 collect_info: false,
984 detect_packages: false,
985 detect_application_packages: false,
986 detect_system_packages: false,
987 detect_packages_in_compiled: false,
988 detect_copyrights: false,
989 detect_generated: false,
990 detect_emails: false,
991 detect_urls: false,
992 max_emails: 50,
993 max_urls: 50,
994 timeout_seconds: 120.0,
995 };
996 let scanned = scan_single_file(
997 "script.py",
998 "#!/usr/bin/env python3\nprint(\"hello\")\n",
999 &options,
1000 );
1001
1002 assert!(scanned.sha1.is_none());
1003 assert!(scanned.md5.is_none());
1004 assert!(scanned.sha256.is_none());
1005 assert!(scanned.sha1_git.is_none());
1006 assert!(scanned.mime_type.is_none());
1007 assert!(scanned.date.is_none());
1008 assert!(scanned.programming_language.is_none());
1009 assert!(scanned.is_binary.is_none());
1010 assert!(scanned.is_text.is_none());
1011 assert!(scanned.is_archive.is_none());
1012 assert!(scanned.is_media.is_none());
1013 assert!(scanned.is_script.is_none());
1014 assert!(scanned.is_source.is_none());
1015 }
1016
1017 #[test]
1018 fn scanner_skips_package_parsing_when_disabled() {
1019 let options = TextDetectionOptions {
1020 collect_info: false,
1021 detect_packages: false,
1022 detect_application_packages: false,
1023 detect_system_packages: false,
1024 detect_packages_in_compiled: false,
1025 detect_copyrights: false,
1026 detect_generated: false,
1027 detect_emails: false,
1028 detect_urls: false,
1029 max_emails: 50,
1030 max_urls: 50,
1031 timeout_seconds: 120.0,
1032 };
1033 let scanned = scan_single_file(
1034 "package.json",
1035 r#"{"name":"demo","version":"1.0.0"}"#,
1036 &options,
1037 );
1038
1039 assert!(
1040 scanned.package_data.is_empty(),
1041 "package_data: {:#?}",
1042 scanned.package_data
1043 );
1044 }
1045
1046 #[test]
1047 fn scanner_parses_package_manifests_when_enabled() {
1048 let options = TextDetectionOptions {
1049 collect_info: false,
1050 detect_packages: true,
1051 detect_application_packages: true,
1052 detect_system_packages: false,
1053 detect_packages_in_compiled: false,
1054 detect_copyrights: false,
1055 detect_generated: false,
1056 detect_emails: false,
1057 detect_urls: false,
1058 max_emails: 50,
1059 max_urls: 50,
1060 timeout_seconds: 120.0,
1061 };
1062 let scanned = scan_single_file(
1063 "package.json",
1064 r#"{"name":"demo","version":"1.0.0"}"#,
1065 &options,
1066 );
1067
1068 assert_eq!(
1069 scanned.package_data.len(),
1070 1,
1071 "package_data: {:#?}",
1072 scanned.package_data
1073 );
1074 }
1075
1076 #[test]
1077 fn scanner_parses_oversized_rpm_in_package_only_mode_without_size_warning() {
1078 let temp_dir = TempDir::new().expect("create temp dir");
1079 let file_path = build_sparse_oversized_rpm(&temp_dir, "oversized-demo");
1080
1081 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1082 let collected = collect_paths(temp_dir.path(), 0, &[]);
1083 let result = process_collected(
1084 &collected,
1085 progress,
1086 None,
1087 LicenseScanOptions::default(),
1088 &TextDetectionOptions {
1089 collect_info: false,
1090 detect_packages: true,
1091 detect_application_packages: true,
1092 detect_system_packages: false,
1093 detect_packages_in_compiled: false,
1094 detect_copyrights: false,
1095 detect_generated: false,
1096 detect_emails: false,
1097 detect_urls: false,
1098 max_emails: 50,
1099 max_urls: 50,
1100 timeout_seconds: 120.0,
1101 },
1102 );
1103
1104 let scanned = result
1105 .files
1106 .into_iter()
1107 .find(|entry| {
1108 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1109 })
1110 .expect("scanned file entry");
1111
1112 assert!(
1113 scanned.scan_errors.is_empty(),
1114 "scan_errors: {:#?}",
1115 scanned.scan_errors
1116 );
1117 assert_eq!(
1118 scanned.package_data.len(),
1119 1,
1120 "package_data: {:#?}",
1121 scanned.package_data
1122 );
1123 assert_eq!(
1124 scanned.package_data[0].datasource_id,
1125 Some(DatasourceId::RpmArchive)
1126 );
1127 assert_eq!(
1128 scanned.package_data[0].name.as_deref(),
1129 Some("oversized-demo")
1130 );
1131 assert_eq!(scanned.package_data[0].version.as_deref(), Some("1.0-1"));
1132 }
1133
1134 #[test]
1135 fn scanner_parses_oversized_rpm_with_info_without_timeout_or_size_warning() {
1136 let temp_dir = TempDir::new().expect("create temp dir");
1137 let file_path = build_sparse_oversized_rpm(&temp_dir, "oversized-info-demo");
1138
1139 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1140 let collected = collect_paths(temp_dir.path(), 0, &[]);
1141 let result = process_collected(
1142 &collected,
1143 progress,
1144 None,
1145 LicenseScanOptions::default(),
1146 &TextDetectionOptions {
1147 collect_info: true,
1148 detect_packages: true,
1149 detect_application_packages: true,
1150 detect_system_packages: false,
1151 detect_packages_in_compiled: false,
1152 detect_copyrights: false,
1153 detect_generated: false,
1154 detect_emails: false,
1155 detect_urls: false,
1156 max_emails: 50,
1157 max_urls: 50,
1158 timeout_seconds: 120.0,
1159 },
1160 );
1161
1162 let scanned = result
1163 .files
1164 .into_iter()
1165 .find(|entry| {
1166 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1167 })
1168 .expect("scanned file entry");
1169
1170 assert!(
1171 scanned.scan_errors.is_empty(),
1172 "scan_errors: {:#?}",
1173 scanned.scan_errors
1174 );
1175 assert_eq!(
1176 scanned.package_data.len(),
1177 1,
1178 "package_data: {:#?}",
1179 scanned.package_data
1180 );
1181 assert_eq!(
1182 scanned.package_data[0].datasource_id,
1183 Some(DatasourceId::RpmArchive)
1184 );
1185 assert_eq!(
1186 scanned.package_data[0].name.as_deref(),
1187 Some("oversized-info-demo")
1188 );
1189 assert!(scanned.sha1.is_some());
1190 assert!(scanned.md5.is_some());
1191 assert!(scanned.sha256.is_some());
1192 assert!(scanned.sha1_git.is_some());
1193 assert_eq!(scanned.mime_type.as_deref(), Some("application/x-rpm"));
1194 assert_eq!(scanned.file_type_label.as_deref(), Some("RPM package"));
1195 assert_eq!(scanned.is_binary, Some(true));
1196 assert_eq!(scanned.is_text, Some(false));
1197 assert_eq!(scanned.is_archive, Some(true));
1198 }
1199
1200 #[test]
1201 fn scanner_parses_oversized_pack_rpm_in_package_only_mode_without_size_warning() {
1202 let temp_dir = TempDir::new().expect("create temp dir");
1203 let file_path = build_sparse_oversized_pack_rpm(&temp_dir, "oversized-pack-demo");
1204
1205 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1206 let collected = collect_paths(temp_dir.path(), 0, &[]);
1207 let result = process_collected(
1208 &collected,
1209 progress,
1210 None,
1211 LicenseScanOptions::default(),
1212 &TextDetectionOptions {
1213 collect_info: false,
1214 detect_packages: true,
1215 detect_application_packages: true,
1216 detect_system_packages: false,
1217 detect_packages_in_compiled: false,
1218 detect_copyrights: false,
1219 detect_generated: false,
1220 detect_emails: false,
1221 detect_urls: false,
1222 max_emails: 50,
1223 max_urls: 50,
1224 timeout_seconds: 120.0,
1225 },
1226 );
1227
1228 let scanned = result
1229 .files
1230 .into_iter()
1231 .find(|entry| {
1232 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1233 })
1234 .expect("scanned file entry");
1235
1236 assert!(
1237 scanned.scan_errors.is_empty(),
1238 "scan_errors: {:#?}",
1239 scanned.scan_errors
1240 );
1241 assert_eq!(
1242 scanned.package_data.len(),
1243 1,
1244 "package_data: {:#?}",
1245 scanned.package_data
1246 );
1247 assert_eq!(
1248 scanned.package_data[0].datasource_id,
1249 Some(DatasourceId::RpmArchive)
1250 );
1251 assert_eq!(
1252 scanned.package_data[0].name.as_deref(),
1253 Some("oversized-pack-demo")
1254 );
1255 }
1256
1257 #[test]
1258 fn scanner_parses_oversized_pack_rpm_with_info_without_timeout_or_size_warning() {
1259 let temp_dir = TempDir::new().expect("create temp dir");
1260 let file_path = build_sparse_oversized_pack_rpm(&temp_dir, "oversized-pack-info-demo");
1261
1262 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1263 let collected = collect_paths(temp_dir.path(), 0, &[]);
1264 let result = process_collected(
1265 &collected,
1266 progress,
1267 None,
1268 LicenseScanOptions::default(),
1269 &TextDetectionOptions {
1270 collect_info: true,
1271 detect_packages: true,
1272 detect_application_packages: true,
1273 detect_system_packages: false,
1274 detect_packages_in_compiled: false,
1275 detect_copyrights: false,
1276 detect_generated: false,
1277 detect_emails: false,
1278 detect_urls: false,
1279 max_emails: 50,
1280 max_urls: 50,
1281 timeout_seconds: 120.0,
1282 },
1283 );
1284
1285 let scanned = result
1286 .files
1287 .into_iter()
1288 .find(|entry| {
1289 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1290 })
1291 .expect("scanned file entry");
1292
1293 assert!(
1294 scanned.scan_errors.is_empty(),
1295 "scan_errors: {:#?}",
1296 scanned.scan_errors
1297 );
1298 assert_eq!(
1299 scanned.package_data.len(),
1300 1,
1301 "package_data: {:#?}",
1302 scanned.package_data
1303 );
1304 assert_eq!(
1305 scanned.package_data[0].datasource_id,
1306 Some(DatasourceId::RpmArchive)
1307 );
1308 assert_eq!(
1309 scanned.package_data[0].name.as_deref(),
1310 Some("oversized-pack-info-demo")
1311 );
1312 assert!(scanned.sha1.is_some());
1313 assert!(scanned.md5.is_some());
1314 assert!(scanned.sha256.is_some());
1315 assert!(scanned.sha1_git.is_some());
1316 assert_eq!(scanned.mime_type.as_deref(), Some("application/x-rpm"));
1317 assert_eq!(scanned.file_type_label.as_deref(), Some("RPM package"));
1318 assert_eq!(scanned.is_binary, Some(true));
1319 assert_eq!(scanned.is_text, Some(false));
1320 assert_eq!(scanned.is_archive, Some(true));
1321 }
1322
1323 #[test]
1324 fn scanner_skips_application_packages_when_only_system_packages_enabled() {
1325 let options = TextDetectionOptions {
1326 collect_info: false,
1327 detect_packages: true,
1328 detect_application_packages: false,
1329 detect_system_packages: true,
1330 detect_packages_in_compiled: false,
1331 detect_copyrights: false,
1332 detect_generated: false,
1333 detect_emails: false,
1334 detect_urls: false,
1335 max_emails: 50,
1336 max_urls: 50,
1337 timeout_seconds: 120.0,
1338 };
1339 let scanned = scan_single_file(
1340 "package.json",
1341 r#"{"name":"demo","version":"1.0.0"}"#,
1342 &options,
1343 );
1344
1345 assert!(
1346 scanned.package_data.is_empty(),
1347 "package_data: {:#?}",
1348 scanned.package_data
1349 );
1350 }
1351
1352 #[test]
1353 fn scanner_parses_system_package_files_when_enabled() {
1354 let options = TextDetectionOptions {
1355 collect_info: false,
1356 detect_packages: true,
1357 detect_application_packages: false,
1358 detect_system_packages: true,
1359 detect_packages_in_compiled: false,
1360 detect_copyrights: false,
1361 detect_generated: false,
1362 detect_emails: false,
1363 detect_urls: false,
1364 max_emails: 50,
1365 max_urls: 50,
1366 timeout_seconds: 120.0,
1367 };
1368 let scanned = scan_file_at_relative_path(
1369 "var/lib/dpkg/status",
1370 b"Package: demo\nVersion: 1.0\nArchitecture: all\nDescription: demo package\n\n",
1371 &options,
1372 );
1373
1374 assert!(
1375 !scanned.package_data.is_empty(),
1376 "package_data: {:#?}",
1377 scanned.package_data
1378 );
1379 }
1380
1381 #[test]
1382 fn scanner_only_parses_compiled_packages_when_package_in_compiled_is_enabled() {
1383 if std::process::Command::new("go")
1384 .arg("version")
1385 .status()
1386 .is_err()
1387 {
1388 return;
1389 }
1390
1391 let temp_dir = TempDir::new().expect("create temp dir");
1392 fs::write(
1393 temp_dir.path().join("go.mod"),
1394 "module example.com/demo\n\ngo 1.23.0\n",
1395 )
1396 .expect("write go.mod");
1397 fs::write(
1398 temp_dir.path().join("main.go"),
1399 "package main\nfunc main() {}\n",
1400 )
1401 .expect("write main.go");
1402 let file_path = temp_dir.path().join("demo");
1403 let status = std::process::Command::new("go")
1404 .current_dir(temp_dir.path())
1405 .args(["build", "-o"])
1406 .arg(&file_path)
1407 .status()
1408 .expect("run go build");
1409 assert!(status.success());
1410
1411 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1412 let collected = collect_paths(temp_dir.path(), 0, &[]);
1413
1414 let without_compiled = process_collected(
1415 &collected,
1416 Arc::clone(&progress),
1417 None,
1418 LicenseScanOptions::default(),
1419 &TextDetectionOptions {
1420 collect_info: false,
1421 detect_packages: true,
1422 detect_application_packages: true,
1423 detect_system_packages: false,
1424 detect_packages_in_compiled: false,
1425 detect_copyrights: false,
1426 detect_generated: false,
1427 detect_emails: false,
1428 detect_urls: false,
1429 max_emails: 50,
1430 max_urls: 50,
1431 timeout_seconds: 120.0,
1432 },
1433 );
1434 let with_compiled = process_collected(
1435 &collected,
1436 progress,
1437 None,
1438 LicenseScanOptions::default(),
1439 &TextDetectionOptions {
1440 collect_info: false,
1441 detect_packages: true,
1442 detect_application_packages: true,
1443 detect_system_packages: false,
1444 detect_packages_in_compiled: true,
1445 detect_copyrights: false,
1446 detect_generated: false,
1447 detect_emails: false,
1448 detect_urls: false,
1449 max_emails: 50,
1450 max_urls: 50,
1451 timeout_seconds: 120.0,
1452 },
1453 );
1454
1455 let without_compiled = without_compiled
1456 .files
1457 .into_iter()
1458 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1459 .expect("compiled artifact present");
1460 let with_compiled = with_compiled
1461 .files
1462 .into_iter()
1463 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1464 .expect("compiled artifact present");
1465
1466 assert!(
1467 without_compiled.package_data.is_empty(),
1468 "package_data: {:#?}",
1469 without_compiled.package_data
1470 );
1471 assert!(!with_compiled.package_data.is_empty());
1472 }
1473
1474 #[test]
1475 fn scanner_parses_windows_executable_packages_under_normal_package_scan() {
1476 let temp_dir = TempDir::new().expect("create temp dir");
1477 let file_path = temp_dir.path().join("libiconv2.dll");
1478 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1479 .expect("read PE fixture");
1480 fs::write(&file_path, fixture).expect("write PE fixture");
1481
1482 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1483 let collected = collect_paths(temp_dir.path(), 0, &[]);
1484
1485 let without_package = process_collected(
1486 &collected,
1487 Arc::clone(&progress),
1488 None,
1489 LicenseScanOptions::default(),
1490 &TextDetectionOptions {
1491 collect_info: false,
1492 detect_packages: false,
1493 detect_application_packages: false,
1494 detect_system_packages: false,
1495 detect_packages_in_compiled: false,
1496 detect_copyrights: false,
1497 detect_generated: false,
1498 detect_emails: false,
1499 detect_urls: false,
1500 max_emails: 50,
1501 max_urls: 50,
1502 timeout_seconds: 120.0,
1503 },
1504 );
1505 let with_package = process_collected(
1506 &collected,
1507 progress,
1508 None,
1509 LicenseScanOptions::default(),
1510 &TextDetectionOptions {
1511 collect_info: false,
1512 detect_packages: true,
1513 detect_application_packages: true,
1514 detect_system_packages: false,
1515 detect_packages_in_compiled: false,
1516 detect_copyrights: false,
1517 detect_generated: false,
1518 detect_emails: false,
1519 detect_urls: false,
1520 max_emails: 50,
1521 max_urls: 50,
1522 timeout_seconds: 120.0,
1523 },
1524 );
1525
1526 let without_package = without_package
1527 .files
1528 .into_iter()
1529 .find(|entry| {
1530 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1531 })
1532 .expect("compiled artifact present");
1533 let with_package = with_package
1534 .files
1535 .into_iter()
1536 .find(|entry| {
1537 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1538 })
1539 .expect("compiled artifact present");
1540
1541 assert!(without_package.package_data.is_empty());
1542 assert_eq!(with_package.package_data.len(), 1);
1543 assert_eq!(
1544 with_package.package_data[0].package_type,
1545 Some(FilePackageType::Winexe)
1546 );
1547 assert_eq!(
1548 with_package.package_data[0].datasource_id,
1549 Some(DatasourceId::WindowsExecutable)
1550 );
1551 }
1552
1553 #[test]
1554 fn scanner_keeps_nsis_and_windows_executable_package_data_together() {
1555 let temp_dir = TempDir::new().expect("create temp dir");
1556 let file_path = temp_dir.path().join("nsis-with-version.exe");
1557 let mut fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1558 .expect("read PE fixture");
1559 if fixture.len() < 70_000 {
1560 fixture.resize(70_000, 0);
1561 }
1562 fixture.extend_from_slice(b"Nullsoft.NSIS.exehead");
1563 fs::write(&file_path, fixture).expect("write synthetic NSIS PE fixture");
1564
1565 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1566 let collected = collect_paths(temp_dir.path(), 0, &[]);
1567 let result = process_collected(
1568 &collected,
1569 progress,
1570 None,
1571 LicenseScanOptions::default(),
1572 &TextDetectionOptions {
1573 collect_info: false,
1574 detect_packages: true,
1575 detect_application_packages: true,
1576 detect_system_packages: false,
1577 detect_packages_in_compiled: false,
1578 detect_copyrights: false,
1579 detect_generated: false,
1580 detect_emails: false,
1581 detect_urls: false,
1582 max_emails: 50,
1583 max_urls: 50,
1584 timeout_seconds: 120.0,
1585 },
1586 );
1587
1588 let scanned = result
1589 .files
1590 .into_iter()
1591 .find(|entry| {
1592 entry.file_type == FileType::File && entry.path.ends_with("/nsis-with-version.exe")
1593 })
1594 .expect("compiled artifact present");
1595
1596 assert_eq!(
1597 scanned.package_data.len(),
1598 2,
1599 "package_data: {:#?}",
1600 scanned.package_data
1601 );
1602 assert!(
1603 scanned
1604 .package_data
1605 .iter()
1606 .any(|pkg| pkg.datasource_id == Some(DatasourceId::NsisInstaller))
1607 );
1608 assert!(
1609 scanned
1610 .package_data
1611 .iter()
1612 .any(|pkg| pkg.datasource_id == Some(DatasourceId::WindowsExecutable))
1613 );
1614 }
1615
1616 #[test]
1617 fn scanner_detects_license_from_font_metadata() {
1618 let temp_dir = TempDir::new().expect("create temp dir");
1619 let file_path = temp_dir.path().join("Lato-Bold.ttf");
1620 let fixture = fs::read("testdata/font-fixtures/Lato-Bold.ttf").expect("read font fixture");
1621 fs::write(&file_path, fixture).expect("write font fixture");
1622
1623 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1624 let collected = collect_paths(temp_dir.path(), 0, &[]);
1625 let engine =
1626 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1627 let result = process_collected(
1628 &collected,
1629 progress,
1630 Some(engine),
1631 LicenseScanOptions::default(),
1632 &TextDetectionOptions::default(),
1633 );
1634 let scanned = result
1635 .files
1636 .into_iter()
1637 .find(|entry| {
1638 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1639 })
1640 .expect("scanned file entry");
1641
1642 assert!(
1643 scanned.license_expression.is_some(),
1644 "license detections: {:#?}",
1645 scanned.license_detections
1646 );
1647 assert!(
1648 scanned
1649 .license_expression
1650 .as_deref()
1651 .is_some_and(
1652 |expression| expression.contains("OFL-1.1") || expression.contains("ofl-1.1")
1653 ),
1654 "license expression: {:?}",
1655 scanned.license_expression
1656 );
1657 }
1658
1659 #[test]
1660 fn scanner_detects_license_from_windows_executable_metadata() {
1661 let temp_dir = TempDir::new().expect("create temp dir");
1662 let file_path = temp_dir.path().join("libiconv2.dll");
1663 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1664 .expect("read PE fixture");
1665 fs::write(&file_path, fixture).expect("write PE fixture");
1666
1667 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1668 let collected = collect_paths(temp_dir.path(), 0, &[]);
1669 let engine =
1670 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1671 let result = process_collected(
1672 &collected,
1673 progress,
1674 Some(engine),
1675 LicenseScanOptions::default(),
1676 &TextDetectionOptions::default(),
1677 );
1678 let scanned = result
1679 .files
1680 .into_iter()
1681 .find(|entry| {
1682 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1683 })
1684 .expect("scanned file entry");
1685
1686 assert!(
1687 scanned.license_expression.is_some(),
1688 "license detections: {:#?}",
1689 scanned.license_detections
1690 );
1691 assert!(
1692 scanned
1693 .license_expression
1694 .as_deref()
1695 .is_some_and(|expression| {
1696 expression.contains("lgpl") || expression.contains("LGPL")
1697 }),
1698 "license expression: {:?}",
1699 scanned.license_expression
1700 );
1701 }
1702
1703 #[test]
1704 fn scanner_detects_license_from_windows_executable_security_notice() {
1705 fn synthetic_pe_with_security_notice(notice: &str) -> Vec<u8> {
1706 let cert_payload = notice
1707 .encode_utf16()
1708 .flat_map(|unit| unit.to_le_bytes())
1709 .collect::<Vec<_>>();
1710 let cert_len = (8 + cert_payload.len()) as u32;
1711 let mut cert = Vec::new();
1712 cert.extend_from_slice(&cert_len.to_le_bytes());
1713 cert.extend_from_slice(&0x0200u16.to_le_bytes());
1714 cert.extend_from_slice(&0x0002u16.to_le_bytes());
1715 cert.extend_from_slice(&cert_payload);
1716 while !cert.len().is_multiple_of(8) {
1717 cert.push(0);
1718 }
1719
1720 let offset = 0x200usize;
1721 let size = cert.len();
1722 let optional_header_size = 224usize;
1723 let pe_header_offset = 0x80usize;
1724 let nt_headers_offset = pe_header_offset + 4;
1725 let optional_header_offset = nt_headers_offset + 20;
1726 let data_directory_offset = optional_header_offset + 96;
1727 let security_directory_offset =
1728 data_directory_offset + pe::IMAGE_DIRECTORY_ENTRY_SECURITY * 8;
1729 let total_len = offset + size;
1730 let mut bytes = vec![0u8; total_len];
1731
1732 bytes[0..2].copy_from_slice(b"MZ");
1733 bytes[0x3c..0x40].copy_from_slice(&(pe_header_offset as u32).to_le_bytes());
1734 bytes[pe_header_offset..pe_header_offset + 4].copy_from_slice(b"PE\0\0");
1735
1736 bytes[nt_headers_offset..nt_headers_offset + 2]
1737 .copy_from_slice(&0x014cu16.to_le_bytes());
1738 bytes[nt_headers_offset + 16..nt_headers_offset + 18]
1739 .copy_from_slice(&(optional_header_size as u16).to_le_bytes());
1740
1741 bytes[optional_header_offset..optional_header_offset + 2]
1742 .copy_from_slice(&0x010bu16.to_le_bytes());
1743 bytes[optional_header_offset + 92..optional_header_offset + 96]
1744 .copy_from_slice(&16u32.to_le_bytes());
1745 bytes[security_directory_offset..security_directory_offset + 4]
1746 .copy_from_slice(&(offset as u32).to_le_bytes());
1747 bytes[security_directory_offset + 4..security_directory_offset + 8]
1748 .copy_from_slice(&(size as u32).to_le_bytes());
1749 bytes[offset..offset + size].copy_from_slice(&cert);
1750
1751 bytes
1752 }
1753
1754 let temp_dir = TempDir::new().expect("create temp dir");
1755 let file_path = temp_dir.path().join("signed.dll");
1756 let fixture = synthetic_pe_with_security_notice(
1757 "use of this Certificate constitutes acceptance of the DigiCert CP/CPS and the Relying Party Agreement which limit liability and are incorporated herein by reference.",
1758 );
1759 fs::write(&file_path, fixture).expect("write PE fixture");
1760
1761 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1762 let collected = collect_paths(temp_dir.path(), 0, &[]);
1763 let engine =
1764 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1765 let result = process_collected(
1766 &collected,
1767 progress,
1768 Some(engine),
1769 LicenseScanOptions::default(),
1770 &TextDetectionOptions::default(),
1771 );
1772 let scanned = result
1773 .files
1774 .into_iter()
1775 .find(|entry| {
1776 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1777 })
1778 .expect("scanned file entry");
1779
1780 assert!(
1781 scanned
1782 .license_expression
1783 .as_deref()
1784 .is_some_and(|expression| expression.contains("proprietary-license")),
1785 "license expression: {:?}, detections: {:#?}",
1786 scanned.license_expression,
1787 scanned.license_detections
1788 );
1789 }
1790
1791 #[test]
1792 fn scanner_detects_cc_by_license_from_markdown_comment_banner() {
1793 let scanned = scan_single_file_with_license_engine(
1794 "navbar.md",
1795 "<!-- Documentation licensed under CC BY 4.0 -->\n<!-- License available at https://creativecommons.org/licenses/by/4.0/ -->\n",
1796 &TextDetectionOptions::default(),
1797 );
1798
1799 assert!(
1800 scanned
1801 .license_expression
1802 .as_deref()
1803 .is_some_and(|expression| {
1804 expression.contains("cc-by-4.0") || expression.contains("CC-BY-4.0")
1805 }),
1806 "license expression: {:?}",
1807 scanned.license_expression
1808 );
1809 }
1810
1811 #[test]
1812 fn scanner_detects_mit_license_from_shields_badge_markdown() {
1813 let scanned = scan_single_file_with_license_engine(
1814 "README.md",
1815 "[](https://opensource.org/licenses/MIT)\n",
1816 &TextDetectionOptions::default(),
1817 );
1818
1819 assert!(
1820 scanned
1821 .license_expression
1822 .as_deref()
1823 .is_some_and(|expression| {
1824 expression.contains("mit") || expression.contains("MIT")
1825 }),
1826 "license expression: {:?}",
1827 scanned.license_expression
1828 );
1829 }
1830
1831 #[test]
1832 fn scanner_detects_apache_license_from_markdown_readme_phrase() {
1833 let scanned = scan_single_file_with_license_engine(
1834 "README.md",
1835 "This crate is distributed under the terms of the Apache License (Version 2.0).\n",
1836 &TextDetectionOptions::default(),
1837 );
1838
1839 assert!(
1840 scanned
1841 .license_expression
1842 .as_deref()
1843 .is_some_and(|expression| {
1844 expression.contains("apache-2.0") || expression.contains("Apache-2.0")
1845 }),
1846 "license expression: {:?}",
1847 scanned.license_expression
1848 );
1849 }
1850
1851 #[test]
1852 fn scanner_prefers_dual_license_readme_expression_over_supplemental_mentions() {
1853 let scanned = scan_single_file_with_license_engine(
1854 "README.md",
1855 concat!(
1856 "## License\n\n",
1857 "Licensed under either of:\n\n",
1858 " * [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n",
1859 " * [MIT license](https://opensource.org/licenses/MIT)\n\n",
1860 "at your option.\n\n",
1861 "### Contribution\n\n",
1862 "Unless you explicitly state otherwise, any contribution intentionally submitted\n",
1863 "for inclusion in the work by you, as defined in the Apache-2.0 license, shall be\n",
1864 "dual licensed as above, without any additional terms or conditions.\n",
1865 ),
1866 &TextDetectionOptions::default(),
1867 );
1868
1869 assert!(
1870 matches!(
1871 scanned.license_expression.as_deref(),
1872 Some("Apache-2.0 OR MIT") | Some("MIT OR Apache-2.0")
1873 ),
1874 "license expression: {:?}",
1875 scanned.license_expression
1876 );
1877 assert!(
1878 !scanned
1879 .license_detections
1880 .iter()
1881 .any(|detection| detection.license_expression_spdx == "Apache-2.0"),
1882 "detections: {:?}",
1883 scanned.license_detections
1884 );
1885 }
1886
1887 #[test]
1888 fn scanner_drops_redundant_conjunctive_readme_detection_when_or_notice_exists() {
1889 let scanned = scan_single_file_with_license_engine(
1890 "README.md",
1891 concat!(
1892 "## License\n\n",
1893 "Licensed under either of:\n\n",
1894 " * [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n",
1895 " * [MIT license](https://opensource.org/licenses/MIT)\n\n",
1896 "at your option.\n\n",
1897 "### Contribution\n\n",
1898 "Unless you explicitly state otherwise, any contribution intentionally submitted\n",
1899 "for inclusion in the work by you, as defined in the Apache-2.0 license, shall be\n",
1900 "dual licensed as above, without any additional terms or conditions.\n\n",
1901 "[license-image]: https://img.shields.io/badge/license-Apache2.0/MIT-blue.svg\n",
1902 ),
1903 &TextDetectionOptions::default(),
1904 );
1905
1906 assert!(
1907 !scanned
1908 .license_detections
1909 .iter()
1910 .any(|detection| { detection.license_expression_spdx == "Apache-2.0 AND MIT" })
1911 );
1912 }
1913
1914 #[test]
1915 fn scanner_drops_unknown_placeholder_from_dual_license_readme_notice() {
1916 let scanned = scan_single_file_with_license_engine(
1917 "README.md",
1918 concat!(
1919 "## License\n\n",
1920 "This project is dual-licensed under MIT and Apache 2.0.\n",
1921 ),
1922 &TextDetectionOptions::default(),
1923 );
1924
1925 assert!(
1926 matches!(
1927 scanned.license_expression.as_deref(),
1928 Some("Apache-2.0 OR MIT") | Some("MIT OR Apache-2.0")
1929 ),
1930 "license expression: {:?}",
1931 scanned.license_expression
1932 );
1933 assert!(scanned.license_detections.iter().any(|detection| {
1934 detection
1935 .license_expression_spdx
1936 .contains("Apache-2.0 OR MIT")
1937 || detection
1938 .license_expression_spdx
1939 .contains("MIT OR Apache-2.0")
1940 }));
1941 assert!(!scanned.license_detections.iter().any(|detection| {
1942 detection.license_expression_spdx == "LicenseRef-scancode-unknown-license-reference"
1943 }));
1944 assert!(
1945 scanned
1946 .license_detections
1947 .iter()
1948 .any(|detection| detection.license_expression_spdx == "MIT"),
1949 "detections: {:?}",
1950 scanned.license_detections
1951 );
1952 }
1953
1954 #[test]
1955 fn scanner_sets_is_source_only_when_info_enabled() {
1956 let without_info = TextDetectionOptions {
1957 collect_info: false,
1958 detect_packages: false,
1959 detect_application_packages: false,
1960 detect_system_packages: false,
1961 detect_packages_in_compiled: false,
1962 detect_copyrights: false,
1963 detect_generated: false,
1964 detect_emails: false,
1965 detect_urls: false,
1966 max_emails: 50,
1967 max_urls: 50,
1968 timeout_seconds: 120.0,
1969 };
1970 let with_info = TextDetectionOptions {
1971 collect_info: true,
1972 ..without_info.clone()
1973 };
1974
1975 let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
1976 let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
1977
1978 assert_eq!(scanned_without_info.is_source, None);
1979 assert_eq!(scanned_with_info.is_source, Some(true));
1980 }
1981
1982 #[test]
1983 fn directory_omits_info_fields_when_info_disabled() {
1984 let temp_dir = TempDir::new().expect("create temp dir");
1985 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1986
1987 let collected = collect_paths(temp_dir.path(), 0, &[]);
1988 let result = process_collected(
1989 &collected,
1990 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1991 None,
1992 LicenseScanOptions::default(),
1993 &TextDetectionOptions {
1994 collect_info: false,
1995 detect_packages: false,
1996 detect_application_packages: false,
1997 detect_system_packages: false,
1998 detect_packages_in_compiled: false,
1999 detect_copyrights: false,
2000 detect_generated: false,
2001 detect_emails: false,
2002 detect_urls: false,
2003 max_emails: 50,
2004 max_urls: 50,
2005 timeout_seconds: 120.0,
2006 },
2007 );
2008
2009 let directory = result
2010 .files
2011 .into_iter()
2012 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
2013 .expect("directory entry");
2014
2015 assert!(directory.date.is_none());
2016 assert!(directory.file_type_label.is_none());
2017 assert!(directory.is_binary.is_none());
2018 assert!(directory.is_text.is_none());
2019 assert!(directory.is_archive.is_none());
2020 assert!(directory.is_media.is_none());
2021 assert!(directory.is_source.is_none());
2022 assert!(directory.is_script.is_none());
2023 }
2024
2025 #[test]
2026 fn directory_includes_info_fields_when_info_enabled() {
2027 let temp_dir = TempDir::new().expect("create temp dir");
2028 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
2029
2030 let collected = collect_paths(temp_dir.path(), 0, &[]);
2031 let result = process_collected(
2032 &collected,
2033 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
2034 None,
2035 LicenseScanOptions::default(),
2036 &TextDetectionOptions {
2037 collect_info: true,
2038 detect_packages: false,
2039 detect_application_packages: false,
2040 detect_system_packages: false,
2041 detect_packages_in_compiled: false,
2042 detect_copyrights: false,
2043 detect_generated: false,
2044 detect_emails: false,
2045 detect_urls: false,
2046 max_emails: 50,
2047 max_urls: 50,
2048 timeout_seconds: 120.0,
2049 },
2050 );
2051
2052 let directory = result
2053 .files
2054 .into_iter()
2055 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
2056 .expect("directory entry");
2057
2058 assert!(directory.date.is_none());
2059 assert!(directory.file_type_label.is_none());
2060 assert_eq!(directory.is_binary, Some(false));
2061 assert_eq!(directory.is_text, Some(false));
2062 assert_eq!(directory.is_archive, Some(false));
2063 assert_eq!(directory.is_media, Some(false));
2064 assert_eq!(directory.is_source, Some(false));
2065 assert_eq!(directory.is_script, Some(false));
2066 assert_eq!(directory.files_count, Some(0));
2067 assert_eq!(directory.dirs_count, Some(0));
2068 assert_eq!(directory.size_count, Some(0));
2069 }
2070
2071 #[test]
2072 fn collect_paths_includes_root_directory_entry() {
2073 let temp_dir = TempDir::new().expect("create temp dir");
2074 fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
2075 fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
2076 .expect("write nested file");
2077
2078 let collected = collect_paths(temp_dir.path(), 0, &[]);
2079
2080 assert!(
2081 collected
2082 .directories
2083 .iter()
2084 .any(|(path, _)| path == temp_dir.path())
2085 );
2086 }
2087
2088 #[test]
2089 fn collect_paths_supports_single_file_input() {
2090 let temp_dir = TempDir::new().expect("create temp dir");
2091 let file_path = temp_dir.path().join("main.rs");
2092 fs::write(&file_path, "fn main() {}\n").expect("write file");
2093
2094 let collected = collect_paths(&file_path, 0, &[]);
2095
2096 assert_eq!(collected.files.len(), 1);
2097 assert!(collected.directories.is_empty());
2098 assert_eq!(collected.files[0].0, file_path);
2099 }
2100
2101 #[cfg(unix)]
2102 #[test]
2103 fn collect_selected_paths_does_not_walk_unselected_siblings() {
2104 use std::os::unix::fs::PermissionsExt;
2105
2106 let temp_dir = TempDir::new().expect("create temp dir");
2107 let root = temp_dir.path();
2108 fs::create_dir_all(root.join("selected/docs")).expect("create selected dir");
2109 fs::create_dir_all(root.join("blocked/secret")).expect("create blocked dir");
2110 fs::write(root.join("selected/docs/guide.md"), "# guide\n").expect("write guide");
2111
2112 let blocked = root.join("blocked");
2113 let mut perms = fs::metadata(&blocked)
2114 .expect("blocked metadata")
2115 .permissions();
2116 perms.set_mode(0o000);
2117 fs::set_permissions(&blocked, perms).expect("remove blocked permissions");
2118
2119 let collected = collect_selected_paths(
2120 root,
2121 &[CollectionFrontier {
2122 path: PathBuf::from("selected"),
2123 recurse: true,
2124 }],
2125 0,
2126 &[],
2127 );
2128
2129 let mut restore = fs::metadata(&blocked)
2130 .expect("blocked metadata")
2131 .permissions();
2132 restore.set_mode(0o755);
2133 fs::set_permissions(&blocked, restore).expect("restore blocked permissions");
2134
2135 assert!(
2136 collected.collection_errors.is_empty(),
2137 "{:#?}",
2138 collected.collection_errors
2139 );
2140 assert!(
2141 collected
2142 .files
2143 .iter()
2144 .any(|(path, _)| path == &root.join("selected/docs/guide.md"))
2145 );
2146 assert!(
2147 collected
2148 .files
2149 .iter()
2150 .all(|(path, _): &(PathBuf, fs::Metadata)| !path.starts_with(&blocked))
2151 );
2152 }
2153
2154 #[test]
2155 fn collect_selected_paths_respects_excluded_ancestor_directories() {
2156 let temp_dir = TempDir::new().expect("create temp dir");
2157 let root = temp_dir.path();
2158 fs::create_dir_all(root.join(".git")).expect("create git dir");
2159 fs::write(
2160 root.join(".git/config"),
2161 "[core]\nrepositoryformatversion = 0\n",
2162 )
2163 .expect("write git config");
2164
2165 let exclude_patterns =
2166 build_collection_exclude_patterns(root, &root.join(".provenant-cache"));
2167 let collected = collect_selected_paths(
2168 root,
2169 &[CollectionFrontier {
2170 path: PathBuf::from(".git/config"),
2171 recurse: false,
2172 }],
2173 0,
2174 &exclude_patterns,
2175 );
2176
2177 assert!(collected.files.is_empty());
2178 assert!(collected.directories.iter().all(|(path, _)| path == root));
2179 assert_eq!(collected.excluded_count, 1);
2180 }
2181
2182 #[test]
2183 fn process_collected_with_memory_limit_preserves_results_when_spilling() {
2184 let temp_dir = TempDir::new().expect("create temp dir");
2185 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
2186 fs::write(temp_dir.path().join("b.txt"), "world").expect("write second file");
2187
2188 let collected = collect_paths(temp_dir.path(), 0, &[]);
2189 let result = process_collected_with_memory_limit(
2190 &collected,
2191 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
2192 None,
2193 LicenseScanOptions::default(),
2194 &TextDetectionOptions {
2195 collect_info: false,
2196 detect_packages: false,
2197 detect_application_packages: false,
2198 detect_system_packages: false,
2199 detect_packages_in_compiled: false,
2200 detect_copyrights: false,
2201 detect_generated: false,
2202 detect_emails: false,
2203 detect_urls: false,
2204 max_emails: 50,
2205 max_urls: 50,
2206 timeout_seconds: 120.0,
2207 },
2208 MemoryMode::Limit(1),
2209 );
2210
2211 assert_eq!(result.files.len(), 3);
2212 }
2213
2214 #[test]
2215 fn process_collected_with_negative_one_uses_disk_only_mode() {
2216 let temp_dir = TempDir::new().expect("create temp dir");
2217 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
2218
2219 let collected = collect_paths(temp_dir.path(), 0, &[]);
2220 let result = process_collected_with_memory_limit(
2221 &collected,
2222 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
2223 None,
2224 LicenseScanOptions::default(),
2225 &TextDetectionOptions {
2226 collect_info: false,
2227 detect_packages: false,
2228 detect_application_packages: false,
2229 detect_system_packages: false,
2230 detect_packages_in_compiled: false,
2231 detect_copyrights: false,
2232 detect_generated: false,
2233 detect_emails: false,
2234 detect_urls: false,
2235 max_emails: 50,
2236 max_urls: 50,
2237 timeout_seconds: 120.0,
2238 },
2239 MemoryMode::StreamUnlimited,
2240 );
2241
2242 assert_eq!(result.files.len(), 2);
2243 }
2244}