1mod collect;
5pub(crate) mod process;
6
7use crate::license_detection::LicenseDetectionEngine;
8use crate::models::FileInfo;
9
10pub struct ProcessResult {
11 pub files: Vec<FileInfo>,
12 pub excluded_count: usize,
13}
14
15#[derive(Debug, Clone, Copy, Default)]
16pub struct LicenseScanOptions {
17 pub include_text: bool,
18 pub include_text_diagnostics: bool,
19 pub include_diagnostics: bool,
20 pub unknown_licenses: bool,
21 pub min_score: u8,
22}
23
24#[derive(Debug, Clone)]
25pub struct TextDetectionOptions {
26 pub collect_info: bool,
27 pub detect_packages: bool,
28 pub detect_application_packages: bool,
29 pub detect_system_packages: bool,
30 pub detect_packages_in_compiled: bool,
31 pub detect_copyrights: bool,
32 pub detect_generated: bool,
33 pub detect_emails: bool,
34 pub detect_urls: bool,
35 pub max_emails: usize,
36 pub max_urls: usize,
37 pub timeout_seconds: f64,
38}
39
40impl Default for TextDetectionOptions {
41 fn default() -> Self {
42 Self {
43 collect_info: false,
44 detect_packages: false,
45 detect_application_packages: false,
46 detect_system_packages: false,
47 detect_packages_in_compiled: false,
48 detect_copyrights: true,
49 detect_generated: false,
50 detect_emails: false,
51 detect_urls: false,
52 max_emails: 50,
53 max_urls: 50,
54 timeout_seconds: 120.0,
55 }
56 }
57}
58
59pub fn scan_options_fingerprint(
60 text_options: &TextDetectionOptions,
61 license_options: LicenseScanOptions,
62 license_engine: Option<&LicenseDetectionEngine>,
63) -> String {
64 let (license_enabled, rules_count, first_rule_id, last_rule_id) = match license_engine {
65 Some(engine) => {
66 let rules = &engine.index().rules_by_rid;
67 (
68 true,
69 rules.len(),
70 rules
71 .first()
72 .map(|rule| rule.identifier.as_str())
73 .unwrap_or(""),
74 rules
75 .last()
76 .map(|rule| rule.identifier.as_str())
77 .unwrap_or(""),
78 )
79 }
80 None => (false, 0, "", ""),
81 };
82
83 format!(
84 "tool_version={};info={};packages={};app_packages={};system_packages={};compiled_packages={};copyrights={};generated={};emails={};urls={};max_emails={};max_urls={};timeout={:.6};license_enabled={};rules_count={};first_rule_id={};last_rule_id={};license_text={};license_text_diagnostics={};license_diagnostics={};unknown_licenses={};license_score={}",
85 crate::version::BUILD_VERSION,
86 text_options.collect_info,
87 text_options.detect_packages,
88 text_options.detect_application_packages,
89 text_options.detect_system_packages,
90 text_options.detect_packages_in_compiled,
91 text_options.detect_copyrights,
92 text_options.detect_generated,
93 text_options.detect_emails,
94 text_options.detect_urls,
95 text_options.max_emails,
96 text_options.max_urls,
97 text_options.timeout_seconds,
98 license_enabled,
99 rules_count,
100 first_rule_id,
101 last_rule_id,
102 license_options.include_text,
103 license_options.include_text_diagnostics,
104 license_options.include_diagnostics,
105 license_options.unknown_licenses,
106 license_options.min_score,
107 )
108}
109
110pub use self::collect::{
111 CollectedPaths, CollectionFrontier, collect_paths, collect_selected_paths,
112};
113#[allow(unused_imports)]
114pub use self::process::{
115 MemoryMode, process_collected, process_collected_sequential,
116 process_collected_with_memory_limit, process_collected_with_memory_limit_sequential,
117};
118
119#[cfg(test)]
120mod tests {
121 use std::fs;
122 use std::path::PathBuf;
123 use std::sync::Arc;
124
125 use object::pe;
126 use tempfile::TempDir;
127
128 use crate::cache::build_collection_exclude_patterns;
129 use crate::license_detection::LicenseDetectionEngine;
130 use crate::models::{DatasourceId, FileType, PackageType as FilePackageType};
131 use crate::progress::{ProgressMode, ScanProgress};
132
133 use super::{
134 CollectionFrontier, LicenseScanOptions, MemoryMode, TextDetectionOptions, collect_paths,
135 collect_selected_paths, process_collected, process_collected_with_memory_limit,
136 scan_options_fingerprint,
137 };
138
139 fn build_sparse_oversized_rpm_with_filename(
140 temp_dir: &TempDir,
141 package_name: &str,
142 filename: &str,
143 ) -> PathBuf {
144 let file_path = temp_dir.path().join(filename);
145 rpm::PackageBuilder::new(package_name, "1.0", "MIT", "x86_64", "Demo RPM package")
146 .release("1")
147 .build()
148 .expect("build rpm fixture")
149 .write_file(&file_path)
150 .expect("write rpm fixture");
151 fs::OpenOptions::new()
152 .write(true)
153 .open(&file_path)
154 .expect("open rpm fixture for sparse extension")
155 .set_len(100 * 1024 * 1024 + 1_048_576)
156 .expect("extend rpm fixture");
157 file_path
158 }
159
160 fn build_sparse_oversized_rpm(temp_dir: &TempDir, name: &str) -> PathBuf {
161 build_sparse_oversized_rpm_with_filename(
162 temp_dir,
163 name,
164 &format!("{name}-1.0-1.x86_64.rpm"),
165 )
166 }
167
168 fn build_sparse_oversized_pack_rpm(temp_dir: &TempDir, name: &str) -> PathBuf {
169 build_sparse_oversized_rpm_with_filename(
170 temp_dir,
171 name,
172 &format!("{name}-1.0-1.x86_64.pack"),
173 )
174 }
175
176 #[test]
177 fn default_options_keep_copyright_detection_enabled() {
178 let options = TextDetectionOptions::default();
179 assert!(!options.detect_packages);
180 assert!(options.detect_copyrights);
181 }
182
183 #[test]
184 fn test_scan_options_fingerprint_changes_with_license_score() {
185 let text_options = TextDetectionOptions::default();
186 let default_fingerprint = scan_options_fingerprint(
187 &text_options,
188 LicenseScanOptions {
189 min_score: 0,
190 ..LicenseScanOptions::default()
191 },
192 None,
193 );
194 let filtered_fingerprint = scan_options_fingerprint(
195 &text_options,
196 LicenseScanOptions {
197 min_score: 70,
198 ..LicenseScanOptions::default()
199 },
200 None,
201 );
202
203 assert_ne!(default_fingerprint, filtered_fingerprint);
204 }
205
206 fn scan_single_file(
207 file_name: &str,
208 content: &str,
209 options: &TextDetectionOptions,
210 ) -> crate::models::FileInfo {
211 let temp_dir = TempDir::new().expect("create temp dir");
212 let file_path = temp_dir.path().join(file_name);
213 fs::write(&file_path, content).expect("write test file");
214
215 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
216 let collected = collect_paths(temp_dir.path(), 0, &[]);
217 let result = process_collected(
218 &collected,
219 progress,
220 None,
221 LicenseScanOptions::default(),
222 options,
223 );
224
225 result
226 .files
227 .into_iter()
228 .find(|entry| {
229 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
230 })
231 .expect("scanned file entry")
232 }
233
234 fn scan_file_at_relative_path(
235 relative_path: &str,
236 content: &[u8],
237 options: &TextDetectionOptions,
238 ) -> crate::models::FileInfo {
239 let temp_dir = TempDir::new().expect("create temp dir");
240 let file_path = temp_dir.path().join(relative_path);
241 if let Some(parent) = file_path.parent() {
242 fs::create_dir_all(parent).expect("create parent dirs");
243 }
244 fs::write(&file_path, content).expect("write test file");
245
246 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
247 let collected = collect_paths(temp_dir.path(), 0, &[]);
248 let result = process_collected(
249 &collected,
250 progress,
251 None,
252 LicenseScanOptions::default(),
253 options,
254 );
255
256 result
257 .files
258 .into_iter()
259 .find(|entry| {
260 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
261 })
262 .expect("scanned file entry")
263 }
264
265 fn scan_single_file_with_license_engine(
266 file_name: &str,
267 content: &str,
268 options: &TextDetectionOptions,
269 ) -> crate::models::FileInfo {
270 let temp_dir = TempDir::new().expect("create temp dir");
271 let file_path = temp_dir.path().join(file_name);
272 fs::write(&file_path, content).expect("write test file");
273
274 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
275 let collected = collect_paths(temp_dir.path(), 0, &[]);
276 let engine =
277 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
278 let result = process_collected(
279 &collected,
280 progress,
281 Some(engine),
282 LicenseScanOptions::default(),
283 options,
284 );
285
286 result
287 .files
288 .into_iter()
289 .find(|entry| {
290 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
291 })
292 .expect("scanned file entry")
293 }
294
295 #[test]
296 fn scanner_reports_repeated_email_occurrences() {
297 let options = TextDetectionOptions {
298 collect_info: false,
299 detect_packages: false,
300 detect_application_packages: false,
301 detect_system_packages: false,
302 detect_packages_in_compiled: false,
303 detect_copyrights: false,
304 detect_generated: false,
305 detect_emails: true,
306 detect_urls: false,
307 max_emails: 50,
308 max_urls: 50,
309 timeout_seconds: 120.0,
310 };
311 let scanned = scan_single_file(
312 "contacts.txt",
313 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
314 &options,
315 );
316
317 let emails: Vec<(&str, usize)> = scanned
318 .emails
319 .iter()
320 .map(|email| (email.email.as_str(), email.start_line.get()))
321 .collect();
322
323 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
324 assert_eq!(
325 emails,
326 vec![
327 ("linux@3ware.com", 1),
328 ("linux@3ware.com", 2),
329 ("andre@suse.com", 3),
330 ("linux@3ware.com", 4),
331 ]
332 );
333 }
334
335 #[test]
336 fn scanner_skips_pem_certificate_text_detection() {
337 let options = TextDetectionOptions {
338 collect_info: false,
339 detect_packages: false,
340 detect_application_packages: false,
341 detect_system_packages: false,
342 detect_packages_in_compiled: false,
343 detect_copyrights: true,
344 detect_generated: false,
345 detect_emails: true,
346 detect_urls: true,
347 max_emails: 50,
348 max_urls: 50,
349 timeout_seconds: 120.0,
350 };
351 let pem_fixture = concat!(
352 "-----BEGIN CERTIFICATE-----\n",
353 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
354 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
355 "-----END CERTIFICATE-----\n",
356 "Certificate:\n",
357 " Data:\n",
358 " Signature Algorithm: sha1WithRSAEncryption\n",
359 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
360 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
361 " Contact: cert-owner@example.com\n",
362 );
363 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
364
365 assert!(
366 scanned.copyrights.is_empty(),
367 "copyrights: {:#?}",
368 scanned.copyrights
369 );
370 assert!(
371 scanned.holders.is_empty(),
372 "holders: {:#?}",
373 scanned.holders
374 );
375 assert!(
376 scanned.authors.is_empty(),
377 "authors: {:#?}",
378 scanned.authors
379 );
380 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
381 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
382 assert!(
383 scanned.license_detections.is_empty(),
384 "licenses: {:#?}",
385 scanned.license_detections
386 );
387 assert!(
388 scanned.license_clues.is_empty(),
389 "license clues: {:#?}",
390 scanned.license_clues
391 );
392 }
393
394 #[test]
395 fn scanner_keeps_source_headers_when_pem_blocks_are_embedded() {
396 let options = TextDetectionOptions {
397 collect_info: false,
398 detect_packages: false,
399 detect_application_packages: false,
400 detect_system_packages: false,
401 detect_packages_in_compiled: false,
402 detect_copyrights: true,
403 detect_generated: false,
404 detect_emails: false,
405 detect_urls: true,
406 max_emails: 50,
407 max_urls: 50,
408 timeout_seconds: 120.0,
409 };
410 let fixture = concat!(
411 "/*\n",
412 "Copyright 2022 The Kubernetes Authors.\n\n",
413 "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
414 "you may not use this file except in compliance with the License.\n",
415 "You may obtain a copy of the License at\n\n",
416 " http://www.apache.org/licenses/LICENSE-2.0\n",
417 "*/\n\n",
418 "package storage\n\n",
419 "const validCert = `\n",
420 "-----BEGIN CERTIFICATE-----\n",
421 "MIIDmTCCAoGgAwIBAgIUWQ==\n",
422 "-----END CERTIFICATE-----\n",
423 "`\n",
424 );
425 let temp_dir = TempDir::new().expect("create temp dir");
426 let file_path = temp_dir.path().join("storage_test.go");
427 fs::write(&file_path, fixture).expect("write fixture");
428
429 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
430 let collected = collect_paths(temp_dir.path(), 0, &[]);
431 let engine =
432 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
433 let result = process_collected(
434 &collected,
435 progress,
436 Some(engine),
437 LicenseScanOptions::default(),
438 &options,
439 );
440 let scanned = result
441 .files
442 .into_iter()
443 .find(|entry| {
444 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
445 })
446 .expect("scanned file entry");
447
448 assert!(
449 scanned
450 .copyrights
451 .iter()
452 .any(|c| c.copyright == "Copyright 2022 The Kubernetes Authors."),
453 "copyrights: {:#?}",
454 scanned.copyrights
455 );
456 assert!(
457 scanned
458 .holders
459 .iter()
460 .any(|h| h.holder == "The Kubernetes Authors"),
461 "holders: {:#?}",
462 scanned.holders
463 );
464 assert!(
465 scanned
466 .urls
467 .iter()
468 .any(|u| u.url == "http://www.apache.org/licenses/LICENSE-2.0"),
469 "urls: {:#?}",
470 scanned.urls
471 );
472 assert_eq!(
473 scanned.detected_license_expression.as_deref(),
474 Some("Apache-2.0")
475 );
476 }
477
478 #[test]
479 fn scanner_detects_structured_credits_authors() {
480 let options = TextDetectionOptions {
481 collect_info: false,
482 detect_packages: false,
483 detect_application_packages: false,
484 detect_system_packages: false,
485 detect_packages_in_compiled: false,
486 detect_copyrights: true,
487 detect_generated: false,
488 detect_emails: false,
489 detect_urls: false,
490 max_emails: 50,
491 max_urls: 50,
492 timeout_seconds: 120.0,
493 };
494 let credits_fixture = concat!(
495 "N: Jack Lloyd\n",
496 "E: lloyd@randombit.net\n",
497 "W: http://www.randombit.net/\n",
498 );
499 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
500
501 let authors: Vec<(&str, usize, usize)> = scanned
502 .authors
503 .iter()
504 .map(|author| {
505 (
506 author.author.as_str(),
507 author.start_line.get(),
508 author.end_line.get(),
509 )
510 })
511 .collect();
512
513 assert_eq!(
514 authors,
515 vec![(
516 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
517 1,
518 3,
519 )]
520 );
521 assert!(scanned.copyrights.is_empty());
522 assert!(scanned.holders.is_empty());
523 }
524
525 #[test]
526 fn scanner_uses_or_for_alternative_license_header() {
527 let fixture =
528 include_str!("../../testdata/license-golden/datadriven/external/boost-json-d2s.ipp");
529 let temp_dir = TempDir::new().expect("create temp dir");
530 let file_path = temp_dir.path().join("d2s.ipp");
531 fs::write(&file_path, fixture).expect("write fixture");
532
533 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
534 let collected = collect_paths(temp_dir.path(), 0, &[]);
535 let engine =
536 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
537 let result = process_collected(
538 &collected,
539 progress,
540 Some(engine),
541 LicenseScanOptions::default(),
542 &TextDetectionOptions::default(),
543 );
544 let scanned = result
545 .files
546 .into_iter()
547 .find(|entry| {
548 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
549 })
550 .expect("scanned file entry");
551
552 assert_eq!(
553 scanned.detected_license_expression.as_deref(),
554 Some("Apache-2.0 OR BSL-1.0")
555 );
556 assert!(
557 scanned.license_clues.is_empty(),
558 "license clues: {:#?}",
559 scanned.license_clues
560 );
561 assert_eq!(
562 scanned.license_detections.len(),
563 1,
564 "detections: {:#?}",
565 scanned.license_detections
566 );
567
568 let detection = &scanned.license_detections[0];
569 assert_eq!(detection.license_expression_spdx, "Apache-2.0 OR BSL-1.0");
570
571 let match_expressions: Vec<_> = detection
572 .matches
573 .iter()
574 .map(|m| m.license_expression_spdx.as_str())
575 .collect();
576 assert_eq!(match_expressions, vec!["Apache-2.0", "BSL-1.0"]);
577 }
578
579 #[test]
580 fn scanner_sets_generated_flag_when_enabled() {
581 let options = TextDetectionOptions {
582 collect_info: false,
583 detect_packages: false,
584 detect_application_packages: false,
585 detect_system_packages: false,
586 detect_packages_in_compiled: false,
587 detect_copyrights: false,
588 detect_generated: true,
589 detect_emails: false,
590 detect_urls: false,
591 max_emails: 50,
592 max_urls: 50,
593 timeout_seconds: 120.0,
594 };
595 let scanned = scan_single_file(
596 "generated.c",
597 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
598 &options,
599 );
600
601 assert_eq!(scanned.is_generated, Some(true));
602 }
603
604 #[test]
605 fn scanner_leaves_generated_flag_unset_when_disabled() {
606 let options = TextDetectionOptions {
607 collect_info: false,
608 detect_packages: false,
609 detect_application_packages: false,
610 detect_system_packages: false,
611 detect_packages_in_compiled: false,
612 detect_copyrights: false,
613 detect_generated: false,
614 detect_emails: false,
615 detect_urls: false,
616 max_emails: 50,
617 max_urls: 50,
618 timeout_seconds: 120.0,
619 };
620 let scanned = scan_single_file(
621 "generated.c",
622 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
623 &options,
624 );
625
626 assert_eq!(scanned.is_generated, None);
627 }
628
629 #[test]
630 fn scanner_populates_info_surface_when_enabled() {
631 let options = TextDetectionOptions {
632 collect_info: true,
633 detect_packages: false,
634 detect_application_packages: false,
635 detect_system_packages: false,
636 detect_packages_in_compiled: false,
637 detect_copyrights: false,
638 detect_generated: false,
639 detect_emails: false,
640 detect_urls: false,
641 max_emails: 50,
642 max_urls: 50,
643 timeout_seconds: 120.0,
644 };
645 let scanned = scan_single_file(
646 "script.py",
647 "#!/usr/bin/env python3\nprint(\"hello\")\n",
648 &options,
649 );
650
651 assert!(scanned.sha1.is_some());
652 assert!(scanned.md5.is_some());
653 assert!(scanned.sha256.is_some());
654 assert!(scanned.sha1_git.is_some());
655 assert!(scanned.mime_type.is_some());
656 assert!(scanned.date.is_some());
657 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
658 assert_eq!(scanned.is_text, Some(true));
659 assert_eq!(scanned.is_script, Some(true));
660 assert_eq!(scanned.is_source, Some(true));
661 }
662
663 #[test]
664 fn scanner_treats_latin1_python_sources_as_textual_scripts() {
665 let options = TextDetectionOptions {
666 collect_info: true,
667 detect_packages: false,
668 detect_application_packages: false,
669 detect_system_packages: false,
670 detect_packages_in_compiled: false,
671 detect_copyrights: false,
672 detect_generated: false,
673 detect_emails: false,
674 detect_urls: false,
675 max_emails: 50,
676 max_urls: 50,
677 timeout_seconds: 120.0,
678 };
679 let latin1_python = b"# coding: latin-1\nprint(\"caf\xe9\")\n# comment padding\n";
680 let scanned = scan_file_at_relative_path("script.py", latin1_python, &options);
681
682 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
683 assert_eq!(
684 scanned.file_type_label.as_deref(),
685 Some("python script, text executable")
686 );
687 assert_eq!(scanned.is_binary, Some(false));
688 assert_eq!(scanned.is_text, Some(true));
689 assert_eq!(scanned.is_script, Some(true));
690 assert_eq!(scanned.is_source, Some(true));
691 }
692
693 #[test]
694 fn scanner_skips_findings_for_zip_like_archives() {
695 let options = TextDetectionOptions {
696 collect_info: true,
697 detect_packages: false,
698 detect_application_packages: false,
699 detect_system_packages: false,
700 detect_packages_in_compiled: false,
701 detect_copyrights: true,
702 detect_generated: false,
703 detect_emails: true,
704 detect_urls: true,
705 max_emails: 50,
706 max_urls: 50,
707 timeout_seconds: 120.0,
708 };
709 let archive_like = b"PK\x03\x04\x14\x00\x00\x00\x08\x00MIT License\ncontact@example.com\nhttps://example.com\n";
710 let scanned = scan_file_at_relative_path("demo.whl", archive_like, &options);
711
712 assert_eq!(scanned.mime_type.as_deref(), Some("application/zip"));
713 assert_eq!(scanned.is_archive, Some(true));
714 assert!(scanned.license_detections.is_empty());
715 assert!(scanned.copyrights.is_empty());
716 assert!(scanned.emails.is_empty());
717 assert!(scanned.urls.is_empty());
718 }
719
720 #[test]
721 fn scanner_treats_typescript_sources_as_text_not_video_media() {
722 let options = TextDetectionOptions {
723 collect_info: true,
724 detect_packages: false,
725 detect_application_packages: false,
726 detect_system_packages: false,
727 detect_packages_in_compiled: false,
728 detect_copyrights: false,
729 detect_generated: false,
730 detect_emails: false,
731 detect_urls: false,
732 max_emails: 50,
733 max_urls: 50,
734 timeout_seconds: 120.0,
735 };
736 let scanned = scan_single_file("main.ts", "export const answer: number = 42;\n", &options);
737
738 assert_eq!(scanned.programming_language.as_deref(), Some("TypeScript"));
739 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
740 assert_eq!(
741 scanned.file_type_label.as_deref(),
742 Some("TypeScript source, UTF-8 Unicode text")
743 );
744 assert_eq!(scanned.is_text, Some(true));
745 assert_eq!(scanned.is_media, Some(false));
746 assert_eq!(scanned.is_script, Some(false));
747 assert_eq!(scanned.is_source, Some(true));
748 }
749
750 #[test]
751 fn scanner_normalizes_sparse_ts_files_away_from_video_mime() {
752 let options = TextDetectionOptions {
753 collect_info: true,
754 detect_packages: false,
755 detect_application_packages: false,
756 detect_system_packages: false,
757 detect_packages_in_compiled: false,
758 detect_copyrights: false,
759 detect_generated: false,
760 detect_emails: false,
761 detect_urls: false,
762 max_emails: 50,
763 max_urls: 50,
764 timeout_seconds: 120.0,
765 };
766 let scanned = scan_single_file("main.ts", "// comment-only TypeScript fixture\n", &options);
767
768 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
769 assert_eq!(
770 scanned.file_type_label.as_deref(),
771 Some("TypeScript source, UTF-8 Unicode text")
772 );
773 assert_eq!(scanned.is_text, Some(true));
774 assert_eq!(scanned.is_media, Some(false));
775 assert_eq!(scanned.is_script, Some(false));
776 assert_eq!(scanned.is_source, Some(true));
777 }
778
779 #[test]
780 fn scanner_treats_empty_files_like_scancode_info_surface() {
781 let options = TextDetectionOptions {
782 collect_info: true,
783 detect_packages: false,
784 detect_application_packages: false,
785 detect_system_packages: false,
786 detect_packages_in_compiled: false,
787 detect_copyrights: false,
788 detect_generated: false,
789 detect_emails: false,
790 detect_urls: false,
791 max_emails: 50,
792 max_urls: 50,
793 timeout_seconds: 120.0,
794 };
795 let scanned = scan_single_file("test.txt", "", &options);
796
797 assert_eq!(scanned.mime_type.as_deref(), Some("inode/x-empty"));
798 assert_eq!(scanned.file_type_label.as_deref(), Some("empty"));
799 assert_eq!(scanned.programming_language, None);
800 assert_eq!(scanned.is_binary, Some(false));
801 assert_eq!(scanned.is_text, Some(true));
802 assert_eq!(scanned.is_archive, Some(false));
803 assert_eq!(scanned.is_media, Some(false));
804 assert_eq!(scanned.is_source, Some(false));
805 assert_eq!(scanned.is_script, Some(false));
806 }
807
808 #[test]
809 fn scanner_treats_package_json_as_text_not_source() {
810 let options = TextDetectionOptions {
811 collect_info: true,
812 detect_packages: false,
813 detect_application_packages: false,
814 detect_system_packages: false,
815 detect_packages_in_compiled: false,
816 detect_copyrights: false,
817 detect_generated: false,
818 detect_emails: false,
819 detect_urls: false,
820 max_emails: 50,
821 max_urls: 50,
822 timeout_seconds: 120.0,
823 };
824 let scanned = scan_single_file("package.json", r#"{"name":"demo"}"#, &options);
825
826 assert_eq!(scanned.mime_type.as_deref(), Some("application/json"));
827 assert_eq!(scanned.file_type_label.as_deref(), Some("JSON text data"));
828 assert_eq!(scanned.programming_language, None);
829 assert_eq!(scanned.is_text, Some(true));
830 assert_eq!(scanned.is_source, Some(false));
831 assert_eq!(scanned.is_script, Some(false));
832 }
833
834 #[test]
835 fn scanner_classifies_gradle_and_nix_manifests_as_source() {
836 let options = TextDetectionOptions {
837 collect_info: true,
838 detect_packages: false,
839 detect_application_packages: false,
840 detect_system_packages: false,
841 detect_packages_in_compiled: false,
842 detect_copyrights: false,
843 detect_generated: false,
844 detect_emails: false,
845 detect_urls: false,
846 max_emails: 50,
847 max_urls: 50,
848 timeout_seconds: 120.0,
849 };
850
851 let gradle = scan_single_file("build.gradle", "plugins { id 'java' }\n", &options);
852 let nix = scan_single_file("flake.nix", "{ inputs, ... }: {}\n", &options);
853
854 assert_eq!(gradle.programming_language.as_deref(), Some("Groovy"));
855 assert_eq!(gradle.mime_type.as_deref(), Some("text/plain"));
856 assert_eq!(gradle.is_source, Some(true));
857 assert_eq!(gradle.is_script, Some(false));
858
859 assert_eq!(nix.programming_language.as_deref(), Some("Nix"));
860 assert_eq!(nix.mime_type.as_deref(), Some("text/plain"));
861 assert_eq!(nix.is_source, Some(true));
862 assert_eq!(nix.is_script, Some(false));
863 }
864
865 #[test]
866 fn scanner_treats_gitmodules_as_text_not_source() {
867 let options = TextDetectionOptions {
868 collect_info: true,
869 detect_packages: false,
870 detect_application_packages: false,
871 detect_system_packages: false,
872 detect_packages_in_compiled: false,
873 detect_copyrights: false,
874 detect_generated: false,
875 detect_emails: false,
876 detect_urls: false,
877 max_emails: 50,
878 max_urls: 50,
879 timeout_seconds: 120.0,
880 };
881 let scanned = scan_file_at_relative_path(
882 ".gitmodules",
883 b"[submodule \"demo\"]\n\tpath = vendor/demo\n",
884 &options,
885 );
886
887 assert_eq!(scanned.programming_language, None);
888 assert_eq!(
889 scanned.file_type_label.as_deref(),
890 Some("Git configuration text")
891 );
892 assert_eq!(scanned.is_text, Some(true));
893 assert_eq!(scanned.is_source, Some(false));
894 assert_eq!(scanned.is_script, Some(false));
895 }
896
897 #[test]
898 fn scanner_treats_javascript_shebang_files_as_scripts() {
899 let options = TextDetectionOptions {
900 collect_info: true,
901 detect_packages: false,
902 detect_application_packages: false,
903 detect_system_packages: false,
904 detect_packages_in_compiled: false,
905 detect_copyrights: false,
906 detect_generated: false,
907 detect_emails: false,
908 detect_urls: false,
909 max_emails: 50,
910 max_urls: 50,
911 timeout_seconds: 120.0,
912 };
913 let scanned = scan_file_at_relative_path(
914 "bin/run",
915 b"#!/usr/bin/env node\nconsole.log('hello');\n",
916 &options,
917 );
918
919 assert_eq!(scanned.programming_language.as_deref(), Some("JavaScript"));
920 assert_eq!(
921 scanned.file_type_label.as_deref(),
922 Some("javascript script, UTF-8 Unicode text executable")
923 );
924 assert_eq!(scanned.is_script, Some(true));
925 assert_eq!(scanned.is_source, Some(true));
926 }
927
928 #[test]
929 fn scanner_treats_dockerfile_as_source() {
930 let options = TextDetectionOptions {
931 collect_info: true,
932 detect_packages: false,
933 detect_application_packages: false,
934 detect_system_packages: false,
935 detect_packages_in_compiled: false,
936 detect_copyrights: false,
937 detect_generated: false,
938 detect_emails: false,
939 detect_urls: false,
940 max_emails: 50,
941 max_urls: 50,
942 timeout_seconds: 120.0,
943 };
944 let scanned = scan_single_file("Dockerfile", "FROM scratch\n", &options);
945
946 assert_eq!(scanned.programming_language.as_deref(), Some("Dockerfile"));
947 assert_eq!(
948 scanned.file_type_label.as_deref(),
949 Some("Dockerfile source, UTF-8 Unicode text")
950 );
951 assert_eq!(scanned.is_source, Some(true));
952 assert_eq!(scanned.is_script, Some(false));
953 }
954
955 #[test]
956 fn scanner_treats_makefile_as_text_not_source() {
957 let options = TextDetectionOptions {
958 collect_info: true,
959 detect_packages: false,
960 detect_application_packages: false,
961 detect_system_packages: false,
962 detect_packages_in_compiled: false,
963 detect_copyrights: false,
964 detect_generated: false,
965 detect_emails: false,
966 detect_urls: false,
967 max_emails: 50,
968 max_urls: 50,
969 timeout_seconds: 120.0,
970 };
971 let scanned = scan_single_file("Makefile", "all:\n\techo hi\n", &options);
972
973 assert_eq!(scanned.programming_language, None);
974 assert_eq!(
975 scanned.file_type_label.as_deref(),
976 Some("UTF-8 Unicode text")
977 );
978 assert_eq!(scanned.is_text, Some(true));
979 assert_eq!(scanned.is_source, Some(false));
980 assert_eq!(scanned.is_script, Some(false));
981 }
982
983 #[test]
984 fn scanner_omits_info_surface_when_disabled() {
985 let options = TextDetectionOptions {
986 collect_info: false,
987 detect_packages: false,
988 detect_application_packages: false,
989 detect_system_packages: false,
990 detect_packages_in_compiled: false,
991 detect_copyrights: false,
992 detect_generated: false,
993 detect_emails: false,
994 detect_urls: false,
995 max_emails: 50,
996 max_urls: 50,
997 timeout_seconds: 120.0,
998 };
999 let scanned = scan_single_file(
1000 "script.py",
1001 "#!/usr/bin/env python3\nprint(\"hello\")\n",
1002 &options,
1003 );
1004
1005 assert!(scanned.sha1.is_none());
1006 assert!(scanned.md5.is_none());
1007 assert!(scanned.sha256.is_none());
1008 assert!(scanned.sha1_git.is_none());
1009 assert!(scanned.mime_type.is_none());
1010 assert!(scanned.date.is_none());
1011 assert!(scanned.programming_language.is_none());
1012 assert!(scanned.is_binary.is_none());
1013 assert!(scanned.is_text.is_none());
1014 assert!(scanned.is_archive.is_none());
1015 assert!(scanned.is_media.is_none());
1016 assert!(scanned.is_script.is_none());
1017 assert!(scanned.is_source.is_none());
1018 }
1019
1020 #[test]
1021 fn scanner_skips_package_parsing_when_disabled() {
1022 let options = TextDetectionOptions {
1023 collect_info: false,
1024 detect_packages: false,
1025 detect_application_packages: false,
1026 detect_system_packages: false,
1027 detect_packages_in_compiled: false,
1028 detect_copyrights: false,
1029 detect_generated: false,
1030 detect_emails: false,
1031 detect_urls: false,
1032 max_emails: 50,
1033 max_urls: 50,
1034 timeout_seconds: 120.0,
1035 };
1036 let scanned = scan_single_file(
1037 "package.json",
1038 r#"{"name":"demo","version":"1.0.0"}"#,
1039 &options,
1040 );
1041
1042 assert!(
1043 scanned.package_data.is_empty(),
1044 "package_data: {:#?}",
1045 scanned.package_data
1046 );
1047 }
1048
1049 #[test]
1050 fn scanner_parses_package_manifests_when_enabled() {
1051 let options = TextDetectionOptions {
1052 collect_info: false,
1053 detect_packages: true,
1054 detect_application_packages: true,
1055 detect_system_packages: false,
1056 detect_packages_in_compiled: false,
1057 detect_copyrights: false,
1058 detect_generated: false,
1059 detect_emails: false,
1060 detect_urls: false,
1061 max_emails: 50,
1062 max_urls: 50,
1063 timeout_seconds: 120.0,
1064 };
1065 let scanned = scan_single_file(
1066 "package.json",
1067 r#"{"name":"demo","version":"1.0.0"}"#,
1068 &options,
1069 );
1070
1071 assert_eq!(
1072 scanned.package_data.len(),
1073 1,
1074 "package_data: {:#?}",
1075 scanned.package_data
1076 );
1077 }
1078
1079 #[test]
1080 fn scanner_parses_oversized_rpm_in_package_only_mode_without_size_warning() {
1081 let temp_dir = TempDir::new().expect("create temp dir");
1082 let file_path = build_sparse_oversized_rpm(&temp_dir, "oversized-demo");
1083
1084 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1085 let collected = collect_paths(temp_dir.path(), 0, &[]);
1086 let result = process_collected(
1087 &collected,
1088 progress,
1089 None,
1090 LicenseScanOptions::default(),
1091 &TextDetectionOptions {
1092 collect_info: false,
1093 detect_packages: true,
1094 detect_application_packages: true,
1095 detect_system_packages: false,
1096 detect_packages_in_compiled: false,
1097 detect_copyrights: false,
1098 detect_generated: false,
1099 detect_emails: false,
1100 detect_urls: false,
1101 max_emails: 50,
1102 max_urls: 50,
1103 timeout_seconds: 120.0,
1104 },
1105 );
1106
1107 let scanned = result
1108 .files
1109 .into_iter()
1110 .find(|entry| {
1111 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1112 })
1113 .expect("scanned file entry");
1114
1115 assert!(
1116 scanned.scan_diagnostics.is_empty(),
1117 "scan_diagnostics: {:#?}",
1118 scanned.scan_diagnostics
1119 );
1120 assert_eq!(
1121 scanned.package_data.len(),
1122 1,
1123 "package_data: {:#?}",
1124 scanned.package_data
1125 );
1126 assert_eq!(
1127 scanned.package_data[0].datasource_id,
1128 Some(DatasourceId::RpmArchive)
1129 );
1130 assert_eq!(
1131 scanned.package_data[0].name.as_deref(),
1132 Some("oversized-demo")
1133 );
1134 assert_eq!(scanned.package_data[0].version.as_deref(), Some("1.0-1"));
1135 }
1136
1137 #[test]
1138 fn scanner_parses_oversized_rpm_with_info_without_timeout_or_size_warning() {
1139 let temp_dir = TempDir::new().expect("create temp dir");
1140 let file_path = build_sparse_oversized_rpm(&temp_dir, "oversized-info-demo");
1141
1142 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1143 let collected = collect_paths(temp_dir.path(), 0, &[]);
1144 let result = process_collected(
1145 &collected,
1146 progress,
1147 None,
1148 LicenseScanOptions::default(),
1149 &TextDetectionOptions {
1150 collect_info: true,
1151 detect_packages: true,
1152 detect_application_packages: true,
1153 detect_system_packages: false,
1154 detect_packages_in_compiled: false,
1155 detect_copyrights: false,
1156 detect_generated: false,
1157 detect_emails: false,
1158 detect_urls: false,
1159 max_emails: 50,
1160 max_urls: 50,
1161 timeout_seconds: 120.0,
1162 },
1163 );
1164
1165 let scanned = result
1166 .files
1167 .into_iter()
1168 .find(|entry| {
1169 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1170 })
1171 .expect("scanned file entry");
1172
1173 assert!(
1174 scanned.scan_diagnostics.is_empty(),
1175 "scan_diagnostics: {:#?}",
1176 scanned.scan_diagnostics
1177 );
1178 assert_eq!(
1179 scanned.package_data.len(),
1180 1,
1181 "package_data: {:#?}",
1182 scanned.package_data
1183 );
1184 assert_eq!(
1185 scanned.package_data[0].datasource_id,
1186 Some(DatasourceId::RpmArchive)
1187 );
1188 assert_eq!(
1189 scanned.package_data[0].name.as_deref(),
1190 Some("oversized-info-demo")
1191 );
1192 assert!(scanned.sha1.is_some());
1193 assert!(scanned.md5.is_some());
1194 assert!(scanned.sha256.is_some());
1195 assert!(scanned.sha1_git.is_some());
1196 assert_eq!(scanned.mime_type.as_deref(), Some("application/x-rpm"));
1197 assert_eq!(scanned.file_type_label.as_deref(), Some("RPM package"));
1198 assert_eq!(scanned.is_binary, Some(true));
1199 assert_eq!(scanned.is_text, Some(false));
1200 assert_eq!(scanned.is_archive, Some(true));
1201 }
1202
1203 #[test]
1204 fn scanner_parses_oversized_pack_rpm_in_package_only_mode_without_size_warning() {
1205 let temp_dir = TempDir::new().expect("create temp dir");
1206 let file_path = build_sparse_oversized_pack_rpm(&temp_dir, "oversized-pack-demo");
1207
1208 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1209 let collected = collect_paths(temp_dir.path(), 0, &[]);
1210 let result = process_collected(
1211 &collected,
1212 progress,
1213 None,
1214 LicenseScanOptions::default(),
1215 &TextDetectionOptions {
1216 collect_info: false,
1217 detect_packages: true,
1218 detect_application_packages: true,
1219 detect_system_packages: false,
1220 detect_packages_in_compiled: false,
1221 detect_copyrights: false,
1222 detect_generated: false,
1223 detect_emails: false,
1224 detect_urls: false,
1225 max_emails: 50,
1226 max_urls: 50,
1227 timeout_seconds: 120.0,
1228 },
1229 );
1230
1231 let scanned = result
1232 .files
1233 .into_iter()
1234 .find(|entry| {
1235 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1236 })
1237 .expect("scanned file entry");
1238
1239 assert!(
1240 scanned.scan_diagnostics.is_empty(),
1241 "scan_diagnostics: {:#?}",
1242 scanned.scan_diagnostics
1243 );
1244 assert_eq!(
1245 scanned.package_data.len(),
1246 1,
1247 "package_data: {:#?}",
1248 scanned.package_data
1249 );
1250 assert_eq!(
1251 scanned.package_data[0].datasource_id,
1252 Some(DatasourceId::RpmArchive)
1253 );
1254 assert_eq!(
1255 scanned.package_data[0].name.as_deref(),
1256 Some("oversized-pack-demo")
1257 );
1258 }
1259
1260 #[test]
1261 fn scanner_parses_oversized_pack_rpm_with_info_without_timeout_or_size_warning() {
1262 let temp_dir = TempDir::new().expect("create temp dir");
1263 let file_path = build_sparse_oversized_pack_rpm(&temp_dir, "oversized-pack-info-demo");
1264
1265 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1266 let collected = collect_paths(temp_dir.path(), 0, &[]);
1267 let result = process_collected(
1268 &collected,
1269 progress,
1270 None,
1271 LicenseScanOptions::default(),
1272 &TextDetectionOptions {
1273 collect_info: true,
1274 detect_packages: true,
1275 detect_application_packages: true,
1276 detect_system_packages: false,
1277 detect_packages_in_compiled: false,
1278 detect_copyrights: false,
1279 detect_generated: false,
1280 detect_emails: false,
1281 detect_urls: false,
1282 max_emails: 50,
1283 max_urls: 50,
1284 timeout_seconds: 120.0,
1285 },
1286 );
1287
1288 let scanned = result
1289 .files
1290 .into_iter()
1291 .find(|entry| {
1292 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1293 })
1294 .expect("scanned file entry");
1295
1296 assert!(
1297 scanned.scan_diagnostics.is_empty(),
1298 "scan_diagnostics: {:#?}",
1299 scanned.scan_diagnostics
1300 );
1301 assert_eq!(
1302 scanned.package_data.len(),
1303 1,
1304 "package_data: {:#?}",
1305 scanned.package_data
1306 );
1307 assert_eq!(
1308 scanned.package_data[0].datasource_id,
1309 Some(DatasourceId::RpmArchive)
1310 );
1311 assert_eq!(
1312 scanned.package_data[0].name.as_deref(),
1313 Some("oversized-pack-info-demo")
1314 );
1315 assert!(scanned.sha1.is_some());
1316 assert!(scanned.md5.is_some());
1317 assert!(scanned.sha256.is_some());
1318 assert!(scanned.sha1_git.is_some());
1319 assert_eq!(scanned.mime_type.as_deref(), Some("application/x-rpm"));
1320 assert_eq!(scanned.file_type_label.as_deref(), Some("RPM package"));
1321 assert_eq!(scanned.is_binary, Some(true));
1322 assert_eq!(scanned.is_text, Some(false));
1323 assert_eq!(scanned.is_archive, Some(true));
1324 }
1325
1326 #[test]
1327 fn scanner_skips_application_packages_when_only_system_packages_enabled() {
1328 let options = TextDetectionOptions {
1329 collect_info: false,
1330 detect_packages: true,
1331 detect_application_packages: false,
1332 detect_system_packages: true,
1333 detect_packages_in_compiled: false,
1334 detect_copyrights: false,
1335 detect_generated: false,
1336 detect_emails: false,
1337 detect_urls: false,
1338 max_emails: 50,
1339 max_urls: 50,
1340 timeout_seconds: 120.0,
1341 };
1342 let scanned = scan_single_file(
1343 "package.json",
1344 r#"{"name":"demo","version":"1.0.0"}"#,
1345 &options,
1346 );
1347
1348 assert!(
1349 scanned.package_data.is_empty(),
1350 "package_data: {:#?}",
1351 scanned.package_data
1352 );
1353 }
1354
1355 #[test]
1356 fn scanner_parses_system_package_files_when_enabled() {
1357 let options = TextDetectionOptions {
1358 collect_info: false,
1359 detect_packages: true,
1360 detect_application_packages: false,
1361 detect_system_packages: true,
1362 detect_packages_in_compiled: false,
1363 detect_copyrights: false,
1364 detect_generated: false,
1365 detect_emails: false,
1366 detect_urls: false,
1367 max_emails: 50,
1368 max_urls: 50,
1369 timeout_seconds: 120.0,
1370 };
1371 let scanned = scan_file_at_relative_path(
1372 "var/lib/dpkg/status",
1373 b"Package: demo\nVersion: 1.0\nArchitecture: all\nDescription: demo package\n\n",
1374 &options,
1375 );
1376
1377 assert!(
1378 !scanned.package_data.is_empty(),
1379 "package_data: {:#?}",
1380 scanned.package_data
1381 );
1382 }
1383
1384 #[test]
1385 fn scanner_only_parses_compiled_packages_when_package_in_compiled_is_enabled() {
1386 if std::process::Command::new("go")
1387 .arg("version")
1388 .status()
1389 .is_err()
1390 {
1391 return;
1392 }
1393
1394 let temp_dir = TempDir::new().expect("create temp dir");
1395 fs::write(
1396 temp_dir.path().join("go.mod"),
1397 "module example.com/demo\n\ngo 1.23.0\n",
1398 )
1399 .expect("write go.mod");
1400 fs::write(
1401 temp_dir.path().join("main.go"),
1402 "package main\nfunc main() {}\n",
1403 )
1404 .expect("write main.go");
1405 let file_path = temp_dir.path().join("demo");
1406 let status = std::process::Command::new("go")
1407 .current_dir(temp_dir.path())
1408 .args(["build", "-o"])
1409 .arg(&file_path)
1410 .status()
1411 .expect("run go build");
1412 assert!(status.success());
1413
1414 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1415 let collected = collect_paths(temp_dir.path(), 0, &[]);
1416
1417 let without_compiled = process_collected(
1418 &collected,
1419 Arc::clone(&progress),
1420 None,
1421 LicenseScanOptions::default(),
1422 &TextDetectionOptions {
1423 collect_info: false,
1424 detect_packages: true,
1425 detect_application_packages: true,
1426 detect_system_packages: false,
1427 detect_packages_in_compiled: false,
1428 detect_copyrights: false,
1429 detect_generated: false,
1430 detect_emails: false,
1431 detect_urls: false,
1432 max_emails: 50,
1433 max_urls: 50,
1434 timeout_seconds: 120.0,
1435 },
1436 );
1437 let with_compiled = process_collected(
1438 &collected,
1439 progress,
1440 None,
1441 LicenseScanOptions::default(),
1442 &TextDetectionOptions {
1443 collect_info: false,
1444 detect_packages: true,
1445 detect_application_packages: true,
1446 detect_system_packages: false,
1447 detect_packages_in_compiled: true,
1448 detect_copyrights: false,
1449 detect_generated: false,
1450 detect_emails: false,
1451 detect_urls: false,
1452 max_emails: 50,
1453 max_urls: 50,
1454 timeout_seconds: 120.0,
1455 },
1456 );
1457
1458 let without_compiled = without_compiled
1459 .files
1460 .into_iter()
1461 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1462 .expect("compiled artifact present");
1463 let with_compiled = with_compiled
1464 .files
1465 .into_iter()
1466 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1467 .expect("compiled artifact present");
1468
1469 assert!(
1470 without_compiled.package_data.is_empty(),
1471 "package_data: {:#?}",
1472 without_compiled.package_data
1473 );
1474 assert!(!with_compiled.package_data.is_empty());
1475 }
1476
1477 #[test]
1478 fn scanner_parses_windows_executable_packages_under_normal_package_scan() {
1479 let temp_dir = TempDir::new().expect("create temp dir");
1480 let file_path = temp_dir.path().join("libiconv2.dll");
1481 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1482 .expect("read PE fixture");
1483 fs::write(&file_path, fixture).expect("write PE fixture");
1484
1485 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1486 let collected = collect_paths(temp_dir.path(), 0, &[]);
1487
1488 let without_package = process_collected(
1489 &collected,
1490 Arc::clone(&progress),
1491 None,
1492 LicenseScanOptions::default(),
1493 &TextDetectionOptions {
1494 collect_info: false,
1495 detect_packages: false,
1496 detect_application_packages: false,
1497 detect_system_packages: false,
1498 detect_packages_in_compiled: false,
1499 detect_copyrights: false,
1500 detect_generated: false,
1501 detect_emails: false,
1502 detect_urls: false,
1503 max_emails: 50,
1504 max_urls: 50,
1505 timeout_seconds: 120.0,
1506 },
1507 );
1508 let with_package = process_collected(
1509 &collected,
1510 progress,
1511 None,
1512 LicenseScanOptions::default(),
1513 &TextDetectionOptions {
1514 collect_info: false,
1515 detect_packages: true,
1516 detect_application_packages: true,
1517 detect_system_packages: false,
1518 detect_packages_in_compiled: false,
1519 detect_copyrights: false,
1520 detect_generated: false,
1521 detect_emails: false,
1522 detect_urls: false,
1523 max_emails: 50,
1524 max_urls: 50,
1525 timeout_seconds: 120.0,
1526 },
1527 );
1528
1529 let without_package = without_package
1530 .files
1531 .into_iter()
1532 .find(|entry| {
1533 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1534 })
1535 .expect("compiled artifact present");
1536 let with_package = with_package
1537 .files
1538 .into_iter()
1539 .find(|entry| {
1540 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1541 })
1542 .expect("compiled artifact present");
1543
1544 assert!(without_package.package_data.is_empty());
1545 assert_eq!(with_package.package_data.len(), 1);
1546 assert_eq!(
1547 with_package.package_data[0].package_type,
1548 Some(FilePackageType::Winexe)
1549 );
1550 assert_eq!(
1551 with_package.package_data[0].datasource_id,
1552 Some(DatasourceId::WindowsExecutable)
1553 );
1554 }
1555
1556 #[test]
1557 fn scanner_keeps_nsis_and_windows_executable_package_data_together() {
1558 let temp_dir = TempDir::new().expect("create temp dir");
1559 let file_path = temp_dir.path().join("nsis-with-version.exe");
1560 let mut fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1561 .expect("read PE fixture");
1562 if fixture.len() < 70_000 {
1563 fixture.resize(70_000, 0);
1564 }
1565 fixture.extend_from_slice(b"Nullsoft.NSIS.exehead");
1566 fs::write(&file_path, fixture).expect("write synthetic NSIS PE fixture");
1567
1568 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1569 let collected = collect_paths(temp_dir.path(), 0, &[]);
1570 let result = process_collected(
1571 &collected,
1572 progress,
1573 None,
1574 LicenseScanOptions::default(),
1575 &TextDetectionOptions {
1576 collect_info: false,
1577 detect_packages: true,
1578 detect_application_packages: true,
1579 detect_system_packages: false,
1580 detect_packages_in_compiled: false,
1581 detect_copyrights: false,
1582 detect_generated: false,
1583 detect_emails: false,
1584 detect_urls: false,
1585 max_emails: 50,
1586 max_urls: 50,
1587 timeout_seconds: 120.0,
1588 },
1589 );
1590
1591 let scanned = result
1592 .files
1593 .into_iter()
1594 .find(|entry| {
1595 entry.file_type == FileType::File && entry.path.ends_with("/nsis-with-version.exe")
1596 })
1597 .expect("compiled artifact present");
1598
1599 assert_eq!(
1600 scanned.package_data.len(),
1601 2,
1602 "package_data: {:#?}",
1603 scanned.package_data
1604 );
1605 assert!(
1606 scanned
1607 .package_data
1608 .iter()
1609 .any(|pkg| pkg.datasource_id == Some(DatasourceId::NsisInstaller))
1610 );
1611 assert!(
1612 scanned
1613 .package_data
1614 .iter()
1615 .any(|pkg| pkg.datasource_id == Some(DatasourceId::WindowsExecutable))
1616 );
1617 }
1618
1619 #[test]
1620 fn scanner_detects_license_from_font_metadata() {
1621 let temp_dir = TempDir::new().expect("create temp dir");
1622 let file_path = temp_dir.path().join("Lato-Bold.ttf");
1623 let fixture = fs::read("testdata/font-fixtures/Lato-Bold.ttf").expect("read font fixture");
1624 fs::write(&file_path, fixture).expect("write font fixture");
1625
1626 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1627 let collected = collect_paths(temp_dir.path(), 0, &[]);
1628 let engine =
1629 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1630 let result = process_collected(
1631 &collected,
1632 progress,
1633 Some(engine),
1634 LicenseScanOptions::default(),
1635 &TextDetectionOptions::default(),
1636 );
1637 let scanned = result
1638 .files
1639 .into_iter()
1640 .find(|entry| {
1641 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1642 })
1643 .expect("scanned file entry");
1644
1645 assert!(
1646 scanned.detected_license_expression.is_some(),
1647 "license detections: {:#?}",
1648 scanned.license_detections
1649 );
1650 assert!(
1651 scanned
1652 .detected_license_expression
1653 .as_deref()
1654 .is_some_and(
1655 |expression| expression.contains("OFL-1.1") || expression.contains("ofl-1.1")
1656 ),
1657 "license expression: {:?}",
1658 scanned.detected_license_expression
1659 );
1660 }
1661
1662 #[test]
1663 fn scanner_detects_license_from_windows_executable_metadata() {
1664 let temp_dir = TempDir::new().expect("create temp dir");
1665 let file_path = temp_dir.path().join("libiconv2.dll");
1666 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1667 .expect("read PE fixture");
1668 fs::write(&file_path, fixture).expect("write PE fixture");
1669
1670 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1671 let collected = collect_paths(temp_dir.path(), 0, &[]);
1672 let engine =
1673 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1674 let result = process_collected(
1675 &collected,
1676 progress,
1677 Some(engine),
1678 LicenseScanOptions::default(),
1679 &TextDetectionOptions::default(),
1680 );
1681 let scanned = result
1682 .files
1683 .into_iter()
1684 .find(|entry| {
1685 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1686 })
1687 .expect("scanned file entry");
1688
1689 assert!(
1690 scanned.detected_license_expression.is_some(),
1691 "license detections: {:#?}",
1692 scanned.license_detections
1693 );
1694 assert!(
1695 scanned
1696 .detected_license_expression
1697 .as_deref()
1698 .is_some_and(|expression| {
1699 expression.contains("lgpl") || expression.contains("LGPL")
1700 }),
1701 "license expression: {:?}",
1702 scanned.detected_license_expression
1703 );
1704 }
1705
1706 #[test]
1707 fn scanner_detects_license_from_windows_executable_security_notice() {
1708 fn synthetic_pe_with_security_notice(notice: &str) -> Vec<u8> {
1709 let cert_payload = notice
1710 .encode_utf16()
1711 .flat_map(|unit| unit.to_le_bytes())
1712 .collect::<Vec<_>>();
1713 let cert_len = (8 + cert_payload.len()) as u32;
1714 let mut cert = Vec::new();
1715 cert.extend_from_slice(&cert_len.to_le_bytes());
1716 cert.extend_from_slice(&0x0200u16.to_le_bytes());
1717 cert.extend_from_slice(&0x0002u16.to_le_bytes());
1718 cert.extend_from_slice(&cert_payload);
1719 while !cert.len().is_multiple_of(8) {
1720 cert.push(0);
1721 }
1722
1723 let offset = 0x200usize;
1724 let size = cert.len();
1725 let optional_header_size = 224usize;
1726 let pe_header_offset = 0x80usize;
1727 let nt_headers_offset = pe_header_offset + 4;
1728 let optional_header_offset = nt_headers_offset + 20;
1729 let data_directory_offset = optional_header_offset + 96;
1730 let security_directory_offset =
1731 data_directory_offset + pe::IMAGE_DIRECTORY_ENTRY_SECURITY * 8;
1732 let total_len = offset + size;
1733 let mut bytes = vec![0u8; total_len];
1734
1735 bytes[0..2].copy_from_slice(b"MZ");
1736 bytes[0x3c..0x40].copy_from_slice(&(pe_header_offset as u32).to_le_bytes());
1737 bytes[pe_header_offset..pe_header_offset + 4].copy_from_slice(b"PE\0\0");
1738
1739 bytes[nt_headers_offset..nt_headers_offset + 2]
1740 .copy_from_slice(&0x014cu16.to_le_bytes());
1741 bytes[nt_headers_offset + 16..nt_headers_offset + 18]
1742 .copy_from_slice(&(optional_header_size as u16).to_le_bytes());
1743
1744 bytes[optional_header_offset..optional_header_offset + 2]
1745 .copy_from_slice(&0x010bu16.to_le_bytes());
1746 bytes[optional_header_offset + 92..optional_header_offset + 96]
1747 .copy_from_slice(&16u32.to_le_bytes());
1748 bytes[security_directory_offset..security_directory_offset + 4]
1749 .copy_from_slice(&(offset as u32).to_le_bytes());
1750 bytes[security_directory_offset + 4..security_directory_offset + 8]
1751 .copy_from_slice(&(size as u32).to_le_bytes());
1752 bytes[offset..offset + size].copy_from_slice(&cert);
1753
1754 bytes
1755 }
1756
1757 let temp_dir = TempDir::new().expect("create temp dir");
1758 let file_path = temp_dir.path().join("signed.dll");
1759 let fixture = synthetic_pe_with_security_notice(
1760 "use of this Certificate constitutes acceptance of the DigiCert CP/CPS and the Relying Party Agreement which limit liability and are incorporated herein by reference.",
1761 );
1762 fs::write(&file_path, fixture).expect("write PE fixture");
1763
1764 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1765 let collected = collect_paths(temp_dir.path(), 0, &[]);
1766 let engine =
1767 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1768 let result = process_collected(
1769 &collected,
1770 progress,
1771 Some(engine),
1772 LicenseScanOptions::default(),
1773 &TextDetectionOptions::default(),
1774 );
1775 let scanned = result
1776 .files
1777 .into_iter()
1778 .find(|entry| {
1779 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1780 })
1781 .expect("scanned file entry");
1782
1783 assert!(
1784 scanned
1785 .detected_license_expression
1786 .as_deref()
1787 .is_some_and(|expression| expression.contains("proprietary-license")),
1788 "license expression: {:?}, detections: {:#?}",
1789 scanned.detected_license_expression,
1790 scanned.license_detections
1791 );
1792 }
1793
1794 #[test]
1795 fn scanner_detects_cc_by_license_from_markdown_comment_banner() {
1796 let scanned = scan_single_file_with_license_engine(
1797 "navbar.md",
1798 "<!-- Documentation licensed under CC BY 4.0 -->\n<!-- License available at https://creativecommons.org/licenses/by/4.0/ -->\n",
1799 &TextDetectionOptions::default(),
1800 );
1801
1802 assert!(
1803 scanned
1804 .detected_license_expression
1805 .as_deref()
1806 .is_some_and(|expression| {
1807 expression.contains("cc-by-4.0") || expression.contains("CC-BY-4.0")
1808 }),
1809 "license expression: {:?}",
1810 scanned.detected_license_expression
1811 );
1812 }
1813
1814 #[test]
1815 fn scanner_detects_mit_license_from_shields_badge_markdown() {
1816 let scanned = scan_single_file_with_license_engine(
1817 "README.md",
1818 "[](https://opensource.org/licenses/MIT)\n",
1819 &TextDetectionOptions::default(),
1820 );
1821
1822 assert!(
1823 scanned
1824 .detected_license_expression
1825 .as_deref()
1826 .is_some_and(|expression| {
1827 expression.contains("mit") || expression.contains("MIT")
1828 }),
1829 "license expression: {:?}",
1830 scanned.detected_license_expression
1831 );
1832 }
1833
1834 #[test]
1835 fn scanner_detects_apache_license_from_markdown_readme_phrase() {
1836 let scanned = scan_single_file_with_license_engine(
1837 "README.md",
1838 "This crate is distributed under the terms of the Apache License (Version 2.0).\n",
1839 &TextDetectionOptions::default(),
1840 );
1841
1842 assert!(
1843 scanned
1844 .detected_license_expression
1845 .as_deref()
1846 .is_some_and(|expression| {
1847 expression.contains("apache-2.0") || expression.contains("Apache-2.0")
1848 }),
1849 "license expression: {:?}",
1850 scanned.detected_license_expression
1851 );
1852 }
1853
1854 #[test]
1855 fn scanner_prefers_dual_license_readme_expression_over_supplemental_mentions() {
1856 let scanned = scan_single_file_with_license_engine(
1857 "README.md",
1858 concat!(
1859 "## License\n\n",
1860 "Licensed under either of:\n\n",
1861 " * [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n",
1862 " * [MIT license](https://opensource.org/licenses/MIT)\n\n",
1863 "at your option.\n\n",
1864 "### Contribution\n\n",
1865 "Unless you explicitly state otherwise, any contribution intentionally submitted\n",
1866 "for inclusion in the work by you, as defined in the Apache-2.0 license, shall be\n",
1867 "dual licensed as above, without any additional terms or conditions.\n",
1868 ),
1869 &TextDetectionOptions::default(),
1870 );
1871
1872 assert!(
1873 matches!(
1874 scanned.detected_license_expression.as_deref(),
1875 Some("Apache-2.0 OR MIT") | Some("MIT OR Apache-2.0")
1876 ),
1877 "license expression: {:?}",
1878 scanned.detected_license_expression
1879 );
1880 assert!(
1881 !scanned
1882 .license_detections
1883 .iter()
1884 .any(|detection| detection.license_expression_spdx == "Apache-2.0"),
1885 "detections: {:?}",
1886 scanned.license_detections
1887 );
1888 }
1889
1890 #[test]
1891 fn scanner_drops_redundant_conjunctive_readme_detection_when_or_notice_exists() {
1892 let scanned = scan_single_file_with_license_engine(
1893 "README.md",
1894 concat!(
1895 "## License\n\n",
1896 "Licensed under either of:\n\n",
1897 " * [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n",
1898 " * [MIT license](https://opensource.org/licenses/MIT)\n\n",
1899 "at your option.\n\n",
1900 "### Contribution\n\n",
1901 "Unless you explicitly state otherwise, any contribution intentionally submitted\n",
1902 "for inclusion in the work by you, as defined in the Apache-2.0 license, shall be\n",
1903 "dual licensed as above, without any additional terms or conditions.\n\n",
1904 "[license-image]: https://img.shields.io/badge/license-Apache2.0/MIT-blue.svg\n",
1905 ),
1906 &TextDetectionOptions::default(),
1907 );
1908
1909 assert!(
1910 !scanned
1911 .license_detections
1912 .iter()
1913 .any(|detection| { detection.license_expression_spdx == "Apache-2.0 AND MIT" })
1914 );
1915 }
1916
1917 #[test]
1918 fn scanner_drops_unknown_placeholder_from_dual_license_readme_notice() {
1919 let scanned = scan_single_file_with_license_engine(
1920 "README.md",
1921 concat!(
1922 "## License\n\n",
1923 "This project is dual-licensed under MIT and Apache 2.0.\n",
1924 ),
1925 &TextDetectionOptions::default(),
1926 );
1927
1928 assert!(
1929 matches!(
1930 scanned.detected_license_expression.as_deref(),
1931 Some("Apache-2.0 OR MIT") | Some("MIT OR Apache-2.0")
1932 ),
1933 "license expression: {:?}",
1934 scanned.detected_license_expression
1935 );
1936 assert!(scanned.license_detections.iter().any(|detection| {
1937 detection
1938 .license_expression_spdx
1939 .contains("Apache-2.0 OR MIT")
1940 || detection
1941 .license_expression_spdx
1942 .contains("MIT OR Apache-2.0")
1943 }));
1944 assert!(!scanned.license_detections.iter().any(|detection| {
1945 detection.license_expression_spdx == "LicenseRef-scancode-unknown-license-reference"
1946 }));
1947 assert!(
1948 scanned
1949 .license_detections
1950 .iter()
1951 .any(|detection| detection.license_expression_spdx == "MIT"),
1952 "detections: {:?}",
1953 scanned.license_detections
1954 );
1955 }
1956
1957 #[test]
1958 fn scanner_sets_is_source_only_when_info_enabled() {
1959 let without_info = TextDetectionOptions {
1960 collect_info: false,
1961 detect_packages: false,
1962 detect_application_packages: false,
1963 detect_system_packages: false,
1964 detect_packages_in_compiled: false,
1965 detect_copyrights: false,
1966 detect_generated: false,
1967 detect_emails: false,
1968 detect_urls: false,
1969 max_emails: 50,
1970 max_urls: 50,
1971 timeout_seconds: 120.0,
1972 };
1973 let with_info = TextDetectionOptions {
1974 collect_info: true,
1975 ..without_info.clone()
1976 };
1977
1978 let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
1979 let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
1980
1981 assert_eq!(scanned_without_info.is_source, None);
1982 assert_eq!(scanned_with_info.is_source, Some(true));
1983 }
1984
1985 #[test]
1986 fn directory_omits_info_fields_when_info_disabled() {
1987 let temp_dir = TempDir::new().expect("create temp dir");
1988 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1989
1990 let collected = collect_paths(temp_dir.path(), 0, &[]);
1991 let result = process_collected(
1992 &collected,
1993 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1994 None,
1995 LicenseScanOptions::default(),
1996 &TextDetectionOptions {
1997 collect_info: false,
1998 detect_packages: false,
1999 detect_application_packages: false,
2000 detect_system_packages: false,
2001 detect_packages_in_compiled: false,
2002 detect_copyrights: false,
2003 detect_generated: false,
2004 detect_emails: false,
2005 detect_urls: false,
2006 max_emails: 50,
2007 max_urls: 50,
2008 timeout_seconds: 120.0,
2009 },
2010 );
2011
2012 let directory = result
2013 .files
2014 .into_iter()
2015 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
2016 .expect("directory entry");
2017
2018 assert!(directory.date.is_none());
2019 assert!(directory.file_type_label.is_none());
2020 assert!(directory.is_binary.is_none());
2021 assert!(directory.is_text.is_none());
2022 assert!(directory.is_archive.is_none());
2023 assert!(directory.is_media.is_none());
2024 assert!(directory.is_source.is_none());
2025 assert!(directory.is_script.is_none());
2026 }
2027
2028 #[test]
2029 fn directory_includes_info_fields_when_info_enabled() {
2030 let temp_dir = TempDir::new().expect("create temp dir");
2031 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
2032
2033 let collected = collect_paths(temp_dir.path(), 0, &[]);
2034 let result = process_collected(
2035 &collected,
2036 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
2037 None,
2038 LicenseScanOptions::default(),
2039 &TextDetectionOptions {
2040 collect_info: true,
2041 detect_packages: false,
2042 detect_application_packages: false,
2043 detect_system_packages: false,
2044 detect_packages_in_compiled: false,
2045 detect_copyrights: false,
2046 detect_generated: false,
2047 detect_emails: false,
2048 detect_urls: false,
2049 max_emails: 50,
2050 max_urls: 50,
2051 timeout_seconds: 120.0,
2052 },
2053 );
2054
2055 let directory = result
2056 .files
2057 .into_iter()
2058 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
2059 .expect("directory entry");
2060
2061 assert!(directory.date.is_none());
2062 assert!(directory.file_type_label.is_none());
2063 assert_eq!(directory.is_binary, Some(false));
2064 assert_eq!(directory.is_text, Some(false));
2065 assert_eq!(directory.is_archive, Some(false));
2066 assert_eq!(directory.is_media, Some(false));
2067 assert_eq!(directory.is_source, Some(false));
2068 assert_eq!(directory.is_script, Some(false));
2069 assert_eq!(directory.files_count, Some(0));
2070 assert_eq!(directory.dirs_count, Some(0));
2071 assert_eq!(directory.size_count, Some(0));
2072 }
2073
2074 #[test]
2075 fn collect_paths_includes_root_directory_entry() {
2076 let temp_dir = TempDir::new().expect("create temp dir");
2077 fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
2078 fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
2079 .expect("write nested file");
2080
2081 let collected = collect_paths(temp_dir.path(), 0, &[]);
2082
2083 assert!(
2084 collected
2085 .directories
2086 .iter()
2087 .any(|(path, _)| path == temp_dir.path())
2088 );
2089 }
2090
2091 #[test]
2092 fn collect_paths_supports_single_file_input() {
2093 let temp_dir = TempDir::new().expect("create temp dir");
2094 let file_path = temp_dir.path().join("main.rs");
2095 fs::write(&file_path, "fn main() {}\n").expect("write file");
2096
2097 let collected = collect_paths(&file_path, 0, &[]);
2098
2099 assert_eq!(collected.files.len(), 1);
2100 assert!(collected.directories.is_empty());
2101 assert_eq!(collected.files[0].0, file_path);
2102 }
2103
2104 #[cfg(unix)]
2105 #[test]
2106 fn collect_selected_paths_does_not_walk_unselected_siblings() {
2107 use std::os::unix::fs::PermissionsExt;
2108
2109 let temp_dir = TempDir::new().expect("create temp dir");
2110 let root = temp_dir.path();
2111 fs::create_dir_all(root.join("selected/docs")).expect("create selected dir");
2112 fs::create_dir_all(root.join("blocked/secret")).expect("create blocked dir");
2113 fs::write(root.join("selected/docs/guide.md"), "# guide\n").expect("write guide");
2114
2115 let blocked = root.join("blocked");
2116 let mut perms = fs::metadata(&blocked)
2117 .expect("blocked metadata")
2118 .permissions();
2119 perms.set_mode(0o000);
2120 fs::set_permissions(&blocked, perms).expect("remove blocked permissions");
2121
2122 let collected = collect_selected_paths(
2123 root,
2124 &[CollectionFrontier {
2125 path: PathBuf::from("selected"),
2126 recurse: true,
2127 }],
2128 0,
2129 &[],
2130 );
2131
2132 let mut restore = fs::metadata(&blocked)
2133 .expect("blocked metadata")
2134 .permissions();
2135 restore.set_mode(0o755);
2136 fs::set_permissions(&blocked, restore).expect("restore blocked permissions");
2137
2138 assert!(
2139 collected.collection_errors.is_empty(),
2140 "{:#?}",
2141 collected.collection_errors
2142 );
2143 assert!(
2144 collected
2145 .files
2146 .iter()
2147 .any(|(path, _)| path == &root.join("selected/docs/guide.md"))
2148 );
2149 assert!(
2150 collected
2151 .files
2152 .iter()
2153 .all(|(path, _): &(PathBuf, fs::Metadata)| !path.starts_with(&blocked))
2154 );
2155 }
2156
2157 #[test]
2158 fn collect_selected_paths_respects_excluded_ancestor_directories() {
2159 let temp_dir = TempDir::new().expect("create temp dir");
2160 let root = temp_dir.path();
2161 fs::create_dir_all(root.join(".git")).expect("create git dir");
2162 fs::write(
2163 root.join(".git/config"),
2164 "[core]\nrepositoryformatversion = 0\n",
2165 )
2166 .expect("write git config");
2167
2168 let exclude_patterns =
2169 build_collection_exclude_patterns(root, &root.join(".provenant-cache"));
2170 let collected = collect_selected_paths(
2171 root,
2172 &[CollectionFrontier {
2173 path: PathBuf::from(".git/config"),
2174 recurse: false,
2175 }],
2176 0,
2177 &exclude_patterns,
2178 );
2179
2180 assert!(collected.files.is_empty());
2181 assert!(collected.directories.iter().all(|(path, _)| path == root));
2182 assert_eq!(collected.excluded_count, 1);
2183 }
2184
2185 #[test]
2186 fn process_collected_with_memory_limit_preserves_results_when_spilling() {
2187 let temp_dir = TempDir::new().expect("create temp dir");
2188 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
2189 fs::write(temp_dir.path().join("b.txt"), "world").expect("write second file");
2190
2191 let collected = collect_paths(temp_dir.path(), 0, &[]);
2192 let result = process_collected_with_memory_limit(
2193 &collected,
2194 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
2195 None,
2196 LicenseScanOptions::default(),
2197 &TextDetectionOptions {
2198 collect_info: false,
2199 detect_packages: false,
2200 detect_application_packages: false,
2201 detect_system_packages: false,
2202 detect_packages_in_compiled: false,
2203 detect_copyrights: false,
2204 detect_generated: false,
2205 detect_emails: false,
2206 detect_urls: false,
2207 max_emails: 50,
2208 max_urls: 50,
2209 timeout_seconds: 120.0,
2210 },
2211 MemoryMode::Limit(1),
2212 );
2213
2214 assert_eq!(result.files.len(), 3);
2215 }
2216
2217 #[test]
2218 fn process_collected_with_negative_one_uses_disk_only_mode() {
2219 let temp_dir = TempDir::new().expect("create temp dir");
2220 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
2221
2222 let collected = collect_paths(temp_dir.path(), 0, &[]);
2223 let result = process_collected_with_memory_limit(
2224 &collected,
2225 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
2226 None,
2227 LicenseScanOptions::default(),
2228 &TextDetectionOptions {
2229 collect_info: false,
2230 detect_packages: false,
2231 detect_application_packages: false,
2232 detect_system_packages: false,
2233 detect_packages_in_compiled: false,
2234 detect_copyrights: false,
2235 detect_generated: false,
2236 detect_emails: false,
2237 detect_urls: false,
2238 max_emails: 50,
2239 max_urls: 50,
2240 timeout_seconds: 120.0,
2241 },
2242 MemoryMode::StreamUnlimited,
2243 );
2244
2245 assert_eq!(result.files.len(), 2);
2246 }
2247}