1mod collect;
5mod process;
6
7use crate::license_detection::LicenseDetectionEngine;
8use crate::models::FileInfo;
9
10pub struct ProcessResult {
11 pub files: Vec<FileInfo>,
12 pub excluded_count: usize,
13}
14
15#[derive(Debug, Clone, Copy, Default)]
16pub struct LicenseScanOptions {
17 pub include_text: bool,
18 pub include_text_diagnostics: bool,
19 pub include_diagnostics: bool,
20 pub unknown_licenses: bool,
21 pub min_score: u8,
22}
23
24#[derive(Debug, Clone)]
25pub struct TextDetectionOptions {
26 pub collect_info: bool,
27 pub detect_packages: bool,
28 pub detect_application_packages: bool,
29 pub detect_system_packages: bool,
30 pub detect_packages_in_compiled: bool,
31 pub detect_copyrights: bool,
32 pub detect_generated: bool,
33 pub detect_emails: bool,
34 pub detect_urls: bool,
35 pub max_emails: usize,
36 pub max_urls: usize,
37 pub timeout_seconds: f64,
38}
39
40impl Default for TextDetectionOptions {
41 fn default() -> Self {
42 Self {
43 collect_info: false,
44 detect_packages: false,
45 detect_application_packages: false,
46 detect_system_packages: false,
47 detect_packages_in_compiled: false,
48 detect_copyrights: true,
49 detect_generated: false,
50 detect_emails: false,
51 detect_urls: false,
52 max_emails: 50,
53 max_urls: 50,
54 timeout_seconds: 120.0,
55 }
56 }
57}
58
59pub fn scan_options_fingerprint(
60 text_options: &TextDetectionOptions,
61 license_options: LicenseScanOptions,
62 license_engine: Option<&LicenseDetectionEngine>,
63) -> String {
64 let (license_enabled, rules_count, first_rule_id, last_rule_id) = match license_engine {
65 Some(engine) => {
66 let rules = &engine.index().rules_by_rid;
67 (
68 true,
69 rules.len(),
70 rules
71 .first()
72 .map(|rule| rule.identifier.as_str())
73 .unwrap_or(""),
74 rules
75 .last()
76 .map(|rule| rule.identifier.as_str())
77 .unwrap_or(""),
78 )
79 }
80 None => (false, 0, "", ""),
81 };
82
83 format!(
84 "tool_version={};info={};packages={};app_packages={};system_packages={};compiled_packages={};copyrights={};generated={};emails={};urls={};max_emails={};max_urls={};timeout={:.6};license_enabled={};rules_count={};first_rule_id={};last_rule_id={};license_text={};license_text_diagnostics={};license_diagnostics={};unknown_licenses={};license_score={}",
85 crate::version::BUILD_VERSION,
86 text_options.collect_info,
87 text_options.detect_packages,
88 text_options.detect_application_packages,
89 text_options.detect_system_packages,
90 text_options.detect_packages_in_compiled,
91 text_options.detect_copyrights,
92 text_options.detect_generated,
93 text_options.detect_emails,
94 text_options.detect_urls,
95 text_options.max_emails,
96 text_options.max_urls,
97 text_options.timeout_seconds,
98 license_enabled,
99 rules_count,
100 first_rule_id,
101 last_rule_id,
102 license_options.include_text,
103 license_options.include_text_diagnostics,
104 license_options.include_diagnostics,
105 license_options.unknown_licenses,
106 license_options.min_score,
107 )
108}
109
110pub use self::collect::{CollectedPaths, collect_paths};
111#[allow(unused_imports)]
112pub use self::process::{
113 MemoryMode, process_collected, process_collected_sequential,
114 process_collected_with_memory_limit, process_collected_with_memory_limit_sequential,
115};
116
117#[cfg(test)]
118mod tests {
119 use std::fs;
120 use std::path::PathBuf;
121 use std::sync::Arc;
122
123 use tempfile::TempDir;
124
125 use crate::license_detection::LicenseDetectionEngine;
126 use crate::models::{DatasourceId, FileType, PackageType as FilePackageType};
127 use crate::progress::{ProgressMode, ScanProgress};
128
129 use super::{
130 LicenseScanOptions, MemoryMode, TextDetectionOptions, collect_paths, process_collected,
131 process_collected_with_memory_limit, scan_options_fingerprint,
132 };
133
134 fn build_sparse_oversized_rpm_with_filename(
135 temp_dir: &TempDir,
136 package_name: &str,
137 filename: &str,
138 ) -> PathBuf {
139 let file_path = temp_dir.path().join(filename);
140 rpm::PackageBuilder::new(package_name, "1.0", "MIT", "x86_64", "Demo RPM package")
141 .release("1")
142 .build()
143 .expect("build rpm fixture")
144 .write_file(&file_path)
145 .expect("write rpm fixture");
146 fs::OpenOptions::new()
147 .write(true)
148 .open(&file_path)
149 .expect("open rpm fixture for sparse extension")
150 .set_len(100 * 1024 * 1024 + 1_048_576)
151 .expect("extend rpm fixture");
152 file_path
153 }
154
155 fn build_sparse_oversized_rpm(temp_dir: &TempDir, name: &str) -> PathBuf {
156 build_sparse_oversized_rpm_with_filename(
157 temp_dir,
158 name,
159 &format!("{name}-1.0-1.x86_64.rpm"),
160 )
161 }
162
163 fn build_sparse_oversized_pack_rpm(temp_dir: &TempDir, name: &str) -> PathBuf {
164 build_sparse_oversized_rpm_with_filename(
165 temp_dir,
166 name,
167 &format!("{name}-1.0-1.x86_64.pack"),
168 )
169 }
170
171 #[test]
172 fn default_options_keep_copyright_detection_enabled() {
173 let options = TextDetectionOptions::default();
174 assert!(!options.detect_packages);
175 assert!(options.detect_copyrights);
176 }
177
178 #[test]
179 fn test_scan_options_fingerprint_changes_with_license_score() {
180 let text_options = TextDetectionOptions::default();
181 let default_fingerprint = scan_options_fingerprint(
182 &text_options,
183 LicenseScanOptions {
184 min_score: 0,
185 ..LicenseScanOptions::default()
186 },
187 None,
188 );
189 let filtered_fingerprint = scan_options_fingerprint(
190 &text_options,
191 LicenseScanOptions {
192 min_score: 70,
193 ..LicenseScanOptions::default()
194 },
195 None,
196 );
197
198 assert_ne!(default_fingerprint, filtered_fingerprint);
199 }
200
201 fn scan_single_file(
202 file_name: &str,
203 content: &str,
204 options: &TextDetectionOptions,
205 ) -> crate::models::FileInfo {
206 let temp_dir = TempDir::new().expect("create temp dir");
207 let file_path = temp_dir.path().join(file_name);
208 fs::write(&file_path, content).expect("write test file");
209
210 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
211 let collected = collect_paths(temp_dir.path(), 0, &[]);
212 let result = process_collected(
213 &collected,
214 progress,
215 None,
216 LicenseScanOptions::default(),
217 options,
218 );
219
220 result
221 .files
222 .into_iter()
223 .find(|entry| {
224 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
225 })
226 .expect("scanned file entry")
227 }
228
229 fn scan_file_at_relative_path(
230 relative_path: &str,
231 content: &[u8],
232 options: &TextDetectionOptions,
233 ) -> crate::models::FileInfo {
234 let temp_dir = TempDir::new().expect("create temp dir");
235 let file_path = temp_dir.path().join(relative_path);
236 if let Some(parent) = file_path.parent() {
237 fs::create_dir_all(parent).expect("create parent dirs");
238 }
239 fs::write(&file_path, content).expect("write test file");
240
241 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
242 let collected = collect_paths(temp_dir.path(), 0, &[]);
243 let result = process_collected(
244 &collected,
245 progress,
246 None,
247 LicenseScanOptions::default(),
248 options,
249 );
250
251 result
252 .files
253 .into_iter()
254 .find(|entry| {
255 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
256 })
257 .expect("scanned file entry")
258 }
259
260 fn scan_single_file_with_license_engine(
261 file_name: &str,
262 content: &str,
263 options: &TextDetectionOptions,
264 ) -> crate::models::FileInfo {
265 let temp_dir = TempDir::new().expect("create temp dir");
266 let file_path = temp_dir.path().join(file_name);
267 fs::write(&file_path, content).expect("write test file");
268
269 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
270 let collected = collect_paths(temp_dir.path(), 0, &[]);
271 let engine =
272 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
273 let result = process_collected(
274 &collected,
275 progress,
276 Some(engine),
277 LicenseScanOptions::default(),
278 options,
279 );
280
281 result
282 .files
283 .into_iter()
284 .find(|entry| {
285 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
286 })
287 .expect("scanned file entry")
288 }
289
290 #[test]
291 fn scanner_reports_repeated_email_occurrences() {
292 let options = TextDetectionOptions {
293 collect_info: false,
294 detect_packages: false,
295 detect_application_packages: false,
296 detect_system_packages: false,
297 detect_packages_in_compiled: false,
298 detect_copyrights: false,
299 detect_generated: false,
300 detect_emails: true,
301 detect_urls: false,
302 max_emails: 50,
303 max_urls: 50,
304 timeout_seconds: 120.0,
305 };
306 let scanned = scan_single_file(
307 "contacts.txt",
308 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
309 &options,
310 );
311
312 let emails: Vec<(&str, usize)> = scanned
313 .emails
314 .iter()
315 .map(|email| (email.email.as_str(), email.start_line.get()))
316 .collect();
317
318 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
319 assert_eq!(
320 emails,
321 vec![
322 ("linux@3ware.com", 1),
323 ("linux@3ware.com", 2),
324 ("andre@suse.com", 3),
325 ("linux@3ware.com", 4),
326 ]
327 );
328 }
329
330 #[test]
331 fn scanner_skips_pem_certificate_text_detection() {
332 let options = TextDetectionOptions {
333 collect_info: false,
334 detect_packages: false,
335 detect_application_packages: false,
336 detect_system_packages: false,
337 detect_packages_in_compiled: false,
338 detect_copyrights: true,
339 detect_generated: false,
340 detect_emails: true,
341 detect_urls: true,
342 max_emails: 50,
343 max_urls: 50,
344 timeout_seconds: 120.0,
345 };
346 let pem_fixture = concat!(
347 "-----BEGIN CERTIFICATE-----\n",
348 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
349 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
350 "-----END CERTIFICATE-----\n",
351 "Certificate:\n",
352 " Data:\n",
353 " Signature Algorithm: sha1WithRSAEncryption\n",
354 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
355 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
356 " Contact: cert-owner@example.com\n",
357 );
358 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
359
360 assert!(
361 scanned.copyrights.is_empty(),
362 "copyrights: {:#?}",
363 scanned.copyrights
364 );
365 assert!(
366 scanned.holders.is_empty(),
367 "holders: {:#?}",
368 scanned.holders
369 );
370 assert!(
371 scanned.authors.is_empty(),
372 "authors: {:#?}",
373 scanned.authors
374 );
375 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
376 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
377 assert!(
378 scanned.license_detections.is_empty(),
379 "licenses: {:#?}",
380 scanned.license_detections
381 );
382 assert!(
383 scanned.license_clues.is_empty(),
384 "license clues: {:#?}",
385 scanned.license_clues
386 );
387 }
388
389 #[test]
390 fn scanner_keeps_source_headers_when_pem_blocks_are_embedded() {
391 let options = TextDetectionOptions {
392 collect_info: false,
393 detect_packages: false,
394 detect_application_packages: false,
395 detect_system_packages: false,
396 detect_packages_in_compiled: false,
397 detect_copyrights: true,
398 detect_generated: false,
399 detect_emails: false,
400 detect_urls: true,
401 max_emails: 50,
402 max_urls: 50,
403 timeout_seconds: 120.0,
404 };
405 let fixture = concat!(
406 "/*\n",
407 "Copyright 2022 The Kubernetes Authors.\n\n",
408 "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
409 "you may not use this file except in compliance with the License.\n",
410 "You may obtain a copy of the License at\n\n",
411 " http://www.apache.org/licenses/LICENSE-2.0\n",
412 "*/\n\n",
413 "package storage\n\n",
414 "const validCert = `\n",
415 "-----BEGIN CERTIFICATE-----\n",
416 "MIIDmTCCAoGgAwIBAgIUWQ==\n",
417 "-----END CERTIFICATE-----\n",
418 "`\n",
419 );
420 let temp_dir = TempDir::new().expect("create temp dir");
421 let file_path = temp_dir.path().join("storage_test.go");
422 fs::write(&file_path, fixture).expect("write fixture");
423
424 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
425 let collected = collect_paths(temp_dir.path(), 0, &[]);
426 let engine =
427 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
428 let result = process_collected(
429 &collected,
430 progress,
431 Some(engine),
432 LicenseScanOptions::default(),
433 &options,
434 );
435 let scanned = result
436 .files
437 .into_iter()
438 .find(|entry| {
439 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
440 })
441 .expect("scanned file entry");
442
443 assert!(
444 scanned
445 .copyrights
446 .iter()
447 .any(|c| c.copyright == "Copyright 2022 The Kubernetes Authors"),
448 "copyrights: {:#?}",
449 scanned.copyrights
450 );
451 assert!(
452 scanned
453 .holders
454 .iter()
455 .any(|h| h.holder == "The Kubernetes Authors"),
456 "holders: {:#?}",
457 scanned.holders
458 );
459 assert!(
460 scanned
461 .urls
462 .iter()
463 .any(|u| u.url == "http://www.apache.org/licenses/LICENSE-2.0"),
464 "urls: {:#?}",
465 scanned.urls
466 );
467 assert_eq!(scanned.license_expression.as_deref(), Some("Apache-2.0"));
468 }
469
470 #[test]
471 fn scanner_detects_structured_credits_authors() {
472 let options = TextDetectionOptions {
473 collect_info: false,
474 detect_packages: false,
475 detect_application_packages: false,
476 detect_system_packages: false,
477 detect_packages_in_compiled: false,
478 detect_copyrights: true,
479 detect_generated: false,
480 detect_emails: false,
481 detect_urls: false,
482 max_emails: 50,
483 max_urls: 50,
484 timeout_seconds: 120.0,
485 };
486 let credits_fixture = concat!(
487 "N: Jack Lloyd\n",
488 "E: lloyd@randombit.net\n",
489 "W: http://www.randombit.net/\n",
490 );
491 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
492
493 let authors: Vec<(&str, usize, usize)> = scanned
494 .authors
495 .iter()
496 .map(|author| {
497 (
498 author.author.as_str(),
499 author.start_line.get(),
500 author.end_line.get(),
501 )
502 })
503 .collect();
504
505 assert_eq!(
506 authors,
507 vec![(
508 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
509 1,
510 3,
511 )]
512 );
513 assert!(scanned.copyrights.is_empty());
514 assert!(scanned.holders.is_empty());
515 }
516
517 #[test]
518 fn scanner_uses_or_for_alternative_license_header() {
519 let fixture =
520 include_str!("../../testdata/license-golden/datadriven/external/boost-json-d2s.ipp");
521 let temp_dir = TempDir::new().expect("create temp dir");
522 let file_path = temp_dir.path().join("d2s.ipp");
523 fs::write(&file_path, fixture).expect("write fixture");
524
525 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
526 let collected = collect_paths(temp_dir.path(), 0, &[]);
527 let engine =
528 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
529 let result = process_collected(
530 &collected,
531 progress,
532 Some(engine),
533 LicenseScanOptions::default(),
534 &TextDetectionOptions::default(),
535 );
536 let scanned = result
537 .files
538 .into_iter()
539 .find(|entry| {
540 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
541 })
542 .expect("scanned file entry");
543
544 assert_eq!(
545 scanned.license_expression.as_deref(),
546 Some("Apache-2.0 OR BSL-1.0")
547 );
548 assert!(
549 scanned.license_clues.is_empty(),
550 "license clues: {:#?}",
551 scanned.license_clues
552 );
553 assert_eq!(
554 scanned.license_detections.len(),
555 1,
556 "detections: {:#?}",
557 scanned.license_detections
558 );
559
560 let detection = &scanned.license_detections[0];
561 assert_eq!(detection.license_expression_spdx, "Apache-2.0 OR BSL-1.0");
562
563 let match_expressions: Vec<_> = detection
564 .matches
565 .iter()
566 .map(|m| m.license_expression_spdx.as_str())
567 .collect();
568 assert_eq!(match_expressions, vec!["Apache-2.0", "BSL-1.0"]);
569 }
570
571 #[test]
572 fn scanner_sets_generated_flag_when_enabled() {
573 let options = TextDetectionOptions {
574 collect_info: false,
575 detect_packages: false,
576 detect_application_packages: false,
577 detect_system_packages: false,
578 detect_packages_in_compiled: false,
579 detect_copyrights: false,
580 detect_generated: true,
581 detect_emails: false,
582 detect_urls: false,
583 max_emails: 50,
584 max_urls: 50,
585 timeout_seconds: 120.0,
586 };
587 let scanned = scan_single_file(
588 "generated.c",
589 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
590 &options,
591 );
592
593 assert_eq!(scanned.is_generated, Some(true));
594 }
595
596 #[test]
597 fn scanner_leaves_generated_flag_unset_when_disabled() {
598 let options = TextDetectionOptions {
599 collect_info: false,
600 detect_packages: false,
601 detect_application_packages: false,
602 detect_system_packages: false,
603 detect_packages_in_compiled: false,
604 detect_copyrights: false,
605 detect_generated: false,
606 detect_emails: false,
607 detect_urls: false,
608 max_emails: 50,
609 max_urls: 50,
610 timeout_seconds: 120.0,
611 };
612 let scanned = scan_single_file(
613 "generated.c",
614 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
615 &options,
616 );
617
618 assert_eq!(scanned.is_generated, None);
619 }
620
621 #[test]
622 fn scanner_populates_info_surface_when_enabled() {
623 let options = TextDetectionOptions {
624 collect_info: true,
625 detect_packages: false,
626 detect_application_packages: false,
627 detect_system_packages: false,
628 detect_packages_in_compiled: false,
629 detect_copyrights: false,
630 detect_generated: false,
631 detect_emails: false,
632 detect_urls: false,
633 max_emails: 50,
634 max_urls: 50,
635 timeout_seconds: 120.0,
636 };
637 let scanned = scan_single_file(
638 "script.py",
639 "#!/usr/bin/env python3\nprint(\"hello\")\n",
640 &options,
641 );
642
643 assert!(scanned.sha1.is_some());
644 assert!(scanned.md5.is_some());
645 assert!(scanned.sha256.is_some());
646 assert!(scanned.sha1_git.is_some());
647 assert!(scanned.mime_type.is_some());
648 assert!(scanned.date.is_some());
649 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
650 assert_eq!(scanned.is_text, Some(true));
651 assert_eq!(scanned.is_script, Some(true));
652 assert_eq!(scanned.is_source, Some(true));
653 }
654
655 #[test]
656 fn scanner_treats_latin1_python_sources_as_textual_scripts() {
657 let options = TextDetectionOptions {
658 collect_info: true,
659 detect_packages: false,
660 detect_application_packages: false,
661 detect_system_packages: false,
662 detect_packages_in_compiled: false,
663 detect_copyrights: false,
664 detect_generated: false,
665 detect_emails: false,
666 detect_urls: false,
667 max_emails: 50,
668 max_urls: 50,
669 timeout_seconds: 120.0,
670 };
671 let latin1_python = b"# coding: latin-1\nprint(\"caf\xe9\")\n# comment padding\n";
672 let scanned = scan_file_at_relative_path("script.py", latin1_python, &options);
673
674 assert_eq!(scanned.programming_language.as_deref(), Some("Python"));
675 assert_eq!(
676 scanned.file_type_label.as_deref(),
677 Some("python script, text executable")
678 );
679 assert_eq!(scanned.is_binary, Some(false));
680 assert_eq!(scanned.is_text, Some(true));
681 assert_eq!(scanned.is_script, Some(true));
682 assert_eq!(scanned.is_source, Some(true));
683 }
684
685 #[test]
686 fn scanner_skips_findings_for_zip_like_archives() {
687 let options = TextDetectionOptions {
688 collect_info: true,
689 detect_packages: false,
690 detect_application_packages: false,
691 detect_system_packages: false,
692 detect_packages_in_compiled: false,
693 detect_copyrights: true,
694 detect_generated: false,
695 detect_emails: true,
696 detect_urls: true,
697 max_emails: 50,
698 max_urls: 50,
699 timeout_seconds: 120.0,
700 };
701 let archive_like = b"PK\x03\x04\x14\x00\x00\x00\x08\x00MIT License\ncontact@example.com\nhttps://example.com\n";
702 let scanned = scan_file_at_relative_path("demo.whl", archive_like, &options);
703
704 assert_eq!(scanned.mime_type.as_deref(), Some("application/zip"));
705 assert_eq!(scanned.is_archive, Some(true));
706 assert!(scanned.license_detections.is_empty());
707 assert!(scanned.copyrights.is_empty());
708 assert!(scanned.emails.is_empty());
709 assert!(scanned.urls.is_empty());
710 }
711
712 #[test]
713 fn scanner_treats_typescript_sources_as_text_not_video_media() {
714 let options = TextDetectionOptions {
715 collect_info: true,
716 detect_packages: false,
717 detect_application_packages: false,
718 detect_system_packages: false,
719 detect_packages_in_compiled: false,
720 detect_copyrights: false,
721 detect_generated: false,
722 detect_emails: false,
723 detect_urls: false,
724 max_emails: 50,
725 max_urls: 50,
726 timeout_seconds: 120.0,
727 };
728 let scanned = scan_single_file("main.ts", "export const answer: number = 42;\n", &options);
729
730 assert_eq!(scanned.programming_language.as_deref(), Some("TypeScript"));
731 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
732 assert_eq!(
733 scanned.file_type_label.as_deref(),
734 Some("UTF-8 Unicode text")
735 );
736 assert_eq!(scanned.is_text, Some(true));
737 assert_eq!(scanned.is_media, Some(false));
738 assert_eq!(scanned.is_script, Some(false));
739 assert_eq!(scanned.is_source, Some(true));
740 }
741
742 #[test]
743 fn scanner_normalizes_sparse_ts_files_away_from_video_mime() {
744 let options = TextDetectionOptions {
745 collect_info: true,
746 detect_packages: false,
747 detect_application_packages: false,
748 detect_system_packages: false,
749 detect_packages_in_compiled: false,
750 detect_copyrights: false,
751 detect_generated: false,
752 detect_emails: false,
753 detect_urls: false,
754 max_emails: 50,
755 max_urls: 50,
756 timeout_seconds: 120.0,
757 };
758 let scanned = scan_single_file("main.ts", "// comment-only TypeScript fixture\n", &options);
759
760 assert_eq!(scanned.mime_type.as_deref(), Some("text/plain"));
761 assert_eq!(
762 scanned.file_type_label.as_deref(),
763 Some("UTF-8 Unicode text")
764 );
765 assert_eq!(scanned.is_text, Some(true));
766 assert_eq!(scanned.is_media, Some(false));
767 assert_eq!(scanned.is_script, Some(false));
768 assert_eq!(scanned.is_source, Some(true));
769 }
770
771 #[test]
772 fn scanner_treats_empty_files_like_scancode_info_surface() {
773 let options = TextDetectionOptions {
774 collect_info: true,
775 detect_packages: false,
776 detect_application_packages: false,
777 detect_system_packages: false,
778 detect_packages_in_compiled: false,
779 detect_copyrights: false,
780 detect_generated: false,
781 detect_emails: false,
782 detect_urls: false,
783 max_emails: 50,
784 max_urls: 50,
785 timeout_seconds: 120.0,
786 };
787 let scanned = scan_single_file("test.txt", "", &options);
788
789 assert_eq!(scanned.mime_type.as_deref(), Some("inode/x-empty"));
790 assert_eq!(scanned.file_type_label.as_deref(), Some("empty"));
791 assert_eq!(scanned.programming_language, None);
792 assert_eq!(scanned.is_binary, Some(false));
793 assert_eq!(scanned.is_text, Some(true));
794 assert_eq!(scanned.is_archive, Some(false));
795 assert_eq!(scanned.is_media, Some(false));
796 assert_eq!(scanned.is_source, Some(false));
797 assert_eq!(scanned.is_script, Some(false));
798 }
799
800 #[test]
801 fn scanner_treats_package_json_as_text_not_source() {
802 let options = TextDetectionOptions {
803 collect_info: true,
804 detect_packages: false,
805 detect_application_packages: false,
806 detect_system_packages: false,
807 detect_packages_in_compiled: false,
808 detect_copyrights: false,
809 detect_generated: false,
810 detect_emails: false,
811 detect_urls: false,
812 max_emails: 50,
813 max_urls: 50,
814 timeout_seconds: 120.0,
815 };
816 let scanned = scan_single_file("package.json", r#"{"name":"demo"}"#, &options);
817
818 assert_eq!(scanned.mime_type.as_deref(), Some("application/json"));
819 assert_eq!(scanned.file_type_label.as_deref(), Some("JSON text data"));
820 assert_eq!(scanned.programming_language, None);
821 assert_eq!(scanned.is_text, Some(true));
822 assert_eq!(scanned.is_source, Some(false));
823 assert_eq!(scanned.is_script, Some(false));
824 }
825
826 #[test]
827 fn scanner_classifies_gradle_and_nix_manifests_as_source() {
828 let options = TextDetectionOptions {
829 collect_info: true,
830 detect_packages: false,
831 detect_application_packages: false,
832 detect_system_packages: false,
833 detect_packages_in_compiled: false,
834 detect_copyrights: false,
835 detect_generated: false,
836 detect_emails: false,
837 detect_urls: false,
838 max_emails: 50,
839 max_urls: 50,
840 timeout_seconds: 120.0,
841 };
842
843 let gradle = scan_single_file("build.gradle", "plugins { id 'java' }\n", &options);
844 let nix = scan_single_file("flake.nix", "{ inputs, ... }: {}\n", &options);
845
846 assert_eq!(gradle.programming_language.as_deref(), Some("Groovy"));
847 assert_eq!(gradle.mime_type.as_deref(), Some("text/plain"));
848 assert_eq!(gradle.is_source, Some(true));
849 assert_eq!(gradle.is_script, Some(false));
850
851 assert_eq!(nix.programming_language.as_deref(), Some("Nix"));
852 assert_eq!(nix.mime_type.as_deref(), Some("text/plain"));
853 assert_eq!(nix.is_source, Some(true));
854 assert_eq!(nix.is_script, Some(false));
855 }
856
857 #[test]
858 fn scanner_treats_gitmodules_as_text_not_source() {
859 let options = TextDetectionOptions {
860 collect_info: true,
861 detect_packages: false,
862 detect_application_packages: false,
863 detect_system_packages: false,
864 detect_packages_in_compiled: false,
865 detect_copyrights: false,
866 detect_generated: false,
867 detect_emails: false,
868 detect_urls: false,
869 max_emails: 50,
870 max_urls: 50,
871 timeout_seconds: 120.0,
872 };
873 let scanned = scan_file_at_relative_path(
874 ".gitmodules",
875 b"[submodule \"demo\"]\n\tpath = vendor/demo\n",
876 &options,
877 );
878
879 assert_eq!(scanned.programming_language, None);
880 assert_eq!(
881 scanned.file_type_label.as_deref(),
882 Some("Git configuration text")
883 );
884 assert_eq!(scanned.is_text, Some(true));
885 assert_eq!(scanned.is_source, Some(false));
886 assert_eq!(scanned.is_script, Some(false));
887 }
888
889 #[test]
890 fn scanner_treats_javascript_shebang_files_as_scripts() {
891 let options = TextDetectionOptions {
892 collect_info: true,
893 detect_packages: false,
894 detect_application_packages: false,
895 detect_system_packages: false,
896 detect_packages_in_compiled: false,
897 detect_copyrights: false,
898 detect_generated: false,
899 detect_emails: false,
900 detect_urls: false,
901 max_emails: 50,
902 max_urls: 50,
903 timeout_seconds: 120.0,
904 };
905 let scanned = scan_file_at_relative_path(
906 "bin/run",
907 b"#!/usr/bin/env node\nconsole.log('hello');\n",
908 &options,
909 );
910
911 assert_eq!(scanned.programming_language.as_deref(), Some("JavaScript"));
912 assert_eq!(
913 scanned.file_type_label.as_deref(),
914 Some("javascript script, UTF-8 Unicode text executable")
915 );
916 assert_eq!(scanned.is_script, Some(true));
917 assert_eq!(scanned.is_source, Some(true));
918 }
919
920 #[test]
921 fn scanner_treats_dockerfile_as_source() {
922 let options = TextDetectionOptions {
923 collect_info: true,
924 detect_packages: false,
925 detect_application_packages: false,
926 detect_system_packages: false,
927 detect_packages_in_compiled: false,
928 detect_copyrights: false,
929 detect_generated: false,
930 detect_emails: false,
931 detect_urls: false,
932 max_emails: 50,
933 max_urls: 50,
934 timeout_seconds: 120.0,
935 };
936 let scanned = scan_single_file("Dockerfile", "FROM scratch\n", &options);
937
938 assert_eq!(scanned.programming_language.as_deref(), Some("Dockerfile"));
939 assert_eq!(
940 scanned.file_type_label.as_deref(),
941 Some("UTF-8 Unicode text")
942 );
943 assert_eq!(scanned.is_source, Some(true));
944 assert_eq!(scanned.is_script, Some(false));
945 }
946
947 #[test]
948 fn scanner_treats_makefile_as_text_not_source() {
949 let options = TextDetectionOptions {
950 collect_info: true,
951 detect_packages: false,
952 detect_application_packages: false,
953 detect_system_packages: false,
954 detect_packages_in_compiled: false,
955 detect_copyrights: false,
956 detect_generated: false,
957 detect_emails: false,
958 detect_urls: false,
959 max_emails: 50,
960 max_urls: 50,
961 timeout_seconds: 120.0,
962 };
963 let scanned = scan_single_file("Makefile", "all:\n\techo hi\n", &options);
964
965 assert_eq!(scanned.programming_language, None);
966 assert_eq!(
967 scanned.file_type_label.as_deref(),
968 Some("UTF-8 Unicode text")
969 );
970 assert_eq!(scanned.is_text, Some(true));
971 assert_eq!(scanned.is_source, Some(false));
972 assert_eq!(scanned.is_script, Some(false));
973 }
974
975 #[test]
976 fn scanner_omits_info_surface_when_disabled() {
977 let options = TextDetectionOptions {
978 collect_info: false,
979 detect_packages: false,
980 detect_application_packages: false,
981 detect_system_packages: false,
982 detect_packages_in_compiled: false,
983 detect_copyrights: false,
984 detect_generated: false,
985 detect_emails: false,
986 detect_urls: false,
987 max_emails: 50,
988 max_urls: 50,
989 timeout_seconds: 120.0,
990 };
991 let scanned = scan_single_file(
992 "script.py",
993 "#!/usr/bin/env python3\nprint(\"hello\")\n",
994 &options,
995 );
996
997 assert!(scanned.sha1.is_none());
998 assert!(scanned.md5.is_none());
999 assert!(scanned.sha256.is_none());
1000 assert!(scanned.sha1_git.is_none());
1001 assert!(scanned.mime_type.is_none());
1002 assert!(scanned.date.is_none());
1003 assert!(scanned.programming_language.is_none());
1004 assert!(scanned.is_binary.is_none());
1005 assert!(scanned.is_text.is_none());
1006 assert!(scanned.is_archive.is_none());
1007 assert!(scanned.is_media.is_none());
1008 assert!(scanned.is_script.is_none());
1009 assert!(scanned.is_source.is_none());
1010 }
1011
1012 #[test]
1013 fn scanner_skips_package_parsing_when_disabled() {
1014 let options = TextDetectionOptions {
1015 collect_info: false,
1016 detect_packages: false,
1017 detect_application_packages: false,
1018 detect_system_packages: false,
1019 detect_packages_in_compiled: false,
1020 detect_copyrights: false,
1021 detect_generated: false,
1022 detect_emails: false,
1023 detect_urls: false,
1024 max_emails: 50,
1025 max_urls: 50,
1026 timeout_seconds: 120.0,
1027 };
1028 let scanned = scan_single_file(
1029 "package.json",
1030 r#"{"name":"demo","version":"1.0.0"}"#,
1031 &options,
1032 );
1033
1034 assert!(
1035 scanned.package_data.is_empty(),
1036 "package_data: {:#?}",
1037 scanned.package_data
1038 );
1039 }
1040
1041 #[test]
1042 fn scanner_parses_package_manifests_when_enabled() {
1043 let options = TextDetectionOptions {
1044 collect_info: false,
1045 detect_packages: true,
1046 detect_application_packages: true,
1047 detect_system_packages: false,
1048 detect_packages_in_compiled: false,
1049 detect_copyrights: false,
1050 detect_generated: false,
1051 detect_emails: false,
1052 detect_urls: false,
1053 max_emails: 50,
1054 max_urls: 50,
1055 timeout_seconds: 120.0,
1056 };
1057 let scanned = scan_single_file(
1058 "package.json",
1059 r#"{"name":"demo","version":"1.0.0"}"#,
1060 &options,
1061 );
1062
1063 assert_eq!(
1064 scanned.package_data.len(),
1065 1,
1066 "package_data: {:#?}",
1067 scanned.package_data
1068 );
1069 }
1070
1071 #[test]
1072 fn scanner_parses_oversized_rpm_in_package_only_mode_without_size_warning() {
1073 let temp_dir = TempDir::new().expect("create temp dir");
1074 let file_path = build_sparse_oversized_rpm(&temp_dir, "oversized-demo");
1075
1076 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1077 let collected = collect_paths(temp_dir.path(), 0, &[]);
1078 let result = process_collected(
1079 &collected,
1080 progress,
1081 None,
1082 LicenseScanOptions::default(),
1083 &TextDetectionOptions {
1084 collect_info: false,
1085 detect_packages: true,
1086 detect_application_packages: true,
1087 detect_system_packages: false,
1088 detect_packages_in_compiled: false,
1089 detect_copyrights: false,
1090 detect_generated: false,
1091 detect_emails: false,
1092 detect_urls: false,
1093 max_emails: 50,
1094 max_urls: 50,
1095 timeout_seconds: 120.0,
1096 },
1097 );
1098
1099 let scanned = result
1100 .files
1101 .into_iter()
1102 .find(|entry| {
1103 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1104 })
1105 .expect("scanned file entry");
1106
1107 assert!(
1108 scanned.scan_errors.is_empty(),
1109 "scan_errors: {:#?}",
1110 scanned.scan_errors
1111 );
1112 assert_eq!(
1113 scanned.package_data.len(),
1114 1,
1115 "package_data: {:#?}",
1116 scanned.package_data
1117 );
1118 assert_eq!(
1119 scanned.package_data[0].datasource_id,
1120 Some(DatasourceId::RpmArchive)
1121 );
1122 assert_eq!(
1123 scanned.package_data[0].name.as_deref(),
1124 Some("oversized-demo")
1125 );
1126 assert_eq!(scanned.package_data[0].version.as_deref(), Some("1.0-1"));
1127 }
1128
1129 #[test]
1130 fn scanner_parses_oversized_rpm_with_info_without_timeout_or_size_warning() {
1131 let temp_dir = TempDir::new().expect("create temp dir");
1132 let file_path = build_sparse_oversized_rpm(&temp_dir, "oversized-info-demo");
1133
1134 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1135 let collected = collect_paths(temp_dir.path(), 0, &[]);
1136 let result = process_collected(
1137 &collected,
1138 progress,
1139 None,
1140 LicenseScanOptions::default(),
1141 &TextDetectionOptions {
1142 collect_info: true,
1143 detect_packages: true,
1144 detect_application_packages: true,
1145 detect_system_packages: false,
1146 detect_packages_in_compiled: false,
1147 detect_copyrights: false,
1148 detect_generated: false,
1149 detect_emails: false,
1150 detect_urls: false,
1151 max_emails: 50,
1152 max_urls: 50,
1153 timeout_seconds: 120.0,
1154 },
1155 );
1156
1157 let scanned = result
1158 .files
1159 .into_iter()
1160 .find(|entry| {
1161 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1162 })
1163 .expect("scanned file entry");
1164
1165 assert!(
1166 scanned.scan_errors.is_empty(),
1167 "scan_errors: {:#?}",
1168 scanned.scan_errors
1169 );
1170 assert_eq!(
1171 scanned.package_data.len(),
1172 1,
1173 "package_data: {:#?}",
1174 scanned.package_data
1175 );
1176 assert_eq!(
1177 scanned.package_data[0].datasource_id,
1178 Some(DatasourceId::RpmArchive)
1179 );
1180 assert_eq!(
1181 scanned.package_data[0].name.as_deref(),
1182 Some("oversized-info-demo")
1183 );
1184 assert!(scanned.sha1.is_some());
1185 assert!(scanned.md5.is_some());
1186 assert!(scanned.sha256.is_some());
1187 assert!(scanned.sha1_git.is_some());
1188 assert_eq!(scanned.mime_type.as_deref(), Some("application/x-rpm"));
1189 assert_eq!(scanned.file_type_label.as_deref(), Some("RPM package"));
1190 assert_eq!(scanned.is_binary, Some(true));
1191 assert_eq!(scanned.is_text, Some(false));
1192 assert_eq!(scanned.is_archive, Some(true));
1193 }
1194
1195 #[test]
1196 fn scanner_parses_oversized_pack_rpm_in_package_only_mode_without_size_warning() {
1197 let temp_dir = TempDir::new().expect("create temp dir");
1198 let file_path = build_sparse_oversized_pack_rpm(&temp_dir, "oversized-pack-demo");
1199
1200 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1201 let collected = collect_paths(temp_dir.path(), 0, &[]);
1202 let result = process_collected(
1203 &collected,
1204 progress,
1205 None,
1206 LicenseScanOptions::default(),
1207 &TextDetectionOptions {
1208 collect_info: false,
1209 detect_packages: true,
1210 detect_application_packages: true,
1211 detect_system_packages: false,
1212 detect_packages_in_compiled: false,
1213 detect_copyrights: false,
1214 detect_generated: false,
1215 detect_emails: false,
1216 detect_urls: false,
1217 max_emails: 50,
1218 max_urls: 50,
1219 timeout_seconds: 120.0,
1220 },
1221 );
1222
1223 let scanned = result
1224 .files
1225 .into_iter()
1226 .find(|entry| {
1227 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1228 })
1229 .expect("scanned file entry");
1230
1231 assert!(
1232 scanned.scan_errors.is_empty(),
1233 "scan_errors: {:#?}",
1234 scanned.scan_errors
1235 );
1236 assert_eq!(
1237 scanned.package_data.len(),
1238 1,
1239 "package_data: {:#?}",
1240 scanned.package_data
1241 );
1242 assert_eq!(
1243 scanned.package_data[0].datasource_id,
1244 Some(DatasourceId::RpmArchive)
1245 );
1246 assert_eq!(
1247 scanned.package_data[0].name.as_deref(),
1248 Some("oversized-pack-demo")
1249 );
1250 }
1251
1252 #[test]
1253 fn scanner_parses_oversized_pack_rpm_with_info_without_timeout_or_size_warning() {
1254 let temp_dir = TempDir::new().expect("create temp dir");
1255 let file_path = build_sparse_oversized_pack_rpm(&temp_dir, "oversized-pack-info-demo");
1256
1257 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1258 let collected = collect_paths(temp_dir.path(), 0, &[]);
1259 let result = process_collected(
1260 &collected,
1261 progress,
1262 None,
1263 LicenseScanOptions::default(),
1264 &TextDetectionOptions {
1265 collect_info: true,
1266 detect_packages: true,
1267 detect_application_packages: true,
1268 detect_system_packages: false,
1269 detect_packages_in_compiled: false,
1270 detect_copyrights: false,
1271 detect_generated: false,
1272 detect_emails: false,
1273 detect_urls: false,
1274 max_emails: 50,
1275 max_urls: 50,
1276 timeout_seconds: 120.0,
1277 },
1278 );
1279
1280 let scanned = result
1281 .files
1282 .into_iter()
1283 .find(|entry| {
1284 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1285 })
1286 .expect("scanned file entry");
1287
1288 assert!(
1289 scanned.scan_errors.is_empty(),
1290 "scan_errors: {:#?}",
1291 scanned.scan_errors
1292 );
1293 assert_eq!(
1294 scanned.package_data.len(),
1295 1,
1296 "package_data: {:#?}",
1297 scanned.package_data
1298 );
1299 assert_eq!(
1300 scanned.package_data[0].datasource_id,
1301 Some(DatasourceId::RpmArchive)
1302 );
1303 assert_eq!(
1304 scanned.package_data[0].name.as_deref(),
1305 Some("oversized-pack-info-demo")
1306 );
1307 assert!(scanned.sha1.is_some());
1308 assert!(scanned.md5.is_some());
1309 assert!(scanned.sha256.is_some());
1310 assert!(scanned.sha1_git.is_some());
1311 assert_eq!(scanned.mime_type.as_deref(), Some("application/x-rpm"));
1312 assert_eq!(scanned.file_type_label.as_deref(), Some("RPM package"));
1313 assert_eq!(scanned.is_binary, Some(true));
1314 assert_eq!(scanned.is_text, Some(false));
1315 assert_eq!(scanned.is_archive, Some(true));
1316 }
1317
1318 #[test]
1319 fn scanner_skips_application_packages_when_only_system_packages_enabled() {
1320 let options = TextDetectionOptions {
1321 collect_info: false,
1322 detect_packages: true,
1323 detect_application_packages: false,
1324 detect_system_packages: true,
1325 detect_packages_in_compiled: false,
1326 detect_copyrights: false,
1327 detect_generated: false,
1328 detect_emails: false,
1329 detect_urls: false,
1330 max_emails: 50,
1331 max_urls: 50,
1332 timeout_seconds: 120.0,
1333 };
1334 let scanned = scan_single_file(
1335 "package.json",
1336 r#"{"name":"demo","version":"1.0.0"}"#,
1337 &options,
1338 );
1339
1340 assert!(
1341 scanned.package_data.is_empty(),
1342 "package_data: {:#?}",
1343 scanned.package_data
1344 );
1345 }
1346
1347 #[test]
1348 fn scanner_parses_system_package_files_when_enabled() {
1349 let options = TextDetectionOptions {
1350 collect_info: false,
1351 detect_packages: true,
1352 detect_application_packages: false,
1353 detect_system_packages: true,
1354 detect_packages_in_compiled: false,
1355 detect_copyrights: false,
1356 detect_generated: false,
1357 detect_emails: false,
1358 detect_urls: false,
1359 max_emails: 50,
1360 max_urls: 50,
1361 timeout_seconds: 120.0,
1362 };
1363 let scanned = scan_file_at_relative_path(
1364 "var/lib/dpkg/status",
1365 b"Package: demo\nVersion: 1.0\nArchitecture: all\nDescription: demo package\n\n",
1366 &options,
1367 );
1368
1369 assert!(
1370 !scanned.package_data.is_empty(),
1371 "package_data: {:#?}",
1372 scanned.package_data
1373 );
1374 }
1375
1376 #[test]
1377 fn scanner_only_parses_compiled_packages_when_package_in_compiled_is_enabled() {
1378 if std::process::Command::new("go")
1379 .arg("version")
1380 .status()
1381 .is_err()
1382 {
1383 return;
1384 }
1385
1386 let temp_dir = TempDir::new().expect("create temp dir");
1387 fs::write(
1388 temp_dir.path().join("go.mod"),
1389 "module example.com/demo\n\ngo 1.23.0\n",
1390 )
1391 .expect("write go.mod");
1392 fs::write(
1393 temp_dir.path().join("main.go"),
1394 "package main\nfunc main() {}\n",
1395 )
1396 .expect("write main.go");
1397 let file_path = temp_dir.path().join("demo");
1398 let status = std::process::Command::new("go")
1399 .current_dir(temp_dir.path())
1400 .args(["build", "-o"])
1401 .arg(&file_path)
1402 .status()
1403 .expect("run go build");
1404 assert!(status.success());
1405
1406 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1407 let collected = collect_paths(temp_dir.path(), 0, &[]);
1408
1409 let without_compiled = process_collected(
1410 &collected,
1411 Arc::clone(&progress),
1412 None,
1413 LicenseScanOptions::default(),
1414 &TextDetectionOptions {
1415 collect_info: false,
1416 detect_packages: true,
1417 detect_application_packages: true,
1418 detect_system_packages: false,
1419 detect_packages_in_compiled: false,
1420 detect_copyrights: false,
1421 detect_generated: false,
1422 detect_emails: false,
1423 detect_urls: false,
1424 max_emails: 50,
1425 max_urls: 50,
1426 timeout_seconds: 120.0,
1427 },
1428 );
1429 let with_compiled = process_collected(
1430 &collected,
1431 progress,
1432 None,
1433 LicenseScanOptions::default(),
1434 &TextDetectionOptions {
1435 collect_info: false,
1436 detect_packages: true,
1437 detect_application_packages: true,
1438 detect_system_packages: false,
1439 detect_packages_in_compiled: true,
1440 detect_copyrights: false,
1441 detect_generated: false,
1442 detect_emails: false,
1443 detect_urls: false,
1444 max_emails: 50,
1445 max_urls: 50,
1446 timeout_seconds: 120.0,
1447 },
1448 );
1449
1450 let without_compiled = without_compiled
1451 .files
1452 .into_iter()
1453 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1454 .expect("compiled artifact present");
1455 let with_compiled = with_compiled
1456 .files
1457 .into_iter()
1458 .find(|entry| entry.file_type == FileType::File && entry.path.ends_with("/demo"))
1459 .expect("compiled artifact present");
1460
1461 assert!(
1462 without_compiled.package_data.is_empty(),
1463 "package_data: {:#?}",
1464 without_compiled.package_data
1465 );
1466 assert!(!with_compiled.package_data.is_empty());
1467 }
1468
1469 #[test]
1470 fn scanner_parses_windows_executable_packages_under_normal_package_scan() {
1471 let temp_dir = TempDir::new().expect("create temp dir");
1472 let file_path = temp_dir.path().join("libiconv2.dll");
1473 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1474 .expect("read PE fixture");
1475 fs::write(&file_path, fixture).expect("write PE fixture");
1476
1477 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1478 let collected = collect_paths(temp_dir.path(), 0, &[]);
1479
1480 let without_package = process_collected(
1481 &collected,
1482 Arc::clone(&progress),
1483 None,
1484 LicenseScanOptions::default(),
1485 &TextDetectionOptions {
1486 collect_info: false,
1487 detect_packages: false,
1488 detect_application_packages: false,
1489 detect_system_packages: false,
1490 detect_packages_in_compiled: false,
1491 detect_copyrights: false,
1492 detect_generated: false,
1493 detect_emails: false,
1494 detect_urls: false,
1495 max_emails: 50,
1496 max_urls: 50,
1497 timeout_seconds: 120.0,
1498 },
1499 );
1500 let with_package = process_collected(
1501 &collected,
1502 progress,
1503 None,
1504 LicenseScanOptions::default(),
1505 &TextDetectionOptions {
1506 collect_info: false,
1507 detect_packages: true,
1508 detect_application_packages: true,
1509 detect_system_packages: false,
1510 detect_packages_in_compiled: false,
1511 detect_copyrights: false,
1512 detect_generated: false,
1513 detect_emails: false,
1514 detect_urls: false,
1515 max_emails: 50,
1516 max_urls: 50,
1517 timeout_seconds: 120.0,
1518 },
1519 );
1520
1521 let without_package = without_package
1522 .files
1523 .into_iter()
1524 .find(|entry| {
1525 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1526 })
1527 .expect("compiled artifact present");
1528 let with_package = with_package
1529 .files
1530 .into_iter()
1531 .find(|entry| {
1532 entry.file_type == FileType::File && entry.path.ends_with("/libiconv2.dll")
1533 })
1534 .expect("compiled artifact present");
1535
1536 assert!(without_package.package_data.is_empty());
1537 assert_eq!(with_package.package_data.len(), 1);
1538 assert_eq!(
1539 with_package.package_data[0].package_type,
1540 Some(FilePackageType::Winexe)
1541 );
1542 assert_eq!(
1543 with_package.package_data[0].datasource_id,
1544 Some(DatasourceId::WindowsExecutable)
1545 );
1546 }
1547
1548 #[test]
1549 fn scanner_detects_license_from_font_metadata() {
1550 let temp_dir = TempDir::new().expect("create temp dir");
1551 let file_path = temp_dir.path().join("Lato-Bold.ttf");
1552 let fixture = fs::read("testdata/font-fixtures/Lato-Bold.ttf").expect("read font fixture");
1553 fs::write(&file_path, fixture).expect("write font fixture");
1554
1555 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1556 let collected = collect_paths(temp_dir.path(), 0, &[]);
1557 let engine =
1558 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1559 let result = process_collected(
1560 &collected,
1561 progress,
1562 Some(engine),
1563 LicenseScanOptions::default(),
1564 &TextDetectionOptions::default(),
1565 );
1566 let scanned = result
1567 .files
1568 .into_iter()
1569 .find(|entry| {
1570 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1571 })
1572 .expect("scanned file entry");
1573
1574 assert!(
1575 scanned.license_expression.is_some(),
1576 "license detections: {:#?}",
1577 scanned.license_detections
1578 );
1579 assert!(
1580 scanned
1581 .license_expression
1582 .as_deref()
1583 .is_some_and(
1584 |expression| expression.contains("OFL-1.1") || expression.contains("ofl-1.1")
1585 ),
1586 "license expression: {:?}",
1587 scanned.license_expression
1588 );
1589 }
1590
1591 #[test]
1592 fn scanner_detects_license_from_windows_executable_metadata() {
1593 let temp_dir = TempDir::new().expect("create temp dir");
1594 let file_path = temp_dir.path().join("libiconv2.dll");
1595 let fixture = fs::read("testdata/compiled-binary-golden/win_pe/libiconv2.dll")
1596 .expect("read PE fixture");
1597 fs::write(&file_path, fixture).expect("write PE fixture");
1598
1599 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
1600 let collected = collect_paths(temp_dir.path(), 0, &[]);
1601 let engine =
1602 Arc::new(LicenseDetectionEngine::from_embedded().expect("initialize license engine"));
1603 let result = process_collected(
1604 &collected,
1605 progress,
1606 Some(engine),
1607 LicenseScanOptions::default(),
1608 &TextDetectionOptions::default(),
1609 );
1610 let scanned = result
1611 .files
1612 .into_iter()
1613 .find(|entry| {
1614 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
1615 })
1616 .expect("scanned file entry");
1617
1618 assert!(
1619 scanned.license_expression.is_some(),
1620 "license detections: {:#?}",
1621 scanned.license_detections
1622 );
1623 assert!(
1624 scanned
1625 .license_expression
1626 .as_deref()
1627 .is_some_and(|expression| {
1628 expression.contains("lgpl") || expression.contains("LGPL")
1629 }),
1630 "license expression: {:?}",
1631 scanned.license_expression
1632 );
1633 }
1634
1635 #[test]
1636 fn scanner_detects_cc_by_license_from_markdown_comment_banner() {
1637 let scanned = scan_single_file_with_license_engine(
1638 "navbar.md",
1639 "<!-- Documentation licensed under CC BY 4.0 -->\n<!-- License available at https://creativecommons.org/licenses/by/4.0/ -->\n",
1640 &TextDetectionOptions::default(),
1641 );
1642
1643 assert!(
1644 scanned
1645 .license_expression
1646 .as_deref()
1647 .is_some_and(|expression| {
1648 expression.contains("cc-by-4.0") || expression.contains("CC-BY-4.0")
1649 }),
1650 "license expression: {:?}",
1651 scanned.license_expression
1652 );
1653 }
1654
1655 #[test]
1656 fn scanner_detects_mit_license_from_shields_badge_markdown() {
1657 let scanned = scan_single_file_with_license_engine(
1658 "README.md",
1659 "[](https://opensource.org/licenses/MIT)\n",
1660 &TextDetectionOptions::default(),
1661 );
1662
1663 assert!(
1664 scanned
1665 .license_expression
1666 .as_deref()
1667 .is_some_and(|expression| {
1668 expression.contains("mit") || expression.contains("MIT")
1669 }),
1670 "license expression: {:?}",
1671 scanned.license_expression
1672 );
1673 }
1674
1675 #[test]
1676 fn scanner_detects_apache_license_from_markdown_readme_phrase() {
1677 let scanned = scan_single_file_with_license_engine(
1678 "README.md",
1679 "This crate is distributed under the terms of the Apache License (Version 2.0).\n",
1680 &TextDetectionOptions::default(),
1681 );
1682
1683 assert!(
1684 scanned
1685 .license_expression
1686 .as_deref()
1687 .is_some_and(|expression| {
1688 expression.contains("apache-2.0") || expression.contains("Apache-2.0")
1689 }),
1690 "license expression: {:?}",
1691 scanned.license_expression
1692 );
1693 }
1694
1695 #[test]
1696 fn scanner_sets_is_source_only_when_info_enabled() {
1697 let without_info = TextDetectionOptions {
1698 collect_info: false,
1699 detect_packages: false,
1700 detect_application_packages: false,
1701 detect_system_packages: false,
1702 detect_packages_in_compiled: false,
1703 detect_copyrights: false,
1704 detect_generated: false,
1705 detect_emails: false,
1706 detect_urls: false,
1707 max_emails: 50,
1708 max_urls: 50,
1709 timeout_seconds: 120.0,
1710 };
1711 let with_info = TextDetectionOptions {
1712 collect_info: true,
1713 ..without_info.clone()
1714 };
1715
1716 let scanned_without_info = scan_single_file("main.rs", "fn main() {}\n", &without_info);
1717 let scanned_with_info = scan_single_file("main.rs", "fn main() {}\n", &with_info);
1718
1719 assert_eq!(scanned_without_info.is_source, None);
1720 assert_eq!(scanned_with_info.is_source, Some(true));
1721 }
1722
1723 #[test]
1724 fn directory_omits_info_fields_when_info_disabled() {
1725 let temp_dir = TempDir::new().expect("create temp dir");
1726 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1727
1728 let collected = collect_paths(temp_dir.path(), 0, &[]);
1729 let result = process_collected(
1730 &collected,
1731 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1732 None,
1733 LicenseScanOptions::default(),
1734 &TextDetectionOptions {
1735 collect_info: false,
1736 detect_packages: false,
1737 detect_application_packages: false,
1738 detect_system_packages: false,
1739 detect_packages_in_compiled: false,
1740 detect_copyrights: false,
1741 detect_generated: false,
1742 detect_emails: false,
1743 detect_urls: false,
1744 max_emails: 50,
1745 max_urls: 50,
1746 timeout_seconds: 120.0,
1747 },
1748 );
1749
1750 let directory = result
1751 .files
1752 .into_iter()
1753 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1754 .expect("directory entry");
1755
1756 assert!(directory.date.is_none());
1757 assert!(directory.file_type_label.is_none());
1758 assert!(directory.is_binary.is_none());
1759 assert!(directory.is_text.is_none());
1760 assert!(directory.is_archive.is_none());
1761 assert!(directory.is_media.is_none());
1762 assert!(directory.is_source.is_none());
1763 assert!(directory.is_script.is_none());
1764 }
1765
1766 #[test]
1767 fn directory_includes_info_fields_when_info_enabled() {
1768 let temp_dir = TempDir::new().expect("create temp dir");
1769 fs::create_dir_all(temp_dir.path().join("nested")).expect("create nested dir");
1770
1771 let collected = collect_paths(temp_dir.path(), 0, &[]);
1772 let result = process_collected(
1773 &collected,
1774 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1775 None,
1776 LicenseScanOptions::default(),
1777 &TextDetectionOptions {
1778 collect_info: true,
1779 detect_packages: false,
1780 detect_application_packages: false,
1781 detect_system_packages: false,
1782 detect_packages_in_compiled: false,
1783 detect_copyrights: false,
1784 detect_generated: false,
1785 detect_emails: false,
1786 detect_urls: false,
1787 max_emails: 50,
1788 max_urls: 50,
1789 timeout_seconds: 120.0,
1790 },
1791 );
1792
1793 let directory = result
1794 .files
1795 .into_iter()
1796 .find(|entry| entry.file_type == FileType::Directory && entry.path.ends_with("nested"))
1797 .expect("directory entry");
1798
1799 assert!(directory.date.is_none());
1800 assert!(directory.file_type_label.is_none());
1801 assert_eq!(directory.is_binary, Some(false));
1802 assert_eq!(directory.is_text, Some(false));
1803 assert_eq!(directory.is_archive, Some(false));
1804 assert_eq!(directory.is_media, Some(false));
1805 assert_eq!(directory.is_source, Some(false));
1806 assert_eq!(directory.is_script, Some(false));
1807 assert_eq!(directory.files_count, Some(0));
1808 assert_eq!(directory.dirs_count, Some(0));
1809 assert_eq!(directory.size_count, Some(0));
1810 }
1811
1812 #[test]
1813 fn collect_paths_includes_root_directory_entry() {
1814 let temp_dir = TempDir::new().expect("create temp dir");
1815 fs::create_dir_all(temp_dir.path().join("src")).expect("create nested dir");
1816 fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}")
1817 .expect("write nested file");
1818
1819 let collected = collect_paths(temp_dir.path(), 0, &[]);
1820
1821 assert!(
1822 collected
1823 .directories
1824 .iter()
1825 .any(|(path, _)| path == temp_dir.path())
1826 );
1827 }
1828
1829 #[test]
1830 fn collect_paths_supports_single_file_input() {
1831 let temp_dir = TempDir::new().expect("create temp dir");
1832 let file_path = temp_dir.path().join("main.rs");
1833 fs::write(&file_path, "fn main() {}\n").expect("write file");
1834
1835 let collected = collect_paths(&file_path, 0, &[]);
1836
1837 assert_eq!(collected.files.len(), 1);
1838 assert!(collected.directories.is_empty());
1839 assert_eq!(collected.files[0].0, file_path);
1840 }
1841
1842 #[test]
1843 fn process_collected_with_memory_limit_preserves_results_when_spilling() {
1844 let temp_dir = TempDir::new().expect("create temp dir");
1845 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1846 fs::write(temp_dir.path().join("b.txt"), "world").expect("write second file");
1847
1848 let collected = collect_paths(temp_dir.path(), 0, &[]);
1849 let result = process_collected_with_memory_limit(
1850 &collected,
1851 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1852 None,
1853 LicenseScanOptions::default(),
1854 &TextDetectionOptions {
1855 collect_info: false,
1856 detect_packages: false,
1857 detect_application_packages: false,
1858 detect_system_packages: false,
1859 detect_packages_in_compiled: false,
1860 detect_copyrights: false,
1861 detect_generated: false,
1862 detect_emails: false,
1863 detect_urls: false,
1864 max_emails: 50,
1865 max_urls: 50,
1866 timeout_seconds: 120.0,
1867 },
1868 MemoryMode::Limit(1),
1869 );
1870
1871 assert_eq!(result.files.len(), 3);
1872 }
1873
1874 #[test]
1875 fn process_collected_with_negative_one_uses_disk_only_mode() {
1876 let temp_dir = TempDir::new().expect("create temp dir");
1877 fs::write(temp_dir.path().join("a.txt"), "hello").expect("write first file");
1878
1879 let collected = collect_paths(temp_dir.path(), 0, &[]);
1880 let result = process_collected_with_memory_limit(
1881 &collected,
1882 Arc::new(ScanProgress::new(ProgressMode::Quiet)),
1883 None,
1884 LicenseScanOptions::default(),
1885 &TextDetectionOptions {
1886 collect_info: false,
1887 detect_packages: false,
1888 detect_application_packages: false,
1889 detect_system_packages: false,
1890 detect_packages_in_compiled: false,
1891 detect_copyrights: false,
1892 detect_generated: false,
1893 detect_emails: false,
1894 detect_urls: false,
1895 max_emails: 50,
1896 max_urls: 50,
1897 timeout_seconds: 120.0,
1898 },
1899 MemoryMode::StreamUnlimited,
1900 );
1901
1902 assert_eq!(result.files.len(), 2);
1903 }
1904}