1mod collect;
2mod process;
3
4use std::path::PathBuf;
5
6use crate::models::FileInfo;
7
8pub struct ProcessResult {
9 pub files: Vec<FileInfo>,
10 pub excluded_count: usize,
11}
12
13#[derive(Debug, Clone)]
14pub struct TextDetectionOptions {
15 pub detect_packages: bool,
16 pub detect_copyrights: bool,
17 pub detect_generated: bool,
18 pub detect_emails: bool,
19 pub detect_urls: bool,
20 pub max_emails: usize,
21 pub max_urls: usize,
22 pub timeout_seconds: f64,
23 pub scan_cache_dir: Option<PathBuf>,
24}
25
26impl Default for TextDetectionOptions {
27 fn default() -> Self {
28 Self {
29 detect_packages: false,
30 detect_copyrights: true,
31 detect_generated: false,
32 detect_emails: false,
33 detect_urls: false,
34 max_emails: 50,
35 max_urls: 50,
36 timeout_seconds: 120.0,
37 scan_cache_dir: None,
38 }
39 }
40}
41
42#[allow(unused_imports)]
43pub use self::collect::{CollectedPaths, collect_paths};
44pub use self::process::process_collected;
45
46#[cfg(test)]
47mod tests {
48 use std::fs;
49 use std::sync::Arc;
50
51 use tempfile::TempDir;
52
53 use crate::models::FileType;
54 use crate::progress::{ProgressMode, ScanProgress};
55
56 use super::{TextDetectionOptions, collect_paths, process_collected};
57
58 #[test]
59 fn default_options_keep_copyright_detection_enabled() {
60 let options = TextDetectionOptions::default();
61 assert!(!options.detect_packages);
62 assert!(options.detect_copyrights);
63 }
64
65 fn scan_single_file(
66 file_name: &str,
67 content: &str,
68 options: &TextDetectionOptions,
69 ) -> crate::models::FileInfo {
70 let temp_dir = TempDir::new().expect("create temp dir");
71 let file_path = temp_dir.path().join(file_name);
72 fs::write(&file_path, content).expect("write test file");
73
74 let progress = Arc::new(ScanProgress::new(ProgressMode::Quiet));
75 let collected = collect_paths(temp_dir.path(), 0, &[]);
76 let result = process_collected(&collected, progress, None, false, options);
77
78 result
79 .files
80 .into_iter()
81 .find(|entry| {
82 entry.file_type == FileType::File && entry.path == file_path.to_string_lossy()
83 })
84 .expect("scanned file entry")
85 }
86
87 #[test]
88 fn scanner_reports_repeated_email_occurrences() {
89 let options = TextDetectionOptions {
90 detect_packages: false,
91 detect_copyrights: false,
92 detect_generated: false,
93 detect_emails: true,
94 detect_urls: false,
95 max_emails: 50,
96 max_urls: 50,
97 timeout_seconds: 120.0,
98 scan_cache_dir: None,
99 };
100 let scanned = scan_single_file(
101 "contacts.txt",
102 "linux@3ware.com\nlinux@3ware.com\nandre@suse.com\nlinux@3ware.com\n",
103 &options,
104 );
105
106 let emails: Vec<(&str, usize)> = scanned
107 .emails
108 .iter()
109 .map(|email| (email.email.as_str(), email.start_line))
110 .collect();
111
112 assert_eq!(emails.len(), 4, "emails: {emails:#?}");
113 assert_eq!(
114 emails,
115 vec![
116 ("linux@3ware.com", 1),
117 ("linux@3ware.com", 2),
118 ("andre@suse.com", 3),
119 ("linux@3ware.com", 4),
120 ]
121 );
122 }
123
124 #[test]
125 fn scanner_skips_pem_certificate_text_detection() {
126 let options = TextDetectionOptions {
127 detect_packages: false,
128 detect_copyrights: true,
129 detect_generated: false,
130 detect_emails: true,
131 detect_urls: true,
132 max_emails: 50,
133 max_urls: 50,
134 timeout_seconds: 120.0,
135 scan_cache_dir: None,
136 };
137 let pem_fixture = concat!(
138 "-----BEGIN CERTIFICATE-----\n",
139 "MIID8TCCAtmgAwIBAgIQQT1yx/RrH4FDffHSKFTfmjANBgkqhkiG9w0BAQUFADCB\n",
140 "ijELMAkGA1UEBhMCQ0gxEDAOBgNVBAoTB1dJU2VLZXkxGzAZBgNVBAsTEkNvcHly\n",
141 "-----END CERTIFICATE-----\n",
142 "Certificate:\n",
143 " Data:\n",
144 " Signature Algorithm: sha1WithRSAEncryption\n",
145 " Issuer: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
146 " Subject: C=CH, O=WISeKey, OU=Copyright (c) 2005, OU=OISTE Foundation Endorsed\n",
147 " Contact: cert-owner@example.com\n",
148 );
149 let scanned = scan_single_file("cert.pem", pem_fixture, &options);
150
151 assert!(
152 scanned.copyrights.is_empty(),
153 "copyrights: {:#?}",
154 scanned.copyrights
155 );
156 assert!(
157 scanned.holders.is_empty(),
158 "holders: {:#?}",
159 scanned.holders
160 );
161 assert!(
162 scanned.authors.is_empty(),
163 "authors: {:#?}",
164 scanned.authors
165 );
166 assert!(scanned.emails.is_empty(), "emails: {:#?}", scanned.emails);
167 assert!(scanned.urls.is_empty(), "urls: {:#?}", scanned.urls);
168 assert!(
169 scanned.license_detections.is_empty(),
170 "licenses: {:#?}",
171 scanned.license_detections
172 );
173 }
174
175 #[test]
176 fn scanner_detects_structured_credits_authors() {
177 let options = TextDetectionOptions {
178 detect_packages: false,
179 detect_copyrights: true,
180 detect_generated: false,
181 detect_emails: false,
182 detect_urls: false,
183 max_emails: 50,
184 max_urls: 50,
185 timeout_seconds: 120.0,
186 scan_cache_dir: None,
187 };
188 let credits_fixture = concat!(
189 "N: Jack Lloyd\n",
190 "E: lloyd@randombit.net\n",
191 "W: http://www.randombit.net/\n",
192 );
193 let scanned = scan_single_file("CREDITS", credits_fixture, &options);
194
195 let authors: Vec<(&str, usize, usize)> = scanned
196 .authors
197 .iter()
198 .map(|author| (author.author.as_str(), author.start_line, author.end_line))
199 .collect();
200
201 assert_eq!(
202 authors,
203 vec![(
204 "Jack Lloyd lloyd@randombit.net http://www.randombit.net/",
205 1,
206 3,
207 )]
208 );
209 assert!(scanned.copyrights.is_empty());
210 assert!(scanned.holders.is_empty());
211 }
212
213 #[test]
214 fn scanner_sets_generated_flag_when_enabled() {
215 let options = TextDetectionOptions {
216 detect_packages: false,
217 detect_copyrights: false,
218 detect_generated: true,
219 detect_emails: false,
220 detect_urls: false,
221 max_emails: 50,
222 max_urls: 50,
223 timeout_seconds: 120.0,
224 scan_cache_dir: None,
225 };
226 let scanned = scan_single_file(
227 "generated.c",
228 "/* DO NOT EDIT THIS FILE - it is machine generated */\n",
229 &options,
230 );
231
232 assert_eq!(scanned.is_generated, Some(true));
233 }
234
235 #[test]
236 fn scanner_skips_package_parsing_when_disabled() {
237 let options = TextDetectionOptions {
238 detect_packages: false,
239 detect_copyrights: false,
240 detect_generated: false,
241 detect_emails: false,
242 detect_urls: false,
243 max_emails: 50,
244 max_urls: 50,
245 timeout_seconds: 120.0,
246 scan_cache_dir: None,
247 };
248 let scanned = scan_single_file(
249 "package.json",
250 r#"{"name":"demo","version":"1.0.0"}"#,
251 &options,
252 );
253
254 assert!(
255 scanned.package_data.is_empty(),
256 "package_data: {:#?}",
257 scanned.package_data
258 );
259 }
260
261 #[test]
262 fn scanner_parses_package_manifests_when_enabled() {
263 let options = TextDetectionOptions {
264 detect_packages: true,
265 detect_copyrights: false,
266 detect_generated: false,
267 detect_emails: false,
268 detect_urls: false,
269 max_emails: 50,
270 max_urls: 50,
271 timeout_seconds: 120.0,
272 scan_cache_dir: None,
273 };
274 let scanned = scan_single_file(
275 "package.json",
276 r#"{"name":"demo","version":"1.0.0"}"#,
277 &options,
278 );
279
280 assert_eq!(
281 scanned.package_data.len(),
282 1,
283 "package_data: {:#?}",
284 scanned.package_data
285 );
286 }
287}