1use anyhow::{Result, anyhow};
5use serde_json::{Map as JsonMap, Value as JsonValue};
6use std::path::{Path, PathBuf};
7
8use crate::app::request::{InputMode, ScanRequest};
9use crate::app::scan_pipeline::execute_request;
10use crate::license_detection::DEFAULT_LICENSEDB_URL_TEMPLATE;
11use crate::progress::ProgressMode;
12use crate::scanner::MemoryMode;
13use crate::{Output, ProcessMode};
14
15#[derive(Debug, Clone)]
17pub enum LicenseSource {
18 Disabled,
20 Embedded,
22 Directory(PathBuf),
24}
25
26#[derive(Debug, Clone)]
32pub struct ScanOptions {
33 pub progress_mode: ProgressMode,
34 pub process_mode: ProcessMode,
35 pub timeout_seconds: f64,
36 pub max_depth: usize,
37 pub max_in_memory: MemoryMode,
38 pub collect_info: bool,
39 pub detect_license: LicenseSource,
40 pub detect_packages: bool,
41 pub detect_system_packages: bool,
42 pub detect_packages_in_compiled: bool,
43 pub package_only: bool,
44 pub no_assemble: bool,
45 pub detect_copyrights: bool,
46 pub detect_emails: bool,
47 pub detect_urls: bool,
48 pub detect_generated: bool,
49 pub max_emails: usize,
50 pub max_urls: usize,
51 pub include: Vec<String>,
52 pub exclude: Vec<String>,
53 pub include_input_header: bool,
54 pub cache_dir: Option<PathBuf>,
55 pub cache_clear: bool,
56 pub incremental: bool,
57 pub reindex: bool,
58 pub no_license_index_cache: bool,
59 pub license_text: bool,
60 pub license_text_diagnostics: bool,
61 pub license_diagnostics: bool,
62 pub unknown_licenses: bool,
63 pub license_score: u8,
64 pub filter_clues: bool,
65 pub ignore_author_patterns: Vec<String>,
66 pub ignore_copyright_holder_patterns: Vec<String>,
67 pub only_findings: bool,
68 pub mark_source: bool,
69 pub classify: bool,
70 pub summary: bool,
71 pub license_clarity_score: bool,
72 pub license_references: bool,
73 pub license_url_template: String,
74 pub license_policy: Option<PathBuf>,
75 pub tallies: bool,
76 pub tallies_key_files: bool,
77 pub tallies_with_details: bool,
78 pub facets: Vec<String>,
79 pub tallies_by_facet: bool,
80 pub strip_root: bool,
81 pub full_root: bool,
82 pub header_options: JsonMap<String, JsonValue>,
83}
84
85impl Default for ScanOptions {
86 fn default() -> Self {
87 Self {
88 progress_mode: ProgressMode::Quiet,
89 process_mode: ProcessMode::default(),
90 timeout_seconds: 120.0,
91 max_depth: 0,
92 max_in_memory: MemoryMode::Limit(10_000),
93 collect_info: false,
94 detect_license: LicenseSource::Disabled,
95 detect_packages: false,
96 detect_system_packages: false,
97 detect_packages_in_compiled: false,
98 package_only: false,
99 no_assemble: false,
100 detect_copyrights: false,
101 detect_emails: false,
102 detect_urls: false,
103 detect_generated: false,
104 max_emails: 50,
105 max_urls: 50,
106 include: Vec::new(),
107 exclude: Vec::new(),
108 include_input_header: false,
109 cache_dir: None,
110 cache_clear: false,
111 incremental: false,
112 reindex: false,
113 no_license_index_cache: false,
114 license_text: false,
115 license_text_diagnostics: false,
116 license_diagnostics: false,
117 unknown_licenses: false,
118 license_score: 0,
119 filter_clues: false,
120 ignore_author_patterns: Vec::new(),
121 ignore_copyright_holder_patterns: Vec::new(),
122 only_findings: false,
123 mark_source: false,
124 classify: false,
125 summary: false,
126 license_clarity_score: false,
127 license_references: false,
128 license_url_template: DEFAULT_LICENSEDB_URL_TEMPLATE.to_string(),
129 license_policy: None,
130 tallies: false,
131 tallies_key_files: false,
132 tallies_with_details: false,
133 facets: Vec::new(),
134 tallies_by_facet: false,
135 strip_root: false,
136 full_root: false,
137 header_options: JsonMap::new(),
138 }
139 }
140}
141
142pub fn scan_path(path: impl AsRef<Path>, options: &ScanOptions) -> Result<Output> {
163 scan_paths([path.as_ref()], options)
164}
165
166pub fn scan_paths<'a>(
194 paths: impl IntoIterator<Item = &'a Path>,
195 options: &ScanOptions,
196) -> Result<Output> {
197 let input_paths: Vec<String> = paths
198 .into_iter()
199 .map(|path| path.to_string_lossy().to_string())
200 .collect();
201
202 if input_paths.is_empty() {
203 return Err(anyhow!("At least one input path is required"));
204 }
205
206 let request = request_for_native_paths(input_paths, options);
207 validate_workflow_request(&request)?;
208
209 execute_request(&request).map(|executed| executed.output)
210}
211
212fn request_for_native_paths(input_paths: Vec<String>, options: &ScanOptions) -> ScanRequest {
213 let mut header_options = options.header_options.clone();
214 if options.include_input_header {
215 header_options.insert(
216 "input".to_string(),
217 JsonValue::Array(input_paths.iter().cloned().map(JsonValue::String).collect()),
218 );
219 }
220
221 let (license, license_dataset_path) = match &options.detect_license {
222 LicenseSource::Disabled => (false, None),
223 LicenseSource::Embedded => (true, None),
224 LicenseSource::Directory(path) => (true, Some(path.to_string_lossy().to_string())),
225 };
226
227 ScanRequest {
228 input_paths,
229 input_mode: InputMode::Native,
230 output_targets: Vec::new(),
231 output_header_options: header_options,
232 progress_mode: options.progress_mode,
233 process_mode: options.process_mode,
234 timeout_seconds: options.timeout_seconds,
235 quiet: matches!(options.progress_mode, ProgressMode::Quiet),
236 verbose: matches!(options.progress_mode, ProgressMode::Verbose),
237 strip_root: options.strip_root,
238 full_root: options.full_root,
239 include: options.include.clone(),
240 exclude: options.exclude.clone(),
241 paths_files: Vec::new(),
242 respect_process_cache_env: false,
243 cache_dir: options
244 .cache_dir
245 .as_ref()
246 .map(|path| path.to_string_lossy().to_string()),
247 cache_clear: options.cache_clear,
248 incremental: options.incremental,
249 max_depth: options.max_depth,
250 max_in_memory: options.max_in_memory,
251 info: options.collect_info,
252 package: options.detect_packages,
253 system_package: options.detect_system_packages,
254 package_in_compiled: options.detect_packages_in_compiled,
255 package_only: options.package_only,
256 no_assemble: options.no_assemble,
257 license_dataset_path,
258 reindex: options.reindex,
259 no_license_index_cache: options.no_license_index_cache,
260 license_text: options.license_text,
261 license_text_diagnostics: options.license_text_diagnostics,
262 license_diagnostics: options.license_diagnostics,
263 unknown_licenses: options.unknown_licenses,
264 license_score: options.license_score,
265 license_url_template: options.license_url_template.clone(),
266 filter_clues: options.filter_clues,
267 ignore_author: options.ignore_author_patterns.clone(),
268 ignore_copyright_holder: options.ignore_copyright_holder_patterns.clone(),
269 only_findings: options.only_findings,
270 mark_source: options.mark_source,
271 classify: options.classify,
272 summary: options.summary,
273 license_clarity_score: options.license_clarity_score,
274 license_references: options.license_references,
275 license_policy: options
276 .license_policy
277 .as_ref()
278 .map(|path| path.to_string_lossy().to_string()),
279 tallies: options.tallies,
280 tallies_key_files: options.tallies_key_files,
281 tallies_with_details: options.tallies_with_details,
282 facet: options.facets.clone(),
283 tallies_by_facet: options.tallies_by_facet,
284 generated: options.detect_generated,
285 license,
286 copyright: options.detect_copyrights,
287 email: options.detect_emails,
288 max_email: options.max_emails,
289 url: options.detect_urls,
290 max_url: options.max_urls,
291 }
292}
293
294fn validate_workflow_request(request: &ScanRequest) -> Result<()> {
295 let license_enabled = request.license;
296
297 if request.strip_root && request.full_root {
298 return Err(anyhow!("strip_root and full_root are mutually exclusive"));
299 }
300
301 if request.license_text && !license_enabled {
302 return Err(anyhow!("license_text requires detect_license"));
303 }
304
305 if request.license_text_diagnostics && !request.license_text {
306 return Err(anyhow!("license_text_diagnostics requires license_text"));
307 }
308
309 if request.license_diagnostics && !license_enabled {
310 return Err(anyhow!("license_diagnostics requires detect_license"));
311 }
312
313 if request.unknown_licenses && !license_enabled {
314 return Err(anyhow!("unknown_licenses requires detect_license"));
315 }
316
317 if request.license_references && !license_enabled {
318 return Err(anyhow!("license_references requires detect_license"));
319 }
320
321 if request.license_url_template != DEFAULT_LICENSEDB_URL_TEMPLATE && !license_enabled {
322 return Err(anyhow!("license_url_template requires detect_license"));
323 }
324
325 if request.package_only && license_enabled {
326 return Err(anyhow!(
327 "package_only cannot be combined with detect_license"
328 ));
329 }
330
331 if request.package_only && request.summary {
332 return Err(anyhow!("package_only cannot be combined with summary"));
333 }
334
335 if request.package_only && request.package {
336 return Err(anyhow!(
337 "package_only cannot be combined with detect_packages"
338 ));
339 }
340
341 if request.package_only && request.system_package {
342 return Err(anyhow!(
343 "package_only cannot be combined with detect_system_packages"
344 ));
345 }
346
347 if request.summary && !request.classify {
348 return Err(anyhow!("summary requires classify"));
349 }
350
351 if request.license_clarity_score && !request.classify {
352 return Err(anyhow!("license_clarity_score requires classify"));
353 }
354
355 if request.tallies_key_files && !(request.tallies && request.classify) {
356 return Err(anyhow!("tallies_key_files requires tallies and classify"));
357 }
358
359 if request.tallies_by_facet && request.facet.is_empty() {
360 return Err(anyhow!(
361 "tallies_by_facet requires at least one facet definition"
362 ));
363 }
364
365 if request.tallies_by_facet && !request.tallies {
366 return Err(anyhow!("tallies_by_facet requires tallies"));
367 }
368
369 if request.mark_source && !request.info {
370 return Err(anyhow!("mark_source requires collect_info"));
371 }
372
373 if request.license_score > 100 {
374 return Err(anyhow!("license_score must be between 0 and 100"));
375 }
376
377 Ok(())
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383 use std::fs;
384
385 #[test]
386 fn scan_path_requires_at_least_one_input() {
387 let result = scan_paths(std::iter::empty::<&Path>(), &ScanOptions::default());
388 assert!(result.is_err());
389 }
390
391 #[test]
392 fn workflow_request_populates_input_header() {
393 let options = ScanOptions {
394 include_input_header: true,
395 ..ScanOptions::default()
396 };
397 let request = request_for_native_paths(vec!["src".to_string()], &options);
398 assert!(request.output_header_options.contains_key("input"));
399 }
400
401 #[test]
402 fn workflow_validation_rejects_license_dependent_flags_without_license() {
403 let options = ScanOptions {
404 license_references: true,
405 ..ScanOptions::default()
406 };
407
408 let request = request_for_native_paths(vec!["src".to_string()], &options);
409 let error = validate_workflow_request(&request).expect_err("validation should fail");
410 assert!(
411 error
412 .to_string()
413 .contains("license_references requires detect_license")
414 );
415 }
416
417 #[test]
418 fn workflow_validation_rejects_package_only_with_regular_package_modes() {
419 let options = ScanOptions {
420 package_only: true,
421 detect_packages: true,
422 ..ScanOptions::default()
423 };
424
425 let request = request_for_native_paths(vec!["src".to_string()], &options);
426 let error = validate_workflow_request(&request).expect_err("validation should fail");
427 assert!(
428 error
429 .to_string()
430 .contains("package_only cannot be combined with detect_packages")
431 );
432 }
433
434 #[test]
435 fn workflow_validation_rejects_classify_dependent_flags_without_classify() {
436 let options = ScanOptions {
437 summary: true,
438 ..ScanOptions::default()
439 };
440
441 let request = request_for_native_paths(vec!["src".to_string()], &options);
442 let error = validate_workflow_request(&request).expect_err("validation should fail");
443 assert!(error.to_string().contains("summary requires classify"));
444 }
445
446 #[test]
447 fn scan_path_runs_a_basic_in_process_scan() {
448 let temp_dir = tempfile::TempDir::new().expect("create temp dir");
449 fs::write(
450 temp_dir.path().join("README.txt"),
451 "hello from workflow facade\n",
452 )
453 .expect("write fixture file");
454
455 let options = ScanOptions {
456 collect_info: true,
457 include_input_header: true,
458 ..ScanOptions::default()
459 };
460
461 let output = scan_path(temp_dir.path(), &options).expect("workflow scan should succeed");
462
463 assert_eq!(output.headers.len(), 1);
464 assert!(!output.files.is_empty());
465 assert!(output.headers[0].options.contains_key("input"));
466 }
467
468 #[test]
469 fn scan_paths_supports_multiple_absolute_inputs() {
470 let temp_dir = tempfile::TempDir::new().expect("create temp dir");
471 let left = temp_dir.path().join("left");
472 let right = temp_dir.path().join("right");
473 fs::create_dir_all(&left).expect("create left dir");
474 fs::create_dir_all(&right).expect("create right dir");
475 fs::write(left.join("one.txt"), "left\n").expect("write left fixture");
476 fs::write(right.join("two.txt"), "right\n").expect("write right fixture");
477
478 let output = scan_paths([left.as_path(), right.as_path()], &ScanOptions::default())
479 .expect("workflow scan should succeed for multiple absolute inputs");
480
481 assert!(
482 output
483 .files
484 .iter()
485 .any(|file| file.path.ends_with("one.txt"))
486 );
487 assert!(
488 output
489 .files
490 .iter()
491 .any(|file| file.path.ends_with("two.txt"))
492 );
493 }
494}