1use serde_json::{Map as JsonMap, Value as JsonValue};
5use std::path::{Path, PathBuf};
6
7use crate::app::request::{InputMode, ScanRequest};
8use crate::app::scan_pipeline::execute_request;
9use crate::license_detection::DEFAULT_LICENSEDB_URL_TEMPLATE;
10use crate::progress::ProgressMode;
11use crate::scanner::MemoryMode;
12use crate::{Output, ProcessMode};
13
14#[derive(Debug, thiserror::Error)]
15pub enum WorkflowError {
16 #[error("{0}")]
17 InvalidOptions(String),
18 #[error(transparent)]
19 Pipeline(#[from] anyhow::Error),
20}
21
22#[derive(Debug, Clone)]
24pub enum LicenseSource {
25 Disabled,
27 Embedded,
29 Directory(PathBuf),
31}
32
33#[derive(Debug, Clone)]
39pub struct ScanOptions {
40 pub progress_mode: ProgressMode,
41 pub process_mode: ProcessMode,
42 pub timeout_seconds: f64,
43 pub max_depth: usize,
44 pub max_in_memory: MemoryMode,
45 pub collect_info: bool,
46 pub detect_license: LicenseSource,
47 pub detect_packages: bool,
48 pub detect_system_packages: bool,
49 pub detect_packages_in_compiled: bool,
50 pub package_only: bool,
51 pub no_assemble: bool,
52 pub detect_copyrights: bool,
53 pub detect_emails: bool,
54 pub detect_urls: bool,
55 pub detect_generated: bool,
56 pub max_emails: usize,
57 pub max_urls: usize,
58 pub include: Vec<String>,
59 pub exclude: Vec<String>,
60 pub include_input_header: bool,
61 pub cache_dir: Option<PathBuf>,
62 pub cache_clear: bool,
63 pub incremental: bool,
64 pub reindex: bool,
65 pub no_license_index_cache: bool,
66 pub license_text: bool,
67 pub license_text_diagnostics: bool,
68 pub license_diagnostics: bool,
69 pub unknown_licenses: bool,
70 pub license_score: u8,
71 pub filter_clues: bool,
72 pub ignore_author_patterns: Vec<String>,
73 pub ignore_copyright_holder_patterns: Vec<String>,
74 pub only_findings: bool,
75 pub mark_source: bool,
76 pub classify: bool,
77 pub summary: bool,
78 pub license_clarity_score: bool,
79 pub license_references: bool,
80 pub license_url_template: String,
81 pub license_policy: Option<PathBuf>,
82 pub tallies: bool,
83 pub tallies_key_files: bool,
84 pub tallies_with_details: bool,
85 pub facets: Vec<String>,
86 pub tallies_by_facet: bool,
87 pub strip_root: bool,
88 pub full_root: bool,
89 pub header_options: JsonMap<String, JsonValue>,
90}
91
92impl Default for ScanOptions {
93 fn default() -> Self {
94 Self {
95 progress_mode: ProgressMode::Quiet,
96 process_mode: ProcessMode::default(),
97 timeout_seconds: 120.0,
98 max_depth: 0,
99 max_in_memory: MemoryMode::Limit(10_000),
100 collect_info: false,
101 detect_license: LicenseSource::Disabled,
102 detect_packages: false,
103 detect_system_packages: false,
104 detect_packages_in_compiled: false,
105 package_only: false,
106 no_assemble: false,
107 detect_copyrights: false,
108 detect_emails: false,
109 detect_urls: false,
110 detect_generated: false,
111 max_emails: 50,
112 max_urls: 50,
113 include: Vec::new(),
114 exclude: Vec::new(),
115 include_input_header: false,
116 cache_dir: None,
117 cache_clear: false,
118 incremental: false,
119 reindex: false,
120 no_license_index_cache: false,
121 license_text: false,
122 license_text_diagnostics: false,
123 license_diagnostics: false,
124 unknown_licenses: false,
125 license_score: 0,
126 filter_clues: false,
127 ignore_author_patterns: Vec::new(),
128 ignore_copyright_holder_patterns: Vec::new(),
129 only_findings: false,
130 mark_source: false,
131 classify: false,
132 summary: false,
133 license_clarity_score: false,
134 license_references: false,
135 license_url_template: DEFAULT_LICENSEDB_URL_TEMPLATE.to_string(),
136 license_policy: None,
137 tallies: false,
138 tallies_key_files: false,
139 tallies_with_details: false,
140 facets: Vec::new(),
141 tallies_by_facet: false,
142 strip_root: false,
143 full_root: false,
144 header_options: JsonMap::new(),
145 }
146 }
147}
148
149pub fn scan_path(path: impl AsRef<Path>, options: &ScanOptions) -> Result<Output, WorkflowError> {
167 scan_paths([path.as_ref()], options)
168}
169
170pub fn scan_paths<'a>(
196 paths: impl IntoIterator<Item = &'a Path>,
197 options: &ScanOptions,
198) -> Result<Output, WorkflowError> {
199 let input_paths: Vec<String> = paths
200 .into_iter()
201 .map(|path| path.to_string_lossy().to_string())
202 .collect();
203
204 if input_paths.is_empty() {
205 return Err(WorkflowError::InvalidOptions(
206 "At least one input path is required".to_string(),
207 ));
208 }
209
210 let request = request_for_native_paths(input_paths, options);
211 validate_workflow_request(&request)?;
212
213 execute_request(&request)
214 .map(|executed| executed.output)
215 .map_err(WorkflowError::Pipeline)
216}
217
218fn request_for_native_paths(input_paths: Vec<String>, options: &ScanOptions) -> ScanRequest {
219 let mut header_options = options.header_options.clone();
220 if options.include_input_header {
221 header_options.insert(
222 "input".to_string(),
223 JsonValue::Array(input_paths.iter().cloned().map(JsonValue::String).collect()),
224 );
225 }
226
227 let (license, license_dataset_path) = match &options.detect_license {
228 LicenseSource::Disabled => (false, None),
229 LicenseSource::Embedded => (true, None),
230 LicenseSource::Directory(path) => (true, Some(path.to_string_lossy().to_string())),
231 };
232
233 ScanRequest {
234 input_paths,
235 input_mode: InputMode::Native,
236 output_targets: Vec::new(),
237 output_header_options: header_options,
238 progress_mode: options.progress_mode,
239 process_mode: options.process_mode,
240 timeout_seconds: options.timeout_seconds,
241 quiet: matches!(options.progress_mode, ProgressMode::Quiet),
242 verbose: matches!(options.progress_mode, ProgressMode::Verbose),
243 strip_root: options.strip_root,
244 full_root: options.full_root,
245 include: options.include.clone(),
246 exclude: options.exclude.clone(),
247 paths_files: Vec::new(),
248 respect_process_cache_env: false,
249 cache_dir: options
250 .cache_dir
251 .as_ref()
252 .map(|path| path.to_string_lossy().to_string()),
253 cache_clear: options.cache_clear,
254 incremental: options.incremental,
255 max_depth: options.max_depth,
256 max_in_memory: options.max_in_memory,
257 info: options.collect_info,
258 package: options.detect_packages,
259 system_package: options.detect_system_packages,
260 package_in_compiled: options.detect_packages_in_compiled,
261 package_only: options.package_only,
262 no_assemble: options.no_assemble,
263 license_dataset_path,
264 reindex: options.reindex,
265 no_license_index_cache: options.no_license_index_cache,
266 license_text: options.license_text,
267 license_text_diagnostics: options.license_text_diagnostics,
268 license_diagnostics: options.license_diagnostics,
269 unknown_licenses: options.unknown_licenses,
270 license_score: options.license_score,
271 license_url_template: options.license_url_template.clone(),
272 filter_clues: options.filter_clues,
273 ignore_author: options.ignore_author_patterns.clone(),
274 ignore_copyright_holder: options.ignore_copyright_holder_patterns.clone(),
275 only_findings: options.only_findings,
276 mark_source: options.mark_source,
277 classify: options.classify,
278 summary: options.summary,
279 license_clarity_score: options.license_clarity_score,
280 license_references: options.license_references,
281 license_policy: options
282 .license_policy
283 .as_ref()
284 .map(|path| path.to_string_lossy().to_string()),
285 tallies: options.tallies,
286 tallies_key_files: options.tallies_key_files,
287 tallies_with_details: options.tallies_with_details,
288 facet: options.facets.clone(),
289 tallies_by_facet: options.tallies_by_facet,
290 generated: options.detect_generated,
291 license,
292 copyright: options.detect_copyrights,
293 email: options.detect_emails,
294 max_email: options.max_emails,
295 url: options.detect_urls,
296 max_url: options.max_urls,
297 }
298}
299
300fn validate_workflow_request(request: &ScanRequest) -> Result<(), WorkflowError> {
301 let license_enabled = request.license;
302
303 if request.strip_root && request.full_root {
304 return Err(WorkflowError::InvalidOptions(
305 "strip_root and full_root are mutually exclusive".to_string(),
306 ));
307 }
308
309 if request.license_text && !license_enabled {
310 return Err(WorkflowError::InvalidOptions(
311 "license_text requires detect_license".to_string(),
312 ));
313 }
314
315 if request.license_text_diagnostics && !request.license_text {
316 return Err(WorkflowError::InvalidOptions(
317 "license_text_diagnostics requires license_text".to_string(),
318 ));
319 }
320
321 if request.license_diagnostics && !license_enabled {
322 return Err(WorkflowError::InvalidOptions(
323 "license_diagnostics requires detect_license".to_string(),
324 ));
325 }
326
327 if request.unknown_licenses && !license_enabled {
328 return Err(WorkflowError::InvalidOptions(
329 "unknown_licenses requires detect_license".to_string(),
330 ));
331 }
332
333 if request.license_references && !license_enabled {
334 return Err(WorkflowError::InvalidOptions(
335 "license_references requires detect_license".to_string(),
336 ));
337 }
338
339 if request.license_url_template != DEFAULT_LICENSEDB_URL_TEMPLATE && !license_enabled {
340 return Err(WorkflowError::InvalidOptions(
341 "license_url_template requires detect_license".to_string(),
342 ));
343 }
344
345 if request.package_only && license_enabled {
346 return Err(WorkflowError::InvalidOptions(
347 "package_only cannot be combined with detect_license".to_string(),
348 ));
349 }
350
351 if request.package_only && request.summary {
352 return Err(WorkflowError::InvalidOptions(
353 "package_only cannot be combined with summary".to_string(),
354 ));
355 }
356
357 if request.package_only && request.package {
358 return Err(WorkflowError::InvalidOptions(
359 "package_only cannot be combined with detect_packages".to_string(),
360 ));
361 }
362
363 if request.package_only && request.system_package {
364 return Err(WorkflowError::InvalidOptions(
365 "package_only cannot be combined with detect_system_packages".to_string(),
366 ));
367 }
368
369 if request.summary && !request.classify {
370 return Err(WorkflowError::InvalidOptions(
371 "summary requires classify".to_string(),
372 ));
373 }
374
375 if request.license_clarity_score && !request.classify {
376 return Err(WorkflowError::InvalidOptions(
377 "license_clarity_score requires classify".to_string(),
378 ));
379 }
380
381 if request.tallies_key_files && !(request.tallies && request.classify) {
382 return Err(WorkflowError::InvalidOptions(
383 "tallies_key_files requires tallies and classify".to_string(),
384 ));
385 }
386
387 if request.tallies_by_facet && request.facet.is_empty() {
388 return Err(WorkflowError::InvalidOptions(
389 "tallies_by_facet requires at least one facet definition".to_string(),
390 ));
391 }
392
393 if request.tallies_by_facet && !request.tallies {
394 return Err(WorkflowError::InvalidOptions(
395 "tallies_by_facet requires tallies".to_string(),
396 ));
397 }
398
399 if request.mark_source && !request.info {
400 return Err(WorkflowError::InvalidOptions(
401 "mark_source requires collect_info".to_string(),
402 ));
403 }
404
405 if request.license_score > 100 {
406 return Err(WorkflowError::InvalidOptions(
407 "license_score must be between 0 and 100".to_string(),
408 ));
409 }
410
411 Ok(())
412}
413
414#[cfg(test)]
415mod tests {
416 use super::*;
417 use std::fs;
418
419 #[test]
420 fn scan_path_requires_at_least_one_input() {
421 let result = scan_paths(std::iter::empty::<&Path>(), &ScanOptions::default());
422 assert!(result.is_err());
423 }
424
425 #[test]
426 fn workflow_request_populates_input_header() {
427 let options = ScanOptions {
428 include_input_header: true,
429 ..ScanOptions::default()
430 };
431 let request = request_for_native_paths(vec!["src".to_string()], &options);
432 assert!(request.output_header_options.contains_key("input"));
433 }
434
435 #[test]
436 fn workflow_validation_rejects_license_dependent_flags_without_license() {
437 let options = ScanOptions {
438 license_references: true,
439 ..ScanOptions::default()
440 };
441
442 let request = request_for_native_paths(vec!["src".to_string()], &options);
443 let error = validate_workflow_request(&request).expect_err("validation should fail");
444 assert!(matches!(error, WorkflowError::InvalidOptions(_)));
445 assert!(
446 error
447 .to_string()
448 .contains("license_references requires detect_license")
449 );
450 }
451
452 #[test]
453 fn workflow_validation_rejects_package_only_with_regular_package_modes() {
454 let options = ScanOptions {
455 package_only: true,
456 detect_packages: true,
457 ..ScanOptions::default()
458 };
459
460 let request = request_for_native_paths(vec!["src".to_string()], &options);
461 let error = validate_workflow_request(&request).expect_err("validation should fail");
462 assert!(matches!(error, WorkflowError::InvalidOptions(_)));
463 assert!(
464 error
465 .to_string()
466 .contains("package_only cannot be combined with detect_packages")
467 );
468 }
469
470 #[test]
471 fn workflow_validation_rejects_classify_dependent_flags_without_classify() {
472 let options = ScanOptions {
473 summary: true,
474 ..ScanOptions::default()
475 };
476
477 let request = request_for_native_paths(vec!["src".to_string()], &options);
478 let error = validate_workflow_request(&request).expect_err("validation should fail");
479 assert!(matches!(error, WorkflowError::InvalidOptions(_)));
480 assert!(error.to_string().contains("summary requires classify"));
481 }
482
483 #[test]
484 fn scan_path_runs_a_basic_in_process_scan() {
485 let temp_dir = tempfile::TempDir::new().expect("create temp dir");
486 fs::write(
487 temp_dir.path().join("README.txt"),
488 "hello from workflow facade\n",
489 )
490 .expect("write fixture file");
491
492 let options = ScanOptions {
493 collect_info: true,
494 include_input_header: true,
495 ..ScanOptions::default()
496 };
497
498 let output = scan_path(temp_dir.path(), &options).expect("workflow scan should succeed");
499
500 assert_eq!(output.headers.len(), 1);
501 assert!(!output.files.is_empty());
502 assert!(output.headers[0].options.contains_key("input"));
503 }
504
505 #[test]
506 fn scan_paths_supports_multiple_absolute_inputs() {
507 let temp_dir = tempfile::TempDir::new().expect("create temp dir");
508 let left = temp_dir.path().join("left");
509 let right = temp_dir.path().join("right");
510 fs::create_dir_all(&left).expect("create left dir");
511 fs::create_dir_all(&right).expect("create right dir");
512 fs::write(left.join("one.txt"), "left\n").expect("write left fixture");
513 fs::write(right.join("two.txt"), "right\n").expect("write right fixture");
514
515 let output = scan_paths([left.as_path(), right.as_path()], &ScanOptions::default())
516 .expect("workflow scan should succeed for multiple absolute inputs");
517
518 assert!(
519 output
520 .files
521 .iter()
522 .any(|file| file.path.ends_with("one.txt"))
523 );
524 assert!(
525 output
526 .files
527 .iter()
528 .any(|file| file.path.ends_with("two.txt"))
529 );
530 }
531}