1use anyhow::{Result, anyhow};
5use serde_json::{Map as JsonMap, Value as JsonValue};
6use std::path::{Path, PathBuf};
7
8use crate::app::request::{InputMode, ScanRequest};
9use crate::app::scan_pipeline::execute_request;
10use crate::license_detection::DEFAULT_LICENSEDB_URL_TEMPLATE;
11use crate::progress::ProgressMode;
12use crate::scanner::MemoryMode;
13use crate::{Output, ProcessMode};
14
15#[derive(Debug, Clone)]
17pub enum LicenseSource {
18 Disabled,
20 Embedded,
22 Directory(PathBuf),
24}
25
26#[derive(Debug, Clone)]
32pub struct ScanOptions {
33 pub progress_mode: ProgressMode,
34 pub process_mode: ProcessMode,
35 pub timeout_seconds: f64,
36 pub max_depth: usize,
37 pub max_in_memory: MemoryMode,
38 pub collect_info: bool,
39 pub detect_license: LicenseSource,
40 pub detect_packages: bool,
41 pub detect_system_packages: bool,
42 pub detect_packages_in_compiled: bool,
43 pub package_only: bool,
44 pub no_assemble: bool,
45 pub detect_copyrights: bool,
46 pub detect_emails: bool,
47 pub detect_urls: bool,
48 pub detect_generated: bool,
49 pub max_emails: usize,
50 pub max_urls: usize,
51 pub include: Vec<String>,
52 pub exclude: Vec<String>,
53 pub include_input_header: bool,
54 pub cache_dir: Option<PathBuf>,
55 pub cache_clear: bool,
56 pub incremental: bool,
57 pub reindex: bool,
58 pub no_license_index_cache: bool,
59 pub license_text: bool,
60 pub license_text_diagnostics: bool,
61 pub license_diagnostics: bool,
62 pub unknown_licenses: bool,
63 pub license_score: u8,
64 pub filter_clues: bool,
65 pub ignore_author_patterns: Vec<String>,
66 pub ignore_copyright_holder_patterns: Vec<String>,
67 pub only_findings: bool,
68 pub mark_source: bool,
69 pub classify: bool,
70 pub summary: bool,
71 pub license_clarity_score: bool,
72 pub license_references: bool,
73 pub license_url_template: String,
74 pub license_policy: Option<PathBuf>,
75 pub tallies: bool,
76 pub tallies_key_files: bool,
77 pub tallies_with_details: bool,
78 pub facets: Vec<String>,
79 pub tallies_by_facet: bool,
80 pub strip_root: bool,
81 pub full_root: bool,
82 pub header_options: JsonMap<String, JsonValue>,
83}
84
85impl Default for ScanOptions {
86 fn default() -> Self {
87 Self {
88 progress_mode: ProgressMode::Quiet,
89 process_mode: ProcessMode::default(),
90 timeout_seconds: 120.0,
91 max_depth: 0,
92 max_in_memory: MemoryMode::Limit(10_000),
93 collect_info: false,
94 detect_license: LicenseSource::Disabled,
95 detect_packages: false,
96 detect_system_packages: false,
97 detect_packages_in_compiled: false,
98 package_only: false,
99 no_assemble: false,
100 detect_copyrights: false,
101 detect_emails: false,
102 detect_urls: false,
103 detect_generated: false,
104 max_emails: 50,
105 max_urls: 50,
106 include: Vec::new(),
107 exclude: Vec::new(),
108 include_input_header: false,
109 cache_dir: None,
110 cache_clear: false,
111 incremental: false,
112 reindex: false,
113 no_license_index_cache: false,
114 license_text: false,
115 license_text_diagnostics: false,
116 license_diagnostics: false,
117 unknown_licenses: false,
118 license_score: 0,
119 filter_clues: false,
120 ignore_author_patterns: Vec::new(),
121 ignore_copyright_holder_patterns: Vec::new(),
122 only_findings: false,
123 mark_source: false,
124 classify: false,
125 summary: false,
126 license_clarity_score: false,
127 license_references: false,
128 license_url_template: DEFAULT_LICENSEDB_URL_TEMPLATE.to_string(),
129 license_policy: None,
130 tallies: false,
131 tallies_key_files: false,
132 tallies_with_details: false,
133 facets: Vec::new(),
134 tallies_by_facet: false,
135 strip_root: false,
136 full_root: false,
137 header_options: JsonMap::new(),
138 }
139 }
140}
141
142pub fn scan_path(path: impl AsRef<Path>, options: &ScanOptions) -> Result<Output> {
160 scan_paths([path.as_ref()], options)
161}
162
163pub fn scan_paths<'a>(
189 paths: impl IntoIterator<Item = &'a Path>,
190 options: &ScanOptions,
191) -> Result<Output> {
192 let input_paths: Vec<String> = paths
193 .into_iter()
194 .map(|path| path.to_string_lossy().to_string())
195 .collect();
196
197 if input_paths.is_empty() {
198 return Err(anyhow!("At least one input path is required"));
199 }
200
201 let request = request_for_native_paths(input_paths, options);
202 validate_workflow_request(&request)?;
203
204 execute_request(&request).map(|executed| executed.output)
205}
206
207fn request_for_native_paths(input_paths: Vec<String>, options: &ScanOptions) -> ScanRequest {
208 let mut header_options = options.header_options.clone();
209 if options.include_input_header {
210 header_options.insert(
211 "input".to_string(),
212 JsonValue::Array(input_paths.iter().cloned().map(JsonValue::String).collect()),
213 );
214 }
215
216 let (license, license_dataset_path) = match &options.detect_license {
217 LicenseSource::Disabled => (false, None),
218 LicenseSource::Embedded => (true, None),
219 LicenseSource::Directory(path) => (true, Some(path.to_string_lossy().to_string())),
220 };
221
222 ScanRequest {
223 input_paths,
224 input_mode: InputMode::Native,
225 output_targets: Vec::new(),
226 output_header_options: header_options,
227 progress_mode: options.progress_mode,
228 process_mode: options.process_mode,
229 timeout_seconds: options.timeout_seconds,
230 quiet: matches!(options.progress_mode, ProgressMode::Quiet),
231 verbose: matches!(options.progress_mode, ProgressMode::Verbose),
232 strip_root: options.strip_root,
233 full_root: options.full_root,
234 include: options.include.clone(),
235 exclude: options.exclude.clone(),
236 paths_files: Vec::new(),
237 respect_process_cache_env: false,
238 cache_dir: options
239 .cache_dir
240 .as_ref()
241 .map(|path| path.to_string_lossy().to_string()),
242 cache_clear: options.cache_clear,
243 incremental: options.incremental,
244 max_depth: options.max_depth,
245 max_in_memory: options.max_in_memory,
246 info: options.collect_info,
247 package: options.detect_packages,
248 system_package: options.detect_system_packages,
249 package_in_compiled: options.detect_packages_in_compiled,
250 package_only: options.package_only,
251 no_assemble: options.no_assemble,
252 license_dataset_path,
253 reindex: options.reindex,
254 no_license_index_cache: options.no_license_index_cache,
255 license_text: options.license_text,
256 license_text_diagnostics: options.license_text_diagnostics,
257 license_diagnostics: options.license_diagnostics,
258 unknown_licenses: options.unknown_licenses,
259 license_score: options.license_score,
260 license_url_template: options.license_url_template.clone(),
261 filter_clues: options.filter_clues,
262 ignore_author: options.ignore_author_patterns.clone(),
263 ignore_copyright_holder: options.ignore_copyright_holder_patterns.clone(),
264 only_findings: options.only_findings,
265 mark_source: options.mark_source,
266 classify: options.classify,
267 summary: options.summary,
268 license_clarity_score: options.license_clarity_score,
269 license_references: options.license_references,
270 license_policy: options
271 .license_policy
272 .as_ref()
273 .map(|path| path.to_string_lossy().to_string()),
274 tallies: options.tallies,
275 tallies_key_files: options.tallies_key_files,
276 tallies_with_details: options.tallies_with_details,
277 facet: options.facets.clone(),
278 tallies_by_facet: options.tallies_by_facet,
279 generated: options.detect_generated,
280 license,
281 copyright: options.detect_copyrights,
282 email: options.detect_emails,
283 max_email: options.max_emails,
284 url: options.detect_urls,
285 max_url: options.max_urls,
286 }
287}
288
289fn validate_workflow_request(request: &ScanRequest) -> Result<()> {
290 let license_enabled = request.license;
291
292 if request.strip_root && request.full_root {
293 return Err(anyhow!("strip_root and full_root are mutually exclusive"));
294 }
295
296 if request.license_text && !license_enabled {
297 return Err(anyhow!("license_text requires detect_license"));
298 }
299
300 if request.license_text_diagnostics && !request.license_text {
301 return Err(anyhow!("license_text_diagnostics requires license_text"));
302 }
303
304 if request.license_diagnostics && !license_enabled {
305 return Err(anyhow!("license_diagnostics requires detect_license"));
306 }
307
308 if request.unknown_licenses && !license_enabled {
309 return Err(anyhow!("unknown_licenses requires detect_license"));
310 }
311
312 if request.license_references && !license_enabled {
313 return Err(anyhow!("license_references requires detect_license"));
314 }
315
316 if request.license_url_template != DEFAULT_LICENSEDB_URL_TEMPLATE && !license_enabled {
317 return Err(anyhow!("license_url_template requires detect_license"));
318 }
319
320 if request.package_only && license_enabled {
321 return Err(anyhow!(
322 "package_only cannot be combined with detect_license"
323 ));
324 }
325
326 if request.package_only && request.summary {
327 return Err(anyhow!("package_only cannot be combined with summary"));
328 }
329
330 if request.package_only && request.package {
331 return Err(anyhow!(
332 "package_only cannot be combined with detect_packages"
333 ));
334 }
335
336 if request.package_only && request.system_package {
337 return Err(anyhow!(
338 "package_only cannot be combined with detect_system_packages"
339 ));
340 }
341
342 if request.summary && !request.classify {
343 return Err(anyhow!("summary requires classify"));
344 }
345
346 if request.license_clarity_score && !request.classify {
347 return Err(anyhow!("license_clarity_score requires classify"));
348 }
349
350 if request.tallies_key_files && !(request.tallies && request.classify) {
351 return Err(anyhow!("tallies_key_files requires tallies and classify"));
352 }
353
354 if request.tallies_by_facet && request.facet.is_empty() {
355 return Err(anyhow!(
356 "tallies_by_facet requires at least one facet definition"
357 ));
358 }
359
360 if request.tallies_by_facet && !request.tallies {
361 return Err(anyhow!("tallies_by_facet requires tallies"));
362 }
363
364 if request.mark_source && !request.info {
365 return Err(anyhow!("mark_source requires collect_info"));
366 }
367
368 if request.license_score > 100 {
369 return Err(anyhow!("license_score must be between 0 and 100"));
370 }
371
372 Ok(())
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378 use std::fs;
379
380 #[test]
381 fn scan_path_requires_at_least_one_input() {
382 let result = scan_paths(std::iter::empty::<&Path>(), &ScanOptions::default());
383 assert!(result.is_err());
384 }
385
386 #[test]
387 fn workflow_request_populates_input_header() {
388 let options = ScanOptions {
389 include_input_header: true,
390 ..ScanOptions::default()
391 };
392 let request = request_for_native_paths(vec!["src".to_string()], &options);
393 assert!(request.output_header_options.contains_key("input"));
394 }
395
396 #[test]
397 fn workflow_validation_rejects_license_dependent_flags_without_license() {
398 let options = ScanOptions {
399 license_references: true,
400 ..ScanOptions::default()
401 };
402
403 let request = request_for_native_paths(vec!["src".to_string()], &options);
404 let error = validate_workflow_request(&request).expect_err("validation should fail");
405 assert!(
406 error
407 .to_string()
408 .contains("license_references requires detect_license")
409 );
410 }
411
412 #[test]
413 fn workflow_validation_rejects_package_only_with_regular_package_modes() {
414 let options = ScanOptions {
415 package_only: true,
416 detect_packages: true,
417 ..ScanOptions::default()
418 };
419
420 let request = request_for_native_paths(vec!["src".to_string()], &options);
421 let error = validate_workflow_request(&request).expect_err("validation should fail");
422 assert!(
423 error
424 .to_string()
425 .contains("package_only cannot be combined with detect_packages")
426 );
427 }
428
429 #[test]
430 fn workflow_validation_rejects_classify_dependent_flags_without_classify() {
431 let options = ScanOptions {
432 summary: true,
433 ..ScanOptions::default()
434 };
435
436 let request = request_for_native_paths(vec!["src".to_string()], &options);
437 let error = validate_workflow_request(&request).expect_err("validation should fail");
438 assert!(error.to_string().contains("summary requires classify"));
439 }
440
441 #[test]
442 fn scan_path_runs_a_basic_in_process_scan() {
443 let temp_dir = tempfile::TempDir::new().expect("create temp dir");
444 fs::write(
445 temp_dir.path().join("README.txt"),
446 "hello from workflow facade\n",
447 )
448 .expect("write fixture file");
449
450 let options = ScanOptions {
451 collect_info: true,
452 include_input_header: true,
453 ..ScanOptions::default()
454 };
455
456 let output = scan_path(temp_dir.path(), &options).expect("workflow scan should succeed");
457
458 assert_eq!(output.headers.len(), 1);
459 assert!(!output.files.is_empty());
460 assert!(output.headers[0].options.contains_key("input"));
461 }
462
463 #[test]
464 fn scan_paths_supports_multiple_absolute_inputs() {
465 let temp_dir = tempfile::TempDir::new().expect("create temp dir");
466 let left = temp_dir.path().join("left");
467 let right = temp_dir.path().join("right");
468 fs::create_dir_all(&left).expect("create left dir");
469 fs::create_dir_all(&right).expect("create right dir");
470 fs::write(left.join("one.txt"), "left\n").expect("write left fixture");
471 fs::write(right.join("two.txt"), "right\n").expect("write right fixture");
472
473 let output = scan_paths([left.as_path(), right.as_path()], &ScanOptions::default())
474 .expect("workflow scan should succeed for multiple absolute inputs");
475
476 assert!(
477 output
478 .files
479 .iter()
480 .any(|file| file.path.ends_with("one.txt"))
481 );
482 assert!(
483 output
484 .files
485 .iter()
486 .any(|file| file.path.ends_with("two.txt"))
487 );
488 }
489}