Skip to main content

provenant/
cli.rs

1use clap::{ArgGroup, Parser};
2use serde_json::{Map as JsonMap, Number as JsonNumber, Value as JsonValue};
3use std::fs;
4use std::path::Path;
5use yaml_serde::Value as YamlValue;
6
7use crate::license_detection::DEFAULT_LICENSEDB_URL_TEMPLATE;
8use crate::output::OutputFormat;
9
10const PDF_OXIDE_LOG_HELP: &str = "Troubleshooting PDF parser logs:\n  Provenant suppresses noisy pdf_oxide logs by default.\n  To inspect raw pdf_oxide logs for debugging, rerun with RUST_LOG=pdf_oxide=warn (or =error).";
11
12fn parse_license_policy_arg(value: &str) -> Result<String, String> {
13    let policy_path = Path::new(value);
14    let metadata = fs::metadata(policy_path).map_err(|err| {
15        format!(
16            "Failed to read license policy file {:?}: {err}",
17            policy_path
18        )
19    })?;
20    if !metadata.is_file() {
21        return Err(format!(
22            "License policy path {:?} is not a regular file",
23            policy_path
24        ));
25    }
26
27    let policy_text = fs::read_to_string(policy_path).map_err(|err| {
28        format!(
29            "Failed to read license policy file {:?}: {err}",
30            policy_path
31        )
32    })?;
33    if policy_text.trim().is_empty() {
34        return Err(format!("License policy file {:?} is empty", policy_path));
35    }
36
37    let policy_value: YamlValue = yaml_serde::from_str(&policy_text).map_err(|err| {
38        format!(
39            "Failed to parse license policy file {:?}: {err}",
40            policy_path
41        )
42    })?;
43    let has_license_policies = policy_value
44        .as_mapping()
45        .and_then(|mapping| mapping.get(YamlValue::String("license_policies".to_string())))
46        .is_some();
47    if !has_license_policies {
48        return Err(format!(
49            "License policy file {:?} is missing a 'license_policies' attribute",
50            policy_path
51        ));
52    }
53
54    Ok(value.to_string())
55}
56
57#[derive(Parser, Debug)]
58#[command(
59    author = "The Provenant contributors",
60    version = crate::version::BUILD_VERSION,
61    long_version = crate::version::build_long_version(),
62    after_help = PDF_OXIDE_LOG_HELP,
63    about,
64    long_about = None,
65    group(
66        ArgGroup::new("output")
67            .required(true)
68            .multiple(true)
69            .args([
70                "output_json",
71                "output_json_pp",
72                "output_json_lines",
73                "output_yaml",
74                "output_debian",
75                "output_html",
76                "output_spdx_tv",
77                "output_spdx_rdf",
78                "output_cyclonedx",
79                "output_cyclonedx_xml",
80                "custom_output",
81                "show_attribution"
82            ])
83    )
84)]
85pub struct Cli {
86    /// File or directory paths to scan
87    #[arg(required = false)]
88    pub dir_path: Vec<String>,
89
90    /// Write scan output as compact JSON to FILE
91    #[arg(long = "json", value_name = "FILE", allow_hyphen_values = true)]
92    pub output_json: Option<String>,
93
94    /// Write scan output as pretty-printed JSON to FILE
95    #[arg(long = "json-pp", value_name = "FILE", allow_hyphen_values = true)]
96    pub output_json_pp: Option<String>,
97
98    /// Write scan output as JSON Lines to FILE
99    #[arg(long = "json-lines", value_name = "FILE", allow_hyphen_values = true)]
100    pub output_json_lines: Option<String>,
101
102    /// Write scan output as YAML to FILE
103    #[arg(long = "yaml", value_name = "FILE", allow_hyphen_values = true)]
104    pub output_yaml: Option<String>,
105
106    /// Write scan output in machine-readable Debian copyright format to FILE (requires --license, --copyright, and --license-text)
107    #[arg(
108        long = "debian",
109        value_name = "FILE",
110        allow_hyphen_values = true,
111        requires_all = ["copyright", "license", "license_text"]
112    )]
113    pub output_debian: Option<String>,
114
115    /// Write scan output as HTML report to FILE
116    #[arg(long = "html", value_name = "FILE", allow_hyphen_values = true)]
117    pub output_html: Option<String>,
118
119    /// Write scan output as SPDX tag/value to FILE
120    #[arg(long = "spdx-tv", value_name = "FILE", allow_hyphen_values = true)]
121    pub output_spdx_tv: Option<String>,
122
123    /// Write scan output as SPDX RDF/XML to FILE
124    #[arg(long = "spdx-rdf", value_name = "FILE", allow_hyphen_values = true)]
125    pub output_spdx_rdf: Option<String>,
126
127    /// Write scan output as CycloneDX JSON to FILE
128    #[arg(long = "cyclonedx", value_name = "FILE", allow_hyphen_values = true)]
129    pub output_cyclonedx: Option<String>,
130
131    /// Write scan output as CycloneDX XML to FILE
132    #[arg(
133        long = "cyclonedx-xml",
134        value_name = "FILE",
135        allow_hyphen_values = true
136    )]
137    pub output_cyclonedx_xml: Option<String>,
138
139    /// Write scan output to FILE formatted with the custom template
140    #[arg(
141        long = "custom-output",
142        value_name = "FILE",
143        requires = "custom_template",
144        allow_hyphen_values = true
145    )]
146    pub custom_output: Option<String>,
147
148    /// Use this template FILE with --custom-output
149    #[arg(
150        long = "custom-template",
151        value_name = "FILE",
152        requires = "custom_output"
153    )]
154    pub custom_template: Option<String>,
155
156    /// Maximum recursion depth (0 means no depth limit)
157    #[arg(short, long, default_value = "0")]
158    pub max_depth: usize,
159
160    #[arg(short = 'n', long, default_value_t = default_processes(), allow_hyphen_values = true)]
161    pub processes: i32,
162
163    #[arg(long, default_value_t = 120.0)]
164    pub timeout: f64,
165
166    #[arg(short, long, conflicts_with = "verbose")]
167    pub quiet: bool,
168
169    #[arg(short, long, conflicts_with = "quiet")]
170    pub verbose: bool,
171
172    #[arg(long, conflicts_with = "full_root")]
173    pub strip_root: bool,
174
175    #[arg(long, conflicts_with = "strip_root")]
176    pub full_root: bool,
177
178    /// Exclude patterns (ScanCode-compatible alias: --ignore)
179    #[arg(long = "exclude", visible_alias = "ignore", value_delimiter = ',')]
180    pub exclude: Vec<String>,
181
182    #[arg(long, value_delimiter = ',')]
183    pub include: Vec<String>,
184
185    #[arg(long = "cache-dir", value_name = "PATH")]
186    pub cache_dir: Option<String>,
187
188    #[arg(long = "cache-clear")]
189    pub cache_clear: bool,
190
191    #[arg(long = "incremental")]
192    pub incremental: bool,
193
194    /// Maximum number of file and directory scan details kept in memory.
195    /// Use 0 for unlimited memory or -1 for disk-only spill during the scan.
196    #[arg(
197        long = "max-in-memory",
198        value_name = "INT",
199        default_value_t = 10000,
200        value_parser = parse_max_in_memory,
201        allow_hyphen_values = true
202    )]
203    pub max_in_memory: i64,
204
205    /// Collect file information such as checksums, type hints, and source/script flags.
206    #[arg(short = 'i', long)]
207    pub info: bool,
208
209    /// Load one or more existing ScanCode-style JSON scans instead of rescanning inputs.
210    #[arg(long)]
211    pub from_json: bool,
212
213    /// Scan input for application package and dependency manifests, lockfiles and related data
214    #[arg(short = 'p', long)]
215    pub package: bool,
216
217    /// Scan input for installed system package databases (RPM, dpkg, apk, etc.)
218    #[arg(long = "system-package")]
219    pub system_package: bool,
220
221    /// Scan supported compiled Go and Rust binaries for embedded package metadata.
222    #[arg(long = "package-in-compiled")]
223    pub package_in_compiled: bool,
224
225    /// Scan for system and application package data and skip license/copyright detection and top-level package creation.
226    #[arg(
227        long = "package-only",
228        conflicts_with_all = ["license", "summary", "package", "system_package"]
229    )]
230    pub package_only: bool,
231
232    /// Disable package assembly (merging related manifest/lockfiles into packages)
233    #[arg(long)]
234    pub no_assemble: bool,
235
236    /// Path to license rules directory containing .LICENSE and .RULE files.
237    /// If not specified, uses the built-in embedded license index.
238    #[arg(long, value_name = "PATH", requires = "license")]
239    pub license_rules_path: Option<String>,
240
241    /// Include matched text in license detection output
242    #[arg(long = "license-text", requires = "license")]
243    pub license_text: bool,
244
245    #[arg(long = "license-text-diagnostics", requires = "license_text")]
246    pub license_text_diagnostics: bool,
247
248    #[arg(long = "license-diagnostics", requires = "license")]
249    pub license_diagnostics: bool,
250
251    #[arg(long = "unknown-licenses", requires = "license")]
252    pub unknown_licenses: bool,
253
254    #[arg(
255        long = "license-score",
256        default_value_t = 0,
257        requires = "license",
258        value_parser = clap::value_parser!(u8).range(0..=100)
259    )]
260    pub license_score: u8,
261
262    #[arg(
263        long = "license-url-template",
264        default_value = DEFAULT_LICENSEDB_URL_TEMPLATE,
265        requires = "license"
266    )]
267    pub license_url_template: String,
268
269    #[arg(long)]
270    pub filter_clues: bool,
271
272    #[arg(
273        long = "ignore-author",
274        value_name = "PATTERN",
275        help = "Ignore a file and all its findings if an author matches the regex PATTERN"
276    )]
277    pub ignore_author: Vec<String>,
278
279    #[arg(
280        long = "ignore-copyright-holder",
281        value_name = "PATTERN",
282        help = "Ignore a file and all its findings if a copyright holder matches the regex PATTERN"
283    )]
284    pub ignore_copyright_holder: Vec<String>,
285
286    #[arg(long)]
287    pub only_findings: bool,
288
289    #[arg(long, requires = "info")]
290    pub mark_source: bool,
291
292    #[arg(long)]
293    pub classify: bool,
294
295    #[arg(long, requires = "classify")]
296    pub summary: bool,
297
298    #[arg(long = "license-clarity-score", requires = "classify")]
299    pub license_clarity_score: bool,
300
301    #[arg(long = "license-references", requires = "license")]
302    pub license_references: bool,
303
304    /// Evaluate file license detections against a YAML license policy file.
305    #[arg(
306        long = "license-policy",
307        value_name = "FILE",
308        value_parser = parse_license_policy_arg
309    )]
310    pub license_policy: Option<String>,
311
312    #[arg(long)]
313    pub tallies: bool,
314
315    #[arg(long = "tallies-key-files", requires_all = ["tallies", "classify"])]
316    pub tallies_key_files: bool,
317
318    #[arg(long = "tallies-with-details")]
319    pub tallies_with_details: bool,
320
321    #[arg(long = "facet", value_name = "<facet>=<pattern>")]
322    pub facet: Vec<String>,
323
324    #[arg(long = "tallies-by-facet", requires_all = ["facet", "tallies"])]
325    pub tallies_by_facet: bool,
326
327    #[arg(long)]
328    pub generated: bool,
329
330    /// Scan input for licenses
331    #[arg(short = 'l', long)]
332    pub license: bool,
333
334    #[arg(short = 'c', long)]
335    pub copyright: bool,
336
337    /// Scan input for email addresses
338    #[arg(short = 'e', long)]
339    pub email: bool,
340
341    /// Report only up to INT emails found in a file. Use 0 for no limit.
342    #[arg(long, default_value_t = 50, requires = "email")]
343    pub max_email: usize,
344
345    /// Scan input for URLs
346    #[arg(short = 'u', long)]
347    pub url: bool,
348
349    /// Report only up to INT URLs found in a file. Use 0 for no limit.
350    #[arg(long, default_value_t = 50, requires = "url")]
351    pub max_url: usize,
352
353    /// Show attribution notices for embedded license detection data
354    #[arg(
355        long,
356        conflicts_with_all = [
357            "output_json",
358            "output_json_pp",
359            "output_json_lines",
360            "output_yaml",
361            "output_debian",
362            "output_html",
363            "output_spdx_tv",
364            "output_spdx_rdf",
365            "output_cyclonedx",
366            "output_cyclonedx_xml",
367            "custom_output"
368        ]
369    )]
370    pub show_attribution: bool,
371}
372
373fn default_processes() -> i32 {
374    let cpus = std::thread::available_parallelism().map_or(1, |n| n.get());
375    if cpus > 1 { (cpus - 1) as i32 } else { 1 }
376}
377
378fn parse_max_in_memory(value: &str) -> Result<i64, String> {
379    let parsed = value
380        .parse::<i64>()
381        .map_err(|_| format!("invalid integer value: {value}"))?;
382    if parsed < -1 {
383        return Err("--max-in-memory must be -1, 0, or a positive integer".to_string());
384    }
385    Ok(parsed)
386}
387
388#[derive(Debug, Clone)]
389pub struct OutputTarget {
390    pub format: OutputFormat,
391    pub file: String,
392    pub custom_template: Option<String>,
393}
394
395impl Cli {
396    pub fn output_targets(&self) -> Vec<OutputTarget> {
397        let mut targets = Vec::new();
398
399        if let Some(file) = &self.output_json {
400            targets.push(OutputTarget {
401                format: OutputFormat::Json,
402                file: file.clone(),
403                custom_template: None,
404            });
405        }
406
407        if let Some(file) = &self.output_json_pp {
408            targets.push(OutputTarget {
409                format: OutputFormat::JsonPretty,
410                file: file.clone(),
411                custom_template: None,
412            });
413        }
414
415        if let Some(file) = &self.output_json_lines {
416            targets.push(OutputTarget {
417                format: OutputFormat::JsonLines,
418                file: file.clone(),
419                custom_template: None,
420            });
421        }
422
423        if let Some(file) = &self.output_yaml {
424            targets.push(OutputTarget {
425                format: OutputFormat::Yaml,
426                file: file.clone(),
427                custom_template: None,
428            });
429        }
430
431        if let Some(file) = &self.output_debian {
432            targets.push(OutputTarget {
433                format: OutputFormat::Debian,
434                file: file.clone(),
435                custom_template: None,
436            });
437        }
438
439        if let Some(file) = &self.output_html {
440            targets.push(OutputTarget {
441                format: OutputFormat::Html,
442                file: file.clone(),
443                custom_template: None,
444            });
445        }
446
447        if let Some(file) = &self.output_spdx_tv {
448            targets.push(OutputTarget {
449                format: OutputFormat::SpdxTv,
450                file: file.clone(),
451                custom_template: None,
452            });
453        }
454
455        if let Some(file) = &self.output_spdx_rdf {
456            targets.push(OutputTarget {
457                format: OutputFormat::SpdxRdf,
458                file: file.clone(),
459                custom_template: None,
460            });
461        }
462
463        if let Some(file) = &self.output_cyclonedx {
464            targets.push(OutputTarget {
465                format: OutputFormat::CycloneDxJson,
466                file: file.clone(),
467                custom_template: None,
468            });
469        }
470
471        if let Some(file) = &self.output_cyclonedx_xml {
472            targets.push(OutputTarget {
473                format: OutputFormat::CycloneDxXml,
474                file: file.clone(),
475                custom_template: None,
476            });
477        }
478
479        if let Some(file) = &self.custom_output {
480            targets.push(OutputTarget {
481                format: OutputFormat::CustomTemplate,
482                file: file.clone(),
483                custom_template: self.custom_template.clone(),
484            });
485        }
486
487        targets
488    }
489
490    pub fn output_header_options(&self) -> JsonMap<String, JsonValue> {
491        let mut options = JsonMap::new();
492        if !self.dir_path.is_empty() {
493            options.insert(
494                "input".to_string(),
495                JsonValue::Array(
496                    self.dir_path
497                        .iter()
498                        .cloned()
499                        .map(JsonValue::String)
500                        .collect(),
501                ),
502            );
503        }
504
505        let mut flags = Vec::new();
506
507        push_string_option(&mut flags, "--cache-dir", self.cache_dir.as_ref());
508        push_bool_option(&mut flags, "--cache-clear", self.cache_clear);
509        push_bool_option(&mut flags, "--classify", self.classify);
510        push_string_option(&mut flags, "--custom-output", self.custom_output.as_ref());
511        push_string_option(
512            &mut flags,
513            "--custom-template",
514            self.custom_template.as_ref(),
515        );
516        push_bool_option(&mut flags, "--copyright", self.copyright);
517        push_string_option(&mut flags, "--cyclonedx", self.output_cyclonedx.as_ref());
518        push_string_option(
519            &mut flags,
520            "--cyclonedx-xml",
521            self.output_cyclonedx_xml.as_ref(),
522        );
523        push_string_option(&mut flags, "--debian", self.output_debian.as_ref());
524        push_bool_option(&mut flags, "--email", self.email);
525        push_array_option(&mut flags, "--facet", &self.facet);
526        push_bool_option(&mut flags, "--filter-clues", self.filter_clues);
527        push_bool_option(&mut flags, "--from-json", self.from_json);
528        push_bool_option(&mut flags, "--full-root", self.full_root);
529        push_bool_option(&mut flags, "--generated", self.generated);
530        push_string_option(&mut flags, "--html", self.output_html.as_ref());
531        push_array_option(&mut flags, "--ignore", &self.exclude);
532        push_array_option(&mut flags, "--ignore-author", &self.ignore_author);
533        push_array_option(
534            &mut flags,
535            "--ignore-copyright-holder",
536            &self.ignore_copyright_holder,
537        );
538        push_bool_option(&mut flags, "--incremental", self.incremental);
539        push_array_option(&mut flags, "--include", &self.include);
540        push_bool_option(&mut flags, "--info", self.info);
541        push_string_option(&mut flags, "--json", self.output_json.as_ref());
542        push_string_option(&mut flags, "--json-lines", self.output_json_lines.as_ref());
543        push_string_option(&mut flags, "--json-pp", self.output_json_pp.as_ref());
544        push_bool_option(&mut flags, "--license", self.license);
545        push_bool_option(
546            &mut flags,
547            "--license-clarity-score",
548            self.license_clarity_score,
549        );
550        push_bool_option(
551            &mut flags,
552            "--license-diagnostics",
553            self.license_diagnostics,
554        );
555        push_string_option(&mut flags, "--license-policy", self.license_policy.as_ref());
556        push_bool_option(&mut flags, "--license-references", self.license_references);
557        push_non_default_u8_option(&mut flags, "--license-score", self.license_score, 0);
558        push_bool_option(&mut flags, "--license-text", self.license_text);
559        push_bool_option(
560            &mut flags,
561            "--license-text-diagnostics",
562            self.license_text_diagnostics,
563        );
564        push_non_default_string_option(
565            &mut flags,
566            "--license-url-template",
567            &self.license_url_template,
568            DEFAULT_LICENSEDB_URL_TEMPLATE,
569        );
570        push_non_default_usize_option(&mut flags, "--max-depth", self.max_depth, 0);
571        push_non_default_i64_option(&mut flags, "--max-in-memory", self.max_in_memory, 10000);
572        if self.email {
573            push_non_default_usize_option(&mut flags, "--max-email", self.max_email, 50);
574        }
575        if self.url {
576            push_non_default_usize_option(&mut flags, "--max-url", self.max_url, 50);
577        }
578        push_bool_option(&mut flags, "--mark-source", self.mark_source);
579        push_bool_option(&mut flags, "--no-assemble", self.no_assemble);
580        push_bool_option(&mut flags, "--only-findings", self.only_findings);
581        push_bool_option(&mut flags, "--package", self.package);
582        push_bool_option(
583            &mut flags,
584            "--package-in-compiled",
585            self.package_in_compiled,
586        );
587        push_bool_option(&mut flags, "--package-only", self.package_only);
588        push_non_default_i32_option(
589            &mut flags,
590            "--processes",
591            self.processes,
592            default_processes(),
593        );
594        push_bool_option(&mut flags, "--quiet", self.quiet);
595        push_string_option(&mut flags, "--spdx-rdf", self.output_spdx_rdf.as_ref());
596        push_string_option(&mut flags, "--spdx-tv", self.output_spdx_tv.as_ref());
597        push_bool_option(&mut flags, "--strip-root", self.strip_root);
598        push_bool_option(&mut flags, "--summary", self.summary);
599        push_bool_option(&mut flags, "--system-package", self.system_package);
600        push_bool_option(&mut flags, "--tallies", self.tallies);
601        push_bool_option(&mut flags, "--tallies-by-facet", self.tallies_by_facet);
602        push_bool_option(&mut flags, "--tallies-key-files", self.tallies_key_files);
603        push_bool_option(
604            &mut flags,
605            "--tallies-with-details",
606            self.tallies_with_details,
607        );
608        push_non_default_f64_option(&mut flags, "--timeout", self.timeout, 120.0);
609        push_bool_option(&mut flags, "--unknown-licenses", self.unknown_licenses);
610        push_bool_option(&mut flags, "--url", self.url);
611        push_bool_option(&mut flags, "--verbose", self.verbose);
612        push_string_option(&mut flags, "--yaml", self.output_yaml.as_ref());
613
614        flags.sort_by(|left, right| left.0.cmp(&right.0));
615        for (key, value) in flags {
616            options.insert(key, value);
617        }
618
619        options
620    }
621}
622
623fn push_bool_option(options: &mut Vec<(String, JsonValue)>, key: &str, enabled: bool) {
624    if enabled {
625        options.push((key.to_string(), JsonValue::Bool(true)));
626    }
627}
628
629fn push_string_option(options: &mut Vec<(String, JsonValue)>, key: &str, value: Option<&String>) {
630    if let Some(value) = value {
631        options.push((key.to_string(), JsonValue::String(value.clone())));
632    }
633}
634
635fn push_non_default_string_option(
636    options: &mut Vec<(String, JsonValue)>,
637    key: &str,
638    value: &str,
639    default: &str,
640) {
641    if value != default {
642        options.push((key.to_string(), JsonValue::String(value.to_string())));
643    }
644}
645
646fn push_array_option(options: &mut Vec<(String, JsonValue)>, key: &str, values: &[String]) {
647    if !values.is_empty() {
648        options.push((
649            key.to_string(),
650            JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect()),
651        ));
652    }
653}
654
655fn push_non_default_usize_option(
656    options: &mut Vec<(String, JsonValue)>,
657    key: &str,
658    value: usize,
659    default: usize,
660) {
661    if value != default {
662        options.push((key.to_string(), JsonValue::Number(value.into())));
663    }
664}
665
666fn push_non_default_u8_option(
667    options: &mut Vec<(String, JsonValue)>,
668    key: &str,
669    value: u8,
670    default: u8,
671) {
672    if value != default {
673        options.push((key.to_string(), JsonValue::Number(value.into())));
674    }
675}
676
677fn push_non_default_i32_option(
678    options: &mut Vec<(String, JsonValue)>,
679    key: &str,
680    value: i32,
681    default: i32,
682) {
683    if value != default {
684        options.push((key.to_string(), JsonValue::Number(value.into())));
685    }
686}
687
688fn push_non_default_i64_option(
689    options: &mut Vec<(String, JsonValue)>,
690    key: &str,
691    value: i64,
692    default: i64,
693) {
694    if value != default {
695        options.push((key.to_string(), JsonValue::Number(value.into())));
696    }
697}
698
699fn push_non_default_f64_option(
700    options: &mut Vec<(String, JsonValue)>,
701    key: &str,
702    value: f64,
703    default: f64,
704) {
705    if (value - default).abs() > f64::EPSILON
706        && let Some(number) = JsonNumber::from_f64(value)
707    {
708        options.push((key.to_string(), JsonValue::Number(number)));
709    }
710}
711
712#[cfg(test)]
713mod tests {
714    use super::*;
715    use clap::CommandFactory;
716
717    #[test]
718    fn test_requires_at_least_one_output_option() {
719        let parsed = Cli::try_parse_from(["provenant", "samples"]);
720        assert!(parsed.is_err());
721    }
722
723    #[test]
724    fn test_parses_json_pretty_output_option() {
725        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
726            .expect("cli parse should succeed");
727
728        assert_eq!(parsed.output_json_pp.as_deref(), Some("scan.json"));
729        assert_eq!(parsed.output_targets().len(), 1);
730        assert_eq!(parsed.output_targets()[0].format, OutputFormat::JsonPretty);
731    }
732
733    #[test]
734    fn test_allows_multiple_output_options_in_one_run() {
735        let parsed = Cli::try_parse_from([
736            "provenant",
737            "--json",
738            "scan.json",
739            "--html",
740            "report.html",
741            "samples",
742        ])
743        .expect("cli parse should allow multiple outputs");
744
745        assert_eq!(parsed.output_targets().len(), 2);
746        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Json);
747        assert_eq!(parsed.output_targets()[1].format, OutputFormat::Html);
748    }
749
750    #[test]
751    fn test_show_attribution_conflicts_with_output_flags() {
752        let parsed = Cli::try_parse_from([
753            "provenant",
754            "--show-attribution",
755            "--json",
756            "scan.json",
757            "samples",
758        ]);
759        assert!(parsed.is_err());
760    }
761
762    #[test]
763    fn test_output_header_options_use_scancode_style_keys() {
764        let parsed = Cli::try_parse_from([
765            "provenant",
766            "--json-pp",
767            "scan.json",
768            "--license",
769            "--package",
770            "--strip-root",
771            "--ignore",
772            "*.git*",
773            "--ignore",
774            "target/*",
775            "samples",
776        ])
777        .expect("cli parse should succeed");
778
779        let options = parsed.output_header_options();
780
781        assert_eq!(
782            options.get("input"),
783            Some(&JsonValue::Array(vec![JsonValue::String(
784                "samples".to_string()
785            )]))
786        );
787        assert_eq!(
788            options.get("--json-pp"),
789            Some(&JsonValue::String("scan.json".to_string()))
790        );
791        assert_eq!(options.get("--license"), Some(&JsonValue::Bool(true)));
792        assert_eq!(options.get("--package"), Some(&JsonValue::Bool(true)));
793        assert_eq!(options.get("--strip-root"), Some(&JsonValue::Bool(true)));
794        assert_eq!(
795            options.get("--ignore"),
796            Some(&JsonValue::Array(vec![
797                JsonValue::String("*.git*".to_string()),
798                JsonValue::String("target/*".to_string()),
799            ]))
800        );
801    }
802
803    #[test]
804    fn test_output_header_options_skip_defaults_and_include_non_defaults() {
805        let default_options =
806            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
807                .expect("default cli parse should succeed")
808                .output_header_options();
809        assert!(!default_options.contains_key("--timeout"));
810        assert!(!default_options.contains_key("--processes"));
811
812        let custom_options = Cli::try_parse_from([
813            "provenant",
814            "--json-pp",
815            "scan.json",
816            "--timeout",
817            "30",
818            "--processes",
819            "4",
820            "samples",
821        ])
822        .expect("custom cli parse should succeed")
823        .output_header_options();
824
825        assert_eq!(
826            custom_options.get("--timeout"),
827            Some(&JsonValue::Number(
828                JsonNumber::from_f64(30.0).expect("valid number")
829            ))
830        );
831        assert_eq!(
832            custom_options.get("--processes"),
833            Some(&JsonValue::Number(4.into()))
834        );
835    }
836
837    #[test]
838    fn test_allows_stdout_dash_as_output_target() {
839        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "-", "samples"])
840            .expect("cli parse should allow stdout dash output target");
841
842        assert_eq!(parsed.output_json_pp.as_deref(), Some("-"));
843    }
844
845    #[test]
846    fn test_debian_requires_license_copyright_and_license_text() {
847        let missing_license_text = Cli::try_parse_from([
848            "provenant",
849            "--debian",
850            "scan.copyright",
851            "--license",
852            "--copyright",
853            "samples",
854        ]);
855        assert!(missing_license_text.is_err());
856
857        let parsed = Cli::try_parse_from([
858            "provenant",
859            "--debian",
860            "scan.copyright",
861            "--license",
862            "--copyright",
863            "--license-text",
864            "samples",
865        ])
866        .expect("cli parse should accept debian output");
867
868        assert_eq!(parsed.output_targets().len(), 1);
869        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Debian);
870        assert_eq!(parsed.output_debian.as_deref(), Some("scan.copyright"));
871    }
872
873    #[test]
874    fn test_debian_help_mentions_required_companion_flags() {
875        let command = Cli::command();
876        let debian_arg = command
877            .get_arguments()
878            .find(|arg| arg.get_long() == Some("debian"))
879            .expect("debian arg should exist");
880
881        let help = debian_arg
882            .get_help()
883            .expect("debian arg should have help text")
884            .to_string();
885
886        assert!(help.contains("requires --license, --copyright, and --license-text"));
887    }
888
889    #[test]
890    fn test_help_mentions_pdf_oxide_rust_log_escape_hatch() {
891        let help = Cli::command().render_help().to_string();
892
893        assert!(help.contains("RUST_LOG=pdf_oxide=warn"));
894        assert!(help.contains("suppresses noisy pdf_oxide logs by default"));
895    }
896
897    #[test]
898    fn test_parses_license_policy_flag() {
899        let temp = tempfile::tempdir().expect("temp dir");
900        let policy_path = temp.path().join("policy.yml");
901        std::fs::write(&policy_path, "license_policies: []\n").expect("policy written");
902
903        let parsed = Cli::try_parse_from([
904            "provenant",
905            "--json-pp",
906            "scan.json",
907            "--license-policy",
908            policy_path.to_str().expect("utf8 path"),
909            "samples",
910        ])
911        .expect("cli parse should accept license-policy");
912
913        assert_eq!(
914            parsed.license_policy.as_deref(),
915            Some(policy_path.to_str().expect("utf8 path"))
916        );
917    }
918
919    #[test]
920    fn test_rejects_invalid_license_policy_flag_value() {
921        let temp = tempfile::tempdir().expect("temp dir");
922        let policy_path = temp.path().join("policy.yml");
923        std::fs::write(&policy_path, "not_license_policies: []\n").expect("policy written");
924
925        let parsed = Cli::try_parse_from([
926            "provenant",
927            "--json-pp",
928            "scan.json",
929            "--license-policy",
930            policy_path.to_str().expect("utf8 path"),
931            "samples",
932        ]);
933
934        assert!(parsed.is_err());
935    }
936
937    #[test]
938    fn test_custom_template_and_output_must_be_paired() {
939        let missing_template =
940            Cli::try_parse_from(["provenant", "--custom-output", "result.txt", "samples"]);
941        assert!(missing_template.is_err());
942
943        let missing_output =
944            Cli::try_parse_from(["provenant", "--custom-template", "tpl.tera", "samples"]);
945        assert!(missing_output.is_err());
946    }
947
948    #[test]
949    fn test_parses_processes_and_timeout_options() {
950        let parsed = Cli::try_parse_from([
951            "provenant",
952            "--json-pp",
953            "scan.json",
954            "-n",
955            "4",
956            "--timeout",
957            "30",
958            "samples",
959        ])
960        .expect("cli parse should succeed");
961
962        assert_eq!(parsed.processes, 4);
963        assert_eq!(parsed.timeout, 30.0);
964    }
965
966    #[test]
967    fn test_strip_root_conflicts_with_full_root() {
968        let parsed = Cli::try_parse_from([
969            "provenant",
970            "--json-pp",
971            "scan.json",
972            "--strip-root",
973            "--full-root",
974            "samples",
975        ]);
976        assert!(parsed.is_err());
977    }
978
979    #[test]
980    fn test_parses_include_and_only_findings_and_filter_clues() {
981        let parsed = Cli::try_parse_from([
982            "provenant",
983            "--json-pp",
984            "scan.json",
985            "--include",
986            "src/**,Cargo.toml",
987            "--only-findings",
988            "--filter-clues",
989            "samples",
990        ])
991        .expect("cli parse should succeed");
992
993        assert_eq!(parsed.include, vec!["src/**", "Cargo.toml"]);
994        assert!(parsed.only_findings);
995        assert!(parsed.filter_clues);
996    }
997
998    #[test]
999    fn test_parses_ignore_author_and_holder_filters() {
1000        let parsed = Cli::try_parse_from([
1001            "provenant",
1002            "--json-pp",
1003            "scan.json",
1004            "--ignore-author",
1005            "Jane.*",
1006            "--ignore-author",
1007            ".*Bot$",
1008            "--ignore-copyright-holder",
1009            "Example Corp",
1010            "samples",
1011        ])
1012        .expect("cli parse should succeed");
1013
1014        assert_eq!(parsed.ignore_author, vec!["Jane.*", ".*Bot$"]);
1015        assert_eq!(parsed.ignore_copyright_holder, vec!["Example Corp"]);
1016    }
1017
1018    #[test]
1019    fn test_parses_ignore_alias_for_exclude_patterns() {
1020        let parsed = Cli::try_parse_from([
1021            "provenant",
1022            "--json-pp",
1023            "scan.json",
1024            "--ignore",
1025            "*.git*,target/*",
1026            "samples",
1027        ])
1028        .expect("cli parse should accept --ignore alias");
1029
1030        assert_eq!(parsed.exclude, vec!["*.git*", "target/*"]);
1031    }
1032
1033    #[test]
1034    fn test_quiet_conflicts_with_verbose() {
1035        let parsed = Cli::try_parse_from([
1036            "provenant",
1037            "--json-pp",
1038            "scan.json",
1039            "--quiet",
1040            "--verbose",
1041            "samples",
1042        ]);
1043        assert!(parsed.is_err());
1044    }
1045
1046    #[test]
1047    fn test_parses_from_json_and_mark_source() {
1048        let parsed = Cli::try_parse_from([
1049            "provenant",
1050            "--json-pp",
1051            "scan.json",
1052            "--from-json",
1053            "--info",
1054            "--mark-source",
1055            "sample-scan.json",
1056        ])
1057        .expect("cli parse should succeed");
1058
1059        assert!(parsed.from_json);
1060        assert!(parsed.info);
1061        assert_eq!(parsed.dir_path, vec!["sample-scan.json"]);
1062        assert!(parsed.mark_source);
1063    }
1064
1065    #[test]
1066    fn test_mark_source_requires_info() {
1067        let parsed = Cli::try_parse_from([
1068            "provenant",
1069            "--json-pp",
1070            "scan.json",
1071            "--mark-source",
1072            "samples",
1073        ]);
1074
1075        assert!(parsed.is_err());
1076    }
1077
1078    #[test]
1079    fn test_parses_classify_facet_and_tallies_by_facet() {
1080        let parsed = Cli::try_parse_from([
1081            "provenant",
1082            "--json-pp",
1083            "scan.json",
1084            "--classify",
1085            "--tallies",
1086            "--facet",
1087            "dev=*.c",
1088            "--facet",
1089            "tests=*/tests/*",
1090            "--tallies-by-facet",
1091            "samples",
1092        ])
1093        .expect("cli parse should succeed");
1094
1095        assert!(parsed.classify);
1096        assert!(parsed.tallies);
1097        assert_eq!(parsed.facet, vec!["dev=*.c", "tests=*/tests/*"]);
1098        assert!(parsed.tallies_by_facet);
1099    }
1100
1101    #[test]
1102    fn test_tallies_by_facet_requires_facet_definitions() {
1103        let parsed = Cli::try_parse_from([
1104            "provenant",
1105            "--json-pp",
1106            "scan.json",
1107            "--tallies-by-facet",
1108            "samples",
1109        ]);
1110
1111        assert!(parsed.is_err());
1112    }
1113
1114    #[test]
1115    fn test_summary_requires_classify() {
1116        let parsed = Cli::try_parse_from([
1117            "provenant",
1118            "--json-pp",
1119            "scan.json",
1120            "--summary",
1121            "samples",
1122        ]);
1123
1124        assert!(parsed.is_err());
1125    }
1126
1127    #[test]
1128    fn test_tallies_key_files_requires_tallies_and_classify() {
1129        let parsed = Cli::try_parse_from([
1130            "provenant",
1131            "--json-pp",
1132            "scan.json",
1133            "--tallies-key-files",
1134            "samples",
1135        ]);
1136
1137        assert!(parsed.is_err());
1138    }
1139
1140    #[test]
1141    fn test_parses_summary_tallies_and_generated_flags() {
1142        let parsed = Cli::try_parse_from([
1143            "provenant",
1144            "--json-pp",
1145            "scan.json",
1146            "--classify",
1147            "--summary",
1148            "--license-clarity-score",
1149            "--tallies",
1150            "--tallies-key-files",
1151            "--tallies-with-details",
1152            "--generated",
1153            "samples",
1154        ])
1155        .expect("cli parse should succeed");
1156
1157        assert!(parsed.classify);
1158        assert!(parsed.summary);
1159        assert!(parsed.license_clarity_score);
1160        assert!(parsed.tallies);
1161        assert!(parsed.tallies_key_files);
1162        assert!(parsed.tallies_with_details);
1163        assert!(parsed.generated);
1164    }
1165
1166    #[test]
1167    fn test_parses_copyright_flag() {
1168        let parsed = Cli::try_parse_from([
1169            "provenant",
1170            "--json-pp",
1171            "scan.json",
1172            "--copyright",
1173            "samples",
1174        ])
1175        .expect("cli parse should succeed");
1176
1177        assert!(parsed.copyright);
1178    }
1179
1180    #[test]
1181    fn test_package_flag_defaults_to_disabled() {
1182        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1183            .expect("cli parse should succeed");
1184
1185        assert!(!parsed.package);
1186    }
1187
1188    #[test]
1189    fn test_parses_system_package_flag() {
1190        let parsed = Cli::try_parse_from([
1191            "provenant",
1192            "--json-pp",
1193            "scan.json",
1194            "--system-package",
1195            "samples",
1196        ])
1197        .expect("cli parse should succeed");
1198
1199        assert!(parsed.system_package);
1200    }
1201
1202    #[test]
1203    fn test_parses_package_in_compiled_flag() {
1204        let parsed = Cli::try_parse_from([
1205            "provenant",
1206            "--json-pp",
1207            "scan.json",
1208            "--package-in-compiled",
1209            "samples",
1210        ])
1211        .expect("cli parse should succeed");
1212
1213        assert!(parsed.package_in_compiled);
1214    }
1215
1216    #[test]
1217    fn test_parses_package_only_flag() {
1218        let parsed = Cli::try_parse_from([
1219            "provenant",
1220            "--json-pp",
1221            "scan.json",
1222            "--package-only",
1223            "samples",
1224        ])
1225        .expect("cli parse should succeed");
1226
1227        assert!(parsed.package_only);
1228    }
1229
1230    #[test]
1231    fn test_package_only_conflicts_with_upstream_incompatible_flags() {
1232        let with_license = Cli::try_parse_from([
1233            "provenant",
1234            "--json-pp",
1235            "scan.json",
1236            "--package-only",
1237            "--license",
1238            "samples",
1239        ]);
1240        assert!(with_license.is_err());
1241
1242        let with_package = Cli::try_parse_from([
1243            "provenant",
1244            "--json-pp",
1245            "scan.json",
1246            "--package-only",
1247            "--package",
1248            "samples",
1249        ]);
1250        assert!(with_package.is_err());
1251    }
1252
1253    #[test]
1254    fn test_parses_package_flag() {
1255        let parsed = Cli::try_parse_from([
1256            "provenant",
1257            "--json-pp",
1258            "scan.json",
1259            "--package",
1260            "samples",
1261        ])
1262        .expect("cli parse should succeed");
1263
1264        assert!(parsed.package);
1265    }
1266
1267    #[test]
1268    fn test_package_short_flag() {
1269        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-p", "samples"])
1270            .expect("cli parse should succeed");
1271
1272        assert!(parsed.package);
1273    }
1274
1275    #[test]
1276    fn test_parses_license_flag() {
1277        let parsed = Cli::try_parse_from([
1278            "provenant",
1279            "--json-pp",
1280            "scan.json",
1281            "--license",
1282            "samples",
1283        ])
1284        .expect("cli parse should succeed");
1285
1286        assert!(parsed.license);
1287    }
1288
1289    #[test]
1290    fn test_license_short_flag() {
1291        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-l", "samples"])
1292            .expect("cli parse should succeed");
1293
1294        assert!(parsed.license);
1295    }
1296
1297    #[test]
1298    fn test_license_text_requires_license() {
1299        let result = Cli::try_parse_from([
1300            "provenant",
1301            "--json-pp",
1302            "scan.json",
1303            "--license-text",
1304            "samples",
1305        ]);
1306        assert!(result.is_err());
1307    }
1308
1309    #[test]
1310    fn test_include_text_is_rejected() {
1311        let result = Cli::try_parse_from([
1312            "provenant",
1313            "--json-pp",
1314            "scan.json",
1315            "--license",
1316            "--include-text",
1317            "samples",
1318        ]);
1319
1320        assert!(result.is_err());
1321    }
1322
1323    #[test]
1324    fn test_license_text_diagnostics_requires_license_text() {
1325        let result = Cli::try_parse_from([
1326            "provenant",
1327            "--json-pp",
1328            "scan.json",
1329            "--license",
1330            "--license-text-diagnostics",
1331            "samples",
1332        ]);
1333
1334        assert!(result.is_err());
1335    }
1336
1337    #[test]
1338    fn test_parses_license_text_and_diagnostics_flags() {
1339        let parsed = Cli::try_parse_from([
1340            "provenant",
1341            "--json-pp",
1342            "scan.json",
1343            "--license",
1344            "--license-text",
1345            "--license-text-diagnostics",
1346            "--license-diagnostics",
1347            "--unknown-licenses",
1348            "samples",
1349        ])
1350        .expect("cli parse should succeed");
1351
1352        assert!(parsed.license_text);
1353        assert!(parsed.license_text_diagnostics);
1354        assert!(parsed.license_diagnostics);
1355        assert!(parsed.unknown_licenses);
1356        assert_eq!(parsed.license_score, 0);
1357        assert_eq!(parsed.license_url_template, DEFAULT_LICENSEDB_URL_TEMPLATE);
1358    }
1359
1360    #[test]
1361    fn test_license_score_requires_license() {
1362        let result = Cli::try_parse_from([
1363            "provenant",
1364            "--json-pp",
1365            "scan.json",
1366            "--license-score",
1367            "70",
1368            "samples",
1369        ]);
1370
1371        assert!(result.is_err());
1372    }
1373
1374    #[test]
1375    fn test_license_url_template_requires_license() {
1376        let result = Cli::try_parse_from([
1377            "provenant",
1378            "--json-pp",
1379            "scan.json",
1380            "--license-url-template",
1381            "https://example.com/licenses/{}/",
1382            "samples",
1383        ]);
1384
1385        assert!(result.is_err());
1386    }
1387
1388    #[test]
1389    fn test_parses_license_score_and_url_template_flags() {
1390        let parsed = Cli::try_parse_from([
1391            "provenant",
1392            "--json-pp",
1393            "scan.json",
1394            "--license",
1395            "--license-score",
1396            "70",
1397            "--license-url-template",
1398            "https://example.com/licenses/{}/",
1399            "samples",
1400        ])
1401        .expect("cli parse should succeed");
1402
1403        assert_eq!(parsed.license_score, 70);
1404        assert_eq!(
1405            parsed.license_url_template,
1406            "https://example.com/licenses/{}/"
1407        );
1408    }
1409
1410    #[test]
1411    fn test_rejects_license_score_above_range() {
1412        let result = Cli::try_parse_from([
1413            "provenant",
1414            "--json-pp",
1415            "scan.json",
1416            "--license",
1417            "--license-score",
1418            "101",
1419            "samples",
1420        ]);
1421
1422        assert!(result.is_err());
1423    }
1424
1425    #[test]
1426    fn test_license_references_requires_license() {
1427        let result = Cli::try_parse_from([
1428            "provenant",
1429            "--json-pp",
1430            "scan.json",
1431            "--license-references",
1432            "samples",
1433        ]);
1434
1435        assert!(result.is_err());
1436    }
1437
1438    #[test]
1439    fn test_parses_license_references_flag() {
1440        let parsed = Cli::try_parse_from([
1441            "provenant",
1442            "--json-pp",
1443            "scan.json",
1444            "--license",
1445            "--license-references",
1446            "samples",
1447        ])
1448        .expect("cli parse should succeed");
1449
1450        assert!(parsed.license_references);
1451    }
1452
1453    #[test]
1454    fn test_include_text_alias_is_not_supported() {
1455        let result = Cli::try_parse_from([
1456            "provenant",
1457            "--json-pp",
1458            "scan.json",
1459            "--license",
1460            "--include-text",
1461            "samples",
1462        ]);
1463
1464        assert!(result.is_err());
1465    }
1466
1467    #[test]
1468    fn test_parses_short_scan_flags() {
1469        let parsed = Cli::try_parse_from([
1470            "provenant",
1471            "--json-pp",
1472            "scan.json",
1473            "-c",
1474            "-e",
1475            "-u",
1476            "samples",
1477        ])
1478        .expect("cli parse should support short scan flags");
1479
1480        assert!(parsed.copyright);
1481        assert!(parsed.email);
1482        assert!(parsed.url);
1483    }
1484
1485    #[test]
1486    fn test_parses_processes_compat_values_zero_and_minus_one() {
1487        let zero =
1488            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "0", "samples"])
1489                .expect("cli parse should accept processes=0");
1490        assert_eq!(zero.processes, 0);
1491
1492        let parsed =
1493            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "-1", "samples"])
1494                .expect("cli parse should accept processes=-1");
1495        assert_eq!(parsed.processes, -1);
1496    }
1497
1498    #[test]
1499    fn test_parses_cache_flags() {
1500        let parsed = Cli::try_parse_from([
1501            "provenant",
1502            "--json-pp",
1503            "scan.json",
1504            "--cache-dir",
1505            "/tmp/sc-cache",
1506            "--cache-clear",
1507            "--max-in-memory",
1508            "5000",
1509            "samples",
1510        ])
1511        .expect("cli parse should accept cache flags");
1512
1513        assert_eq!(parsed.cache_dir.as_deref(), Some("/tmp/sc-cache"));
1514        assert!(parsed.cache_clear);
1515        assert!(!parsed.incremental);
1516        assert_eq!(parsed.max_in_memory, 5000);
1517    }
1518
1519    #[test]
1520    fn test_parses_incremental_flag() {
1521        let parsed = Cli::try_parse_from([
1522            "provenant",
1523            "--json-pp",
1524            "scan.json",
1525            "--incremental",
1526            "samples",
1527        ])
1528        .expect("cli parse should accept incremental flag");
1529
1530        assert!(parsed.incremental);
1531    }
1532
1533    #[test]
1534    fn test_max_in_memory_defaults_and_special_values() {
1535        let default_parsed =
1536            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1537                .expect("default max-in-memory should parse");
1538        assert_eq!(default_parsed.max_in_memory, 10000);
1539
1540        let disk_only = Cli::try_parse_from([
1541            "provenant",
1542            "--json-pp",
1543            "scan.json",
1544            "--max-in-memory",
1545            "-1",
1546            "samples",
1547        ])
1548        .expect("-1 should parse");
1549        assert_eq!(disk_only.max_in_memory, -1);
1550
1551        let unlimited = Cli::try_parse_from([
1552            "provenant",
1553            "--json-pp",
1554            "scan.json",
1555            "--max-in-memory",
1556            "0",
1557            "samples",
1558        ])
1559        .expect("0 should parse");
1560        assert_eq!(unlimited.max_in_memory, 0);
1561    }
1562
1563    #[test]
1564    fn test_max_in_memory_rejects_values_below_negative_one() {
1565        let result = Cli::try_parse_from([
1566            "provenant",
1567            "--json-pp",
1568            "scan.json",
1569            "--max-in-memory",
1570            "-2",
1571            "samples",
1572        ]);
1573
1574        assert!(result.is_err());
1575    }
1576
1577    #[test]
1578    fn test_max_depth_default_matches_reference_behavior() {
1579        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1580            .expect("cli parse should succeed");
1581
1582        assert_eq!(parsed.max_depth, 0);
1583    }
1584}