Skip to main content

provenant/
cli.rs

1use clap::{ArgGroup, Parser};
2use serde_json::{Map as JsonMap, Number as JsonNumber, Value as JsonValue};
3use std::fs;
4use std::path::Path;
5use yaml_serde::Value as YamlValue;
6
7use crate::license_detection::DEFAULT_LICENSEDB_URL_TEMPLATE;
8use crate::output::OutputFormat;
9
10const PDF_OXIDE_LOG_HELP: &str = "Troubleshooting PDF parser logs:\n  Provenant suppresses noisy pdf_oxide logs by default.\n  To inspect raw pdf_oxide logs for debugging, rerun with RUST_LOG=pdf_oxide=warn (or =error).";
11
12fn parse_license_policy_arg(value: &str) -> Result<String, String> {
13    let policy_path = Path::new(value);
14    let metadata = fs::metadata(policy_path).map_err(|err| {
15        format!(
16            "Failed to read license policy file {:?}: {err}",
17            policy_path
18        )
19    })?;
20    if !metadata.is_file() {
21        return Err(format!(
22            "License policy path {:?} is not a regular file",
23            policy_path
24        ));
25    }
26
27    let policy_text = fs::read_to_string(policy_path).map_err(|err| {
28        format!(
29            "Failed to read license policy file {:?}: {err}",
30            policy_path
31        )
32    })?;
33    if policy_text.trim().is_empty() {
34        return Err(format!("License policy file {:?} is empty", policy_path));
35    }
36
37    let policy_value: YamlValue = yaml_serde::from_str(&policy_text).map_err(|err| {
38        format!(
39            "Failed to parse license policy file {:?}: {err}",
40            policy_path
41        )
42    })?;
43    let has_license_policies = policy_value
44        .as_mapping()
45        .and_then(|mapping| mapping.get(YamlValue::String("license_policies".to_string())))
46        .is_some();
47    if !has_license_policies {
48        return Err(format!(
49            "License policy file {:?} is missing a 'license_policies' attribute",
50            policy_path
51        ));
52    }
53
54    Ok(value.to_string())
55}
56
57#[derive(Parser, Debug)]
58#[command(
59    author = "The Provenant contributors",
60    version = env!("CARGO_PKG_VERSION"),
61    long_version = concat!(
62        env!("CARGO_PKG_VERSION"),
63        "\n",
64        "License detection uses data from ScanCode Toolkit (CC-BY-4.0). See NOTICE file or --show-attribution option."
65    ),
66    after_help = PDF_OXIDE_LOG_HELP,
67    about,
68    long_about = None,
69    group(
70        ArgGroup::new("output")
71            .required(true)
72            .args([
73                "output_json",
74                "output_json_pp",
75                "output_json_lines",
76                "output_yaml",
77                "output_debian",
78                "output_html",
79                "output_spdx_tv",
80                "output_spdx_rdf",
81                "output_cyclonedx",
82                "output_cyclonedx_xml",
83                "custom_output",
84                "show_attribution"
85            ])
86    )
87)]
88pub struct Cli {
89    /// File or directory paths to scan
90    #[arg(required = false)]
91    pub dir_path: Vec<String>,
92
93    /// Write scan output as compact JSON to FILE
94    #[arg(long = "json", value_name = "FILE", allow_hyphen_values = true)]
95    pub output_json: Option<String>,
96
97    /// Write scan output as pretty-printed JSON to FILE
98    #[arg(long = "json-pp", value_name = "FILE", allow_hyphen_values = true)]
99    pub output_json_pp: Option<String>,
100
101    /// Write scan output as JSON Lines to FILE
102    #[arg(long = "json-lines", value_name = "FILE", allow_hyphen_values = true)]
103    pub output_json_lines: Option<String>,
104
105    /// Write scan output as YAML to FILE
106    #[arg(long = "yaml", value_name = "FILE", allow_hyphen_values = true)]
107    pub output_yaml: Option<String>,
108
109    /// Write scan output in machine-readable Debian copyright format to FILE (requires --license, --copyright, and --license-text)
110    #[arg(
111        long = "debian",
112        value_name = "FILE",
113        allow_hyphen_values = true,
114        requires_all = ["copyright", "license", "license_text"]
115    )]
116    pub output_debian: Option<String>,
117
118    /// Write scan output as HTML report to FILE
119    #[arg(long = "html", value_name = "FILE", allow_hyphen_values = true)]
120    pub output_html: Option<String>,
121
122    /// Write scan output as SPDX tag/value to FILE
123    #[arg(long = "spdx-tv", value_name = "FILE", allow_hyphen_values = true)]
124    pub output_spdx_tv: Option<String>,
125
126    /// Write scan output as SPDX RDF/XML to FILE
127    #[arg(long = "spdx-rdf", value_name = "FILE", allow_hyphen_values = true)]
128    pub output_spdx_rdf: Option<String>,
129
130    /// Write scan output as CycloneDX JSON to FILE
131    #[arg(long = "cyclonedx", value_name = "FILE", allow_hyphen_values = true)]
132    pub output_cyclonedx: Option<String>,
133
134    /// Write scan output as CycloneDX XML to FILE
135    #[arg(
136        long = "cyclonedx-xml",
137        value_name = "FILE",
138        allow_hyphen_values = true
139    )]
140    pub output_cyclonedx_xml: Option<String>,
141
142    /// Write scan output to FILE formatted with the custom template
143    #[arg(
144        long = "custom-output",
145        value_name = "FILE",
146        requires = "custom_template",
147        allow_hyphen_values = true
148    )]
149    pub custom_output: Option<String>,
150
151    /// Use this template FILE with --custom-output
152    #[arg(
153        long = "custom-template",
154        value_name = "FILE",
155        requires = "custom_output"
156    )]
157    pub custom_template: Option<String>,
158
159    /// Maximum recursion depth (0 means no depth limit)
160    #[arg(short, long, default_value = "0")]
161    pub max_depth: usize,
162
163    #[arg(short = 'n', long, default_value_t = default_processes(), allow_hyphen_values = true)]
164    pub processes: i32,
165
166    #[arg(long, default_value_t = 120.0)]
167    pub timeout: f64,
168
169    #[arg(short, long, conflicts_with = "verbose")]
170    pub quiet: bool,
171
172    #[arg(short, long, conflicts_with = "quiet")]
173    pub verbose: bool,
174
175    #[arg(long, conflicts_with = "full_root")]
176    pub strip_root: bool,
177
178    #[arg(long, conflicts_with = "strip_root")]
179    pub full_root: bool,
180
181    /// Exclude patterns (ScanCode-compatible alias: --ignore)
182    #[arg(long = "exclude", visible_alias = "ignore", value_delimiter = ',')]
183    pub exclude: Vec<String>,
184
185    #[arg(long, value_delimiter = ',')]
186    pub include: Vec<String>,
187
188    #[arg(long = "cache-dir", value_name = "PATH")]
189    pub cache_dir: Option<String>,
190
191    #[arg(long = "cache-clear")]
192    pub cache_clear: bool,
193
194    #[arg(long = "incremental")]
195    pub incremental: bool,
196
197    /// Maximum number of file and directory scan details kept in memory.
198    /// Use 0 for unlimited memory or -1 for disk-only spill during the scan.
199    #[arg(
200        long = "max-in-memory",
201        value_name = "INT",
202        default_value_t = 10000,
203        value_parser = parse_max_in_memory,
204        allow_hyphen_values = true
205    )]
206    pub max_in_memory: i64,
207
208    /// Collect file information such as checksums, type hints, and source/script flags.
209    #[arg(short = 'i', long)]
210    pub info: bool,
211
212    /// Load one or more existing ScanCode-style JSON scans instead of rescanning inputs.
213    #[arg(long)]
214    pub from_json: bool,
215
216    /// Scan input for application package and dependency manifests, lockfiles and related data
217    #[arg(short = 'p', long)]
218    pub package: bool,
219
220    /// Scan input for installed system package databases (RPM, dpkg, apk, etc.)
221    #[arg(long = "system-package")]
222    pub system_package: bool,
223
224    /// Scan supported compiled Go and Rust binaries for embedded package metadata.
225    #[arg(long = "package-in-compiled")]
226    pub package_in_compiled: bool,
227
228    /// Scan for system and application package data and skip license/copyright detection and top-level package creation.
229    #[arg(
230        long = "package-only",
231        conflicts_with_all = ["license", "summary", "package", "system_package"]
232    )]
233    pub package_only: bool,
234
235    /// Disable package assembly (merging related manifest/lockfiles into packages)
236    #[arg(long)]
237    pub no_assemble: bool,
238
239    /// Path to license rules directory containing .LICENSE and .RULE files.
240    /// If not specified, uses the built-in embedded license index.
241    #[arg(long, value_name = "PATH", requires = "license")]
242    pub license_rules_path: Option<String>,
243
244    /// Include matched text in license detection output
245    #[arg(long = "license-text", requires = "license")]
246    pub license_text: bool,
247
248    #[arg(long = "license-text-diagnostics", requires = "license_text")]
249    pub license_text_diagnostics: bool,
250
251    #[arg(long = "license-diagnostics", requires = "license")]
252    pub license_diagnostics: bool,
253
254    #[arg(long = "unknown-licenses", requires = "license")]
255    pub unknown_licenses: bool,
256
257    #[arg(
258        long = "license-score",
259        default_value_t = 0,
260        requires = "license",
261        value_parser = clap::value_parser!(u8).range(0..=100)
262    )]
263    pub license_score: u8,
264
265    #[arg(
266        long = "license-url-template",
267        default_value = DEFAULT_LICENSEDB_URL_TEMPLATE,
268        requires = "license"
269    )]
270    pub license_url_template: String,
271
272    #[arg(long)]
273    pub filter_clues: bool,
274
275    #[arg(
276        long = "ignore-author",
277        value_name = "PATTERN",
278        help = "Ignore a file and all its findings if an author matches the regex PATTERN"
279    )]
280    pub ignore_author: Vec<String>,
281
282    #[arg(
283        long = "ignore-copyright-holder",
284        value_name = "PATTERN",
285        help = "Ignore a file and all its findings if a copyright holder matches the regex PATTERN"
286    )]
287    pub ignore_copyright_holder: Vec<String>,
288
289    #[arg(long)]
290    pub only_findings: bool,
291
292    #[arg(long, requires = "info")]
293    pub mark_source: bool,
294
295    #[arg(long)]
296    pub classify: bool,
297
298    #[arg(long, requires = "classify")]
299    pub summary: bool,
300
301    #[arg(long = "license-clarity-score", requires = "classify")]
302    pub license_clarity_score: bool,
303
304    #[arg(long = "license-references", requires = "license")]
305    pub license_references: bool,
306
307    /// Evaluate file license detections against a YAML license policy file.
308    #[arg(
309        long = "license-policy",
310        value_name = "FILE",
311        value_parser = parse_license_policy_arg
312    )]
313    pub license_policy: Option<String>,
314
315    #[arg(long)]
316    pub tallies: bool,
317
318    #[arg(long = "tallies-key-files", requires_all = ["tallies", "classify"])]
319    pub tallies_key_files: bool,
320
321    #[arg(long = "tallies-with-details")]
322    pub tallies_with_details: bool,
323
324    #[arg(long = "facet", value_name = "<facet>=<pattern>")]
325    pub facet: Vec<String>,
326
327    #[arg(long = "tallies-by-facet", requires_all = ["facet", "tallies"])]
328    pub tallies_by_facet: bool,
329
330    #[arg(long)]
331    pub generated: bool,
332
333    /// Scan input for licenses
334    #[arg(short = 'l', long)]
335    pub license: bool,
336
337    #[arg(short = 'c', long)]
338    pub copyright: bool,
339
340    /// Scan input for email addresses
341    #[arg(short = 'e', long)]
342    pub email: bool,
343
344    /// Report only up to INT emails found in a file. Use 0 for no limit.
345    #[arg(long, default_value_t = 50, requires = "email")]
346    pub max_email: usize,
347
348    /// Scan input for URLs
349    #[arg(short = 'u', long)]
350    pub url: bool,
351
352    /// Report only up to INT URLs found in a file. Use 0 for no limit.
353    #[arg(long, default_value_t = 50, requires = "url")]
354    pub max_url: usize,
355
356    /// Show attribution notices for embedded license detection data
357    #[arg(long)]
358    pub show_attribution: bool,
359}
360
361fn default_processes() -> i32 {
362    let cpus = std::thread::available_parallelism().map_or(1, |n| n.get());
363    if cpus > 1 { (cpus - 1) as i32 } else { 1 }
364}
365
366fn parse_max_in_memory(value: &str) -> Result<i64, String> {
367    let parsed = value
368        .parse::<i64>()
369        .map_err(|_| format!("invalid integer value: {value}"))?;
370    if parsed < -1 {
371        return Err("--max-in-memory must be -1, 0, or a positive integer".to_string());
372    }
373    Ok(parsed)
374}
375
376#[derive(Debug, Clone)]
377pub struct OutputTarget {
378    pub format: OutputFormat,
379    pub file: String,
380    pub custom_template: Option<String>,
381}
382
383impl Cli {
384    pub fn output_targets(&self) -> Vec<OutputTarget> {
385        let mut targets = Vec::new();
386
387        if let Some(file) = &self.output_json {
388            targets.push(OutputTarget {
389                format: OutputFormat::Json,
390                file: file.clone(),
391                custom_template: None,
392            });
393        }
394
395        if let Some(file) = &self.output_json_pp {
396            targets.push(OutputTarget {
397                format: OutputFormat::JsonPretty,
398                file: file.clone(),
399                custom_template: None,
400            });
401        }
402
403        if let Some(file) = &self.output_json_lines {
404            targets.push(OutputTarget {
405                format: OutputFormat::JsonLines,
406                file: file.clone(),
407                custom_template: None,
408            });
409        }
410
411        if let Some(file) = &self.output_yaml {
412            targets.push(OutputTarget {
413                format: OutputFormat::Yaml,
414                file: file.clone(),
415                custom_template: None,
416            });
417        }
418
419        if let Some(file) = &self.output_debian {
420            targets.push(OutputTarget {
421                format: OutputFormat::Debian,
422                file: file.clone(),
423                custom_template: None,
424            });
425        }
426
427        if let Some(file) = &self.output_html {
428            targets.push(OutputTarget {
429                format: OutputFormat::Html,
430                file: file.clone(),
431                custom_template: None,
432            });
433        }
434
435        if let Some(file) = &self.output_spdx_tv {
436            targets.push(OutputTarget {
437                format: OutputFormat::SpdxTv,
438                file: file.clone(),
439                custom_template: None,
440            });
441        }
442
443        if let Some(file) = &self.output_spdx_rdf {
444            targets.push(OutputTarget {
445                format: OutputFormat::SpdxRdf,
446                file: file.clone(),
447                custom_template: None,
448            });
449        }
450
451        if let Some(file) = &self.output_cyclonedx {
452            targets.push(OutputTarget {
453                format: OutputFormat::CycloneDxJson,
454                file: file.clone(),
455                custom_template: None,
456            });
457        }
458
459        if let Some(file) = &self.output_cyclonedx_xml {
460            targets.push(OutputTarget {
461                format: OutputFormat::CycloneDxXml,
462                file: file.clone(),
463                custom_template: None,
464            });
465        }
466
467        if let Some(file) = &self.custom_output {
468            targets.push(OutputTarget {
469                format: OutputFormat::CustomTemplate,
470                file: file.clone(),
471                custom_template: self.custom_template.clone(),
472            });
473        }
474
475        targets
476    }
477
478    pub fn output_header_options(&self) -> JsonMap<String, JsonValue> {
479        let mut options = JsonMap::new();
480        if !self.dir_path.is_empty() {
481            options.insert(
482                "input".to_string(),
483                JsonValue::Array(
484                    self.dir_path
485                        .iter()
486                        .cloned()
487                        .map(JsonValue::String)
488                        .collect(),
489                ),
490            );
491        }
492
493        let mut flags = Vec::new();
494
495        push_string_option(&mut flags, "--cache-dir", self.cache_dir.as_ref());
496        push_bool_option(&mut flags, "--cache-clear", self.cache_clear);
497        push_bool_option(&mut flags, "--classify", self.classify);
498        push_string_option(&mut flags, "--custom-output", self.custom_output.as_ref());
499        push_string_option(
500            &mut flags,
501            "--custom-template",
502            self.custom_template.as_ref(),
503        );
504        push_bool_option(&mut flags, "--copyright", self.copyright);
505        push_string_option(&mut flags, "--cyclonedx", self.output_cyclonedx.as_ref());
506        push_string_option(
507            &mut flags,
508            "--cyclonedx-xml",
509            self.output_cyclonedx_xml.as_ref(),
510        );
511        push_string_option(&mut flags, "--debian", self.output_debian.as_ref());
512        push_bool_option(&mut flags, "--email", self.email);
513        push_array_option(&mut flags, "--facet", &self.facet);
514        push_bool_option(&mut flags, "--filter-clues", self.filter_clues);
515        push_bool_option(&mut flags, "--from-json", self.from_json);
516        push_bool_option(&mut flags, "--full-root", self.full_root);
517        push_bool_option(&mut flags, "--generated", self.generated);
518        push_string_option(&mut flags, "--html", self.output_html.as_ref());
519        push_array_option(&mut flags, "--ignore", &self.exclude);
520        push_array_option(&mut flags, "--ignore-author", &self.ignore_author);
521        push_array_option(
522            &mut flags,
523            "--ignore-copyright-holder",
524            &self.ignore_copyright_holder,
525        );
526        push_bool_option(&mut flags, "--incremental", self.incremental);
527        push_array_option(&mut flags, "--include", &self.include);
528        push_bool_option(&mut flags, "--info", self.info);
529        push_string_option(&mut flags, "--json", self.output_json.as_ref());
530        push_string_option(&mut flags, "--json-lines", self.output_json_lines.as_ref());
531        push_string_option(&mut flags, "--json-pp", self.output_json_pp.as_ref());
532        push_bool_option(&mut flags, "--license", self.license);
533        push_bool_option(
534            &mut flags,
535            "--license-clarity-score",
536            self.license_clarity_score,
537        );
538        push_bool_option(
539            &mut flags,
540            "--license-diagnostics",
541            self.license_diagnostics,
542        );
543        push_string_option(&mut flags, "--license-policy", self.license_policy.as_ref());
544        push_bool_option(&mut flags, "--license-references", self.license_references);
545        push_non_default_u8_option(&mut flags, "--license-score", self.license_score, 0);
546        push_bool_option(&mut flags, "--license-text", self.license_text);
547        push_bool_option(
548            &mut flags,
549            "--license-text-diagnostics",
550            self.license_text_diagnostics,
551        );
552        push_non_default_string_option(
553            &mut flags,
554            "--license-url-template",
555            &self.license_url_template,
556            DEFAULT_LICENSEDB_URL_TEMPLATE,
557        );
558        push_non_default_usize_option(&mut flags, "--max-depth", self.max_depth, 0);
559        push_non_default_i64_option(&mut flags, "--max-in-memory", self.max_in_memory, 10000);
560        if self.email {
561            push_non_default_usize_option(&mut flags, "--max-email", self.max_email, 50);
562        }
563        if self.url {
564            push_non_default_usize_option(&mut flags, "--max-url", self.max_url, 50);
565        }
566        push_bool_option(&mut flags, "--mark-source", self.mark_source);
567        push_bool_option(&mut flags, "--no-assemble", self.no_assemble);
568        push_bool_option(&mut flags, "--only-findings", self.only_findings);
569        push_bool_option(&mut flags, "--package", self.package);
570        push_bool_option(
571            &mut flags,
572            "--package-in-compiled",
573            self.package_in_compiled,
574        );
575        push_bool_option(&mut flags, "--package-only", self.package_only);
576        push_non_default_i32_option(
577            &mut flags,
578            "--processes",
579            self.processes,
580            default_processes(),
581        );
582        push_bool_option(&mut flags, "--quiet", self.quiet);
583        push_string_option(&mut flags, "--spdx-rdf", self.output_spdx_rdf.as_ref());
584        push_string_option(&mut flags, "--spdx-tv", self.output_spdx_tv.as_ref());
585        push_bool_option(&mut flags, "--strip-root", self.strip_root);
586        push_bool_option(&mut flags, "--summary", self.summary);
587        push_bool_option(&mut flags, "--system-package", self.system_package);
588        push_bool_option(&mut flags, "--tallies", self.tallies);
589        push_bool_option(&mut flags, "--tallies-by-facet", self.tallies_by_facet);
590        push_bool_option(&mut flags, "--tallies-key-files", self.tallies_key_files);
591        push_bool_option(
592            &mut flags,
593            "--tallies-with-details",
594            self.tallies_with_details,
595        );
596        push_non_default_f64_option(&mut flags, "--timeout", self.timeout, 120.0);
597        push_bool_option(&mut flags, "--unknown-licenses", self.unknown_licenses);
598        push_bool_option(&mut flags, "--url", self.url);
599        push_bool_option(&mut flags, "--verbose", self.verbose);
600        push_string_option(&mut flags, "--yaml", self.output_yaml.as_ref());
601
602        flags.sort_by(|left, right| left.0.cmp(&right.0));
603        for (key, value) in flags {
604            options.insert(key, value);
605        }
606
607        options
608    }
609}
610
611fn push_bool_option(options: &mut Vec<(String, JsonValue)>, key: &str, enabled: bool) {
612    if enabled {
613        options.push((key.to_string(), JsonValue::Bool(true)));
614    }
615}
616
617fn push_string_option(options: &mut Vec<(String, JsonValue)>, key: &str, value: Option<&String>) {
618    if let Some(value) = value {
619        options.push((key.to_string(), JsonValue::String(value.clone())));
620    }
621}
622
623fn push_non_default_string_option(
624    options: &mut Vec<(String, JsonValue)>,
625    key: &str,
626    value: &str,
627    default: &str,
628) {
629    if value != default {
630        options.push((key.to_string(), JsonValue::String(value.to_string())));
631    }
632}
633
634fn push_array_option(options: &mut Vec<(String, JsonValue)>, key: &str, values: &[String]) {
635    if !values.is_empty() {
636        options.push((
637            key.to_string(),
638            JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect()),
639        ));
640    }
641}
642
643fn push_non_default_usize_option(
644    options: &mut Vec<(String, JsonValue)>,
645    key: &str,
646    value: usize,
647    default: usize,
648) {
649    if value != default {
650        options.push((key.to_string(), JsonValue::Number(value.into())));
651    }
652}
653
654fn push_non_default_u8_option(
655    options: &mut Vec<(String, JsonValue)>,
656    key: &str,
657    value: u8,
658    default: u8,
659) {
660    if value != default {
661        options.push((key.to_string(), JsonValue::Number(value.into())));
662    }
663}
664
665fn push_non_default_i32_option(
666    options: &mut Vec<(String, JsonValue)>,
667    key: &str,
668    value: i32,
669    default: i32,
670) {
671    if value != default {
672        options.push((key.to_string(), JsonValue::Number(value.into())));
673    }
674}
675
676fn push_non_default_i64_option(
677    options: &mut Vec<(String, JsonValue)>,
678    key: &str,
679    value: i64,
680    default: i64,
681) {
682    if value != default {
683        options.push((key.to_string(), JsonValue::Number(value.into())));
684    }
685}
686
687fn push_non_default_f64_option(
688    options: &mut Vec<(String, JsonValue)>,
689    key: &str,
690    value: f64,
691    default: f64,
692) {
693    if (value - default).abs() > f64::EPSILON
694        && let Some(number) = JsonNumber::from_f64(value)
695    {
696        options.push((key.to_string(), JsonValue::Number(number)));
697    }
698}
699
700#[cfg(test)]
701mod tests {
702    use super::*;
703    use clap::CommandFactory;
704
705    #[test]
706    fn test_requires_at_least_one_output_option() {
707        let parsed = Cli::try_parse_from(["provenant", "samples"]);
708        assert!(parsed.is_err());
709    }
710
711    #[test]
712    fn test_parses_json_pretty_output_option() {
713        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
714            .expect("cli parse should succeed");
715
716        assert_eq!(parsed.output_json_pp.as_deref(), Some("scan.json"));
717        assert_eq!(parsed.output_targets().len(), 1);
718        assert_eq!(parsed.output_targets()[0].format, OutputFormat::JsonPretty);
719    }
720
721    #[test]
722    fn test_output_header_options_use_scancode_style_keys() {
723        let parsed = Cli::try_parse_from([
724            "provenant",
725            "--json-pp",
726            "scan.json",
727            "--license",
728            "--package",
729            "--strip-root",
730            "--ignore",
731            "*.git*",
732            "--ignore",
733            "target/*",
734            "samples",
735        ])
736        .expect("cli parse should succeed");
737
738        let options = parsed.output_header_options();
739
740        assert_eq!(
741            options.get("input"),
742            Some(&JsonValue::Array(vec![JsonValue::String(
743                "samples".to_string()
744            )]))
745        );
746        assert_eq!(
747            options.get("--json-pp"),
748            Some(&JsonValue::String("scan.json".to_string()))
749        );
750        assert_eq!(options.get("--license"), Some(&JsonValue::Bool(true)));
751        assert_eq!(options.get("--package"), Some(&JsonValue::Bool(true)));
752        assert_eq!(options.get("--strip-root"), Some(&JsonValue::Bool(true)));
753        assert_eq!(
754            options.get("--ignore"),
755            Some(&JsonValue::Array(vec![
756                JsonValue::String("*.git*".to_string()),
757                JsonValue::String("target/*".to_string()),
758            ]))
759        );
760    }
761
762    #[test]
763    fn test_output_header_options_skip_defaults_and_include_non_defaults() {
764        let default_options =
765            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
766                .expect("default cli parse should succeed")
767                .output_header_options();
768        assert!(!default_options.contains_key("--timeout"));
769        assert!(!default_options.contains_key("--processes"));
770
771        let custom_options = Cli::try_parse_from([
772            "provenant",
773            "--json-pp",
774            "scan.json",
775            "--timeout",
776            "30",
777            "--processes",
778            "4",
779            "samples",
780        ])
781        .expect("custom cli parse should succeed")
782        .output_header_options();
783
784        assert_eq!(
785            custom_options.get("--timeout"),
786            Some(&JsonValue::Number(
787                JsonNumber::from_f64(30.0).expect("valid number")
788            ))
789        );
790        assert_eq!(
791            custom_options.get("--processes"),
792            Some(&JsonValue::Number(4.into()))
793        );
794    }
795
796    #[test]
797    fn test_allows_stdout_dash_as_output_target() {
798        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "-", "samples"])
799            .expect("cli parse should allow stdout dash output target");
800
801        assert_eq!(parsed.output_json_pp.as_deref(), Some("-"));
802    }
803
804    #[test]
805    fn test_debian_requires_license_copyright_and_license_text() {
806        let missing_license_text = Cli::try_parse_from([
807            "provenant",
808            "--debian",
809            "scan.copyright",
810            "--license",
811            "--copyright",
812            "samples",
813        ]);
814        assert!(missing_license_text.is_err());
815
816        let parsed = Cli::try_parse_from([
817            "provenant",
818            "--debian",
819            "scan.copyright",
820            "--license",
821            "--copyright",
822            "--license-text",
823            "samples",
824        ])
825        .expect("cli parse should accept debian output");
826
827        assert_eq!(parsed.output_targets().len(), 1);
828        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Debian);
829        assert_eq!(parsed.output_debian.as_deref(), Some("scan.copyright"));
830    }
831
832    #[test]
833    fn test_debian_help_mentions_required_companion_flags() {
834        let command = Cli::command();
835        let debian_arg = command
836            .get_arguments()
837            .find(|arg| arg.get_long() == Some("debian"))
838            .expect("debian arg should exist");
839
840        let help = debian_arg
841            .get_help()
842            .expect("debian arg should have help text")
843            .to_string();
844
845        assert!(help.contains("requires --license, --copyright, and --license-text"));
846    }
847
848    #[test]
849    fn test_help_mentions_pdf_oxide_rust_log_escape_hatch() {
850        let help = Cli::command().render_help().to_string();
851
852        assert!(help.contains("RUST_LOG=pdf_oxide=warn"));
853        assert!(help.contains("suppresses noisy pdf_oxide logs by default"));
854    }
855
856    #[test]
857    fn test_parses_license_policy_flag() {
858        let temp = tempfile::tempdir().expect("temp dir");
859        let policy_path = temp.path().join("policy.yml");
860        std::fs::write(&policy_path, "license_policies: []\n").expect("policy written");
861
862        let parsed = Cli::try_parse_from([
863            "provenant",
864            "--json-pp",
865            "scan.json",
866            "--license-policy",
867            policy_path.to_str().expect("utf8 path"),
868            "samples",
869        ])
870        .expect("cli parse should accept license-policy");
871
872        assert_eq!(
873            parsed.license_policy.as_deref(),
874            Some(policy_path.to_str().expect("utf8 path"))
875        );
876    }
877
878    #[test]
879    fn test_rejects_invalid_license_policy_flag_value() {
880        let temp = tempfile::tempdir().expect("temp dir");
881        let policy_path = temp.path().join("policy.yml");
882        std::fs::write(&policy_path, "not_license_policies: []\n").expect("policy written");
883
884        let parsed = Cli::try_parse_from([
885            "provenant",
886            "--json-pp",
887            "scan.json",
888            "--license-policy",
889            policy_path.to_str().expect("utf8 path"),
890            "samples",
891        ]);
892
893        assert!(parsed.is_err());
894    }
895
896    #[test]
897    fn test_custom_template_and_output_must_be_paired() {
898        let missing_template =
899            Cli::try_parse_from(["provenant", "--custom-output", "result.txt", "samples"]);
900        assert!(missing_template.is_err());
901
902        let missing_output =
903            Cli::try_parse_from(["provenant", "--custom-template", "tpl.tera", "samples"]);
904        assert!(missing_output.is_err());
905    }
906
907    #[test]
908    fn test_parses_processes_and_timeout_options() {
909        let parsed = Cli::try_parse_from([
910            "provenant",
911            "--json-pp",
912            "scan.json",
913            "-n",
914            "4",
915            "--timeout",
916            "30",
917            "samples",
918        ])
919        .expect("cli parse should succeed");
920
921        assert_eq!(parsed.processes, 4);
922        assert_eq!(parsed.timeout, 30.0);
923    }
924
925    #[test]
926    fn test_strip_root_conflicts_with_full_root() {
927        let parsed = Cli::try_parse_from([
928            "provenant",
929            "--json-pp",
930            "scan.json",
931            "--strip-root",
932            "--full-root",
933            "samples",
934        ]);
935        assert!(parsed.is_err());
936    }
937
938    #[test]
939    fn test_parses_include_and_only_findings_and_filter_clues() {
940        let parsed = Cli::try_parse_from([
941            "provenant",
942            "--json-pp",
943            "scan.json",
944            "--include",
945            "src/**,Cargo.toml",
946            "--only-findings",
947            "--filter-clues",
948            "samples",
949        ])
950        .expect("cli parse should succeed");
951
952        assert_eq!(parsed.include, vec!["src/**", "Cargo.toml"]);
953        assert!(parsed.only_findings);
954        assert!(parsed.filter_clues);
955    }
956
957    #[test]
958    fn test_parses_ignore_author_and_holder_filters() {
959        let parsed = Cli::try_parse_from([
960            "provenant",
961            "--json-pp",
962            "scan.json",
963            "--ignore-author",
964            "Jane.*",
965            "--ignore-author",
966            ".*Bot$",
967            "--ignore-copyright-holder",
968            "Example Corp",
969            "samples",
970        ])
971        .expect("cli parse should succeed");
972
973        assert_eq!(parsed.ignore_author, vec!["Jane.*", ".*Bot$"]);
974        assert_eq!(parsed.ignore_copyright_holder, vec!["Example Corp"]);
975    }
976
977    #[test]
978    fn test_parses_ignore_alias_for_exclude_patterns() {
979        let parsed = Cli::try_parse_from([
980            "provenant",
981            "--json-pp",
982            "scan.json",
983            "--ignore",
984            "*.git*,target/*",
985            "samples",
986        ])
987        .expect("cli parse should accept --ignore alias");
988
989        assert_eq!(parsed.exclude, vec!["*.git*", "target/*"]);
990    }
991
992    #[test]
993    fn test_quiet_conflicts_with_verbose() {
994        let parsed = Cli::try_parse_from([
995            "provenant",
996            "--json-pp",
997            "scan.json",
998            "--quiet",
999            "--verbose",
1000            "samples",
1001        ]);
1002        assert!(parsed.is_err());
1003    }
1004
1005    #[test]
1006    fn test_parses_from_json_and_mark_source() {
1007        let parsed = Cli::try_parse_from([
1008            "provenant",
1009            "--json-pp",
1010            "scan.json",
1011            "--from-json",
1012            "--info",
1013            "--mark-source",
1014            "sample-scan.json",
1015        ])
1016        .expect("cli parse should succeed");
1017
1018        assert!(parsed.from_json);
1019        assert!(parsed.info);
1020        assert_eq!(parsed.dir_path, vec!["sample-scan.json"]);
1021        assert!(parsed.mark_source);
1022    }
1023
1024    #[test]
1025    fn test_mark_source_requires_info() {
1026        let parsed = Cli::try_parse_from([
1027            "provenant",
1028            "--json-pp",
1029            "scan.json",
1030            "--mark-source",
1031            "samples",
1032        ]);
1033
1034        assert!(parsed.is_err());
1035    }
1036
1037    #[test]
1038    fn test_parses_classify_facet_and_tallies_by_facet() {
1039        let parsed = Cli::try_parse_from([
1040            "provenant",
1041            "--json-pp",
1042            "scan.json",
1043            "--classify",
1044            "--tallies",
1045            "--facet",
1046            "dev=*.c",
1047            "--facet",
1048            "tests=*/tests/*",
1049            "--tallies-by-facet",
1050            "samples",
1051        ])
1052        .expect("cli parse should succeed");
1053
1054        assert!(parsed.classify);
1055        assert!(parsed.tallies);
1056        assert_eq!(parsed.facet, vec!["dev=*.c", "tests=*/tests/*"]);
1057        assert!(parsed.tallies_by_facet);
1058    }
1059
1060    #[test]
1061    fn test_tallies_by_facet_requires_facet_definitions() {
1062        let parsed = Cli::try_parse_from([
1063            "provenant",
1064            "--json-pp",
1065            "scan.json",
1066            "--tallies-by-facet",
1067            "samples",
1068        ]);
1069
1070        assert!(parsed.is_err());
1071    }
1072
1073    #[test]
1074    fn test_summary_requires_classify() {
1075        let parsed = Cli::try_parse_from([
1076            "provenant",
1077            "--json-pp",
1078            "scan.json",
1079            "--summary",
1080            "samples",
1081        ]);
1082
1083        assert!(parsed.is_err());
1084    }
1085
1086    #[test]
1087    fn test_tallies_key_files_requires_tallies_and_classify() {
1088        let parsed = Cli::try_parse_from([
1089            "provenant",
1090            "--json-pp",
1091            "scan.json",
1092            "--tallies-key-files",
1093            "samples",
1094        ]);
1095
1096        assert!(parsed.is_err());
1097    }
1098
1099    #[test]
1100    fn test_parses_summary_tallies_and_generated_flags() {
1101        let parsed = Cli::try_parse_from([
1102            "provenant",
1103            "--json-pp",
1104            "scan.json",
1105            "--classify",
1106            "--summary",
1107            "--license-clarity-score",
1108            "--tallies",
1109            "--tallies-key-files",
1110            "--tallies-with-details",
1111            "--generated",
1112            "samples",
1113        ])
1114        .expect("cli parse should succeed");
1115
1116        assert!(parsed.classify);
1117        assert!(parsed.summary);
1118        assert!(parsed.license_clarity_score);
1119        assert!(parsed.tallies);
1120        assert!(parsed.tallies_key_files);
1121        assert!(parsed.tallies_with_details);
1122        assert!(parsed.generated);
1123    }
1124
1125    #[test]
1126    fn test_parses_copyright_flag() {
1127        let parsed = Cli::try_parse_from([
1128            "provenant",
1129            "--json-pp",
1130            "scan.json",
1131            "--copyright",
1132            "samples",
1133        ])
1134        .expect("cli parse should succeed");
1135
1136        assert!(parsed.copyright);
1137    }
1138
1139    #[test]
1140    fn test_package_flag_defaults_to_disabled() {
1141        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1142            .expect("cli parse should succeed");
1143
1144        assert!(!parsed.package);
1145    }
1146
1147    #[test]
1148    fn test_parses_system_package_flag() {
1149        let parsed = Cli::try_parse_from([
1150            "provenant",
1151            "--json-pp",
1152            "scan.json",
1153            "--system-package",
1154            "samples",
1155        ])
1156        .expect("cli parse should succeed");
1157
1158        assert!(parsed.system_package);
1159    }
1160
1161    #[test]
1162    fn test_parses_package_in_compiled_flag() {
1163        let parsed = Cli::try_parse_from([
1164            "provenant",
1165            "--json-pp",
1166            "scan.json",
1167            "--package-in-compiled",
1168            "samples",
1169        ])
1170        .expect("cli parse should succeed");
1171
1172        assert!(parsed.package_in_compiled);
1173    }
1174
1175    #[test]
1176    fn test_parses_package_only_flag() {
1177        let parsed = Cli::try_parse_from([
1178            "provenant",
1179            "--json-pp",
1180            "scan.json",
1181            "--package-only",
1182            "samples",
1183        ])
1184        .expect("cli parse should succeed");
1185
1186        assert!(parsed.package_only);
1187    }
1188
1189    #[test]
1190    fn test_package_only_conflicts_with_upstream_incompatible_flags() {
1191        let with_license = Cli::try_parse_from([
1192            "provenant",
1193            "--json-pp",
1194            "scan.json",
1195            "--package-only",
1196            "--license",
1197            "samples",
1198        ]);
1199        assert!(with_license.is_err());
1200
1201        let with_package = Cli::try_parse_from([
1202            "provenant",
1203            "--json-pp",
1204            "scan.json",
1205            "--package-only",
1206            "--package",
1207            "samples",
1208        ]);
1209        assert!(with_package.is_err());
1210    }
1211
1212    #[test]
1213    fn test_parses_package_flag() {
1214        let parsed = Cli::try_parse_from([
1215            "provenant",
1216            "--json-pp",
1217            "scan.json",
1218            "--package",
1219            "samples",
1220        ])
1221        .expect("cli parse should succeed");
1222
1223        assert!(parsed.package);
1224    }
1225
1226    #[test]
1227    fn test_package_short_flag() {
1228        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-p", "samples"])
1229            .expect("cli parse should succeed");
1230
1231        assert!(parsed.package);
1232    }
1233
1234    #[test]
1235    fn test_parses_license_flag() {
1236        let parsed = Cli::try_parse_from([
1237            "provenant",
1238            "--json-pp",
1239            "scan.json",
1240            "--license",
1241            "samples",
1242        ])
1243        .expect("cli parse should succeed");
1244
1245        assert!(parsed.license);
1246    }
1247
1248    #[test]
1249    fn test_license_short_flag() {
1250        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-l", "samples"])
1251            .expect("cli parse should succeed");
1252
1253        assert!(parsed.license);
1254    }
1255
1256    #[test]
1257    fn test_license_text_requires_license() {
1258        let result = Cli::try_parse_from([
1259            "provenant",
1260            "--json-pp",
1261            "scan.json",
1262            "--license-text",
1263            "samples",
1264        ]);
1265        assert!(result.is_err());
1266    }
1267
1268    #[test]
1269    fn test_include_text_is_rejected() {
1270        let result = Cli::try_parse_from([
1271            "provenant",
1272            "--json-pp",
1273            "scan.json",
1274            "--license",
1275            "--include-text",
1276            "samples",
1277        ]);
1278
1279        assert!(result.is_err());
1280    }
1281
1282    #[test]
1283    fn test_license_text_diagnostics_requires_license_text() {
1284        let result = Cli::try_parse_from([
1285            "provenant",
1286            "--json-pp",
1287            "scan.json",
1288            "--license",
1289            "--license-text-diagnostics",
1290            "samples",
1291        ]);
1292
1293        assert!(result.is_err());
1294    }
1295
1296    #[test]
1297    fn test_parses_license_text_and_diagnostics_flags() {
1298        let parsed = Cli::try_parse_from([
1299            "provenant",
1300            "--json-pp",
1301            "scan.json",
1302            "--license",
1303            "--license-text",
1304            "--license-text-diagnostics",
1305            "--license-diagnostics",
1306            "--unknown-licenses",
1307            "samples",
1308        ])
1309        .expect("cli parse should succeed");
1310
1311        assert!(parsed.license_text);
1312        assert!(parsed.license_text_diagnostics);
1313        assert!(parsed.license_diagnostics);
1314        assert!(parsed.unknown_licenses);
1315        assert_eq!(parsed.license_score, 0);
1316        assert_eq!(parsed.license_url_template, DEFAULT_LICENSEDB_URL_TEMPLATE);
1317    }
1318
1319    #[test]
1320    fn test_license_score_requires_license() {
1321        let result = Cli::try_parse_from([
1322            "provenant",
1323            "--json-pp",
1324            "scan.json",
1325            "--license-score",
1326            "70",
1327            "samples",
1328        ]);
1329
1330        assert!(result.is_err());
1331    }
1332
1333    #[test]
1334    fn test_license_url_template_requires_license() {
1335        let result = Cli::try_parse_from([
1336            "provenant",
1337            "--json-pp",
1338            "scan.json",
1339            "--license-url-template",
1340            "https://example.com/licenses/{}/",
1341            "samples",
1342        ]);
1343
1344        assert!(result.is_err());
1345    }
1346
1347    #[test]
1348    fn test_parses_license_score_and_url_template_flags() {
1349        let parsed = Cli::try_parse_from([
1350            "provenant",
1351            "--json-pp",
1352            "scan.json",
1353            "--license",
1354            "--license-score",
1355            "70",
1356            "--license-url-template",
1357            "https://example.com/licenses/{}/",
1358            "samples",
1359        ])
1360        .expect("cli parse should succeed");
1361
1362        assert_eq!(parsed.license_score, 70);
1363        assert_eq!(
1364            parsed.license_url_template,
1365            "https://example.com/licenses/{}/"
1366        );
1367    }
1368
1369    #[test]
1370    fn test_rejects_license_score_above_range() {
1371        let result = Cli::try_parse_from([
1372            "provenant",
1373            "--json-pp",
1374            "scan.json",
1375            "--license",
1376            "--license-score",
1377            "101",
1378            "samples",
1379        ]);
1380
1381        assert!(result.is_err());
1382    }
1383
1384    #[test]
1385    fn test_license_references_requires_license() {
1386        let result = Cli::try_parse_from([
1387            "provenant",
1388            "--json-pp",
1389            "scan.json",
1390            "--license-references",
1391            "samples",
1392        ]);
1393
1394        assert!(result.is_err());
1395    }
1396
1397    #[test]
1398    fn test_parses_license_references_flag() {
1399        let parsed = Cli::try_parse_from([
1400            "provenant",
1401            "--json-pp",
1402            "scan.json",
1403            "--license",
1404            "--license-references",
1405            "samples",
1406        ])
1407        .expect("cli parse should succeed");
1408
1409        assert!(parsed.license_references);
1410    }
1411
1412    #[test]
1413    fn test_include_text_alias_is_not_supported() {
1414        let result = Cli::try_parse_from([
1415            "provenant",
1416            "--json-pp",
1417            "scan.json",
1418            "--license",
1419            "--include-text",
1420            "samples",
1421        ]);
1422
1423        assert!(result.is_err());
1424    }
1425
1426    #[test]
1427    fn test_parses_short_scan_flags() {
1428        let parsed = Cli::try_parse_from([
1429            "provenant",
1430            "--json-pp",
1431            "scan.json",
1432            "-c",
1433            "-e",
1434            "-u",
1435            "samples",
1436        ])
1437        .expect("cli parse should support short scan flags");
1438
1439        assert!(parsed.copyright);
1440        assert!(parsed.email);
1441        assert!(parsed.url);
1442    }
1443
1444    #[test]
1445    fn test_parses_processes_compat_values_zero_and_minus_one() {
1446        let zero =
1447            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "0", "samples"])
1448                .expect("cli parse should accept processes=0");
1449        assert_eq!(zero.processes, 0);
1450
1451        let parsed =
1452            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "-1", "samples"])
1453                .expect("cli parse should accept processes=-1");
1454        assert_eq!(parsed.processes, -1);
1455    }
1456
1457    #[test]
1458    fn test_parses_cache_flags() {
1459        let parsed = Cli::try_parse_from([
1460            "provenant",
1461            "--json-pp",
1462            "scan.json",
1463            "--cache-dir",
1464            "/tmp/sc-cache",
1465            "--cache-clear",
1466            "--max-in-memory",
1467            "5000",
1468            "samples",
1469        ])
1470        .expect("cli parse should accept cache flags");
1471
1472        assert_eq!(parsed.cache_dir.as_deref(), Some("/tmp/sc-cache"));
1473        assert!(parsed.cache_clear);
1474        assert!(!parsed.incremental);
1475        assert_eq!(parsed.max_in_memory, 5000);
1476    }
1477
1478    #[test]
1479    fn test_parses_incremental_flag() {
1480        let parsed = Cli::try_parse_from([
1481            "provenant",
1482            "--json-pp",
1483            "scan.json",
1484            "--incremental",
1485            "samples",
1486        ])
1487        .expect("cli parse should accept incremental flag");
1488
1489        assert!(parsed.incremental);
1490    }
1491
1492    #[test]
1493    fn test_max_in_memory_defaults_and_special_values() {
1494        let default_parsed =
1495            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1496                .expect("default max-in-memory should parse");
1497        assert_eq!(default_parsed.max_in_memory, 10000);
1498
1499        let disk_only = Cli::try_parse_from([
1500            "provenant",
1501            "--json-pp",
1502            "scan.json",
1503            "--max-in-memory",
1504            "-1",
1505            "samples",
1506        ])
1507        .expect("-1 should parse");
1508        assert_eq!(disk_only.max_in_memory, -1);
1509
1510        let unlimited = Cli::try_parse_from([
1511            "provenant",
1512            "--json-pp",
1513            "scan.json",
1514            "--max-in-memory",
1515            "0",
1516            "samples",
1517        ])
1518        .expect("0 should parse");
1519        assert_eq!(unlimited.max_in_memory, 0);
1520    }
1521
1522    #[test]
1523    fn test_max_in_memory_rejects_values_below_negative_one() {
1524        let result = Cli::try_parse_from([
1525            "provenant",
1526            "--json-pp",
1527            "scan.json",
1528            "--max-in-memory",
1529            "-2",
1530            "samples",
1531        ]);
1532
1533        assert!(result.is_err());
1534    }
1535
1536    #[test]
1537    fn test_max_depth_default_matches_reference_behavior() {
1538        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1539            .expect("cli parse should succeed");
1540
1541        assert_eq!(parsed.max_depth, 0);
1542    }
1543}