Skip to main content

provenant/cli/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod run;
5
6pub use run::run;
7
8use clap::{ArgGroup, Args, Parser, Subcommand};
9use serde_json::{Map as JsonMap, Number as JsonNumber, Value as JsonValue};
10use std::ffi::OsString;
11use std::fs;
12#[cfg(test)]
13use std::ops::Deref;
14use std::path::{Path, PathBuf};
15use yaml_serde::Value as YamlValue;
16
17use crate::license_detection::DEFAULT_LICENSEDB_URL_TEMPLATE;
18use crate::output::OutputFormat;
19use crate::scanner::MemoryMode;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum ProcessMode {
23    Parallel(usize),
24    SequentialWithTimeouts,
25    SequentialWithoutTimeouts,
26}
27
28impl Default for ProcessMode {
29    fn default() -> Self {
30        let cpus = std::thread::available_parallelism().map_or(1, |n| n.get());
31        if cpus > 1 {
32            ProcessMode::Parallel(cpus - 1)
33        } else {
34            ProcessMode::Parallel(1)
35        }
36    }
37}
38
39impl ProcessMode {
40    fn default_value() -> Self {
41        let cpus = std::thread::available_parallelism().map_or(1, |n| n.get());
42        if cpus > 1 {
43            ProcessMode::Parallel(cpus - 1)
44        } else {
45            ProcessMode::Parallel(1)
46        }
47    }
48
49    pub fn to_i32(self) -> i32 {
50        match self {
51            ProcessMode::Parallel(n) => n as i32,
52            ProcessMode::SequentialWithTimeouts => 0,
53            ProcessMode::SequentialWithoutTimeouts => -1,
54        }
55    }
56}
57
58impl std::fmt::Display for ProcessMode {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        write!(f, "{}", self.to_i32())
61    }
62}
63
64fn parse_processes(value: &str) -> Result<ProcessMode, String> {
65    let parsed: i32 = value
66        .parse()
67        .map_err(|e| format!("invalid integer for --processes: {e}"))?;
68    if parsed > 0 {
69        Ok(ProcessMode::Parallel(
70            u32::try_from(parsed).unwrap() as usize
71        ))
72    } else if parsed == 0 {
73        Ok(ProcessMode::SequentialWithTimeouts)
74    } else {
75        Ok(ProcessMode::SequentialWithoutTimeouts)
76    }
77}
78
79const PDF_OXIDE_LOG_HELP: &str = "Troubleshooting PDF parser logs:\n  Provenant suppresses noisy pdf_oxide logs by default.\n  To inspect raw pdf_oxide logs for debugging, rerun with RUST_LOG=pdf_oxide=warn (or =error).";
80
81fn parse_license_policy_arg(value: &str) -> Result<String, String> {
82    let policy_path = Path::new(value);
83    let metadata = fs::metadata(policy_path).map_err(|err| {
84        format!(
85            "Failed to read license policy file {:?}: {err}",
86            policy_path
87        )
88    })?;
89    if !metadata.is_file() {
90        return Err(format!(
91            "License policy path {:?} is not a regular file",
92            policy_path
93        ));
94    }
95
96    let policy_text = fs::read_to_string(policy_path).map_err(|err| {
97        format!(
98            "Failed to read license policy file {:?}: {err}",
99            policy_path
100        )
101    })?;
102    if policy_text.trim().is_empty() {
103        return Err(format!("License policy file {:?} is empty", policy_path));
104    }
105
106    let policy_value: YamlValue = yaml_serde::from_str(&policy_text).map_err(|err| {
107        format!(
108            "Failed to parse license policy file {:?}: {err}",
109            policy_path
110        )
111    })?;
112    let has_license_policies = policy_value
113        .as_mapping()
114        .and_then(|mapping| mapping.get(YamlValue::String("license_policies".to_string())))
115        .is_some();
116    if !has_license_policies {
117        return Err(format!(
118            "License policy file {:?} is missing a 'license_policies' attribute",
119            policy_path
120        ));
121    }
122
123    Ok(value.to_string())
124}
125
126#[derive(Parser, Debug)]
127#[command(
128    author = "The Provenant contributors",
129    version = crate::version::BUILD_VERSION,
130    long_version = crate::version::build_long_version(),
131    after_help = PDF_OXIDE_LOG_HELP,
132    about,
133    long_about = None,
134    arg_required_else_help = true,
135    subcommand_required = true
136)]
137pub struct Cli {
138    #[command(subcommand)]
139    pub command: Command,
140}
141
142#[derive(Subcommand, Debug, Clone)]
143pub enum Command {
144    /// Scan files or existing ScanCode-style JSON inputs.
145    Scan(Box<ScanArgs>),
146    /// Compare ScanCode and Provenant JSON outputs to review migration-confidence deltas.
147    Compare(CompareArgs),
148    /// Show attribution notices for embedded license detection data.
149    ShowAttribution,
150    /// Export the effective built-in license dataset to DIR and exit.
151    ExportLicenseDataset(ExportLicenseDatasetArgs),
152}
153
154#[derive(Args, Debug, Clone)]
155pub struct CompareArgs {
156    /// Path to an existing ScanCode JSON output file.
157    #[arg(long = "scancode-json", value_name = "PATH")]
158    pub scancode_json: PathBuf,
159
160    /// Path to an existing Provenant JSON output file.
161    #[arg(long = "provenant-json", value_name = "PATH")]
162    pub provenant_json: PathBuf,
163
164    /// Directory where comparison artifacts should be written. Defaults to a timestamped directory in the current working directory.
165    #[arg(long = "artifact-dir", value_name = "DIR")]
166    pub artifact_dir: Option<PathBuf>,
167}
168
169#[derive(Args, Debug, Clone)]
170pub struct ExportLicenseDatasetArgs {
171    #[arg(value_name = "DIR")]
172    pub dir: String,
173}
174
175#[derive(Args, Debug, Clone)]
176#[command(
177    group(
178        ArgGroup::new("output")
179            .required(true)
180            .multiple(true)
181            .args([
182                "output_json",
183                "output_json_pp",
184                "output_json_lines",
185                "output_yaml",
186                "output_debian",
187                "output_html",
188                "output_spdx_tv",
189                "output_spdx_rdf",
190                "output_cyclonedx",
191                "output_cyclonedx_xml",
192                "custom_output"
193            ])
194    ),
195    after_help = PDF_OXIDE_LOG_HELP
196)]
197pub struct ScanArgs {
198    /// File or directory paths to scan
199    #[arg(required = false)]
200    pub dir_path: Vec<String>,
201
202    /// Write scan output as compact JSON to FILE
203    #[arg(long = "json", value_name = "FILE", allow_hyphen_values = true)]
204    pub output_json: Option<String>,
205
206    /// Write scan output as pretty-printed JSON to FILE
207    #[arg(long = "json-pp", value_name = "FILE", allow_hyphen_values = true)]
208    pub output_json_pp: Option<String>,
209
210    /// Write scan output as JSON Lines to FILE
211    #[arg(long = "json-lines", value_name = "FILE", allow_hyphen_values = true)]
212    pub output_json_lines: Option<String>,
213
214    /// Write scan output as YAML to FILE
215    #[arg(long = "yaml", value_name = "FILE", allow_hyphen_values = true)]
216    pub output_yaml: Option<String>,
217
218    /// Write scan output in machine-readable Debian copyright format to FILE (requires --license, --copyright, and --license-text)
219    #[arg(
220        long = "debian",
221        value_name = "FILE",
222        allow_hyphen_values = true,
223        requires_all = ["copyright", "license", "license_text"]
224    )]
225    pub output_debian: Option<String>,
226
227    /// Write scan output as HTML report to FILE
228    #[arg(long = "html", value_name = "FILE", allow_hyphen_values = true)]
229    pub output_html: Option<String>,
230
231    /// Write scan output as SPDX tag/value to FILE
232    #[arg(long = "spdx-tv", value_name = "FILE", allow_hyphen_values = true)]
233    pub output_spdx_tv: Option<String>,
234
235    /// Write scan output as SPDX RDF/XML to FILE
236    #[arg(long = "spdx-rdf", value_name = "FILE", allow_hyphen_values = true)]
237    pub output_spdx_rdf: Option<String>,
238
239    /// Write scan output as CycloneDX JSON to FILE
240    #[arg(long = "cyclonedx", value_name = "FILE", allow_hyphen_values = true)]
241    pub output_cyclonedx: Option<String>,
242
243    /// Write scan output as CycloneDX XML to FILE
244    #[arg(
245        long = "cyclonedx-xml",
246        value_name = "FILE",
247        allow_hyphen_values = true
248    )]
249    pub output_cyclonedx_xml: Option<String>,
250
251    /// Write scan output to FILE formatted with the custom template
252    #[arg(
253        long = "custom-output",
254        value_name = "FILE",
255        requires = "custom_template",
256        allow_hyphen_values = true
257    )]
258    pub custom_output: Option<String>,
259
260    /// Use this template FILE with --custom-output
261    #[arg(
262        long = "custom-template",
263        value_name = "FILE",
264        requires = "custom_output"
265    )]
266    pub custom_template: Option<String>,
267
268    /// Maximum recursion depth (0 means no depth limit)
269    #[arg(short, long, default_value = "0")]
270    pub max_depth: usize,
271
272    #[arg(short = 'n', long, default_value_t = ProcessMode::default_value(), value_parser = parse_processes, allow_hyphen_values = true)]
273    pub processes: ProcessMode,
274
275    #[arg(long, default_value_t = 120.0)]
276    pub timeout: f64,
277
278    #[arg(short, long, conflicts_with = "verbose")]
279    pub quiet: bool,
280
281    #[arg(short, long, conflicts_with = "quiet")]
282    pub verbose: bool,
283
284    #[arg(long, conflicts_with = "full_root")]
285    pub strip_root: bool,
286
287    #[arg(long, conflicts_with = "strip_root")]
288    pub full_root: bool,
289
290    /// Exclude patterns (ScanCode-compatible alias: --ignore)
291    #[arg(long = "exclude", visible_alias = "ignore", value_delimiter = ',')]
292    pub exclude: Vec<String>,
293
294    /// Include files matching PATTERN. Use `**` when you want recursion across directories.
295    #[arg(long, value_delimiter = ',')]
296    pub include: Vec<String>,
297
298    /// Read selected scan paths from FILE (or '-' for stdin), relative to the explicit scan root.
299    #[arg(long = "paths-file", value_name = "FILE", allow_hyphen_values = true)]
300    pub paths_file: Vec<String>,
301
302    #[arg(long = "cache-dir", value_name = "PATH")]
303    pub cache_dir: Option<String>,
304
305    #[arg(long = "cache-clear")]
306    pub cache_clear: bool,
307
308    #[arg(long = "incremental")]
309    pub incremental: bool,
310
311    /// Maximum number of file and directory scan details kept in memory.
312    /// Use 0 for unlimited memory or -1 for disk-only spill during the scan.
313    #[arg(
314        long = "max-in-memory",
315        value_name = "INT",
316        default_value_t = MemoryMode::Limit(10000),
317        value_parser = parse_max_in_memory,
318        allow_hyphen_values = true
319    )]
320    pub max_in_memory: MemoryMode,
321
322    /// Collect file information such as checksums, type hints, and source/script flags.
323    #[arg(short = 'i', long)]
324    pub info: bool,
325
326    /// Load one or more existing ScanCode-style JSON scans instead of rescanning inputs.
327    #[arg(long)]
328    pub from_json: bool,
329
330    /// Scan input for application package and dependency manifests, lockfiles and related data
331    #[arg(short = 'p', long)]
332    pub package: bool,
333
334    /// Scan input for installed system package databases (RPM, dpkg, apk, etc.)
335    #[arg(long = "system-package")]
336    pub system_package: bool,
337
338    /// Scan supported compiled Go and Rust binaries for embedded package metadata.
339    #[arg(long = "package-in-compiled")]
340    pub package_in_compiled: bool,
341
342    /// Scan for system and application package data and skip license/copyright detection and top-level package creation.
343    #[arg(
344        long = "package-only",
345        conflicts_with_all = ["license", "summary", "package", "system_package"]
346    )]
347    pub package_only: bool,
348
349    /// Disable package assembly (merging related manifest/lockfiles into packages)
350    #[arg(long)]
351    pub no_assemble: bool,
352
353    /// Path to a custom license dataset root containing manifest.json, rules/, and licenses/.
354    /// If not specified, uses the built-in embedded license index.
355    #[arg(
356        long = "license-dataset-path",
357        value_name = "PATH",
358        requires = "license"
359    )]
360    pub license_dataset_path: Option<String>,
361
362    /// Force rebuild of the license index cache, ignoring any existing cache.
363    #[arg(long)]
364    pub reindex: bool,
365
366    /// Build the license index in memory for this run without reading or writing persistent cache files.
367    #[arg(long = "no-license-index-cache")]
368    pub no_license_index_cache: bool,
369
370    /// Include matched text in license detection output
371    #[arg(long = "license-text", requires = "license")]
372    pub license_text: bool,
373
374    #[arg(long = "license-text-diagnostics", requires = "license_text")]
375    pub license_text_diagnostics: bool,
376
377    #[arg(long = "license-diagnostics", requires = "license")]
378    pub license_diagnostics: bool,
379
380    #[arg(long = "unknown-licenses", requires = "license")]
381    pub unknown_licenses: bool,
382
383    #[arg(
384        long = "license-score",
385        default_value_t = 0,
386        requires = "license",
387        value_parser = clap::value_parser!(u8).range(0..=100)
388    )]
389    pub license_score: u8,
390
391    #[arg(
392        long = "license-url-template",
393        default_value = DEFAULT_LICENSEDB_URL_TEMPLATE,
394        requires = "license"
395    )]
396    pub license_url_template: String,
397
398    #[arg(long)]
399    pub filter_clues: bool,
400
401    #[arg(
402        long = "ignore-author",
403        value_name = "PATTERN",
404        help = "Ignore a file and all its findings if an author matches the regex PATTERN"
405    )]
406    pub ignore_author: Vec<String>,
407
408    #[arg(
409        long = "ignore-copyright-holder",
410        value_name = "PATTERN",
411        help = "Ignore a file and all its findings if a copyright holder matches the regex PATTERN"
412    )]
413    pub ignore_copyright_holder: Vec<String>,
414
415    #[arg(long)]
416    pub only_findings: bool,
417
418    #[arg(long, requires = "info")]
419    pub mark_source: bool,
420
421    #[arg(long)]
422    pub classify: bool,
423
424    #[arg(long, requires = "classify")]
425    pub summary: bool,
426
427    #[arg(long = "license-clarity-score", requires = "classify")]
428    pub license_clarity_score: bool,
429
430    #[arg(long = "license-references", requires = "license")]
431    pub license_references: bool,
432
433    /// Evaluate file license detections against a YAML license policy file.
434    #[arg(
435        long = "license-policy",
436        value_name = "FILE",
437        value_parser = parse_license_policy_arg
438    )]
439    pub license_policy: Option<String>,
440
441    #[arg(long)]
442    pub tallies: bool,
443
444    #[arg(long = "tallies-key-files", requires_all = ["tallies", "classify"])]
445    pub tallies_key_files: bool,
446
447    #[arg(long = "tallies-with-details")]
448    pub tallies_with_details: bool,
449
450    #[arg(long = "facet", value_name = "<facet>=<pattern>")]
451    pub facet: Vec<String>,
452
453    #[arg(long = "tallies-by-facet", requires_all = ["facet", "tallies"])]
454    pub tallies_by_facet: bool,
455
456    #[arg(long)]
457    pub generated: bool,
458
459    /// Scan input for licenses
460    #[arg(short = 'l', long)]
461    pub license: bool,
462
463    #[arg(short = 'c', long)]
464    pub copyright: bool,
465
466    /// Scan input for email addresses
467    #[arg(short = 'e', long)]
468    pub email: bool,
469
470    /// Report only up to INT emails found in a file. Use 0 for no limit.
471    #[arg(long, default_value_t = 50, requires = "email")]
472    pub max_email: usize,
473
474    /// Scan input for URLs
475    #[arg(short = 'u', long)]
476    pub url: bool,
477
478    /// Report only up to INT URLs found in a file. Use 0 for no limit.
479    #[arg(long, default_value_t = 50, requires = "url")]
480    pub max_url: usize,
481}
482
483impl Cli {
484    pub fn parse() -> Self {
485        <Self as Parser>::parse_from(rewrite_args_for_default_scan(std::env::args_os()))
486    }
487
488    pub fn try_parse_from<I, T>(itr: I) -> Result<Self, clap::Error>
489    where
490        I: IntoIterator<Item = T>,
491        T: Into<OsString>,
492    {
493        <Self as Parser>::try_parse_from(rewrite_args_for_default_scan(itr))
494    }
495
496    pub(crate) fn scan_args(&self) -> Option<&ScanArgs> {
497        match &self.command {
498            Command::Scan(scan_args) => Some(scan_args.as_ref()),
499            Command::Compare(_) | Command::ShowAttribution | Command::ExportLicenseDataset(_) => {
500                None
501            }
502        }
503    }
504}
505
506#[cfg(test)]
507impl Deref for Cli {
508    type Target = ScanArgs;
509
510    fn deref(&self) -> &Self::Target {
511        self.scan_args()
512            .expect("scan arguments are only available for the scan command")
513    }
514}
515
516fn rewrite_args_for_default_scan<I, T>(itr: I) -> Vec<OsString>
517where
518    I: IntoIterator<Item = T>,
519    T: Into<OsString>,
520{
521    let mut args: Vec<OsString> = itr.into_iter().map(Into::into).collect();
522    if args.len() <= 1 {
523        return args;
524    }
525
526    let first = args[1].to_string_lossy();
527    if matches!(
528        first.as_ref(),
529        "scan"
530            | "compare"
531            | "show-attribution"
532            | "export-license-dataset"
533            | "help"
534            | "-h"
535            | "--help"
536            | "-V"
537            | "--version"
538    ) {
539        return args;
540    }
541
542    if first.starts_with('-') || Path::new(first.as_ref()).exists() {
543        args.insert(1, OsString::from("scan"));
544    }
545
546    args
547}
548
549fn parse_max_in_memory(value: &str) -> Result<MemoryMode, String> {
550    let parsed = value
551        .parse::<i64>()
552        .map_err(|_| format!("invalid integer value: {value}"))?;
553    if parsed < -1 {
554        return Err("--max-in-memory must be -1, 0, or a positive integer".to_string());
555    }
556    match parsed {
557        -1 => Ok(MemoryMode::StreamUnlimited),
558        0 => Ok(MemoryMode::CollectFirst),
559        n if n > 0 => Ok(MemoryMode::Limit(usize::try_from(n).unwrap_or(usize::MAX))),
560        _ => Ok(MemoryMode::CollectFirst),
561    }
562}
563
564#[derive(Debug, Clone)]
565pub struct OutputTarget {
566    pub format: OutputFormat,
567    pub file: String,
568    pub custom_template: Option<String>,
569}
570
571impl ScanArgs {
572    pub fn output_targets(&self) -> Vec<OutputTarget> {
573        let mut targets = Vec::new();
574
575        if let Some(file) = &self.output_json {
576            targets.push(OutputTarget {
577                format: OutputFormat::Json,
578                file: file.clone(),
579                custom_template: None,
580            });
581        }
582
583        if let Some(file) = &self.output_json_pp {
584            targets.push(OutputTarget {
585                format: OutputFormat::JsonPretty,
586                file: file.clone(),
587                custom_template: None,
588            });
589        }
590
591        if let Some(file) = &self.output_json_lines {
592            targets.push(OutputTarget {
593                format: OutputFormat::JsonLines,
594                file: file.clone(),
595                custom_template: None,
596            });
597        }
598
599        if let Some(file) = &self.output_yaml {
600            targets.push(OutputTarget {
601                format: OutputFormat::Yaml,
602                file: file.clone(),
603                custom_template: None,
604            });
605        }
606
607        if let Some(file) = &self.output_debian {
608            targets.push(OutputTarget {
609                format: OutputFormat::Debian,
610                file: file.clone(),
611                custom_template: None,
612            });
613        }
614
615        if let Some(file) = &self.output_html {
616            targets.push(OutputTarget {
617                format: OutputFormat::Html,
618                file: file.clone(),
619                custom_template: None,
620            });
621        }
622
623        if let Some(file) = &self.output_spdx_tv {
624            targets.push(OutputTarget {
625                format: OutputFormat::SpdxTv,
626                file: file.clone(),
627                custom_template: None,
628            });
629        }
630
631        if let Some(file) = &self.output_spdx_rdf {
632            targets.push(OutputTarget {
633                format: OutputFormat::SpdxRdf,
634                file: file.clone(),
635                custom_template: None,
636            });
637        }
638
639        if let Some(file) = &self.output_cyclonedx {
640            targets.push(OutputTarget {
641                format: OutputFormat::CycloneDxJson,
642                file: file.clone(),
643                custom_template: None,
644            });
645        }
646
647        if let Some(file) = &self.output_cyclonedx_xml {
648            targets.push(OutputTarget {
649                format: OutputFormat::CycloneDxXml,
650                file: file.clone(),
651                custom_template: None,
652            });
653        }
654
655        if let Some(file) = &self.custom_output {
656            targets.push(OutputTarget {
657                format: OutputFormat::CustomTemplate,
658                file: file.clone(),
659                custom_template: self.custom_template.clone(),
660            });
661        }
662
663        targets
664    }
665
666    pub fn output_header_options(&self) -> JsonMap<String, JsonValue> {
667        let mut options = JsonMap::new();
668        if !self.dir_path.is_empty() {
669            options.insert(
670                "input".to_string(),
671                JsonValue::Array(
672                    self.dir_path
673                        .iter()
674                        .cloned()
675                        .map(JsonValue::String)
676                        .collect(),
677                ),
678            );
679        }
680
681        let mut flags = Vec::new();
682
683        push_string_option(&mut flags, "--cache-dir", self.cache_dir.as_ref());
684        push_bool_option(&mut flags, "--cache-clear", self.cache_clear);
685        push_bool_option(&mut flags, "--classify", self.classify);
686        push_string_option(&mut flags, "--custom-output", self.custom_output.as_ref());
687        push_string_option(
688            &mut flags,
689            "--custom-template",
690            self.custom_template.as_ref(),
691        );
692        push_bool_option(&mut flags, "--copyright", self.copyright);
693        push_string_option(&mut flags, "--cyclonedx", self.output_cyclonedx.as_ref());
694        push_string_option(
695            &mut flags,
696            "--cyclonedx-xml",
697            self.output_cyclonedx_xml.as_ref(),
698        );
699        push_string_option(&mut flags, "--debian", self.output_debian.as_ref());
700        push_bool_option(&mut flags, "--email", self.email);
701        push_array_option(&mut flags, "--facet", &self.facet);
702        push_bool_option(&mut flags, "--filter-clues", self.filter_clues);
703        push_bool_option(&mut flags, "--from-json", self.from_json);
704        push_bool_option(&mut flags, "--full-root", self.full_root);
705        push_bool_option(&mut flags, "--generated", self.generated);
706        push_string_option(&mut flags, "--html", self.output_html.as_ref());
707        push_array_option(&mut flags, "--ignore", &self.exclude);
708        push_array_option(&mut flags, "--ignore-author", &self.ignore_author);
709        push_array_option(
710            &mut flags,
711            "--ignore-copyright-holder",
712            &self.ignore_copyright_holder,
713        );
714        push_bool_option(&mut flags, "--incremental", self.incremental);
715        push_array_option(&mut flags, "--include", &self.include);
716        push_bool_option(&mut flags, "--info", self.info);
717        push_string_option(&mut flags, "--json", self.output_json.as_ref());
718        push_string_option(&mut flags, "--json-lines", self.output_json_lines.as_ref());
719        push_string_option(&mut flags, "--json-pp", self.output_json_pp.as_ref());
720        push_bool_option(&mut flags, "--license", self.license);
721        push_bool_option(
722            &mut flags,
723            "--license-clarity-score",
724            self.license_clarity_score,
725        );
726        push_bool_option(
727            &mut flags,
728            "--license-diagnostics",
729            self.license_diagnostics,
730        );
731        push_string_option(
732            &mut flags,
733            "--license-dataset-path",
734            self.license_dataset_path.as_ref(),
735        );
736        push_string_option(&mut flags, "--license-policy", self.license_policy.as_ref());
737        push_bool_option(
738            &mut flags,
739            "--no-license-index-cache",
740            self.no_license_index_cache,
741        );
742        push_bool_option(&mut flags, "--license-references", self.license_references);
743        push_bool_option(&mut flags, "--reindex", self.reindex);
744        push_non_default_u8_option(&mut flags, "--license-score", self.license_score, 0);
745        push_bool_option(&mut flags, "--license-text", self.license_text);
746        push_bool_option(
747            &mut flags,
748            "--license-text-diagnostics",
749            self.license_text_diagnostics,
750        );
751        push_non_default_string_option(
752            &mut flags,
753            "--license-url-template",
754            &self.license_url_template,
755            DEFAULT_LICENSEDB_URL_TEMPLATE,
756        );
757        push_non_default_usize_option(&mut flags, "--max-depth", self.max_depth, 0);
758        match self.max_in_memory {
759            MemoryMode::Limit(10000) => {}
760            MemoryMode::CollectFirst => {
761                flags.push(("--max-in-memory".to_string(), JsonValue::Number(0.into())));
762            }
763            MemoryMode::StreamUnlimited => {
764                flags.push((
765                    "--max-in-memory".to_string(),
766                    JsonValue::Number((-1i64).into()),
767                ));
768            }
769            MemoryMode::Limit(n) => {
770                flags.push(("--max-in-memory".to_string(), JsonValue::Number(n.into())));
771            }
772        }
773        if self.email {
774            push_non_default_usize_option(&mut flags, "--max-email", self.max_email, 50);
775        }
776        if self.url {
777            push_non_default_usize_option(&mut flags, "--max-url", self.max_url, 50);
778        }
779        push_bool_option(&mut flags, "--mark-source", self.mark_source);
780        push_bool_option(&mut flags, "--no-assemble", self.no_assemble);
781        push_bool_option(&mut flags, "--only-findings", self.only_findings);
782        push_bool_option(&mut flags, "--package", self.package);
783        push_bool_option(
784            &mut flags,
785            "--package-in-compiled",
786            self.package_in_compiled,
787        );
788        push_bool_option(&mut flags, "--package-only", self.package_only);
789        push_array_option(&mut flags, "--paths-file", &self.paths_file);
790        push_non_default_process_mode_option(
791            &mut flags,
792            "--processes",
793            self.processes,
794            ProcessMode::default_value(),
795        );
796        push_bool_option(&mut flags, "--quiet", self.quiet);
797        push_string_option(&mut flags, "--spdx-rdf", self.output_spdx_rdf.as_ref());
798        push_string_option(&mut flags, "--spdx-tv", self.output_spdx_tv.as_ref());
799        push_bool_option(&mut flags, "--strip-root", self.strip_root);
800        push_bool_option(&mut flags, "--summary", self.summary);
801        push_bool_option(&mut flags, "--system-package", self.system_package);
802        push_bool_option(&mut flags, "--tallies", self.tallies);
803        push_bool_option(&mut flags, "--tallies-by-facet", self.tallies_by_facet);
804        push_bool_option(&mut flags, "--tallies-key-files", self.tallies_key_files);
805        push_bool_option(
806            &mut flags,
807            "--tallies-with-details",
808            self.tallies_with_details,
809        );
810        push_non_default_f64_option(&mut flags, "--timeout", self.timeout, 120.0);
811        push_bool_option(&mut flags, "--unknown-licenses", self.unknown_licenses);
812        push_bool_option(&mut flags, "--url", self.url);
813        push_bool_option(&mut flags, "--verbose", self.verbose);
814        push_string_option(&mut flags, "--yaml", self.output_yaml.as_ref());
815
816        flags.sort_by(|left, right| left.0.cmp(&right.0));
817        for (key, value) in flags {
818            options.insert(key, value);
819        }
820
821        options
822    }
823}
824
825fn push_bool_option(options: &mut Vec<(String, JsonValue)>, key: &str, enabled: bool) {
826    if enabled {
827        options.push((key.to_string(), JsonValue::Bool(true)));
828    }
829}
830
831fn push_string_option(options: &mut Vec<(String, JsonValue)>, key: &str, value: Option<&String>) {
832    if let Some(value) = value {
833        options.push((key.to_string(), JsonValue::String(value.clone())));
834    }
835}
836
837fn push_non_default_string_option(
838    options: &mut Vec<(String, JsonValue)>,
839    key: &str,
840    value: &str,
841    default: &str,
842) {
843    if value != default {
844        options.push((key.to_string(), JsonValue::String(value.to_string())));
845    }
846}
847
848fn push_array_option(options: &mut Vec<(String, JsonValue)>, key: &str, values: &[String]) {
849    if !values.is_empty() {
850        options.push((
851            key.to_string(),
852            JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect()),
853        ));
854    }
855}
856
857fn push_non_default_usize_option(
858    options: &mut Vec<(String, JsonValue)>,
859    key: &str,
860    value: usize,
861    default: usize,
862) {
863    if value != default {
864        options.push((key.to_string(), JsonValue::Number(value.into())));
865    }
866}
867
868fn push_non_default_u8_option(
869    options: &mut Vec<(String, JsonValue)>,
870    key: &str,
871    value: u8,
872    default: u8,
873) {
874    if value != default {
875        options.push((key.to_string(), JsonValue::Number(value.into())));
876    }
877}
878
879fn push_non_default_process_mode_option(
880    options: &mut Vec<(String, JsonValue)>,
881    key: &str,
882    value: ProcessMode,
883    default: ProcessMode,
884) {
885    if value != default {
886        options.push((key.to_string(), JsonValue::Number(value.to_i32().into())));
887    }
888}
889
890fn push_non_default_f64_option(
891    options: &mut Vec<(String, JsonValue)>,
892    key: &str,
893    value: f64,
894    default: f64,
895) {
896    if (value - default).abs() > f64::EPSILON
897        && let Some(number) = JsonNumber::from_f64(value)
898    {
899        options.push((key.to_string(), JsonValue::Number(number)));
900    }
901}
902
903#[cfg(test)]
904mod tests {
905    use super::*;
906    use clap::CommandFactory;
907
908    fn scan_command() -> clap::Command {
909        Cli::command()
910            .find_subcommand("scan")
911            .expect("scan subcommand should exist")
912            .clone()
913    }
914
915    #[test]
916    fn test_requires_at_least_one_output_option() {
917        let parsed = Cli::try_parse_from(["provenant", "samples"]);
918        assert!(parsed.is_err());
919    }
920
921    #[test]
922    fn test_parses_json_pretty_output_option() {
923        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
924            .expect("cli parse should succeed");
925
926        assert_eq!(parsed.output_json_pp.as_deref(), Some("scan.json"));
927        assert_eq!(parsed.output_targets().len(), 1);
928        assert_eq!(parsed.output_targets()[0].format, OutputFormat::JsonPretty);
929    }
930
931    #[test]
932    fn test_explicit_scan_subcommand_parses_scan_flags() {
933        let parsed = Cli::try_parse_from([
934            "provenant",
935            "scan",
936            "--json-pp",
937            "scan.json",
938            "--license",
939            "samples",
940        ])
941        .expect("explicit scan subcommand should parse");
942
943        assert!(matches!(parsed.command, Command::Scan(_)));
944        let scan = parsed.scan_args().expect("scan args should be present");
945        assert_eq!(scan.output_json_pp.as_deref(), Some("scan.json"));
946        assert!(scan.license);
947        assert_eq!(scan.dir_path, vec!["samples"]);
948    }
949
950    #[test]
951    fn test_parses_compare_subcommand() {
952        let parsed = Cli::try_parse_from([
953            "provenant",
954            "compare",
955            "--scancode-json",
956            "scan-a.json",
957            "--provenant-json",
958            "scan-b.json",
959            "--artifact-dir",
960            "compare-out",
961        ])
962        .expect("compare subcommand should parse");
963
964        match parsed.command {
965            Command::Compare(args) => {
966                assert_eq!(args.scancode_json, PathBuf::from("scan-a.json"));
967                assert_eq!(args.provenant_json, PathBuf::from("scan-b.json"));
968                assert_eq!(args.artifact_dir, Some(PathBuf::from("compare-out")));
969            }
970            other => panic!("expected compare subcommand, got {other:?}"),
971        }
972    }
973
974    #[test]
975    fn test_compare_subcommand_allows_default_artifact_dir() {
976        let parsed = Cli::try_parse_from([
977            "provenant",
978            "compare",
979            "--scancode-json",
980            "scan-a.json",
981            "--provenant-json",
982            "scan-b.json",
983        ])
984        .expect("compare subcommand should allow default artifact dir");
985
986        match parsed.command {
987            Command::Compare(args) => {
988                assert_eq!(args.scancode_json, PathBuf::from("scan-a.json"));
989                assert_eq!(args.provenant_json, PathBuf::from("scan-b.json"));
990                assert!(args.artifact_dir.is_none());
991            }
992            other => panic!("expected compare subcommand, got {other:?}"),
993        }
994    }
995
996    #[test]
997    fn test_unknown_command_like_token_is_not_rewritten_to_scan() {
998        let parsed = Cli::try_parse_from([
999            "provenant",
1000            "future-command",
1001            "--json-pp",
1002            "scan.json",
1003            "samples",
1004        ]);
1005
1006        let error = parsed.expect_err("unknown command-like token should fail");
1007        assert!(
1008            error
1009                .to_string()
1010                .contains("unrecognized subcommand 'future-command'")
1011        );
1012    }
1013
1014    #[test]
1015    fn test_allows_multiple_output_options_in_one_run() {
1016        let parsed = Cli::try_parse_from([
1017            "provenant",
1018            "--json",
1019            "scan.json",
1020            "--html",
1021            "report.html",
1022            "samples",
1023        ])
1024        .expect("cli parse should allow multiple outputs");
1025
1026        assert_eq!(parsed.output_targets().len(), 2);
1027        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Json);
1028        assert_eq!(parsed.output_targets()[1].format, OutputFormat::Html);
1029    }
1030
1031    #[test]
1032    fn test_parses_show_attribution_subcommand() {
1033        let parsed = Cli::try_parse_from(["provenant", "show-attribution"])
1034            .expect("show-attribution subcommand should parse");
1035
1036        assert!(matches!(parsed.command, Command::ShowAttribution));
1037    }
1038
1039    #[test]
1040    fn test_legacy_show_attribution_flag_is_rejected() {
1041        let parsed = Cli::try_parse_from(["provenant", "--show-attribution"]);
1042        assert!(parsed.is_err());
1043    }
1044
1045    #[test]
1046    fn test_export_license_dataset_allows_mode_without_output_file() {
1047        let parsed = Cli::try_parse_from(["provenant", "export-license-dataset", "dataset-out"])
1048            .expect("cli parse should allow export mode without output flags");
1049
1050        match parsed.command {
1051            Command::ExportLicenseDataset(args) => assert_eq!(args.dir, "dataset-out"),
1052            other => panic!("expected export subcommand, got {other:?}"),
1053        }
1054    }
1055
1056    #[test]
1057    fn test_legacy_export_license_dataset_flag_is_rejected() {
1058        let parsed = Cli::try_parse_from(["provenant", "--export-license-dataset", "dataset-out"]);
1059        assert!(parsed.is_err());
1060    }
1061
1062    #[test]
1063    fn test_license_dataset_path_parses_for_license_scans() {
1064        let parsed = Cli::try_parse_from([
1065            "provenant",
1066            "--json-pp",
1067            "scan.json",
1068            "--license",
1069            "--license-dataset-path",
1070            "dataset-root",
1071            "samples",
1072        ])
1073        .expect("cli parse should accept custom license dataset flag");
1074
1075        assert_eq!(parsed.license_dataset_path.as_deref(), Some("dataset-root"));
1076    }
1077
1078    #[test]
1079    fn test_output_header_options_use_scancode_style_keys() {
1080        let parsed = Cli::try_parse_from([
1081            "provenant",
1082            "--json-pp",
1083            "scan.json",
1084            "--license",
1085            "--package",
1086            "--strip-root",
1087            "--paths-file",
1088            "changed-files.txt",
1089            "--ignore",
1090            "*.git*",
1091            "--ignore",
1092            "target/*",
1093            "samples",
1094        ])
1095        .expect("cli parse should succeed");
1096
1097        let options = parsed.output_header_options();
1098
1099        assert_eq!(
1100            options.get("input"),
1101            Some(&JsonValue::Array(vec![JsonValue::String(
1102                "samples".to_string()
1103            )]))
1104        );
1105        assert_eq!(
1106            options.get("--json-pp"),
1107            Some(&JsonValue::String("scan.json".to_string()))
1108        );
1109        assert_eq!(options.get("--license"), Some(&JsonValue::Bool(true)));
1110        assert_eq!(options.get("--package"), Some(&JsonValue::Bool(true)));
1111        assert_eq!(
1112            options.get("--paths-file"),
1113            Some(&JsonValue::Array(vec![JsonValue::String(
1114                "changed-files.txt".to_string()
1115            )]))
1116        );
1117        assert_eq!(options.get("--strip-root"), Some(&JsonValue::Bool(true)));
1118        assert_eq!(
1119            options.get("--ignore"),
1120            Some(&JsonValue::Array(vec![
1121                JsonValue::String("*.git*".to_string()),
1122                JsonValue::String("target/*".to_string()),
1123            ]))
1124        );
1125    }
1126
1127    #[test]
1128    fn test_output_header_options_include_license_dataset_path_when_set() {
1129        let parsed = Cli::try_parse_from([
1130            "provenant",
1131            "--json-pp",
1132            "scan.json",
1133            "--license",
1134            "--license-dataset-path",
1135            "dataset-root",
1136            "samples",
1137        ])
1138        .expect("cli parse should accept custom license dataset flag");
1139
1140        let options = parsed.output_header_options();
1141        assert_eq!(
1142            options.get("--license-dataset-path"),
1143            Some(&JsonValue::String("dataset-root".to_string()))
1144        );
1145    }
1146
1147    #[test]
1148    fn test_output_header_options_skip_defaults_and_include_non_defaults() {
1149        let default_options =
1150            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1151                .expect("default cli parse should succeed")
1152                .output_header_options();
1153        assert!(!default_options.contains_key("--timeout"));
1154        assert!(!default_options.contains_key("--processes"));
1155
1156        let custom_options = Cli::try_parse_from([
1157            "provenant",
1158            "--json-pp",
1159            "scan.json",
1160            "--timeout",
1161            "30",
1162            "--processes",
1163            "4",
1164            "samples",
1165        ])
1166        .expect("custom cli parse should succeed")
1167        .output_header_options();
1168
1169        assert_eq!(
1170            custom_options.get("--timeout"),
1171            Some(&JsonValue::Number(
1172                JsonNumber::from_f64(30.0).expect("valid number")
1173            ))
1174        );
1175        assert_eq!(
1176            custom_options.get("--processes"),
1177            Some(&JsonValue::Number(4.into()))
1178        );
1179    }
1180
1181    #[test]
1182    fn test_allows_stdout_dash_as_output_target() {
1183        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "-", "samples"])
1184            .expect("cli parse should allow stdout dash output target");
1185
1186        assert_eq!(parsed.output_json_pp.as_deref(), Some("-"));
1187    }
1188
1189    #[test]
1190    fn test_debian_requires_license_copyright_and_license_text() {
1191        let missing_license_text = Cli::try_parse_from([
1192            "provenant",
1193            "--debian",
1194            "scan.copyright",
1195            "--license",
1196            "--copyright",
1197            "samples",
1198        ]);
1199        assert!(missing_license_text.is_err());
1200
1201        let parsed = Cli::try_parse_from([
1202            "provenant",
1203            "--debian",
1204            "scan.copyright",
1205            "--license",
1206            "--copyright",
1207            "--license-text",
1208            "samples",
1209        ])
1210        .expect("cli parse should accept debian output");
1211
1212        assert_eq!(parsed.output_targets().len(), 1);
1213        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Debian);
1214        assert_eq!(parsed.output_debian.as_deref(), Some("scan.copyright"));
1215    }
1216
1217    #[test]
1218    fn test_debian_help_mentions_required_companion_flags() {
1219        let command = scan_command();
1220        let debian_arg = command
1221            .get_arguments()
1222            .find(|arg| arg.get_long() == Some("debian"))
1223            .expect("debian arg should exist");
1224
1225        let help = debian_arg
1226            .get_help()
1227            .expect("debian arg should have help text")
1228            .to_string();
1229
1230        assert!(help.contains("requires --license, --copyright, and --license-text"));
1231    }
1232
1233    #[test]
1234    fn test_scan_help_mentions_pdf_oxide_rust_log_escape_hatch() {
1235        let help = scan_command().render_help().to_string();
1236
1237        assert!(help.contains("RUST_LOG=pdf_oxide=warn"));
1238        assert!(help.contains("suppresses noisy pdf_oxide logs by default"));
1239    }
1240
1241    #[test]
1242    fn test_root_help_mentions_subcommands() {
1243        let help = Cli::command().render_help().to_string();
1244
1245        assert!(help.contains("scan"));
1246        assert!(help.contains("compare"));
1247        assert!(help.contains("show-attribution"));
1248        assert!(help.contains("export-license-dataset"));
1249    }
1250
1251    #[test]
1252    fn test_parses_license_policy_flag() {
1253        let temp = tempfile::tempdir().expect("temp dir");
1254        let policy_path = temp.path().join("policy.yml");
1255        std::fs::write(&policy_path, "license_policies: []\n").expect("policy written");
1256
1257        let parsed = Cli::try_parse_from([
1258            "provenant",
1259            "--json-pp",
1260            "scan.json",
1261            "--license-policy",
1262            policy_path.to_str().expect("utf8 path"),
1263            "samples",
1264        ])
1265        .expect("cli parse should accept license-policy");
1266
1267        assert_eq!(
1268            parsed.license_policy.as_deref(),
1269            Some(policy_path.to_str().expect("utf8 path"))
1270        );
1271    }
1272
1273    #[test]
1274    fn test_rejects_invalid_license_policy_flag_value() {
1275        let temp = tempfile::tempdir().expect("temp dir");
1276        let policy_path = temp.path().join("policy.yml");
1277        std::fs::write(&policy_path, "not_license_policies: []\n").expect("policy written");
1278
1279        let parsed = Cli::try_parse_from([
1280            "provenant",
1281            "--json-pp",
1282            "scan.json",
1283            "--license-policy",
1284            policy_path.to_str().expect("utf8 path"),
1285            "samples",
1286        ]);
1287
1288        assert!(parsed.is_err());
1289    }
1290
1291    #[test]
1292    fn test_custom_template_and_output_must_be_paired() {
1293        let missing_template =
1294            Cli::try_parse_from(["provenant", "--custom-output", "result.txt", "samples"]);
1295        assert!(missing_template.is_err());
1296
1297        let missing_output =
1298            Cli::try_parse_from(["provenant", "--custom-template", "tpl.tera", "samples"]);
1299        assert!(missing_output.is_err());
1300    }
1301
1302    #[test]
1303    fn test_parses_processes_and_timeout_options() {
1304        let parsed = Cli::try_parse_from([
1305            "provenant",
1306            "--json-pp",
1307            "scan.json",
1308            "-n",
1309            "4",
1310            "--timeout",
1311            "30",
1312            "samples",
1313        ])
1314        .expect("cli parse should succeed");
1315
1316        assert_eq!(parsed.processes, ProcessMode::Parallel(4));
1317        assert_eq!(parsed.timeout, 30.0);
1318    }
1319
1320    #[test]
1321    fn test_strip_root_conflicts_with_full_root() {
1322        let parsed = Cli::try_parse_from([
1323            "provenant",
1324            "--json-pp",
1325            "scan.json",
1326            "--strip-root",
1327            "--full-root",
1328            "samples",
1329        ]);
1330        assert!(parsed.is_err());
1331    }
1332
1333    #[test]
1334    fn test_parses_include_and_only_findings_and_filter_clues() {
1335        let parsed = Cli::try_parse_from([
1336            "provenant",
1337            "--json-pp",
1338            "scan.json",
1339            "--include",
1340            "src/**,Cargo.toml",
1341            "--only-findings",
1342            "--filter-clues",
1343            "samples",
1344        ])
1345        .expect("cli parse should succeed");
1346
1347        assert_eq!(parsed.include, vec!["src/**", "Cargo.toml"]);
1348        assert!(parsed.only_findings);
1349        assert!(parsed.filter_clues);
1350    }
1351
1352    #[test]
1353    fn test_parses_repeated_paths_file_flags_including_stdin_dash() {
1354        let parsed = Cli::try_parse_from([
1355            "provenant",
1356            "--json-pp",
1357            "scan.json",
1358            "--paths-file",
1359            "changed-files.txt",
1360            "--paths-file",
1361            "-",
1362            "samples",
1363        ])
1364        .expect("cli parse should accept repeated --paths-file flags");
1365
1366        assert_eq!(parsed.paths_file, vec!["changed-files.txt", "-"]);
1367    }
1368
1369    #[test]
1370    fn test_parses_ignore_author_and_holder_filters() {
1371        let parsed = Cli::try_parse_from([
1372            "provenant",
1373            "--json-pp",
1374            "scan.json",
1375            "--ignore-author",
1376            "Jane.*",
1377            "--ignore-author",
1378            ".*Bot$",
1379            "--ignore-copyright-holder",
1380            "Example Corp",
1381            "samples",
1382        ])
1383        .expect("cli parse should succeed");
1384
1385        assert_eq!(parsed.ignore_author, vec!["Jane.*", ".*Bot$"]);
1386        assert_eq!(parsed.ignore_copyright_holder, vec!["Example Corp"]);
1387    }
1388
1389    #[test]
1390    fn test_parses_ignore_alias_for_exclude_patterns() {
1391        let parsed = Cli::try_parse_from([
1392            "provenant",
1393            "--json-pp",
1394            "scan.json",
1395            "--ignore",
1396            "*.git*,target/*",
1397            "samples",
1398        ])
1399        .expect("cli parse should accept --ignore alias");
1400
1401        assert_eq!(parsed.exclude, vec!["*.git*", "target/*"]);
1402    }
1403
1404    #[test]
1405    fn test_quiet_conflicts_with_verbose() {
1406        let parsed = Cli::try_parse_from([
1407            "provenant",
1408            "--json-pp",
1409            "scan.json",
1410            "--quiet",
1411            "--verbose",
1412            "samples",
1413        ]);
1414        assert!(parsed.is_err());
1415    }
1416
1417    #[test]
1418    fn test_parses_from_json_and_mark_source() {
1419        let parsed = Cli::try_parse_from([
1420            "provenant",
1421            "--json-pp",
1422            "scan.json",
1423            "--from-json",
1424            "--info",
1425            "--mark-source",
1426            "sample-scan.json",
1427        ])
1428        .expect("cli parse should succeed");
1429
1430        assert!(parsed.from_json);
1431        assert!(parsed.info);
1432        assert_eq!(parsed.dir_path, vec!["sample-scan.json"]);
1433        assert!(parsed.mark_source);
1434    }
1435
1436    #[test]
1437    fn test_mark_source_requires_info() {
1438        let parsed = Cli::try_parse_from([
1439            "provenant",
1440            "--json-pp",
1441            "scan.json",
1442            "--mark-source",
1443            "samples",
1444        ]);
1445
1446        assert!(parsed.is_err());
1447    }
1448
1449    #[test]
1450    fn test_parses_classify_facet_and_tallies_by_facet() {
1451        let parsed = Cli::try_parse_from([
1452            "provenant",
1453            "--json-pp",
1454            "scan.json",
1455            "--classify",
1456            "--tallies",
1457            "--facet",
1458            "dev=*.c",
1459            "--facet",
1460            "tests=*/tests/*",
1461            "--tallies-by-facet",
1462            "samples",
1463        ])
1464        .expect("cli parse should succeed");
1465
1466        assert!(parsed.classify);
1467        assert!(parsed.tallies);
1468        assert_eq!(parsed.facet, vec!["dev=*.c", "tests=*/tests/*"]);
1469        assert!(parsed.tallies_by_facet);
1470    }
1471
1472    #[test]
1473    fn test_tallies_by_facet_requires_facet_definitions() {
1474        let parsed = Cli::try_parse_from([
1475            "provenant",
1476            "--json-pp",
1477            "scan.json",
1478            "--tallies-by-facet",
1479            "samples",
1480        ]);
1481
1482        assert!(parsed.is_err());
1483    }
1484
1485    #[test]
1486    fn test_summary_requires_classify() {
1487        let parsed = Cli::try_parse_from([
1488            "provenant",
1489            "--json-pp",
1490            "scan.json",
1491            "--summary",
1492            "samples",
1493        ]);
1494
1495        assert!(parsed.is_err());
1496    }
1497
1498    #[test]
1499    fn test_tallies_key_files_requires_tallies_and_classify() {
1500        let parsed = Cli::try_parse_from([
1501            "provenant",
1502            "--json-pp",
1503            "scan.json",
1504            "--tallies-key-files",
1505            "samples",
1506        ]);
1507
1508        assert!(parsed.is_err());
1509    }
1510
1511    #[test]
1512    fn test_parses_summary_tallies_and_generated_flags() {
1513        let parsed = Cli::try_parse_from([
1514            "provenant",
1515            "--json-pp",
1516            "scan.json",
1517            "--classify",
1518            "--summary",
1519            "--license-clarity-score",
1520            "--tallies",
1521            "--tallies-key-files",
1522            "--tallies-with-details",
1523            "--generated",
1524            "samples",
1525        ])
1526        .expect("cli parse should succeed");
1527
1528        assert!(parsed.classify);
1529        assert!(parsed.summary);
1530        assert!(parsed.license_clarity_score);
1531        assert!(parsed.tallies);
1532        assert!(parsed.tallies_key_files);
1533        assert!(parsed.tallies_with_details);
1534        assert!(parsed.generated);
1535    }
1536
1537    #[test]
1538    fn test_parses_copyright_flag() {
1539        let parsed = Cli::try_parse_from([
1540            "provenant",
1541            "--json-pp",
1542            "scan.json",
1543            "--copyright",
1544            "samples",
1545        ])
1546        .expect("cli parse should succeed");
1547
1548        assert!(parsed.copyright);
1549    }
1550
1551    #[test]
1552    fn test_package_flag_defaults_to_disabled() {
1553        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1554            .expect("cli parse should succeed");
1555
1556        assert!(!parsed.package);
1557    }
1558
1559    #[test]
1560    fn test_parses_system_package_flag() {
1561        let parsed = Cli::try_parse_from([
1562            "provenant",
1563            "--json-pp",
1564            "scan.json",
1565            "--system-package",
1566            "samples",
1567        ])
1568        .expect("cli parse should succeed");
1569
1570        assert!(parsed.system_package);
1571    }
1572
1573    #[test]
1574    fn test_parses_package_in_compiled_flag() {
1575        let parsed = Cli::try_parse_from([
1576            "provenant",
1577            "--json-pp",
1578            "scan.json",
1579            "--package-in-compiled",
1580            "samples",
1581        ])
1582        .expect("cli parse should succeed");
1583
1584        assert!(parsed.package_in_compiled);
1585    }
1586
1587    #[test]
1588    fn test_parses_package_only_flag() {
1589        let parsed = Cli::try_parse_from([
1590            "provenant",
1591            "--json-pp",
1592            "scan.json",
1593            "--package-only",
1594            "samples",
1595        ])
1596        .expect("cli parse should succeed");
1597
1598        assert!(parsed.package_only);
1599    }
1600
1601    #[test]
1602    fn test_package_only_conflicts_with_upstream_incompatible_flags() {
1603        let with_license = Cli::try_parse_from([
1604            "provenant",
1605            "--json-pp",
1606            "scan.json",
1607            "--package-only",
1608            "--license",
1609            "samples",
1610        ]);
1611        assert!(with_license.is_err());
1612
1613        let with_package = Cli::try_parse_from([
1614            "provenant",
1615            "--json-pp",
1616            "scan.json",
1617            "--package-only",
1618            "--package",
1619            "samples",
1620        ]);
1621        assert!(with_package.is_err());
1622    }
1623
1624    #[test]
1625    fn test_parses_package_flag() {
1626        let parsed = Cli::try_parse_from([
1627            "provenant",
1628            "--json-pp",
1629            "scan.json",
1630            "--package",
1631            "samples",
1632        ])
1633        .expect("cli parse should succeed");
1634
1635        assert!(parsed.package);
1636    }
1637
1638    #[test]
1639    fn test_package_short_flag() {
1640        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-p", "samples"])
1641            .expect("cli parse should succeed");
1642
1643        assert!(parsed.package);
1644    }
1645
1646    #[test]
1647    fn test_parses_license_flag() {
1648        let parsed = Cli::try_parse_from([
1649            "provenant",
1650            "--json-pp",
1651            "scan.json",
1652            "--license",
1653            "samples",
1654        ])
1655        .expect("cli parse should succeed");
1656
1657        assert!(parsed.license);
1658    }
1659
1660    #[test]
1661    fn test_license_short_flag() {
1662        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-l", "samples"])
1663            .expect("cli parse should succeed");
1664
1665        assert!(parsed.license);
1666    }
1667
1668    #[test]
1669    fn test_license_text_requires_license() {
1670        let result = Cli::try_parse_from([
1671            "provenant",
1672            "--json-pp",
1673            "scan.json",
1674            "--license-text",
1675            "samples",
1676        ]);
1677        assert!(result.is_err());
1678    }
1679
1680    #[test]
1681    fn test_include_text_is_rejected() {
1682        let result = Cli::try_parse_from([
1683            "provenant",
1684            "--json-pp",
1685            "scan.json",
1686            "--license",
1687            "--include-text",
1688            "samples",
1689        ]);
1690
1691        assert!(result.is_err());
1692    }
1693
1694    #[test]
1695    fn test_license_text_diagnostics_requires_license_text() {
1696        let result = Cli::try_parse_from([
1697            "provenant",
1698            "--json-pp",
1699            "scan.json",
1700            "--license",
1701            "--license-text-diagnostics",
1702            "samples",
1703        ]);
1704
1705        assert!(result.is_err());
1706    }
1707
1708    #[test]
1709    fn test_parses_license_text_and_diagnostics_flags() {
1710        let parsed = Cli::try_parse_from([
1711            "provenant",
1712            "--json-pp",
1713            "scan.json",
1714            "--license",
1715            "--license-text",
1716            "--license-text-diagnostics",
1717            "--license-diagnostics",
1718            "--unknown-licenses",
1719            "samples",
1720        ])
1721        .expect("cli parse should succeed");
1722
1723        assert!(parsed.license_text);
1724        assert!(parsed.license_text_diagnostics);
1725        assert!(parsed.license_diagnostics);
1726        assert!(parsed.unknown_licenses);
1727        assert_eq!(parsed.license_score, 0);
1728        assert_eq!(parsed.license_url_template, DEFAULT_LICENSEDB_URL_TEMPLATE);
1729    }
1730
1731    #[test]
1732    fn test_license_score_requires_license() {
1733        let result = Cli::try_parse_from([
1734            "provenant",
1735            "--json-pp",
1736            "scan.json",
1737            "--license-score",
1738            "70",
1739            "samples",
1740        ]);
1741
1742        assert!(result.is_err());
1743    }
1744
1745    #[test]
1746    fn test_license_url_template_requires_license() {
1747        let result = Cli::try_parse_from([
1748            "provenant",
1749            "--json-pp",
1750            "scan.json",
1751            "--license-url-template",
1752            "https://example.com/licenses/{}/",
1753            "samples",
1754        ]);
1755
1756        assert!(result.is_err());
1757    }
1758
1759    #[test]
1760    fn test_parses_license_score_and_url_template_flags() {
1761        let parsed = Cli::try_parse_from([
1762            "provenant",
1763            "--json-pp",
1764            "scan.json",
1765            "--license",
1766            "--license-score",
1767            "70",
1768            "--license-url-template",
1769            "https://example.com/licenses/{}/",
1770            "samples",
1771        ])
1772        .expect("cli parse should succeed");
1773
1774        assert_eq!(parsed.license_score, 70);
1775        assert_eq!(
1776            parsed.license_url_template,
1777            "https://example.com/licenses/{}/"
1778        );
1779    }
1780
1781    #[test]
1782    fn test_rejects_license_score_above_range() {
1783        let result = Cli::try_parse_from([
1784            "provenant",
1785            "--json-pp",
1786            "scan.json",
1787            "--license",
1788            "--license-score",
1789            "101",
1790            "samples",
1791        ]);
1792
1793        assert!(result.is_err());
1794    }
1795
1796    #[test]
1797    fn test_license_references_requires_license() {
1798        let result = Cli::try_parse_from([
1799            "provenant",
1800            "--json-pp",
1801            "scan.json",
1802            "--license-references",
1803            "samples",
1804        ]);
1805
1806        assert!(result.is_err());
1807    }
1808
1809    #[test]
1810    fn test_parses_license_references_flag() {
1811        let parsed = Cli::try_parse_from([
1812            "provenant",
1813            "--json-pp",
1814            "scan.json",
1815            "--license",
1816            "--license-references",
1817            "samples",
1818        ])
1819        .expect("cli parse should succeed");
1820
1821        assert!(parsed.license_references);
1822    }
1823
1824    #[test]
1825    fn test_include_text_alias_is_not_supported() {
1826        let result = Cli::try_parse_from([
1827            "provenant",
1828            "--json-pp",
1829            "scan.json",
1830            "--license",
1831            "--include-text",
1832            "samples",
1833        ]);
1834
1835        assert!(result.is_err());
1836    }
1837
1838    #[test]
1839    fn test_parses_short_scan_flags() {
1840        let parsed = Cli::try_parse_from([
1841            "provenant",
1842            "--json-pp",
1843            "scan.json",
1844            "-c",
1845            "-e",
1846            "-u",
1847            "samples",
1848        ])
1849        .expect("cli parse should support short scan flags");
1850
1851        assert!(parsed.copyright);
1852        assert!(parsed.email);
1853        assert!(parsed.url);
1854    }
1855
1856    #[test]
1857    fn test_parses_processes_compat_values_zero_and_minus_one() {
1858        let zero =
1859            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "0", "samples"])
1860                .expect("cli parse should accept processes=0");
1861        assert_eq!(zero.processes, ProcessMode::SequentialWithTimeouts);
1862
1863        let parsed =
1864            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "-1", "samples"])
1865                .expect("cli parse should accept processes=-1");
1866        assert_eq!(parsed.processes, ProcessMode::SequentialWithoutTimeouts);
1867    }
1868
1869    #[test]
1870    fn test_parses_cache_flags() {
1871        let parsed = Cli::try_parse_from([
1872            "provenant",
1873            "--json-pp",
1874            "scan.json",
1875            "--cache-dir",
1876            "/tmp/sc-cache",
1877            "--cache-clear",
1878            "--max-in-memory",
1879            "5000",
1880            "samples",
1881        ])
1882        .expect("cli parse should accept cache flags");
1883
1884        assert_eq!(parsed.cache_dir.as_deref(), Some("/tmp/sc-cache"));
1885        assert!(parsed.cache_clear);
1886        assert!(!parsed.incremental);
1887        assert_eq!(parsed.max_in_memory, MemoryMode::Limit(5000));
1888    }
1889
1890    #[test]
1891    fn test_parses_incremental_flag() {
1892        let parsed = Cli::try_parse_from([
1893            "provenant",
1894            "--json-pp",
1895            "scan.json",
1896            "--incremental",
1897            "samples",
1898        ])
1899        .expect("cli parse should accept incremental flag");
1900
1901        assert!(parsed.incremental);
1902    }
1903
1904    #[test]
1905    fn test_parses_license_cache_control_flags() {
1906        let parsed = Cli::try_parse_from([
1907            "provenant",
1908            "--json-pp",
1909            "scan.json",
1910            "--license",
1911            "--reindex",
1912            "--no-license-index-cache",
1913            "samples",
1914        ])
1915        .expect("cli parse should accept license cache flags");
1916
1917        assert!(parsed.license);
1918        assert!(parsed.reindex);
1919        assert!(parsed.no_license_index_cache);
1920    }
1921
1922    #[test]
1923    fn test_max_in_memory_defaults_and_special_values() {
1924        let default_parsed =
1925            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1926                .expect("default max-in-memory should parse");
1927        assert_eq!(default_parsed.max_in_memory, MemoryMode::Limit(10000));
1928
1929        let disk_only = Cli::try_parse_from([
1930            "provenant",
1931            "--json-pp",
1932            "scan.json",
1933            "--max-in-memory",
1934            "-1",
1935            "samples",
1936        ])
1937        .expect("-1 should parse");
1938        assert_eq!(disk_only.max_in_memory, MemoryMode::StreamUnlimited);
1939
1940        let unlimited = Cli::try_parse_from([
1941            "provenant",
1942            "--json-pp",
1943            "scan.json",
1944            "--max-in-memory",
1945            "0",
1946            "samples",
1947        ])
1948        .expect("0 should parse");
1949        assert_eq!(unlimited.max_in_memory, MemoryMode::CollectFirst);
1950    }
1951
1952    #[test]
1953    fn test_max_in_memory_rejects_values_below_negative_one() {
1954        let result = Cli::try_parse_from([
1955            "provenant",
1956            "--json-pp",
1957            "scan.json",
1958            "--max-in-memory",
1959            "-2",
1960            "samples",
1961        ]);
1962
1963        assert!(result.is_err());
1964    }
1965
1966    #[test]
1967    fn test_max_depth_default_matches_reference_behavior() {
1968        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1969            .expect("cli parse should succeed");
1970
1971        assert_eq!(parsed.max_depth, 0);
1972    }
1973}