Skip to main content

provenant/cli/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod run;
5
6pub use run::run;
7
8use clap::{ArgGroup, Args, Parser, Subcommand};
9use serde_json::{Map as JsonMap, Number as JsonNumber, Value as JsonValue};
10use std::ffi::OsString;
11use std::fs;
12#[cfg(test)]
13use std::ops::Deref;
14use std::path::{Path, PathBuf};
15use yaml_serde::Value as YamlValue;
16
17use crate::app::request::{InputMode, OutputTarget, ScanRequest};
18use crate::license_detection::DEFAULT_LICENSEDB_URL_TEMPLATE;
19use crate::output::OutputFormat;
20use crate::scanner::MemoryMode;
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum ProcessMode {
24    Parallel(usize),
25    SequentialWithTimeouts,
26    SequentialWithoutTimeouts,
27}
28
29impl Default for ProcessMode {
30    fn default() -> Self {
31        let cpus = std::thread::available_parallelism().map_or(1, |n| n.get());
32        if cpus > 1 {
33            ProcessMode::Parallel(cpus - 1)
34        } else {
35            ProcessMode::Parallel(1)
36        }
37    }
38}
39
40impl ProcessMode {
41    fn default_value() -> Self {
42        let cpus = std::thread::available_parallelism().map_or(1, |n| n.get());
43        if cpus > 1 {
44            ProcessMode::Parallel(cpus - 1)
45        } else {
46            ProcessMode::Parallel(1)
47        }
48    }
49
50    pub fn to_i32(self) -> i32 {
51        match self {
52            ProcessMode::Parallel(n) => n as i32,
53            ProcessMode::SequentialWithTimeouts => 0,
54            ProcessMode::SequentialWithoutTimeouts => -1,
55        }
56    }
57}
58
59impl std::fmt::Display for ProcessMode {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        write!(f, "{}", self.to_i32())
62    }
63}
64
65fn parse_processes(value: &str) -> Result<ProcessMode, String> {
66    let parsed: i32 = value
67        .parse()
68        .map_err(|e| format!("invalid integer for --processes: {e}"))?;
69    if parsed > 0 {
70        Ok(ProcessMode::Parallel(
71            u32::try_from(parsed).unwrap() as usize
72        ))
73    } else if parsed == 0 {
74        Ok(ProcessMode::SequentialWithTimeouts)
75    } else {
76        Ok(ProcessMode::SequentialWithoutTimeouts)
77    }
78}
79
80const PDF_OXIDE_LOG_HELP: &str = "Troubleshooting PDF parser logs:\n  Provenant suppresses noisy pdf_oxide logs by default.\n  To inspect raw pdf_oxide logs for debugging, rerun with RUST_LOG=pdf_oxide=warn (or =error).";
81
82fn parse_license_policy_arg(value: &str) -> Result<String, String> {
83    let policy_path = Path::new(value);
84    let metadata = fs::metadata(policy_path).map_err(|err| {
85        format!(
86            "Failed to read license policy file {:?}: {err}",
87            policy_path
88        )
89    })?;
90    if !metadata.is_file() {
91        return Err(format!(
92            "License policy path {:?} is not a regular file",
93            policy_path
94        ));
95    }
96
97    let policy_text = fs::read_to_string(policy_path).map_err(|err| {
98        format!(
99            "Failed to read license policy file {:?}: {err}",
100            policy_path
101        )
102    })?;
103    if policy_text.trim().is_empty() {
104        return Err(format!("License policy file {:?} is empty", policy_path));
105    }
106
107    let policy_value: YamlValue = yaml_serde::from_str(&policy_text).map_err(|err| {
108        format!(
109            "Failed to parse license policy file {:?}: {err}",
110            policy_path
111        )
112    })?;
113    let has_license_policies = policy_value
114        .as_mapping()
115        .and_then(|mapping| mapping.get(YamlValue::String("license_policies".to_string())))
116        .is_some();
117    if !has_license_policies {
118        return Err(format!(
119            "License policy file {:?} is missing a 'license_policies' attribute",
120            policy_path
121        ));
122    }
123
124    Ok(value.to_string())
125}
126
127#[derive(Parser, Debug)]
128#[command(
129    author = "The Provenant contributors",
130    version = crate::version::BUILD_VERSION,
131    long_version = crate::version::build_long_version(),
132    after_help = PDF_OXIDE_LOG_HELP,
133    about,
134    long_about = None,
135    arg_required_else_help = true,
136    subcommand_required = true
137)]
138pub struct Cli {
139    #[command(subcommand)]
140    pub command: Command,
141}
142
143#[derive(Subcommand, Debug, Clone)]
144pub enum Command {
145    /// Scan files or existing ScanCode-style JSON inputs.
146    Scan(Box<ScanArgs>),
147    /// Run the long-lived HTTP service.
148    Serve(ServeArgs),
149    /// Compare ScanCode and Provenant JSON outputs to review migration-confidence deltas.
150    Compare(CompareArgs),
151    /// Show attribution notices for embedded license detection data.
152    ShowAttribution,
153    /// Export the effective built-in license dataset to DIR and exit.
154    ExportLicenseDataset(ExportLicenseDatasetArgs),
155}
156
157#[derive(Args, Debug, Clone)]
158pub struct CompareArgs {
159    /// Path to an existing ScanCode JSON output file.
160    #[arg(long = "scancode-json", value_name = "PATH")]
161    pub scancode_json: PathBuf,
162
163    /// Path to an existing Provenant JSON output file.
164    #[arg(long = "provenant-json", value_name = "PATH")]
165    pub provenant_json: PathBuf,
166
167    /// Directory where comparison artifacts should be written. Defaults to a timestamped directory in the current working directory.
168    #[arg(long = "artifact-dir", value_name = "DIR")]
169    pub artifact_dir: Option<PathBuf>,
170}
171
172#[derive(Args, Debug, Clone)]
173pub struct ExportLicenseDatasetArgs {
174    #[arg(value_name = "DIR")]
175    pub dir: String,
176}
177
178#[derive(clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq, Default)]
179pub enum CompatibilityMode {
180    #[default]
181    Native,
182    Scancode,
183}
184
185impl CompatibilityMode {
186    fn as_str(self) -> &'static str {
187        match self {
188            Self::Native => "native",
189            Self::Scancode => "scancode",
190        }
191    }
192}
193
194#[derive(Args, Debug, Clone)]
195pub struct ServeArgs {
196    /// Bind the service shell to HOST:PORT.
197    #[arg(long = "bind", value_name = "ADDR", default_value = "127.0.0.1:8080")]
198    pub bind: String,
199}
200
201#[derive(Args, Debug, Clone)]
202#[command(
203    group(
204        ArgGroup::new("output")
205            .required(true)
206            .multiple(true)
207            .args([
208                "output_json",
209                "output_json_pp",
210                "output_json_lines",
211                "output_yaml",
212                "output_debian",
213                "output_html",
214                "output_spdx_tv",
215                "output_spdx_rdf",
216                "output_cyclonedx",
217                "output_cyclonedx_xml",
218                "custom_output"
219            ])
220    ),
221    after_help = PDF_OXIDE_LOG_HELP
222)]
223pub struct ScanArgs {
224    /// File or directory paths to scan
225    #[arg(required = false)]
226    pub dir_path: Vec<String>,
227
228    /// Write scan output as compact JSON to FILE
229    #[arg(long = "json", value_name = "FILE", allow_hyphen_values = true)]
230    pub output_json: Option<String>,
231
232    /// Write scan output as pretty-printed JSON to FILE
233    #[arg(long = "json-pp", value_name = "FILE", allow_hyphen_values = true)]
234    pub output_json_pp: Option<String>,
235
236    /// Write scan output as JSON Lines to FILE
237    #[arg(long = "json-lines", value_name = "FILE", allow_hyphen_values = true)]
238    pub output_json_lines: Option<String>,
239
240    /// Write scan output as YAML to FILE
241    #[arg(long = "yaml", value_name = "FILE", allow_hyphen_values = true)]
242    pub output_yaml: Option<String>,
243
244    /// Write scan output in machine-readable Debian copyright format to FILE (requires --license, --copyright, and --license-text)
245    #[arg(
246        long = "debian",
247        value_name = "FILE",
248        allow_hyphen_values = true,
249        requires_all = ["copyright", "license", "license_text"]
250    )]
251    pub output_debian: Option<String>,
252
253    /// Write scan output as HTML report to FILE
254    #[arg(long = "html", value_name = "FILE", allow_hyphen_values = true)]
255    pub output_html: Option<String>,
256
257    /// Write scan output as SPDX tag/value to FILE
258    #[arg(long = "spdx-tv", value_name = "FILE", allow_hyphen_values = true)]
259    pub output_spdx_tv: Option<String>,
260
261    /// Write scan output as SPDX RDF/XML to FILE
262    #[arg(long = "spdx-rdf", value_name = "FILE", allow_hyphen_values = true)]
263    pub output_spdx_rdf: Option<String>,
264
265    /// Write scan output as CycloneDX JSON to FILE
266    #[arg(long = "cyclonedx", value_name = "FILE", allow_hyphen_values = true)]
267    pub output_cyclonedx: Option<String>,
268
269    /// Write scan output as CycloneDX XML to FILE
270    #[arg(
271        long = "cyclonedx-xml",
272        value_name = "FILE",
273        allow_hyphen_values = true
274    )]
275    pub output_cyclonedx_xml: Option<String>,
276
277    /// Write scan output to FILE formatted with the custom template
278    #[arg(
279        long = "custom-output",
280        value_name = "FILE",
281        requires = "custom_template",
282        allow_hyphen_values = true
283    )]
284    pub custom_output: Option<String>,
285
286    /// Use this template FILE with --custom-output
287    #[arg(
288        long = "custom-template",
289        value_name = "FILE",
290        requires = "custom_output"
291    )]
292    pub custom_template: Option<String>,
293
294    /// Maximum recursion depth (0 means no depth limit)
295    #[arg(short, long, default_value = "0")]
296    pub max_depth: usize,
297
298    #[arg(short = 'n', long, default_value_t = ProcessMode::default_value(), value_parser = parse_processes, allow_hyphen_values = true)]
299    pub processes: ProcessMode,
300
301    #[arg(long, default_value_t = 120.0)]
302    pub timeout: f64,
303
304    #[arg(short, long, conflicts_with = "verbose")]
305    pub quiet: bool,
306
307    #[arg(short, long, conflicts_with = "quiet")]
308    pub verbose: bool,
309
310    #[arg(long, conflicts_with = "full_root")]
311    pub strip_root: bool,
312
313    #[arg(long, conflicts_with = "strip_root")]
314    pub full_root: bool,
315
316    /// Exclude patterns (ScanCode-compatible alias: --ignore)
317    #[arg(long = "exclude", visible_alias = "ignore", value_delimiter = ',')]
318    pub exclude: Vec<String>,
319
320    /// Include files matching PATTERN. Use `**` when you want recursion across directories.
321    #[arg(long, value_delimiter = ',')]
322    pub include: Vec<String>,
323
324    /// Read selected scan paths from FILE (or '-' for stdin), relative to the explicit scan root.
325    #[arg(long = "paths-file", value_name = "FILE", allow_hyphen_values = true)]
326    pub paths_file: Vec<String>,
327
328    #[arg(long = "cache-dir", value_name = "PATH")]
329    pub cache_dir: Option<String>,
330
331    #[arg(long = "cache-clear")]
332    pub cache_clear: bool,
333
334    #[arg(long = "incremental")]
335    pub incremental: bool,
336
337    /// Maximum number of file and directory scan details kept in memory.
338    /// Use 0 for unlimited memory or -1 for disk-only spill during the scan.
339    #[arg(
340        long = "max-in-memory",
341        value_name = "INT",
342        default_value_t = MemoryMode::Limit(10000),
343        value_parser = parse_max_in_memory,
344        allow_hyphen_values = true
345    )]
346    pub max_in_memory: MemoryMode,
347
348    /// Collect file information such as checksums, type hints, and source/script flags.
349    #[arg(short = 'i', long)]
350    pub info: bool,
351
352    /// Load one or more existing ScanCode-style JSON scans instead of rescanning inputs.
353    #[arg(long)]
354    pub from_json: bool,
355
356    /// Scan input for application package and dependency manifests, lockfiles and related data
357    #[arg(short = 'p', long)]
358    pub package: bool,
359
360    /// Select a compatibility bundle for intentional Provenant-vs-ScanCode behavior differences.
361    #[arg(
362        long = "compat-mode",
363        visible_alias = "compat",
364        value_enum,
365        default_value_t = CompatibilityMode::Native
366    )]
367    pub compat_mode: CompatibilityMode,
368
369    /// Scan input for installed system package databases (RPM, dpkg, apk, etc.)
370    #[arg(long = "system-package")]
371    pub system_package: bool,
372
373    /// Scan supported compiled Go and Rust binaries for embedded package metadata.
374    #[arg(long = "package-in-compiled")]
375    pub package_in_compiled: bool,
376
377    /// Scan for system and application package data and skip license/copyright detection and top-level package creation.
378    #[arg(
379        long = "package-only",
380        conflicts_with_all = ["license", "summary", "package", "system_package"]
381    )]
382    pub package_only: bool,
383
384    /// Disable package assembly (merging related manifest/lockfiles into packages)
385    #[arg(long)]
386    pub no_assemble: bool,
387
388    /// Path to a custom license dataset root containing manifest.json, rules/, and licenses/.
389    /// If not specified, uses the built-in embedded license index.
390    #[arg(
391        long = "license-dataset-path",
392        value_name = "PATH",
393        requires = "license"
394    )]
395    pub license_dataset_path: Option<String>,
396
397    /// Force rebuild of the license index cache, ignoring any existing cache.
398    #[arg(long)]
399    pub reindex: bool,
400
401    /// Build the license index in memory for this run without reading or writing persistent cache files.
402    #[arg(long = "no-license-index-cache")]
403    pub no_license_index_cache: bool,
404
405    /// Include matched text in license detection output
406    #[arg(long = "license-text", requires = "license")]
407    pub license_text: bool,
408
409    #[arg(long = "license-text-diagnostics", requires = "license_text")]
410    pub license_text_diagnostics: bool,
411
412    #[arg(long = "license-diagnostics", requires = "license")]
413    pub license_diagnostics: bool,
414
415    #[arg(long = "unknown-licenses", requires = "license")]
416    pub unknown_licenses: bool,
417
418    #[arg(
419        long = "license-score",
420        default_value_t = 0,
421        requires = "license",
422        value_parser = clap::value_parser!(u8).range(0..=100)
423    )]
424    pub license_score: u8,
425
426    #[arg(
427        long = "license-url-template",
428        default_value = DEFAULT_LICENSEDB_URL_TEMPLATE,
429        requires = "license"
430    )]
431    pub license_url_template: String,
432
433    #[arg(long)]
434    pub filter_clues: bool,
435
436    #[arg(
437        long = "ignore-author",
438        value_name = "PATTERN",
439        help = "Ignore a file and all its findings if an author matches the regex PATTERN"
440    )]
441    pub ignore_author: Vec<String>,
442
443    #[arg(
444        long = "ignore-copyright-holder",
445        value_name = "PATTERN",
446        help = "Ignore a file and all its findings if a copyright holder matches the regex PATTERN"
447    )]
448    pub ignore_copyright_holder: Vec<String>,
449
450    #[arg(long)]
451    pub only_findings: bool,
452
453    #[arg(long, requires = "info")]
454    pub mark_source: bool,
455
456    #[arg(long)]
457    pub classify: bool,
458
459    #[arg(long, requires = "classify")]
460    pub summary: bool,
461
462    #[arg(long = "license-clarity-score", requires = "classify")]
463    pub license_clarity_score: bool,
464
465    #[arg(long = "license-references", requires = "license")]
466    pub license_references: bool,
467
468    /// Evaluate file license detections against a YAML license policy file.
469    #[arg(
470        long = "license-policy",
471        value_name = "FILE",
472        value_parser = parse_license_policy_arg
473    )]
474    pub license_policy: Option<String>,
475
476    #[arg(long)]
477    pub tallies: bool,
478
479    #[arg(long = "tallies-key-files", requires_all = ["tallies", "classify"])]
480    pub tallies_key_files: bool,
481
482    #[arg(long = "tallies-with-details")]
483    pub tallies_with_details: bool,
484
485    #[arg(long = "facet", value_name = "<facet>=<pattern>")]
486    pub facet: Vec<String>,
487
488    #[arg(long = "tallies-by-facet", requires_all = ["facet", "tallies"])]
489    pub tallies_by_facet: bool,
490
491    #[arg(long)]
492    pub generated: bool,
493
494    /// Scan input for licenses
495    #[arg(short = 'l', long)]
496    pub license: bool,
497
498    #[arg(short = 'c', long)]
499    pub copyright: bool,
500
501    /// Scan input for email addresses
502    #[arg(short = 'e', long)]
503    pub email: bool,
504
505    /// Report only up to INT emails found in a file. Use 0 for no limit.
506    #[arg(long, default_value_t = 50, requires = "email")]
507    pub max_email: usize,
508
509    /// Scan input for URLs
510    #[arg(short = 'u', long)]
511    pub url: bool,
512
513    /// Report only up to INT URLs found in a file. Use 0 for no limit.
514    #[arg(long, default_value_t = 50, requires = "url")]
515    pub max_url: usize,
516}
517
518impl Cli {
519    pub fn parse() -> Self {
520        <Self as Parser>::parse_from(rewrite_args_for_default_scan(std::env::args_os()))
521    }
522
523    pub fn try_parse_from<I, T>(itr: I) -> Result<Self, clap::Error>
524    where
525        I: IntoIterator<Item = T>,
526        T: Into<OsString>,
527    {
528        <Self as Parser>::try_parse_from(rewrite_args_for_default_scan(itr))
529    }
530
531    pub(crate) fn scan_args(&self) -> Option<&ScanArgs> {
532        match &self.command {
533            Command::Scan(scan_args) => Some(scan_args.as_ref()),
534            Command::Serve(_)
535            | Command::Compare(_)
536            | Command::ShowAttribution
537            | Command::ExportLicenseDataset(_) => None,
538        }
539    }
540}
541
542#[cfg(test)]
543impl Deref for Cli {
544    type Target = ScanArgs;
545
546    fn deref(&self) -> &Self::Target {
547        self.scan_args()
548            .expect("scan arguments are only available for the scan command")
549    }
550}
551
552fn rewrite_args_for_default_scan<I, T>(itr: I) -> Vec<OsString>
553where
554    I: IntoIterator<Item = T>,
555    T: Into<OsString>,
556{
557    let mut args: Vec<OsString> = itr.into_iter().map(Into::into).collect();
558    if args.len() <= 1 {
559        return args;
560    }
561
562    let first = args[1].to_string_lossy();
563    if matches!(
564        first.as_ref(),
565        "scan"
566            | "serve"
567            | "compare"
568            | "show-attribution"
569            | "export-license-dataset"
570            | "help"
571            | "-h"
572            | "--help"
573            | "-V"
574            | "--version"
575    ) {
576        return args;
577    }
578
579    if first.starts_with('-') || Path::new(first.as_ref()).exists() {
580        args.insert(1, OsString::from("scan"));
581    }
582
583    args
584}
585
586fn parse_max_in_memory(value: &str) -> Result<MemoryMode, String> {
587    let parsed = value
588        .parse::<i64>()
589        .map_err(|_| format!("invalid integer value: {value}"))?;
590    if parsed < -1 {
591        return Err("--max-in-memory must be -1, 0, or a positive integer".to_string());
592    }
593    match parsed {
594        -1 => Ok(MemoryMode::StreamUnlimited),
595        0 => Ok(MemoryMode::CollectFirst),
596        n if n > 0 => Ok(MemoryMode::Limit(usize::try_from(n).unwrap_or(usize::MAX))),
597        _ => Ok(MemoryMode::CollectFirst),
598    }
599}
600
601impl ScanArgs {
602    pub(crate) fn output_targets(&self) -> Vec<OutputTarget> {
603        let mut targets = Vec::new();
604
605        if let Some(file) = &self.output_json {
606            targets.push(OutputTarget {
607                format: OutputFormat::Json,
608                file: file.clone(),
609                custom_template: None,
610            });
611        }
612
613        if let Some(file) = &self.output_json_pp {
614            targets.push(OutputTarget {
615                format: OutputFormat::JsonPretty,
616                file: file.clone(),
617                custom_template: None,
618            });
619        }
620
621        if let Some(file) = &self.output_json_lines {
622            targets.push(OutputTarget {
623                format: OutputFormat::JsonLines,
624                file: file.clone(),
625                custom_template: None,
626            });
627        }
628
629        if let Some(file) = &self.output_yaml {
630            targets.push(OutputTarget {
631                format: OutputFormat::Yaml,
632                file: file.clone(),
633                custom_template: None,
634            });
635        }
636
637        if let Some(file) = &self.output_debian {
638            targets.push(OutputTarget {
639                format: OutputFormat::Debian,
640                file: file.clone(),
641                custom_template: None,
642            });
643        }
644
645        if let Some(file) = &self.output_html {
646            targets.push(OutputTarget {
647                format: OutputFormat::Html,
648                file: file.clone(),
649                custom_template: None,
650            });
651        }
652
653        if let Some(file) = &self.output_spdx_tv {
654            targets.push(OutputTarget {
655                format: OutputFormat::SpdxTv,
656                file: file.clone(),
657                custom_template: None,
658            });
659        }
660
661        if let Some(file) = &self.output_spdx_rdf {
662            targets.push(OutputTarget {
663                format: OutputFormat::SpdxRdf,
664                file: file.clone(),
665                custom_template: None,
666            });
667        }
668
669        if let Some(file) = &self.output_cyclonedx {
670            targets.push(OutputTarget {
671                format: OutputFormat::CycloneDxJson,
672                file: file.clone(),
673                custom_template: None,
674            });
675        }
676
677        if let Some(file) = &self.output_cyclonedx_xml {
678            targets.push(OutputTarget {
679                format: OutputFormat::CycloneDxXml,
680                file: file.clone(),
681                custom_template: None,
682            });
683        }
684
685        if let Some(file) = &self.custom_output {
686            targets.push(OutputTarget {
687                format: OutputFormat::CustomTemplate,
688                file: file.clone(),
689                custom_template: self.custom_template.clone(),
690            });
691        }
692
693        targets
694    }
695
696    pub(crate) fn output_header_options(&self) -> JsonMap<String, JsonValue> {
697        let mut options = JsonMap::new();
698        if !self.dir_path.is_empty() {
699            options.insert(
700                "input".to_string(),
701                JsonValue::Array(
702                    self.dir_path
703                        .iter()
704                        .cloned()
705                        .map(JsonValue::String)
706                        .collect(),
707                ),
708            );
709        }
710
711        let mut flags = Vec::new();
712
713        push_string_option(&mut flags, "--cache-dir", self.cache_dir.as_ref());
714        push_bool_option(&mut flags, "--cache-clear", self.cache_clear);
715        push_bool_option(&mut flags, "--classify", self.classify);
716        push_string_option(&mut flags, "--custom-output", self.custom_output.as_ref());
717        push_string_option(
718            &mut flags,
719            "--custom-template",
720            self.custom_template.as_ref(),
721        );
722        push_bool_option(&mut flags, "--copyright", self.copyright);
723        if self.compat_mode != CompatibilityMode::Native {
724            flags.push((
725                "--compat-mode".to_string(),
726                JsonValue::String(self.compat_mode.as_str().to_string()),
727            ));
728        }
729        push_string_option(&mut flags, "--cyclonedx", self.output_cyclonedx.as_ref());
730        push_string_option(
731            &mut flags,
732            "--cyclonedx-xml",
733            self.output_cyclonedx_xml.as_ref(),
734        );
735        push_string_option(&mut flags, "--debian", self.output_debian.as_ref());
736        push_bool_option(&mut flags, "--email", self.email);
737        push_array_option(&mut flags, "--facet", &self.facet);
738        push_bool_option(&mut flags, "--filter-clues", self.filter_clues);
739        push_bool_option(&mut flags, "--from-json", self.from_json);
740        push_bool_option(&mut flags, "--full-root", self.full_root);
741        push_bool_option(&mut flags, "--generated", self.generated);
742        push_string_option(&mut flags, "--html", self.output_html.as_ref());
743        push_array_option(&mut flags, "--ignore", &self.exclude);
744        push_array_option(&mut flags, "--ignore-author", &self.ignore_author);
745        push_array_option(
746            &mut flags,
747            "--ignore-copyright-holder",
748            &self.ignore_copyright_holder,
749        );
750        push_bool_option(&mut flags, "--incremental", self.incremental);
751        push_array_option(&mut flags, "--include", &self.include);
752        push_bool_option(&mut flags, "--info", self.info);
753        push_string_option(&mut flags, "--json", self.output_json.as_ref());
754        push_string_option(&mut flags, "--json-lines", self.output_json_lines.as_ref());
755        push_string_option(&mut flags, "--json-pp", self.output_json_pp.as_ref());
756        push_bool_option(&mut flags, "--license", self.license);
757        push_bool_option(
758            &mut flags,
759            "--license-clarity-score",
760            self.license_clarity_score,
761        );
762        push_bool_option(
763            &mut flags,
764            "--license-diagnostics",
765            self.license_diagnostics,
766        );
767        push_string_option(
768            &mut flags,
769            "--license-dataset-path",
770            self.license_dataset_path.as_ref(),
771        );
772        push_string_option(&mut flags, "--license-policy", self.license_policy.as_ref());
773        push_bool_option(
774            &mut flags,
775            "--no-license-index-cache",
776            self.no_license_index_cache,
777        );
778        push_bool_option(&mut flags, "--license-references", self.license_references);
779        push_bool_option(&mut flags, "--reindex", self.reindex);
780        push_non_default_u8_option(&mut flags, "--license-score", self.license_score, 0);
781        push_bool_option(&mut flags, "--license-text", self.license_text);
782        push_bool_option(
783            &mut flags,
784            "--license-text-diagnostics",
785            self.license_text_diagnostics,
786        );
787        push_non_default_string_option(
788            &mut flags,
789            "--license-url-template",
790            &self.license_url_template,
791            DEFAULT_LICENSEDB_URL_TEMPLATE,
792        );
793        push_non_default_usize_option(&mut flags, "--max-depth", self.max_depth, 0);
794        match self.max_in_memory {
795            MemoryMode::Limit(10000) => {}
796            MemoryMode::CollectFirst => {
797                flags.push(("--max-in-memory".to_string(), JsonValue::Number(0.into())));
798            }
799            MemoryMode::StreamUnlimited => {
800                flags.push((
801                    "--max-in-memory".to_string(),
802                    JsonValue::Number((-1i64).into()),
803                ));
804            }
805            MemoryMode::Limit(n) => {
806                flags.push(("--max-in-memory".to_string(), JsonValue::Number(n.into())));
807            }
808        }
809        if self.email {
810            push_non_default_usize_option(&mut flags, "--max-email", self.max_email, 50);
811        }
812        if self.url {
813            push_non_default_usize_option(&mut flags, "--max-url", self.max_url, 50);
814        }
815        push_bool_option(&mut flags, "--mark-source", self.mark_source);
816        push_bool_option(&mut flags, "--no-assemble", self.no_assemble);
817        push_bool_option(&mut flags, "--only-findings", self.only_findings);
818        push_bool_option(&mut flags, "--package", self.package);
819        push_bool_option(
820            &mut flags,
821            "--package-in-compiled",
822            self.package_in_compiled,
823        );
824        push_bool_option(&mut flags, "--package-only", self.package_only);
825        push_array_option(&mut flags, "--paths-file", &self.paths_file);
826        push_non_default_process_mode_option(
827            &mut flags,
828            "--processes",
829            self.processes,
830            ProcessMode::default_value(),
831        );
832        push_bool_option(&mut flags, "--quiet", self.quiet);
833        push_string_option(&mut flags, "--spdx-rdf", self.output_spdx_rdf.as_ref());
834        push_string_option(&mut flags, "--spdx-tv", self.output_spdx_tv.as_ref());
835        push_bool_option(&mut flags, "--strip-root", self.strip_root);
836        push_bool_option(&mut flags, "--summary", self.summary);
837        push_bool_option(&mut flags, "--system-package", self.system_package);
838        push_bool_option(&mut flags, "--tallies", self.tallies);
839        push_bool_option(&mut flags, "--tallies-by-facet", self.tallies_by_facet);
840        push_bool_option(&mut flags, "--tallies-key-files", self.tallies_key_files);
841        push_bool_option(
842            &mut flags,
843            "--tallies-with-details",
844            self.tallies_with_details,
845        );
846        push_non_default_f64_option(&mut flags, "--timeout", self.timeout, 120.0);
847        push_bool_option(&mut flags, "--unknown-licenses", self.unknown_licenses);
848        push_bool_option(&mut flags, "--url", self.url);
849        push_bool_option(&mut flags, "--verbose", self.verbose);
850        push_string_option(&mut flags, "--yaml", self.output_yaml.as_ref());
851
852        flags.sort_by(|left, right| left.0.cmp(&right.0));
853        for (key, value) in flags {
854            options.insert(key, value);
855        }
856
857        options
858    }
859}
860
861impl From<&ScanArgs> for ScanRequest {
862    fn from(cli: &ScanArgs) -> Self {
863        Self {
864            input_paths: cli.dir_path.clone(),
865            input_mode: if cli.from_json {
866                InputMode::FromJson
867            } else {
868                InputMode::Native
869            },
870            output_targets: cli.output_targets(),
871            output_header_options: cli.output_header_options(),
872            progress_mode: if cli.quiet {
873                crate::progress::ProgressMode::Quiet
874            } else if cli.verbose {
875                crate::progress::ProgressMode::Verbose
876            } else {
877                crate::progress::ProgressMode::Default
878            },
879            process_mode: cli.processes,
880            timeout_seconds: cli.timeout,
881            quiet: cli.quiet,
882            verbose: cli.verbose,
883            strip_root: cli.strip_root,
884            full_root: cli.full_root,
885            exclude: cli.exclude.clone(),
886            include: cli.include.clone(),
887            paths_files: cli.paths_file.clone(),
888            respect_process_cache_env: true,
889            cache_dir: cli.cache_dir.clone(),
890            cache_clear: cli.cache_clear,
891            incremental: cli.incremental,
892            max_depth: cli.max_depth,
893            max_in_memory: cli.max_in_memory,
894            info: cli.info,
895            package: cli.package,
896            system_package: cli.system_package,
897            package_in_compiled: cli.package_in_compiled,
898            package_only: cli.package_only,
899            no_assemble: cli.no_assemble,
900            license_dataset_path: cli.license_dataset_path.clone(),
901            reindex: cli.reindex,
902            no_license_index_cache: cli.no_license_index_cache,
903            license_text: cli.license_text,
904            license_text_diagnostics: cli.license_text_diagnostics,
905            license_diagnostics: cli.license_diagnostics,
906            unknown_licenses: cli.unknown_licenses,
907            license_score: cli.license_score,
908            license_url_template: cli.license_url_template.clone(),
909            filter_clues: cli.filter_clues,
910            ignore_author: cli.ignore_author.clone(),
911            ignore_copyright_holder: cli.ignore_copyright_holder.clone(),
912            only_findings: cli.only_findings,
913            mark_source: cli.mark_source,
914            classify: cli.classify,
915            summary: cli.summary,
916            license_clarity_score: cli.license_clarity_score,
917            license_references: cli.license_references,
918            license_policy: cli.license_policy.clone(),
919            tallies: cli.tallies,
920            tallies_key_files: cli.tallies_key_files,
921            tallies_with_details: cli.tallies_with_details,
922            facet: cli.facet.clone(),
923            tallies_by_facet: cli.tallies_by_facet,
924            generated: cli.generated,
925            license: cli.license,
926            copyright: cli.copyright,
927            email: cli.email,
928            max_email: cli.max_email,
929            url: cli.url,
930            max_url: cli.max_url,
931        }
932    }
933}
934
935fn push_bool_option(options: &mut Vec<(String, JsonValue)>, key: &str, enabled: bool) {
936    if enabled {
937        options.push((key.to_string(), JsonValue::Bool(true)));
938    }
939}
940
941fn push_string_option(options: &mut Vec<(String, JsonValue)>, key: &str, value: Option<&String>) {
942    if let Some(value) = value {
943        options.push((key.to_string(), JsonValue::String(value.clone())));
944    }
945}
946
947fn push_non_default_string_option(
948    options: &mut Vec<(String, JsonValue)>,
949    key: &str,
950    value: &str,
951    default: &str,
952) {
953    if value != default {
954        options.push((key.to_string(), JsonValue::String(value.to_string())));
955    }
956}
957
958fn push_array_option(options: &mut Vec<(String, JsonValue)>, key: &str, values: &[String]) {
959    if !values.is_empty() {
960        options.push((
961            key.to_string(),
962            JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect()),
963        ));
964    }
965}
966
967fn push_non_default_usize_option(
968    options: &mut Vec<(String, JsonValue)>,
969    key: &str,
970    value: usize,
971    default: usize,
972) {
973    if value != default {
974        options.push((key.to_string(), JsonValue::Number(value.into())));
975    }
976}
977
978fn push_non_default_u8_option(
979    options: &mut Vec<(String, JsonValue)>,
980    key: &str,
981    value: u8,
982    default: u8,
983) {
984    if value != default {
985        options.push((key.to_string(), JsonValue::Number(value.into())));
986    }
987}
988
989fn push_non_default_process_mode_option(
990    options: &mut Vec<(String, JsonValue)>,
991    key: &str,
992    value: ProcessMode,
993    default: ProcessMode,
994) {
995    if value != default {
996        options.push((key.to_string(), JsonValue::Number(value.to_i32().into())));
997    }
998}
999
1000fn push_non_default_f64_option(
1001    options: &mut Vec<(String, JsonValue)>,
1002    key: &str,
1003    value: f64,
1004    default: f64,
1005) {
1006    if (value - default).abs() > f64::EPSILON
1007        && let Some(number) = JsonNumber::from_f64(value)
1008    {
1009        options.push((key.to_string(), JsonValue::Number(number)));
1010    }
1011}
1012
1013#[cfg(test)]
1014mod tests {
1015    use super::*;
1016    use clap::CommandFactory;
1017
1018    fn scan_command() -> clap::Command {
1019        Cli::command()
1020            .find_subcommand("scan")
1021            .expect("scan subcommand should exist")
1022            .clone()
1023    }
1024
1025    #[test]
1026    fn test_requires_at_least_one_output_option() {
1027        let parsed = Cli::try_parse_from(["provenant", "samples"]);
1028        assert!(parsed.is_err());
1029    }
1030
1031    #[test]
1032    fn test_parses_json_pretty_output_option() {
1033        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1034            .expect("cli parse should succeed");
1035
1036        assert_eq!(parsed.output_json_pp.as_deref(), Some("scan.json"));
1037        assert_eq!(parsed.output_targets().len(), 1);
1038        assert_eq!(parsed.output_targets()[0].format, OutputFormat::JsonPretty);
1039    }
1040
1041    #[test]
1042    fn test_explicit_scan_subcommand_parses_scan_flags() {
1043        let parsed = Cli::try_parse_from([
1044            "provenant",
1045            "scan",
1046            "--json-pp",
1047            "scan.json",
1048            "--license",
1049            "samples",
1050        ])
1051        .expect("explicit scan subcommand should parse");
1052
1053        assert!(matches!(parsed.command, Command::Scan(_)));
1054        let scan = parsed.scan_args().expect("scan args should be present");
1055        assert_eq!(scan.output_json_pp.as_deref(), Some("scan.json"));
1056        assert!(scan.license);
1057        assert_eq!(scan.dir_path, vec!["samples"]);
1058    }
1059
1060    #[test]
1061    fn test_parses_compare_subcommand() {
1062        let parsed = Cli::try_parse_from([
1063            "provenant",
1064            "compare",
1065            "--scancode-json",
1066            "scan-a.json",
1067            "--provenant-json",
1068            "scan-b.json",
1069            "--artifact-dir",
1070            "compare-out",
1071        ])
1072        .expect("compare subcommand should parse");
1073
1074        match parsed.command {
1075            Command::Compare(args) => {
1076                assert_eq!(args.scancode_json, PathBuf::from("scan-a.json"));
1077                assert_eq!(args.provenant_json, PathBuf::from("scan-b.json"));
1078                assert_eq!(args.artifact_dir, Some(PathBuf::from("compare-out")));
1079            }
1080            other => panic!("expected compare subcommand, got {other:?}"),
1081        }
1082    }
1083
1084    #[test]
1085    fn test_parses_serve_subcommand() {
1086        let parsed = Cli::try_parse_from(["provenant", "serve", "--bind", "127.0.0.1:9090"])
1087            .expect("serve subcommand should parse");
1088
1089        match parsed.command {
1090            Command::Serve(args) => assert_eq!(args.bind, "127.0.0.1:9090"),
1091            other => panic!("expected serve subcommand, got {other:?}"),
1092        }
1093    }
1094
1095    #[test]
1096    fn test_compare_subcommand_allows_default_artifact_dir() {
1097        let parsed = Cli::try_parse_from([
1098            "provenant",
1099            "compare",
1100            "--scancode-json",
1101            "scan-a.json",
1102            "--provenant-json",
1103            "scan-b.json",
1104        ])
1105        .expect("compare subcommand should allow default artifact dir");
1106
1107        match parsed.command {
1108            Command::Compare(args) => {
1109                assert_eq!(args.scancode_json, PathBuf::from("scan-a.json"));
1110                assert_eq!(args.provenant_json, PathBuf::from("scan-b.json"));
1111                assert!(args.artifact_dir.is_none());
1112            }
1113            other => panic!("expected compare subcommand, got {other:?}"),
1114        }
1115    }
1116
1117    #[test]
1118    fn test_unknown_command_like_token_is_not_rewritten_to_scan() {
1119        let parsed = Cli::try_parse_from([
1120            "provenant",
1121            "future-command",
1122            "--json-pp",
1123            "scan.json",
1124            "samples",
1125        ]);
1126
1127        let error = parsed.expect_err("unknown command-like token should fail");
1128        assert!(
1129            error
1130                .to_string()
1131                .contains("unrecognized subcommand 'future-command'")
1132        );
1133    }
1134
1135    #[test]
1136    fn test_allows_multiple_output_options_in_one_run() {
1137        let parsed = Cli::try_parse_from([
1138            "provenant",
1139            "--json",
1140            "scan.json",
1141            "--html",
1142            "report.html",
1143            "samples",
1144        ])
1145        .expect("cli parse should allow multiple outputs");
1146
1147        assert_eq!(parsed.output_targets().len(), 2);
1148        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Json);
1149        assert_eq!(parsed.output_targets()[1].format, OutputFormat::Html);
1150    }
1151
1152    #[test]
1153    fn test_parses_show_attribution_subcommand() {
1154        let parsed = Cli::try_parse_from(["provenant", "show-attribution"])
1155            .expect("show-attribution subcommand should parse");
1156
1157        assert!(matches!(parsed.command, Command::ShowAttribution));
1158    }
1159
1160    #[test]
1161    fn test_legacy_show_attribution_flag_is_rejected() {
1162        let parsed = Cli::try_parse_from(["provenant", "--show-attribution"]);
1163        assert!(parsed.is_err());
1164    }
1165
1166    #[test]
1167    fn test_export_license_dataset_allows_mode_without_output_file() {
1168        let parsed = Cli::try_parse_from(["provenant", "export-license-dataset", "dataset-out"])
1169            .expect("cli parse should allow export mode without output flags");
1170
1171        match parsed.command {
1172            Command::ExportLicenseDataset(args) => assert_eq!(args.dir, "dataset-out"),
1173            other => panic!("expected export subcommand, got {other:?}"),
1174        }
1175    }
1176
1177    #[test]
1178    fn test_legacy_export_license_dataset_flag_is_rejected() {
1179        let parsed = Cli::try_parse_from(["provenant", "--export-license-dataset", "dataset-out"]);
1180        assert!(parsed.is_err());
1181    }
1182
1183    #[test]
1184    fn test_license_dataset_path_parses_for_license_scans() {
1185        let parsed = Cli::try_parse_from([
1186            "provenant",
1187            "--json-pp",
1188            "scan.json",
1189            "--license",
1190            "--license-dataset-path",
1191            "dataset-root",
1192            "samples",
1193        ])
1194        .expect("cli parse should accept custom license dataset flag");
1195
1196        assert_eq!(parsed.license_dataset_path.as_deref(), Some("dataset-root"));
1197    }
1198
1199    #[test]
1200    fn test_output_header_options_use_scancode_style_keys() {
1201        let parsed = Cli::try_parse_from([
1202            "provenant",
1203            "--json-pp",
1204            "scan.json",
1205            "--license",
1206            "--package",
1207            "--strip-root",
1208            "--paths-file",
1209            "changed-files.txt",
1210            "--ignore",
1211            "*.git*",
1212            "--ignore",
1213            "target/*",
1214            "samples",
1215        ])
1216        .expect("cli parse should succeed");
1217
1218        let options = parsed.output_header_options();
1219
1220        assert_eq!(
1221            options.get("input"),
1222            Some(&JsonValue::Array(vec![JsonValue::String(
1223                "samples".to_string()
1224            )]))
1225        );
1226        assert_eq!(
1227            options.get("--json-pp"),
1228            Some(&JsonValue::String("scan.json".to_string()))
1229        );
1230        assert_eq!(options.get("--license"), Some(&JsonValue::Bool(true)));
1231        assert_eq!(options.get("--package"), Some(&JsonValue::Bool(true)));
1232        assert_eq!(
1233            options.get("--paths-file"),
1234            Some(&JsonValue::Array(vec![JsonValue::String(
1235                "changed-files.txt".to_string()
1236            )]))
1237        );
1238        assert_eq!(options.get("--strip-root"), Some(&JsonValue::Bool(true)));
1239        assert_eq!(
1240            options.get("--ignore"),
1241            Some(&JsonValue::Array(vec![
1242                JsonValue::String("*.git*".to_string()),
1243                JsonValue::String("target/*".to_string()),
1244            ]))
1245        );
1246        assert!(!options.contains_key("--compat-mode"));
1247    }
1248
1249    #[test]
1250    fn test_compat_mode_parses_and_is_recorded_when_non_default() {
1251        let parsed = Cli::try_parse_from([
1252            "provenant",
1253            "--json-pp",
1254            "scan.json",
1255            "--copyright",
1256            "--compat-mode",
1257            "scancode",
1258            "samples",
1259        ])
1260        .expect("cli parse should succeed");
1261
1262        assert_eq!(parsed.compat_mode, CompatibilityMode::Scancode);
1263        let options = parsed.output_header_options();
1264        assert_eq!(
1265            options.get("--compat-mode"),
1266            Some(&JsonValue::String("scancode".to_string()))
1267        );
1268    }
1269
1270    #[test]
1271    fn test_output_header_options_include_license_dataset_path_when_set() {
1272        let parsed = Cli::try_parse_from([
1273            "provenant",
1274            "--json-pp",
1275            "scan.json",
1276            "--license",
1277            "--license-dataset-path",
1278            "dataset-root",
1279            "samples",
1280        ])
1281        .expect("cli parse should accept custom license dataset flag");
1282
1283        let options = parsed.output_header_options();
1284        assert_eq!(
1285            options.get("--license-dataset-path"),
1286            Some(&JsonValue::String("dataset-root".to_string()))
1287        );
1288    }
1289
1290    #[test]
1291    fn test_output_header_options_skip_defaults_and_include_non_defaults() {
1292        let default_options =
1293            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1294                .expect("default cli parse should succeed")
1295                .output_header_options();
1296        assert!(!default_options.contains_key("--timeout"));
1297        assert!(!default_options.contains_key("--processes"));
1298
1299        let custom_options = Cli::try_parse_from([
1300            "provenant",
1301            "--json-pp",
1302            "scan.json",
1303            "--timeout",
1304            "30",
1305            "--processes",
1306            "4",
1307            "samples",
1308        ])
1309        .expect("custom cli parse should succeed")
1310        .output_header_options();
1311
1312        assert_eq!(
1313            custom_options.get("--timeout"),
1314            Some(&JsonValue::Number(
1315                JsonNumber::from_f64(30.0).expect("valid number")
1316            ))
1317        );
1318        assert_eq!(
1319            custom_options.get("--processes"),
1320            Some(&JsonValue::Number(4.into()))
1321        );
1322    }
1323
1324    #[test]
1325    fn test_allows_stdout_dash_as_output_target() {
1326        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "-", "samples"])
1327            .expect("cli parse should allow stdout dash output target");
1328
1329        assert_eq!(parsed.output_json_pp.as_deref(), Some("-"));
1330    }
1331
1332    #[test]
1333    fn test_debian_requires_license_copyright_and_license_text() {
1334        let missing_license_text = Cli::try_parse_from([
1335            "provenant",
1336            "--debian",
1337            "scan.copyright",
1338            "--license",
1339            "--copyright",
1340            "samples",
1341        ]);
1342        assert!(missing_license_text.is_err());
1343
1344        let parsed = Cli::try_parse_from([
1345            "provenant",
1346            "--debian",
1347            "scan.copyright",
1348            "--license",
1349            "--copyright",
1350            "--license-text",
1351            "samples",
1352        ])
1353        .expect("cli parse should accept debian output");
1354
1355        assert_eq!(parsed.output_targets().len(), 1);
1356        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Debian);
1357        assert_eq!(parsed.output_debian.as_deref(), Some("scan.copyright"));
1358    }
1359
1360    #[test]
1361    fn test_debian_help_mentions_required_companion_flags() {
1362        let command = scan_command();
1363        let debian_arg = command
1364            .get_arguments()
1365            .find(|arg| arg.get_long() == Some("debian"))
1366            .expect("debian arg should exist");
1367
1368        let help = debian_arg
1369            .get_help()
1370            .expect("debian arg should have help text")
1371            .to_string();
1372
1373        assert!(help.contains("requires --license, --copyright, and --license-text"));
1374    }
1375
1376    #[test]
1377    fn test_scan_help_mentions_pdf_oxide_rust_log_escape_hatch() {
1378        let help = scan_command().render_help().to_string();
1379
1380        assert!(help.contains("RUST_LOG=pdf_oxide=warn"));
1381        assert!(help.contains("suppresses noisy pdf_oxide logs by default"));
1382    }
1383
1384    #[test]
1385    fn test_root_help_mentions_subcommands() {
1386        let help = Cli::command().render_help().to_string();
1387
1388        assert!(help.contains("scan"));
1389        assert!(help.contains("serve"));
1390        assert!(help.contains("compare"));
1391        assert!(help.contains("show-attribution"));
1392        assert!(help.contains("export-license-dataset"));
1393    }
1394
1395    #[test]
1396    fn test_parses_license_policy_flag() {
1397        let temp = tempfile::tempdir().expect("temp dir");
1398        let policy_path = temp.path().join("policy.yml");
1399        std::fs::write(&policy_path, "license_policies: []\n").expect("policy written");
1400
1401        let parsed = Cli::try_parse_from([
1402            "provenant",
1403            "--json-pp",
1404            "scan.json",
1405            "--license-policy",
1406            policy_path.to_str().expect("utf8 path"),
1407            "samples",
1408        ])
1409        .expect("cli parse should accept license-policy");
1410
1411        assert_eq!(
1412            parsed.license_policy.as_deref(),
1413            Some(policy_path.to_str().expect("utf8 path"))
1414        );
1415    }
1416
1417    #[test]
1418    fn test_rejects_invalid_license_policy_flag_value() {
1419        let temp = tempfile::tempdir().expect("temp dir");
1420        let policy_path = temp.path().join("policy.yml");
1421        std::fs::write(&policy_path, "not_license_policies: []\n").expect("policy written");
1422
1423        let parsed = Cli::try_parse_from([
1424            "provenant",
1425            "--json-pp",
1426            "scan.json",
1427            "--license-policy",
1428            policy_path.to_str().expect("utf8 path"),
1429            "samples",
1430        ]);
1431
1432        assert!(parsed.is_err());
1433    }
1434
1435    #[test]
1436    fn test_custom_template_and_output_must_be_paired() {
1437        let missing_template =
1438            Cli::try_parse_from(["provenant", "--custom-output", "result.txt", "samples"]);
1439        assert!(missing_template.is_err());
1440
1441        let missing_output =
1442            Cli::try_parse_from(["provenant", "--custom-template", "tpl.tera", "samples"]);
1443        assert!(missing_output.is_err());
1444    }
1445
1446    #[test]
1447    fn test_parses_processes_and_timeout_options() {
1448        let parsed = Cli::try_parse_from([
1449            "provenant",
1450            "--json-pp",
1451            "scan.json",
1452            "-n",
1453            "4",
1454            "--timeout",
1455            "30",
1456            "samples",
1457        ])
1458        .expect("cli parse should succeed");
1459
1460        assert_eq!(parsed.processes, ProcessMode::Parallel(4));
1461        assert_eq!(parsed.timeout, 30.0);
1462    }
1463
1464    #[test]
1465    fn test_strip_root_conflicts_with_full_root() {
1466        let parsed = Cli::try_parse_from([
1467            "provenant",
1468            "--json-pp",
1469            "scan.json",
1470            "--strip-root",
1471            "--full-root",
1472            "samples",
1473        ]);
1474        assert!(parsed.is_err());
1475    }
1476
1477    #[test]
1478    fn test_parses_include_and_only_findings_and_filter_clues() {
1479        let parsed = Cli::try_parse_from([
1480            "provenant",
1481            "--json-pp",
1482            "scan.json",
1483            "--include",
1484            "src/**,Cargo.toml",
1485            "--only-findings",
1486            "--filter-clues",
1487            "samples",
1488        ])
1489        .expect("cli parse should succeed");
1490
1491        assert_eq!(parsed.include, vec!["src/**", "Cargo.toml"]);
1492        assert!(parsed.only_findings);
1493        assert!(parsed.filter_clues);
1494    }
1495
1496    #[test]
1497    fn test_parses_repeated_paths_file_flags_including_stdin_dash() {
1498        let parsed = Cli::try_parse_from([
1499            "provenant",
1500            "--json-pp",
1501            "scan.json",
1502            "--paths-file",
1503            "changed-files.txt",
1504            "--paths-file",
1505            "-",
1506            "samples",
1507        ])
1508        .expect("cli parse should accept repeated --paths-file flags");
1509
1510        assert_eq!(parsed.paths_file, vec!["changed-files.txt", "-"]);
1511    }
1512
1513    #[test]
1514    fn test_parses_ignore_author_and_holder_filters() {
1515        let parsed = Cli::try_parse_from([
1516            "provenant",
1517            "--json-pp",
1518            "scan.json",
1519            "--ignore-author",
1520            "Jane.*",
1521            "--ignore-author",
1522            ".*Bot$",
1523            "--ignore-copyright-holder",
1524            "Example Corp",
1525            "samples",
1526        ])
1527        .expect("cli parse should succeed");
1528
1529        assert_eq!(parsed.ignore_author, vec!["Jane.*", ".*Bot$"]);
1530        assert_eq!(parsed.ignore_copyright_holder, vec!["Example Corp"]);
1531    }
1532
1533    #[test]
1534    fn test_parses_ignore_alias_for_exclude_patterns() {
1535        let parsed = Cli::try_parse_from([
1536            "provenant",
1537            "--json-pp",
1538            "scan.json",
1539            "--ignore",
1540            "*.git*,target/*",
1541            "samples",
1542        ])
1543        .expect("cli parse should accept --ignore alias");
1544
1545        assert_eq!(parsed.exclude, vec!["*.git*", "target/*"]);
1546    }
1547
1548    #[test]
1549    fn test_quiet_conflicts_with_verbose() {
1550        let parsed = Cli::try_parse_from([
1551            "provenant",
1552            "--json-pp",
1553            "scan.json",
1554            "--quiet",
1555            "--verbose",
1556            "samples",
1557        ]);
1558        assert!(parsed.is_err());
1559    }
1560
1561    #[test]
1562    fn test_parses_from_json_and_mark_source() {
1563        let parsed = Cli::try_parse_from([
1564            "provenant",
1565            "--json-pp",
1566            "scan.json",
1567            "--from-json",
1568            "--info",
1569            "--mark-source",
1570            "sample-scan.json",
1571        ])
1572        .expect("cli parse should succeed");
1573
1574        assert!(parsed.from_json);
1575        assert!(parsed.info);
1576        assert_eq!(parsed.dir_path, vec!["sample-scan.json"]);
1577        assert!(parsed.mark_source);
1578    }
1579
1580    #[test]
1581    fn test_mark_source_requires_info() {
1582        let parsed = Cli::try_parse_from([
1583            "provenant",
1584            "--json-pp",
1585            "scan.json",
1586            "--mark-source",
1587            "samples",
1588        ]);
1589
1590        assert!(parsed.is_err());
1591    }
1592
1593    #[test]
1594    fn test_parses_classify_facet_and_tallies_by_facet() {
1595        let parsed = Cli::try_parse_from([
1596            "provenant",
1597            "--json-pp",
1598            "scan.json",
1599            "--classify",
1600            "--tallies",
1601            "--facet",
1602            "dev=*.c",
1603            "--facet",
1604            "tests=*/tests/*",
1605            "--tallies-by-facet",
1606            "samples",
1607        ])
1608        .expect("cli parse should succeed");
1609
1610        assert!(parsed.classify);
1611        assert!(parsed.tallies);
1612        assert_eq!(parsed.facet, vec!["dev=*.c", "tests=*/tests/*"]);
1613        assert!(parsed.tallies_by_facet);
1614    }
1615
1616    #[test]
1617    fn test_tallies_by_facet_requires_facet_definitions() {
1618        let parsed = Cli::try_parse_from([
1619            "provenant",
1620            "--json-pp",
1621            "scan.json",
1622            "--tallies-by-facet",
1623            "samples",
1624        ]);
1625
1626        assert!(parsed.is_err());
1627    }
1628
1629    #[test]
1630    fn test_summary_requires_classify() {
1631        let parsed = Cli::try_parse_from([
1632            "provenant",
1633            "--json-pp",
1634            "scan.json",
1635            "--summary",
1636            "samples",
1637        ]);
1638
1639        assert!(parsed.is_err());
1640    }
1641
1642    #[test]
1643    fn test_tallies_key_files_requires_tallies_and_classify() {
1644        let parsed = Cli::try_parse_from([
1645            "provenant",
1646            "--json-pp",
1647            "scan.json",
1648            "--tallies-key-files",
1649            "samples",
1650        ]);
1651
1652        assert!(parsed.is_err());
1653    }
1654
1655    #[test]
1656    fn test_parses_summary_tallies_and_generated_flags() {
1657        let parsed = Cli::try_parse_from([
1658            "provenant",
1659            "--json-pp",
1660            "scan.json",
1661            "--classify",
1662            "--summary",
1663            "--license-clarity-score",
1664            "--tallies",
1665            "--tallies-key-files",
1666            "--tallies-with-details",
1667            "--generated",
1668            "samples",
1669        ])
1670        .expect("cli parse should succeed");
1671
1672        assert!(parsed.classify);
1673        assert!(parsed.summary);
1674        assert!(parsed.license_clarity_score);
1675        assert!(parsed.tallies);
1676        assert!(parsed.tallies_key_files);
1677        assert!(parsed.tallies_with_details);
1678        assert!(parsed.generated);
1679    }
1680
1681    #[test]
1682    fn test_parses_copyright_flag() {
1683        let parsed = Cli::try_parse_from([
1684            "provenant",
1685            "--json-pp",
1686            "scan.json",
1687            "--copyright",
1688            "samples",
1689        ])
1690        .expect("cli parse should succeed");
1691
1692        assert!(parsed.copyright);
1693    }
1694
1695    #[test]
1696    fn test_package_flag_defaults_to_disabled() {
1697        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1698            .expect("cli parse should succeed");
1699
1700        assert!(!parsed.package);
1701    }
1702
1703    #[test]
1704    fn test_parses_system_package_flag() {
1705        let parsed = Cli::try_parse_from([
1706            "provenant",
1707            "--json-pp",
1708            "scan.json",
1709            "--system-package",
1710            "samples",
1711        ])
1712        .expect("cli parse should succeed");
1713
1714        assert!(parsed.system_package);
1715    }
1716
1717    #[test]
1718    fn test_parses_package_in_compiled_flag() {
1719        let parsed = Cli::try_parse_from([
1720            "provenant",
1721            "--json-pp",
1722            "scan.json",
1723            "--package-in-compiled",
1724            "samples",
1725        ])
1726        .expect("cli parse should succeed");
1727
1728        assert!(parsed.package_in_compiled);
1729    }
1730
1731    #[test]
1732    fn test_parses_package_only_flag() {
1733        let parsed = Cli::try_parse_from([
1734            "provenant",
1735            "--json-pp",
1736            "scan.json",
1737            "--package-only",
1738            "samples",
1739        ])
1740        .expect("cli parse should succeed");
1741
1742        assert!(parsed.package_only);
1743    }
1744
1745    #[test]
1746    fn test_package_only_conflicts_with_upstream_incompatible_flags() {
1747        let with_license = Cli::try_parse_from([
1748            "provenant",
1749            "--json-pp",
1750            "scan.json",
1751            "--package-only",
1752            "--license",
1753            "samples",
1754        ]);
1755        assert!(with_license.is_err());
1756
1757        let with_package = Cli::try_parse_from([
1758            "provenant",
1759            "--json-pp",
1760            "scan.json",
1761            "--package-only",
1762            "--package",
1763            "samples",
1764        ]);
1765        assert!(with_package.is_err());
1766    }
1767
1768    #[test]
1769    fn test_parses_package_flag() {
1770        let parsed = Cli::try_parse_from([
1771            "provenant",
1772            "--json-pp",
1773            "scan.json",
1774            "--package",
1775            "samples",
1776        ])
1777        .expect("cli parse should succeed");
1778
1779        assert!(parsed.package);
1780    }
1781
1782    #[test]
1783    fn test_package_short_flag() {
1784        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-p", "samples"])
1785            .expect("cli parse should succeed");
1786
1787        assert!(parsed.package);
1788    }
1789
1790    #[test]
1791    fn test_parses_license_flag() {
1792        let parsed = Cli::try_parse_from([
1793            "provenant",
1794            "--json-pp",
1795            "scan.json",
1796            "--license",
1797            "samples",
1798        ])
1799        .expect("cli parse should succeed");
1800
1801        assert!(parsed.license);
1802    }
1803
1804    #[test]
1805    fn test_license_short_flag() {
1806        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-l", "samples"])
1807            .expect("cli parse should succeed");
1808
1809        assert!(parsed.license);
1810    }
1811
1812    #[test]
1813    fn test_license_text_requires_license() {
1814        let result = Cli::try_parse_from([
1815            "provenant",
1816            "--json-pp",
1817            "scan.json",
1818            "--license-text",
1819            "samples",
1820        ]);
1821        assert!(result.is_err());
1822    }
1823
1824    #[test]
1825    fn test_include_text_is_rejected() {
1826        let result = Cli::try_parse_from([
1827            "provenant",
1828            "--json-pp",
1829            "scan.json",
1830            "--license",
1831            "--include-text",
1832            "samples",
1833        ]);
1834
1835        assert!(result.is_err());
1836    }
1837
1838    #[test]
1839    fn test_license_text_diagnostics_requires_license_text() {
1840        let result = Cli::try_parse_from([
1841            "provenant",
1842            "--json-pp",
1843            "scan.json",
1844            "--license",
1845            "--license-text-diagnostics",
1846            "samples",
1847        ]);
1848
1849        assert!(result.is_err());
1850    }
1851
1852    #[test]
1853    fn test_parses_license_text_and_diagnostics_flags() {
1854        let parsed = Cli::try_parse_from([
1855            "provenant",
1856            "--json-pp",
1857            "scan.json",
1858            "--license",
1859            "--license-text",
1860            "--license-text-diagnostics",
1861            "--license-diagnostics",
1862            "--unknown-licenses",
1863            "samples",
1864        ])
1865        .expect("cli parse should succeed");
1866
1867        assert!(parsed.license_text);
1868        assert!(parsed.license_text_diagnostics);
1869        assert!(parsed.license_diagnostics);
1870        assert!(parsed.unknown_licenses);
1871        assert_eq!(parsed.license_score, 0);
1872        assert_eq!(parsed.license_url_template, DEFAULT_LICENSEDB_URL_TEMPLATE);
1873    }
1874
1875    #[test]
1876    fn test_license_score_requires_license() {
1877        let result = Cli::try_parse_from([
1878            "provenant",
1879            "--json-pp",
1880            "scan.json",
1881            "--license-score",
1882            "70",
1883            "samples",
1884        ]);
1885
1886        assert!(result.is_err());
1887    }
1888
1889    #[test]
1890    fn test_license_url_template_requires_license() {
1891        let result = Cli::try_parse_from([
1892            "provenant",
1893            "--json-pp",
1894            "scan.json",
1895            "--license-url-template",
1896            "https://example.com/licenses/{}/",
1897            "samples",
1898        ]);
1899
1900        assert!(result.is_err());
1901    }
1902
1903    #[test]
1904    fn test_parses_license_score_and_url_template_flags() {
1905        let parsed = Cli::try_parse_from([
1906            "provenant",
1907            "--json-pp",
1908            "scan.json",
1909            "--license",
1910            "--license-score",
1911            "70",
1912            "--license-url-template",
1913            "https://example.com/licenses/{}/",
1914            "samples",
1915        ])
1916        .expect("cli parse should succeed");
1917
1918        assert_eq!(parsed.license_score, 70);
1919        assert_eq!(
1920            parsed.license_url_template,
1921            "https://example.com/licenses/{}/"
1922        );
1923    }
1924
1925    #[test]
1926    fn test_rejects_license_score_above_range() {
1927        let result = Cli::try_parse_from([
1928            "provenant",
1929            "--json-pp",
1930            "scan.json",
1931            "--license",
1932            "--license-score",
1933            "101",
1934            "samples",
1935        ]);
1936
1937        assert!(result.is_err());
1938    }
1939
1940    #[test]
1941    fn test_license_references_requires_license() {
1942        let result = Cli::try_parse_from([
1943            "provenant",
1944            "--json-pp",
1945            "scan.json",
1946            "--license-references",
1947            "samples",
1948        ]);
1949
1950        assert!(result.is_err());
1951    }
1952
1953    #[test]
1954    fn test_parses_license_references_flag() {
1955        let parsed = Cli::try_parse_from([
1956            "provenant",
1957            "--json-pp",
1958            "scan.json",
1959            "--license",
1960            "--license-references",
1961            "samples",
1962        ])
1963        .expect("cli parse should succeed");
1964
1965        assert!(parsed.license_references);
1966    }
1967
1968    #[test]
1969    fn test_include_text_alias_is_not_supported() {
1970        let result = Cli::try_parse_from([
1971            "provenant",
1972            "--json-pp",
1973            "scan.json",
1974            "--license",
1975            "--include-text",
1976            "samples",
1977        ]);
1978
1979        assert!(result.is_err());
1980    }
1981
1982    #[test]
1983    fn test_parses_short_scan_flags() {
1984        let parsed = Cli::try_parse_from([
1985            "provenant",
1986            "--json-pp",
1987            "scan.json",
1988            "-c",
1989            "-e",
1990            "-u",
1991            "samples",
1992        ])
1993        .expect("cli parse should support short scan flags");
1994
1995        assert!(parsed.copyright);
1996        assert!(parsed.email);
1997        assert!(parsed.url);
1998    }
1999
2000    #[test]
2001    fn test_parses_processes_compat_values_zero_and_minus_one() {
2002        let zero =
2003            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "0", "samples"])
2004                .expect("cli parse should accept processes=0");
2005        assert_eq!(zero.processes, ProcessMode::SequentialWithTimeouts);
2006
2007        let parsed =
2008            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "-1", "samples"])
2009                .expect("cli parse should accept processes=-1");
2010        assert_eq!(parsed.processes, ProcessMode::SequentialWithoutTimeouts);
2011    }
2012
2013    #[test]
2014    fn test_parses_cache_flags() {
2015        let parsed = Cli::try_parse_from([
2016            "provenant",
2017            "--json-pp",
2018            "scan.json",
2019            "--cache-dir",
2020            "/tmp/sc-cache",
2021            "--cache-clear",
2022            "--max-in-memory",
2023            "5000",
2024            "samples",
2025        ])
2026        .expect("cli parse should accept cache flags");
2027
2028        assert_eq!(parsed.cache_dir.as_deref(), Some("/tmp/sc-cache"));
2029        assert!(parsed.cache_clear);
2030        assert!(!parsed.incremental);
2031        assert_eq!(parsed.max_in_memory, MemoryMode::Limit(5000));
2032    }
2033
2034    #[test]
2035    fn test_parses_incremental_flag() {
2036        let parsed = Cli::try_parse_from([
2037            "provenant",
2038            "--json-pp",
2039            "scan.json",
2040            "--incremental",
2041            "samples",
2042        ])
2043        .expect("cli parse should accept incremental flag");
2044
2045        assert!(parsed.incremental);
2046    }
2047
2048    #[test]
2049    fn test_parses_license_cache_control_flags() {
2050        let parsed = Cli::try_parse_from([
2051            "provenant",
2052            "--json-pp",
2053            "scan.json",
2054            "--license",
2055            "--reindex",
2056            "--no-license-index-cache",
2057            "samples",
2058        ])
2059        .expect("cli parse should accept license cache flags");
2060
2061        assert!(parsed.license);
2062        assert!(parsed.reindex);
2063        assert!(parsed.no_license_index_cache);
2064    }
2065
2066    #[test]
2067    fn test_max_in_memory_defaults_and_special_values() {
2068        let default_parsed =
2069            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
2070                .expect("default max-in-memory should parse");
2071        assert_eq!(default_parsed.max_in_memory, MemoryMode::Limit(10000));
2072
2073        let disk_only = Cli::try_parse_from([
2074            "provenant",
2075            "--json-pp",
2076            "scan.json",
2077            "--max-in-memory",
2078            "-1",
2079            "samples",
2080        ])
2081        .expect("-1 should parse");
2082        assert_eq!(disk_only.max_in_memory, MemoryMode::StreamUnlimited);
2083
2084        let unlimited = Cli::try_parse_from([
2085            "provenant",
2086            "--json-pp",
2087            "scan.json",
2088            "--max-in-memory",
2089            "0",
2090            "samples",
2091        ])
2092        .expect("0 should parse");
2093        assert_eq!(unlimited.max_in_memory, MemoryMode::CollectFirst);
2094    }
2095
2096    #[test]
2097    fn test_max_in_memory_rejects_values_below_negative_one() {
2098        let result = Cli::try_parse_from([
2099            "provenant",
2100            "--json-pp",
2101            "scan.json",
2102            "--max-in-memory",
2103            "-2",
2104            "samples",
2105        ]);
2106
2107        assert!(result.is_err());
2108    }
2109
2110    #[test]
2111    fn test_max_depth_default_matches_reference_behavior() {
2112        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
2113            .expect("cli parse should succeed");
2114
2115        assert_eq!(parsed.max_depth, 0);
2116    }
2117}