Skip to main content

provenant/cli/
mod.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4mod run;
5
6pub use run::run;
7
8use clap::{ArgGroup, Args, Parser, Subcommand};
9use serde_json::{Map as JsonMap, Number as JsonNumber, Value as JsonValue};
10use std::ffi::OsString;
11use std::fs;
12#[cfg(test)]
13use std::ops::Deref;
14use std::path::{Path, PathBuf};
15use yaml_serde::Value as YamlValue;
16
17use crate::app::request::{InputMode, OutputTarget, ScanRequest};
18use crate::license_detection::DEFAULT_LICENSEDB_URL_TEMPLATE;
19use crate::output::OutputFormat;
20use crate::scanner::MemoryMode;
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq)]
23pub enum ProcessMode {
24    Parallel(usize),
25    SequentialWithTimeouts,
26    SequentialWithoutTimeouts,
27}
28
29impl Default for ProcessMode {
30    fn default() -> Self {
31        let cpus = std::thread::available_parallelism().map_or(1, |n| n.get());
32        if cpus > 1 {
33            ProcessMode::Parallel(cpus - 1)
34        } else {
35            ProcessMode::Parallel(1)
36        }
37    }
38}
39
40impl ProcessMode {
41    fn default_value() -> Self {
42        let cpus = std::thread::available_parallelism().map_or(1, |n| n.get());
43        if cpus > 1 {
44            ProcessMode::Parallel(cpus - 1)
45        } else {
46            ProcessMode::Parallel(1)
47        }
48    }
49
50    pub fn to_i32(self) -> i32 {
51        match self {
52            ProcessMode::Parallel(n) => n as i32,
53            ProcessMode::SequentialWithTimeouts => 0,
54            ProcessMode::SequentialWithoutTimeouts => -1,
55        }
56    }
57}
58
59impl std::fmt::Display for ProcessMode {
60    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
61        write!(f, "{}", self.to_i32())
62    }
63}
64
65fn parse_processes(value: &str) -> Result<ProcessMode, String> {
66    let parsed: i32 = value
67        .parse()
68        .map_err(|e| format!("invalid integer for --processes: {e}"))?;
69    if parsed > 0 {
70        Ok(ProcessMode::Parallel(
71            u32::try_from(parsed).unwrap() as usize
72        ))
73    } else if parsed == 0 {
74        Ok(ProcessMode::SequentialWithTimeouts)
75    } else {
76        Ok(ProcessMode::SequentialWithoutTimeouts)
77    }
78}
79
80const PDF_OXIDE_LOG_HELP: &str = "Troubleshooting PDF parser logs:\n  Provenant suppresses noisy pdf_oxide logs by default.\n  To inspect raw pdf_oxide logs for debugging, rerun with RUST_LOG=pdf_oxide=warn (or =error).";
81const CLI_ABOUT: &str = "Independent Rust scanner for ScanCode-compatible workflows. Not affiliated with, endorsed by, or sponsored by ScanCode Toolkit, AboutCode, or nexB Inc.";
82const CLI_LONG_ABOUT: &str = "Independent Rust scanner for ScanCode-compatible workflows.\n\nNot affiliated with, endorsed by, or sponsored by ScanCode Toolkit, AboutCode, or nexB Inc.";
83
84fn parse_license_policy_arg(value: &str) -> Result<String, String> {
85    let policy_path = Path::new(value);
86    let metadata = fs::metadata(policy_path).map_err(|err| {
87        format!(
88            "Failed to read license policy file {:?}: {err}",
89            policy_path
90        )
91    })?;
92    if !metadata.is_file() {
93        return Err(format!(
94            "License policy path {:?} is not a regular file",
95            policy_path
96        ));
97    }
98
99    let policy_text = fs::read_to_string(policy_path).map_err(|err| {
100        format!(
101            "Failed to read license policy file {:?}: {err}",
102            policy_path
103        )
104    })?;
105    if policy_text.trim().is_empty() {
106        return Err(format!("License policy file {:?} is empty", policy_path));
107    }
108
109    let policy_value: YamlValue = yaml_serde::from_str(&policy_text).map_err(|err| {
110        format!(
111            "Failed to parse license policy file {:?}: {err}",
112            policy_path
113        )
114    })?;
115    let has_license_policies = policy_value
116        .as_mapping()
117        .and_then(|mapping| mapping.get(YamlValue::String("license_policies".to_string())))
118        .is_some();
119    if !has_license_policies {
120        return Err(format!(
121            "License policy file {:?} is missing a 'license_policies' attribute",
122            policy_path
123        ));
124    }
125
126    Ok(value.to_string())
127}
128
129#[derive(Parser, Debug)]
130#[command(
131    author = "The Provenant contributors",
132    version = crate::version::BUILD_VERSION,
133    long_version = crate::version::build_long_version(),
134    after_help = PDF_OXIDE_LOG_HELP,
135    about = CLI_ABOUT,
136    long_about = CLI_LONG_ABOUT,
137    arg_required_else_help = true,
138    subcommand_required = true
139)]
140pub struct Cli {
141    #[command(subcommand)]
142    pub command: Command,
143}
144
145#[derive(Subcommand, Debug, Clone)]
146pub enum Command {
147    /// Scan files or existing ScanCode-style JSON inputs.
148    Scan(Box<ScanArgs>),
149    /// Run the long-lived HTTP service.
150    Serve(ServeArgs),
151    /// Compare ScanCode and Provenant JSON outputs to review migration-confidence deltas.
152    Compare(CompareArgs),
153    /// Show attribution notices for embedded license detection data.
154    ShowAttribution,
155    /// Export the effective built-in license dataset to DIR and exit.
156    ExportLicenseDataset(ExportLicenseDatasetArgs),
157}
158
159#[derive(Args, Debug, Clone)]
160pub struct CompareArgs {
161    /// Path to an existing ScanCode JSON output file.
162    #[arg(long = "scancode-json", value_name = "PATH")]
163    pub scancode_json: PathBuf,
164
165    /// Path to an existing Provenant JSON output file.
166    #[arg(long = "provenant-json", value_name = "PATH")]
167    pub provenant_json: PathBuf,
168
169    /// Directory where comparison artifacts should be written. Defaults to a timestamped directory in the current working directory.
170    #[arg(long = "artifact-dir", value_name = "DIR")]
171    pub artifact_dir: Option<PathBuf>,
172}
173
174#[derive(Args, Debug, Clone)]
175pub struct ExportLicenseDatasetArgs {
176    #[arg(value_name = "DIR")]
177    pub dir: String,
178}
179
180#[derive(clap::ValueEnum, Debug, Clone, Copy, PartialEq, Eq, Default)]
181pub enum CompatibilityMode {
182    #[default]
183    Native,
184    Scancode,
185}
186
187impl CompatibilityMode {
188    fn as_str(self) -> &'static str {
189        match self {
190            Self::Native => "native",
191            Self::Scancode => "scancode",
192        }
193    }
194}
195
196#[derive(Args, Debug, Clone)]
197pub struct ServeArgs {
198    /// Bind the service shell to HOST:PORT.
199    #[arg(long = "bind", value_name = "ADDR", default_value = "127.0.0.1:8080")]
200    pub bind: String,
201}
202
203#[derive(Args, Debug, Clone)]
204#[command(
205    group(
206        ArgGroup::new("output")
207            .required(true)
208            .multiple(true)
209            .args([
210                "output_json",
211                "output_json_pp",
212                "output_json_lines",
213                "output_yaml",
214                "output_debian",
215                "output_html",
216                "output_spdx_tv",
217                "output_spdx_rdf",
218                "output_cyclonedx",
219                "output_cyclonedx_xml",
220                "custom_output"
221            ])
222    ),
223    after_help = PDF_OXIDE_LOG_HELP
224)]
225pub struct ScanArgs {
226    /// File or directory paths to scan
227    #[arg(required = false)]
228    pub dir_path: Vec<String>,
229
230    /// Write scan output as compact JSON to FILE
231    #[arg(long = "json", value_name = "FILE", allow_hyphen_values = true)]
232    pub output_json: Option<String>,
233
234    /// Write scan output as pretty-printed JSON to FILE
235    #[arg(long = "json-pp", value_name = "FILE", allow_hyphen_values = true)]
236    pub output_json_pp: Option<String>,
237
238    /// Write scan output as JSON Lines to FILE
239    #[arg(long = "json-lines", value_name = "FILE", allow_hyphen_values = true)]
240    pub output_json_lines: Option<String>,
241
242    /// Write scan output as YAML to FILE
243    #[arg(long = "yaml", value_name = "FILE", allow_hyphen_values = true)]
244    pub output_yaml: Option<String>,
245
246    /// Write scan output in machine-readable Debian copyright format to FILE (requires --license, --copyright, and --license-text)
247    #[arg(
248        long = "debian",
249        value_name = "FILE",
250        allow_hyphen_values = true,
251        requires_all = ["copyright", "license", "license_text"]
252    )]
253    pub output_debian: Option<String>,
254
255    /// Write scan output as HTML report to FILE
256    #[arg(long = "html", value_name = "FILE", allow_hyphen_values = true)]
257    pub output_html: Option<String>,
258
259    /// Write scan output as SPDX tag/value to FILE
260    #[arg(long = "spdx-tv", value_name = "FILE", allow_hyphen_values = true)]
261    pub output_spdx_tv: Option<String>,
262
263    /// Write scan output as SPDX RDF/XML to FILE
264    #[arg(long = "spdx-rdf", value_name = "FILE", allow_hyphen_values = true)]
265    pub output_spdx_rdf: Option<String>,
266
267    /// Write scan output as CycloneDX JSON to FILE
268    #[arg(long = "cyclonedx", value_name = "FILE", allow_hyphen_values = true)]
269    pub output_cyclonedx: Option<String>,
270
271    /// Write scan output as CycloneDX XML to FILE
272    #[arg(
273        long = "cyclonedx-xml",
274        value_name = "FILE",
275        allow_hyphen_values = true
276    )]
277    pub output_cyclonedx_xml: Option<String>,
278
279    /// Write scan output to FILE formatted with the custom template
280    #[arg(
281        long = "custom-output",
282        value_name = "FILE",
283        requires = "custom_template",
284        allow_hyphen_values = true
285    )]
286    pub custom_output: Option<String>,
287
288    /// Use this template FILE with --custom-output
289    #[arg(
290        long = "custom-template",
291        value_name = "FILE",
292        requires = "custom_output"
293    )]
294    pub custom_template: Option<String>,
295
296    /// Maximum recursion depth (0 means no depth limit)
297    #[arg(short, long, default_value = "0")]
298    pub max_depth: usize,
299
300    #[arg(short = 'n', long, default_value_t = ProcessMode::default_value(), value_parser = parse_processes, allow_hyphen_values = true)]
301    pub processes: ProcessMode,
302
303    #[arg(long, default_value_t = 120.0)]
304    pub timeout: f64,
305
306    #[arg(short, long, conflicts_with = "verbose")]
307    pub quiet: bool,
308
309    #[arg(short, long, conflicts_with = "quiet")]
310    pub verbose: bool,
311
312    #[arg(long, conflicts_with = "full_root")]
313    pub strip_root: bool,
314
315    #[arg(long, conflicts_with = "strip_root")]
316    pub full_root: bool,
317
318    /// Exclude patterns (ScanCode-compatible alias: --ignore)
319    #[arg(long = "exclude", visible_alias = "ignore", value_delimiter = ',')]
320    pub exclude: Vec<String>,
321
322    /// Include files matching PATTERN. Use `**` when you want recursion across directories.
323    #[arg(long, value_delimiter = ',')]
324    pub include: Vec<String>,
325
326    /// Read selected scan paths from FILE (or '-' for stdin), relative to the explicit scan root.
327    #[arg(long = "paths-file", value_name = "FILE", allow_hyphen_values = true)]
328    pub paths_file: Vec<String>,
329
330    #[arg(long = "cache-dir", value_name = "PATH")]
331    pub cache_dir: Option<String>,
332
333    #[arg(long = "cache-clear")]
334    pub cache_clear: bool,
335
336    #[arg(long = "incremental")]
337    pub incremental: bool,
338
339    /// Maximum number of file and directory scan details kept in memory.
340    /// Use 0 for unlimited memory or -1 for disk-only spill during the scan.
341    #[arg(
342        long = "max-in-memory",
343        value_name = "INT",
344        default_value_t = MemoryMode::Limit(10000),
345        value_parser = parse_max_in_memory,
346        allow_hyphen_values = true
347    )]
348    pub max_in_memory: MemoryMode,
349
350    /// Collect file information such as checksums, type hints, and source/script flags.
351    #[arg(short = 'i', long)]
352    pub info: bool,
353
354    /// Load one or more existing ScanCode-style JSON scans instead of rescanning inputs.
355    #[arg(long)]
356    pub from_json: bool,
357
358    /// Scan input for application package and dependency manifests, lockfiles and related data
359    #[arg(short = 'p', long)]
360    pub package: bool,
361
362    /// Select a compatibility bundle for intentional Provenant-vs-ScanCode behavior differences.
363    #[arg(
364        long = "compat-mode",
365        visible_alias = "compat",
366        value_enum,
367        default_value_t = CompatibilityMode::Native
368    )]
369    pub compat_mode: CompatibilityMode,
370
371    /// Scan input for installed system package databases (RPM, dpkg, apk, etc.)
372    #[arg(long = "system-package")]
373    pub system_package: bool,
374
375    /// Scan supported compiled Go and Rust binaries for embedded package metadata.
376    #[arg(long = "package-in-compiled")]
377    pub package_in_compiled: bool,
378
379    /// Scan for system and application package data and skip license/copyright detection and top-level package creation.
380    #[arg(
381        long = "package-only",
382        conflicts_with_all = ["license", "summary", "package", "system_package"]
383    )]
384    pub package_only: bool,
385
386    /// Disable package assembly (merging related manifest/lockfiles into packages)
387    #[arg(long)]
388    pub no_assemble: bool,
389
390    /// Path to a custom license dataset root containing manifest.json, rules/, and licenses/.
391    /// If not specified, uses the built-in embedded license index.
392    #[arg(
393        long = "license-dataset-path",
394        value_name = "PATH",
395        requires = "license"
396    )]
397    pub license_dataset_path: Option<String>,
398
399    /// Force rebuild of the license index cache, ignoring any existing cache.
400    #[arg(long)]
401    pub reindex: bool,
402
403    /// Build the license index in memory for this run without reading or writing persistent cache files.
404    #[arg(long = "no-license-index-cache")]
405    pub no_license_index_cache: bool,
406
407    /// Include matched text in license detection output
408    #[arg(long = "license-text", requires = "license")]
409    pub license_text: bool,
410
411    #[arg(long = "license-text-diagnostics", requires = "license_text")]
412    pub license_text_diagnostics: bool,
413
414    #[arg(long = "license-diagnostics", requires = "license")]
415    pub license_diagnostics: bool,
416
417    #[arg(long = "unknown-licenses", requires = "license")]
418    pub unknown_licenses: bool,
419
420    #[arg(
421        long = "license-score",
422        default_value_t = 0,
423        requires = "license",
424        value_parser = clap::value_parser!(u8).range(0..=100)
425    )]
426    pub license_score: u8,
427
428    #[arg(
429        long = "license-url-template",
430        default_value = DEFAULT_LICENSEDB_URL_TEMPLATE,
431        requires = "license"
432    )]
433    pub license_url_template: String,
434
435    #[arg(long)]
436    pub filter_clues: bool,
437
438    #[arg(
439        long = "ignore-author",
440        value_name = "PATTERN",
441        help = "Ignore a file and all its findings if an author matches the regex PATTERN"
442    )]
443    pub ignore_author: Vec<String>,
444
445    #[arg(
446        long = "ignore-copyright-holder",
447        value_name = "PATTERN",
448        help = "Ignore a file and all its findings if a copyright holder matches the regex PATTERN"
449    )]
450    pub ignore_copyright_holder: Vec<String>,
451
452    #[arg(long)]
453    pub only_findings: bool,
454
455    #[arg(long, requires = "info")]
456    pub mark_source: bool,
457
458    #[arg(long)]
459    pub classify: bool,
460
461    #[arg(long, requires = "classify")]
462    pub summary: bool,
463
464    #[arg(long = "license-clarity-score", requires = "classify")]
465    pub license_clarity_score: bool,
466
467    #[arg(long = "license-references", requires = "license")]
468    pub license_references: bool,
469
470    /// Evaluate file license detections against a YAML license policy file.
471    #[arg(
472        long = "license-policy",
473        value_name = "FILE",
474        value_parser = parse_license_policy_arg
475    )]
476    pub license_policy: Option<String>,
477
478    #[arg(long)]
479    pub tallies: bool,
480
481    #[arg(long = "tallies-key-files", requires_all = ["tallies", "classify"])]
482    pub tallies_key_files: bool,
483
484    #[arg(long = "tallies-with-details")]
485    pub tallies_with_details: bool,
486
487    #[arg(long = "facet", value_name = "<facet>=<pattern>")]
488    pub facet: Vec<String>,
489
490    #[arg(long = "tallies-by-facet", requires_all = ["facet", "tallies"])]
491    pub tallies_by_facet: bool,
492
493    #[arg(long)]
494    pub generated: bool,
495
496    /// Scan input for licenses
497    #[arg(short = 'l', long)]
498    pub license: bool,
499
500    #[arg(short = 'c', long)]
501    pub copyright: bool,
502
503    /// Scan input for email addresses
504    #[arg(short = 'e', long)]
505    pub email: bool,
506
507    /// Report only up to INT emails found in a file. Use 0 for no limit.
508    #[arg(long, default_value_t = 50, requires = "email")]
509    pub max_email: usize,
510
511    /// Scan input for URLs
512    #[arg(short = 'u', long)]
513    pub url: bool,
514
515    /// Report only up to INT URLs found in a file. Use 0 for no limit.
516    #[arg(long, default_value_t = 50, requires = "url")]
517    pub max_url: usize,
518}
519
520impl Cli {
521    pub fn parse() -> Self {
522        <Self as Parser>::parse_from(rewrite_args_for_default_scan(std::env::args_os()))
523    }
524
525    pub fn try_parse_from<I, T>(itr: I) -> Result<Self, clap::Error>
526    where
527        I: IntoIterator<Item = T>,
528        T: Into<OsString>,
529    {
530        <Self as Parser>::try_parse_from(rewrite_args_for_default_scan(itr))
531    }
532
533    pub(crate) fn scan_args(&self) -> Option<&ScanArgs> {
534        match &self.command {
535            Command::Scan(scan_args) => Some(scan_args.as_ref()),
536            Command::Serve(_)
537            | Command::Compare(_)
538            | Command::ShowAttribution
539            | Command::ExportLicenseDataset(_) => None,
540        }
541    }
542}
543
544#[cfg(test)]
545impl Deref for Cli {
546    type Target = ScanArgs;
547
548    fn deref(&self) -> &Self::Target {
549        self.scan_args()
550            .expect("scan arguments are only available for the scan command")
551    }
552}
553
554fn rewrite_args_for_default_scan<I, T>(itr: I) -> Vec<OsString>
555where
556    I: IntoIterator<Item = T>,
557    T: Into<OsString>,
558{
559    let mut args: Vec<OsString> = itr.into_iter().map(Into::into).collect();
560    if args.len() <= 1 {
561        return args;
562    }
563
564    let first = args[1].to_string_lossy();
565    if matches!(
566        first.as_ref(),
567        "scan"
568            | "serve"
569            | "compare"
570            | "show-attribution"
571            | "export-license-dataset"
572            | "help"
573            | "-h"
574            | "--help"
575            | "-V"
576            | "--version"
577    ) {
578        return args;
579    }
580
581    if first.starts_with('-') || Path::new(first.as_ref()).exists() {
582        args.insert(1, OsString::from("scan"));
583    }
584
585    args
586}
587
588fn parse_max_in_memory(value: &str) -> Result<MemoryMode, String> {
589    let parsed = value
590        .parse::<i64>()
591        .map_err(|_| format!("invalid integer value: {value}"))?;
592    if parsed < -1 {
593        return Err("--max-in-memory must be -1, 0, or a positive integer".to_string());
594    }
595    match parsed {
596        -1 => Ok(MemoryMode::StreamUnlimited),
597        0 => Ok(MemoryMode::CollectFirst),
598        n if n > 0 => Ok(MemoryMode::Limit(usize::try_from(n).unwrap_or(usize::MAX))),
599        _ => Ok(MemoryMode::CollectFirst),
600    }
601}
602
603impl ScanArgs {
604    pub(crate) fn output_targets(&self) -> Vec<OutputTarget> {
605        let mut targets = Vec::new();
606
607        if let Some(file) = &self.output_json {
608            targets.push(OutputTarget {
609                format: OutputFormat::Json,
610                file: file.clone(),
611                custom_template: None,
612            });
613        }
614
615        if let Some(file) = &self.output_json_pp {
616            targets.push(OutputTarget {
617                format: OutputFormat::JsonPretty,
618                file: file.clone(),
619                custom_template: None,
620            });
621        }
622
623        if let Some(file) = &self.output_json_lines {
624            targets.push(OutputTarget {
625                format: OutputFormat::JsonLines,
626                file: file.clone(),
627                custom_template: None,
628            });
629        }
630
631        if let Some(file) = &self.output_yaml {
632            targets.push(OutputTarget {
633                format: OutputFormat::Yaml,
634                file: file.clone(),
635                custom_template: None,
636            });
637        }
638
639        if let Some(file) = &self.output_debian {
640            targets.push(OutputTarget {
641                format: OutputFormat::Debian,
642                file: file.clone(),
643                custom_template: None,
644            });
645        }
646
647        if let Some(file) = &self.output_html {
648            targets.push(OutputTarget {
649                format: OutputFormat::Html,
650                file: file.clone(),
651                custom_template: None,
652            });
653        }
654
655        if let Some(file) = &self.output_spdx_tv {
656            targets.push(OutputTarget {
657                format: OutputFormat::SpdxTv,
658                file: file.clone(),
659                custom_template: None,
660            });
661        }
662
663        if let Some(file) = &self.output_spdx_rdf {
664            targets.push(OutputTarget {
665                format: OutputFormat::SpdxRdf,
666                file: file.clone(),
667                custom_template: None,
668            });
669        }
670
671        if let Some(file) = &self.output_cyclonedx {
672            targets.push(OutputTarget {
673                format: OutputFormat::CycloneDxJson,
674                file: file.clone(),
675                custom_template: None,
676            });
677        }
678
679        if let Some(file) = &self.output_cyclonedx_xml {
680            targets.push(OutputTarget {
681                format: OutputFormat::CycloneDxXml,
682                file: file.clone(),
683                custom_template: None,
684            });
685        }
686
687        if let Some(file) = &self.custom_output {
688            targets.push(OutputTarget {
689                format: OutputFormat::CustomTemplate,
690                file: file.clone(),
691                custom_template: self.custom_template.clone(),
692            });
693        }
694
695        targets
696    }
697
698    pub(crate) fn output_header_options(&self) -> JsonMap<String, JsonValue> {
699        let mut options = JsonMap::new();
700        if !self.dir_path.is_empty() {
701            options.insert(
702                "input".to_string(),
703                JsonValue::Array(
704                    self.dir_path
705                        .iter()
706                        .cloned()
707                        .map(JsonValue::String)
708                        .collect(),
709                ),
710            );
711        }
712
713        let mut flags = Vec::new();
714
715        push_string_option(&mut flags, "--cache-dir", self.cache_dir.as_ref());
716        push_bool_option(&mut flags, "--cache-clear", self.cache_clear);
717        push_bool_option(&mut flags, "--classify", self.classify);
718        push_string_option(&mut flags, "--custom-output", self.custom_output.as_ref());
719        push_string_option(
720            &mut flags,
721            "--custom-template",
722            self.custom_template.as_ref(),
723        );
724        push_bool_option(&mut flags, "--copyright", self.copyright);
725        if self.compat_mode != CompatibilityMode::Native {
726            flags.push((
727                "--compat-mode".to_string(),
728                JsonValue::String(self.compat_mode.as_str().to_string()),
729            ));
730        }
731        push_string_option(&mut flags, "--cyclonedx", self.output_cyclonedx.as_ref());
732        push_string_option(
733            &mut flags,
734            "--cyclonedx-xml",
735            self.output_cyclonedx_xml.as_ref(),
736        );
737        push_string_option(&mut flags, "--debian", self.output_debian.as_ref());
738        push_bool_option(&mut flags, "--email", self.email);
739        push_array_option(&mut flags, "--facet", &self.facet);
740        push_bool_option(&mut flags, "--filter-clues", self.filter_clues);
741        push_bool_option(&mut flags, "--from-json", self.from_json);
742        push_bool_option(&mut flags, "--full-root", self.full_root);
743        push_bool_option(&mut flags, "--generated", self.generated);
744        push_string_option(&mut flags, "--html", self.output_html.as_ref());
745        push_array_option(&mut flags, "--ignore", &self.exclude);
746        push_array_option(&mut flags, "--ignore-author", &self.ignore_author);
747        push_array_option(
748            &mut flags,
749            "--ignore-copyright-holder",
750            &self.ignore_copyright_holder,
751        );
752        push_bool_option(&mut flags, "--incremental", self.incremental);
753        push_array_option(&mut flags, "--include", &self.include);
754        push_bool_option(&mut flags, "--info", self.info);
755        push_string_option(&mut flags, "--json", self.output_json.as_ref());
756        push_string_option(&mut flags, "--json-lines", self.output_json_lines.as_ref());
757        push_string_option(&mut flags, "--json-pp", self.output_json_pp.as_ref());
758        push_bool_option(&mut flags, "--license", self.license);
759        push_bool_option(
760            &mut flags,
761            "--license-clarity-score",
762            self.license_clarity_score,
763        );
764        push_bool_option(
765            &mut flags,
766            "--license-diagnostics",
767            self.license_diagnostics,
768        );
769        push_string_option(
770            &mut flags,
771            "--license-dataset-path",
772            self.license_dataset_path.as_ref(),
773        );
774        push_string_option(&mut flags, "--license-policy", self.license_policy.as_ref());
775        push_bool_option(
776            &mut flags,
777            "--no-license-index-cache",
778            self.no_license_index_cache,
779        );
780        push_bool_option(&mut flags, "--license-references", self.license_references);
781        push_bool_option(&mut flags, "--reindex", self.reindex);
782        push_non_default_u8_option(&mut flags, "--license-score", self.license_score, 0);
783        push_bool_option(&mut flags, "--license-text", self.license_text);
784        push_bool_option(
785            &mut flags,
786            "--license-text-diagnostics",
787            self.license_text_diagnostics,
788        );
789        push_non_default_string_option(
790            &mut flags,
791            "--license-url-template",
792            &self.license_url_template,
793            DEFAULT_LICENSEDB_URL_TEMPLATE,
794        );
795        push_non_default_usize_option(&mut flags, "--max-depth", self.max_depth, 0);
796        match self.max_in_memory {
797            MemoryMode::Limit(10000) => {}
798            MemoryMode::CollectFirst => {
799                flags.push(("--max-in-memory".to_string(), JsonValue::Number(0.into())));
800            }
801            MemoryMode::StreamUnlimited => {
802                flags.push((
803                    "--max-in-memory".to_string(),
804                    JsonValue::Number((-1i64).into()),
805                ));
806            }
807            MemoryMode::Limit(n) => {
808                flags.push(("--max-in-memory".to_string(), JsonValue::Number(n.into())));
809            }
810        }
811        if self.email {
812            push_non_default_usize_option(&mut flags, "--max-email", self.max_email, 50);
813        }
814        if self.url {
815            push_non_default_usize_option(&mut flags, "--max-url", self.max_url, 50);
816        }
817        push_bool_option(&mut flags, "--mark-source", self.mark_source);
818        push_bool_option(&mut flags, "--no-assemble", self.no_assemble);
819        push_bool_option(&mut flags, "--only-findings", self.only_findings);
820        push_bool_option(&mut flags, "--package", self.package);
821        push_bool_option(
822            &mut flags,
823            "--package-in-compiled",
824            self.package_in_compiled,
825        );
826        push_bool_option(&mut flags, "--package-only", self.package_only);
827        push_array_option(&mut flags, "--paths-file", &self.paths_file);
828        push_non_default_process_mode_option(
829            &mut flags,
830            "--processes",
831            self.processes,
832            ProcessMode::default_value(),
833        );
834        push_bool_option(&mut flags, "--quiet", self.quiet);
835        push_string_option(&mut flags, "--spdx-rdf", self.output_spdx_rdf.as_ref());
836        push_string_option(&mut flags, "--spdx-tv", self.output_spdx_tv.as_ref());
837        push_bool_option(&mut flags, "--strip-root", self.strip_root);
838        push_bool_option(&mut flags, "--summary", self.summary);
839        push_bool_option(&mut flags, "--system-package", self.system_package);
840        push_bool_option(&mut flags, "--tallies", self.tallies);
841        push_bool_option(&mut flags, "--tallies-by-facet", self.tallies_by_facet);
842        push_bool_option(&mut flags, "--tallies-key-files", self.tallies_key_files);
843        push_bool_option(
844            &mut flags,
845            "--tallies-with-details",
846            self.tallies_with_details,
847        );
848        push_non_default_f64_option(&mut flags, "--timeout", self.timeout, 120.0);
849        push_bool_option(&mut flags, "--unknown-licenses", self.unknown_licenses);
850        push_bool_option(&mut flags, "--url", self.url);
851        push_bool_option(&mut flags, "--verbose", self.verbose);
852        push_string_option(&mut flags, "--yaml", self.output_yaml.as_ref());
853
854        flags.sort_by(|left, right| left.0.cmp(&right.0));
855        for (key, value) in flags {
856            options.insert(key, value);
857        }
858
859        options
860    }
861}
862
863impl From<&ScanArgs> for ScanRequest {
864    fn from(cli: &ScanArgs) -> Self {
865        Self {
866            input_paths: cli.dir_path.clone(),
867            input_mode: if cli.from_json {
868                InputMode::FromJson
869            } else {
870                InputMode::Native
871            },
872            output_targets: cli.output_targets(),
873            output_header_options: cli.output_header_options(),
874            progress_mode: if cli.quiet {
875                crate::progress::ProgressMode::Quiet
876            } else if cli.verbose {
877                crate::progress::ProgressMode::Verbose
878            } else {
879                crate::progress::ProgressMode::Default
880            },
881            process_mode: cli.processes,
882            timeout_seconds: cli.timeout,
883            quiet: cli.quiet,
884            verbose: cli.verbose,
885            strip_root: cli.strip_root,
886            full_root: cli.full_root,
887            exclude: cli.exclude.clone(),
888            include: cli.include.clone(),
889            paths_files: cli.paths_file.clone(),
890            respect_process_cache_env: true,
891            cache_dir: cli.cache_dir.clone(),
892            cache_clear: cli.cache_clear,
893            incremental: cli.incremental,
894            max_depth: cli.max_depth,
895            max_in_memory: cli.max_in_memory,
896            info: cli.info,
897            package: cli.package,
898            system_package: cli.system_package,
899            package_in_compiled: cli.package_in_compiled,
900            package_only: cli.package_only,
901            no_assemble: cli.no_assemble,
902            license_dataset_path: cli.license_dataset_path.clone(),
903            reindex: cli.reindex,
904            no_license_index_cache: cli.no_license_index_cache,
905            license_text: cli.license_text,
906            license_text_diagnostics: cli.license_text_diagnostics,
907            license_diagnostics: cli.license_diagnostics,
908            unknown_licenses: cli.unknown_licenses,
909            license_score: cli.license_score,
910            license_url_template: cli.license_url_template.clone(),
911            filter_clues: cli.filter_clues,
912            ignore_author: cli.ignore_author.clone(),
913            ignore_copyright_holder: cli.ignore_copyright_holder.clone(),
914            only_findings: cli.only_findings,
915            mark_source: cli.mark_source,
916            classify: cli.classify,
917            summary: cli.summary,
918            license_clarity_score: cli.license_clarity_score,
919            license_references: cli.license_references,
920            license_policy: cli.license_policy.clone(),
921            tallies: cli.tallies,
922            tallies_key_files: cli.tallies_key_files,
923            tallies_with_details: cli.tallies_with_details,
924            facet: cli.facet.clone(),
925            tallies_by_facet: cli.tallies_by_facet,
926            generated: cli.generated,
927            license: cli.license,
928            copyright: cli.copyright,
929            email: cli.email,
930            max_email: cli.max_email,
931            url: cli.url,
932            max_url: cli.max_url,
933        }
934    }
935}
936
937fn push_bool_option(options: &mut Vec<(String, JsonValue)>, key: &str, enabled: bool) {
938    if enabled {
939        options.push((key.to_string(), JsonValue::Bool(true)));
940    }
941}
942
943fn push_string_option(options: &mut Vec<(String, JsonValue)>, key: &str, value: Option<&String>) {
944    if let Some(value) = value {
945        options.push((key.to_string(), JsonValue::String(value.clone())));
946    }
947}
948
949fn push_non_default_string_option(
950    options: &mut Vec<(String, JsonValue)>,
951    key: &str,
952    value: &str,
953    default: &str,
954) {
955    if value != default {
956        options.push((key.to_string(), JsonValue::String(value.to_string())));
957    }
958}
959
960fn push_array_option(options: &mut Vec<(String, JsonValue)>, key: &str, values: &[String]) {
961    if !values.is_empty() {
962        options.push((
963            key.to_string(),
964            JsonValue::Array(values.iter().cloned().map(JsonValue::String).collect()),
965        ));
966    }
967}
968
969fn push_non_default_usize_option(
970    options: &mut Vec<(String, JsonValue)>,
971    key: &str,
972    value: usize,
973    default: usize,
974) {
975    if value != default {
976        options.push((key.to_string(), JsonValue::Number(value.into())));
977    }
978}
979
980fn push_non_default_u8_option(
981    options: &mut Vec<(String, JsonValue)>,
982    key: &str,
983    value: u8,
984    default: u8,
985) {
986    if value != default {
987        options.push((key.to_string(), JsonValue::Number(value.into())));
988    }
989}
990
991fn push_non_default_process_mode_option(
992    options: &mut Vec<(String, JsonValue)>,
993    key: &str,
994    value: ProcessMode,
995    default: ProcessMode,
996) {
997    if value != default {
998        options.push((key.to_string(), JsonValue::Number(value.to_i32().into())));
999    }
1000}
1001
1002fn push_non_default_f64_option(
1003    options: &mut Vec<(String, JsonValue)>,
1004    key: &str,
1005    value: f64,
1006    default: f64,
1007) {
1008    if (value - default).abs() > f64::EPSILON
1009        && let Some(number) = JsonNumber::from_f64(value)
1010    {
1011        options.push((key.to_string(), JsonValue::Number(number)));
1012    }
1013}
1014
1015#[cfg(test)]
1016mod tests {
1017    use super::*;
1018    use clap::CommandFactory;
1019
1020    fn scan_command() -> clap::Command {
1021        Cli::command()
1022            .find_subcommand("scan")
1023            .expect("scan subcommand should exist")
1024            .clone()
1025    }
1026
1027    #[test]
1028    fn test_requires_at_least_one_output_option() {
1029        let parsed = Cli::try_parse_from(["provenant", "samples"]);
1030        assert!(parsed.is_err());
1031    }
1032
1033    #[test]
1034    fn test_parses_json_pretty_output_option() {
1035        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1036            .expect("cli parse should succeed");
1037
1038        assert_eq!(parsed.output_json_pp.as_deref(), Some("scan.json"));
1039        assert_eq!(parsed.output_targets().len(), 1);
1040        assert_eq!(parsed.output_targets()[0].format, OutputFormat::JsonPretty);
1041    }
1042
1043    #[test]
1044    fn test_explicit_scan_subcommand_parses_scan_flags() {
1045        let parsed = Cli::try_parse_from([
1046            "provenant",
1047            "scan",
1048            "--json-pp",
1049            "scan.json",
1050            "--license",
1051            "samples",
1052        ])
1053        .expect("explicit scan subcommand should parse");
1054
1055        assert!(matches!(parsed.command, Command::Scan(_)));
1056        let scan = parsed.scan_args().expect("scan args should be present");
1057        assert_eq!(scan.output_json_pp.as_deref(), Some("scan.json"));
1058        assert!(scan.license);
1059        assert_eq!(scan.dir_path, vec!["samples"]);
1060    }
1061
1062    #[test]
1063    fn test_parses_compare_subcommand() {
1064        let parsed = Cli::try_parse_from([
1065            "provenant",
1066            "compare",
1067            "--scancode-json",
1068            "scan-a.json",
1069            "--provenant-json",
1070            "scan-b.json",
1071            "--artifact-dir",
1072            "compare-out",
1073        ])
1074        .expect("compare subcommand should parse");
1075
1076        match parsed.command {
1077            Command::Compare(args) => {
1078                assert_eq!(args.scancode_json, PathBuf::from("scan-a.json"));
1079                assert_eq!(args.provenant_json, PathBuf::from("scan-b.json"));
1080                assert_eq!(args.artifact_dir, Some(PathBuf::from("compare-out")));
1081            }
1082            other => panic!("expected compare subcommand, got {other:?}"),
1083        }
1084    }
1085
1086    #[test]
1087    fn test_parses_serve_subcommand() {
1088        let parsed = Cli::try_parse_from(["provenant", "serve", "--bind", "127.0.0.1:9090"])
1089            .expect("serve subcommand should parse");
1090
1091        match parsed.command {
1092            Command::Serve(args) => assert_eq!(args.bind, "127.0.0.1:9090"),
1093            other => panic!("expected serve subcommand, got {other:?}"),
1094        }
1095    }
1096
1097    #[test]
1098    fn test_compare_subcommand_allows_default_artifact_dir() {
1099        let parsed = Cli::try_parse_from([
1100            "provenant",
1101            "compare",
1102            "--scancode-json",
1103            "scan-a.json",
1104            "--provenant-json",
1105            "scan-b.json",
1106        ])
1107        .expect("compare subcommand should allow default artifact dir");
1108
1109        match parsed.command {
1110            Command::Compare(args) => {
1111                assert_eq!(args.scancode_json, PathBuf::from("scan-a.json"));
1112                assert_eq!(args.provenant_json, PathBuf::from("scan-b.json"));
1113                assert!(args.artifact_dir.is_none());
1114            }
1115            other => panic!("expected compare subcommand, got {other:?}"),
1116        }
1117    }
1118
1119    #[test]
1120    fn test_unknown_command_like_token_is_not_rewritten_to_scan() {
1121        let parsed = Cli::try_parse_from([
1122            "provenant",
1123            "future-command",
1124            "--json-pp",
1125            "scan.json",
1126            "samples",
1127        ]);
1128
1129        let error = parsed.expect_err("unknown command-like token should fail");
1130        assert!(
1131            error
1132                .to_string()
1133                .contains("unrecognized subcommand 'future-command'")
1134        );
1135    }
1136
1137    #[test]
1138    fn test_allows_multiple_output_options_in_one_run() {
1139        let parsed = Cli::try_parse_from([
1140            "provenant",
1141            "--json",
1142            "scan.json",
1143            "--html",
1144            "report.html",
1145            "samples",
1146        ])
1147        .expect("cli parse should allow multiple outputs");
1148
1149        assert_eq!(parsed.output_targets().len(), 2);
1150        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Json);
1151        assert_eq!(parsed.output_targets()[1].format, OutputFormat::Html);
1152    }
1153
1154    #[test]
1155    fn test_parses_show_attribution_subcommand() {
1156        let parsed = Cli::try_parse_from(["provenant", "show-attribution"])
1157            .expect("show-attribution subcommand should parse");
1158
1159        assert!(matches!(parsed.command, Command::ShowAttribution));
1160    }
1161
1162    #[test]
1163    fn test_legacy_show_attribution_flag_is_rejected() {
1164        let parsed = Cli::try_parse_from(["provenant", "--show-attribution"]);
1165        assert!(parsed.is_err());
1166    }
1167
1168    #[test]
1169    fn test_export_license_dataset_allows_mode_without_output_file() {
1170        let parsed = Cli::try_parse_from(["provenant", "export-license-dataset", "dataset-out"])
1171            .expect("cli parse should allow export mode without output flags");
1172
1173        match parsed.command {
1174            Command::ExportLicenseDataset(args) => assert_eq!(args.dir, "dataset-out"),
1175            other => panic!("expected export subcommand, got {other:?}"),
1176        }
1177    }
1178
1179    #[test]
1180    fn test_legacy_export_license_dataset_flag_is_rejected() {
1181        let parsed = Cli::try_parse_from(["provenant", "--export-license-dataset", "dataset-out"]);
1182        assert!(parsed.is_err());
1183    }
1184
1185    #[test]
1186    fn test_license_dataset_path_parses_for_license_scans() {
1187        let parsed = Cli::try_parse_from([
1188            "provenant",
1189            "--json-pp",
1190            "scan.json",
1191            "--license",
1192            "--license-dataset-path",
1193            "dataset-root",
1194            "samples",
1195        ])
1196        .expect("cli parse should accept custom license dataset flag");
1197
1198        assert_eq!(parsed.license_dataset_path.as_deref(), Some("dataset-root"));
1199    }
1200
1201    #[test]
1202    fn test_output_header_options_use_scancode_style_keys() {
1203        let parsed = Cli::try_parse_from([
1204            "provenant",
1205            "--json-pp",
1206            "scan.json",
1207            "--license",
1208            "--package",
1209            "--strip-root",
1210            "--paths-file",
1211            "changed-files.txt",
1212            "--ignore",
1213            "*.git*",
1214            "--ignore",
1215            "target/*",
1216            "samples",
1217        ])
1218        .expect("cli parse should succeed");
1219
1220        let options = parsed.output_header_options();
1221
1222        assert_eq!(
1223            options.get("input"),
1224            Some(&JsonValue::Array(vec![JsonValue::String(
1225                "samples".to_string()
1226            )]))
1227        );
1228        assert_eq!(
1229            options.get("--json-pp"),
1230            Some(&JsonValue::String("scan.json".to_string()))
1231        );
1232        assert_eq!(options.get("--license"), Some(&JsonValue::Bool(true)));
1233        assert_eq!(options.get("--package"), Some(&JsonValue::Bool(true)));
1234        assert_eq!(
1235            options.get("--paths-file"),
1236            Some(&JsonValue::Array(vec![JsonValue::String(
1237                "changed-files.txt".to_string()
1238            )]))
1239        );
1240        assert_eq!(options.get("--strip-root"), Some(&JsonValue::Bool(true)));
1241        assert_eq!(
1242            options.get("--ignore"),
1243            Some(&JsonValue::Array(vec![
1244                JsonValue::String("*.git*".to_string()),
1245                JsonValue::String("target/*".to_string()),
1246            ]))
1247        );
1248        assert!(!options.contains_key("--compat-mode"));
1249    }
1250
1251    #[test]
1252    fn test_compat_mode_parses_and_is_recorded_when_non_default() {
1253        let parsed = Cli::try_parse_from([
1254            "provenant",
1255            "--json-pp",
1256            "scan.json",
1257            "--copyright",
1258            "--compat-mode",
1259            "scancode",
1260            "samples",
1261        ])
1262        .expect("cli parse should succeed");
1263
1264        assert_eq!(parsed.compat_mode, CompatibilityMode::Scancode);
1265        let options = parsed.output_header_options();
1266        assert_eq!(
1267            options.get("--compat-mode"),
1268            Some(&JsonValue::String("scancode".to_string()))
1269        );
1270    }
1271
1272    #[test]
1273    fn test_output_header_options_include_license_dataset_path_when_set() {
1274        let parsed = Cli::try_parse_from([
1275            "provenant",
1276            "--json-pp",
1277            "scan.json",
1278            "--license",
1279            "--license-dataset-path",
1280            "dataset-root",
1281            "samples",
1282        ])
1283        .expect("cli parse should accept custom license dataset flag");
1284
1285        let options = parsed.output_header_options();
1286        assert_eq!(
1287            options.get("--license-dataset-path"),
1288            Some(&JsonValue::String("dataset-root".to_string()))
1289        );
1290    }
1291
1292    #[test]
1293    fn test_output_header_options_skip_defaults_and_include_non_defaults() {
1294        let default_options =
1295            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1296                .expect("default cli parse should succeed")
1297                .output_header_options();
1298        assert!(!default_options.contains_key("--timeout"));
1299        assert!(!default_options.contains_key("--processes"));
1300
1301        let custom_options = Cli::try_parse_from([
1302            "provenant",
1303            "--json-pp",
1304            "scan.json",
1305            "--timeout",
1306            "30",
1307            "--processes",
1308            "4",
1309            "samples",
1310        ])
1311        .expect("custom cli parse should succeed")
1312        .output_header_options();
1313
1314        assert_eq!(
1315            custom_options.get("--timeout"),
1316            Some(&JsonValue::Number(
1317                JsonNumber::from_f64(30.0).expect("valid number")
1318            ))
1319        );
1320        assert_eq!(
1321            custom_options.get("--processes"),
1322            Some(&JsonValue::Number(4.into()))
1323        );
1324    }
1325
1326    #[test]
1327    fn test_allows_stdout_dash_as_output_target() {
1328        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "-", "samples"])
1329            .expect("cli parse should allow stdout dash output target");
1330
1331        assert_eq!(parsed.output_json_pp.as_deref(), Some("-"));
1332    }
1333
1334    #[test]
1335    fn test_debian_requires_license_copyright_and_license_text() {
1336        let missing_license_text = Cli::try_parse_from([
1337            "provenant",
1338            "--debian",
1339            "scan.copyright",
1340            "--license",
1341            "--copyright",
1342            "samples",
1343        ]);
1344        assert!(missing_license_text.is_err());
1345
1346        let parsed = Cli::try_parse_from([
1347            "provenant",
1348            "--debian",
1349            "scan.copyright",
1350            "--license",
1351            "--copyright",
1352            "--license-text",
1353            "samples",
1354        ])
1355        .expect("cli parse should accept debian output");
1356
1357        assert_eq!(parsed.output_targets().len(), 1);
1358        assert_eq!(parsed.output_targets()[0].format, OutputFormat::Debian);
1359        assert_eq!(parsed.output_debian.as_deref(), Some("scan.copyright"));
1360    }
1361
1362    #[test]
1363    fn test_debian_help_mentions_required_companion_flags() {
1364        let command = scan_command();
1365        let debian_arg = command
1366            .get_arguments()
1367            .find(|arg| arg.get_long() == Some("debian"))
1368            .expect("debian arg should exist");
1369
1370        let help = debian_arg
1371            .get_help()
1372            .expect("debian arg should have help text")
1373            .to_string();
1374
1375        assert!(help.contains("requires --license, --copyright, and --license-text"));
1376    }
1377
1378    #[test]
1379    fn test_scan_help_mentions_pdf_oxide_rust_log_escape_hatch() {
1380        let help = scan_command().render_help().to_string();
1381
1382        assert!(help.contains("RUST_LOG=pdf_oxide=warn"));
1383        assert!(help.contains("suppresses noisy pdf_oxide logs by default"));
1384    }
1385
1386    #[test]
1387    fn test_root_help_mentions_subcommands() {
1388        let help = Cli::command().render_help().to_string();
1389
1390        assert!(help.contains("scan"));
1391        assert!(help.contains("serve"));
1392        assert!(help.contains("compare"));
1393        assert!(help.contains("show-attribution"));
1394        assert!(help.contains("export-license-dataset"));
1395    }
1396
1397    #[test]
1398    fn test_root_help_mentions_non_affiliation() {
1399        let help = Cli::command().render_help().to_string();
1400
1401        assert!(help.contains("Not affiliated with, endorsed by, or sponsored by"));
1402        assert!(help.contains("ScanCode Toolkit"));
1403    }
1404
1405    #[test]
1406    fn test_parses_license_policy_flag() {
1407        let temp = tempfile::tempdir().expect("temp dir");
1408        let policy_path = temp.path().join("policy.yml");
1409        std::fs::write(&policy_path, "license_policies: []\n").expect("policy written");
1410
1411        let parsed = Cli::try_parse_from([
1412            "provenant",
1413            "--json-pp",
1414            "scan.json",
1415            "--license-policy",
1416            policy_path.to_str().expect("utf8 path"),
1417            "samples",
1418        ])
1419        .expect("cli parse should accept license-policy");
1420
1421        assert_eq!(
1422            parsed.license_policy.as_deref(),
1423            Some(policy_path.to_str().expect("utf8 path"))
1424        );
1425    }
1426
1427    #[test]
1428    fn test_rejects_invalid_license_policy_flag_value() {
1429        let temp = tempfile::tempdir().expect("temp dir");
1430        let policy_path = temp.path().join("policy.yml");
1431        std::fs::write(&policy_path, "not_license_policies: []\n").expect("policy written");
1432
1433        let parsed = Cli::try_parse_from([
1434            "provenant",
1435            "--json-pp",
1436            "scan.json",
1437            "--license-policy",
1438            policy_path.to_str().expect("utf8 path"),
1439            "samples",
1440        ]);
1441
1442        assert!(parsed.is_err());
1443    }
1444
1445    #[test]
1446    fn test_custom_template_and_output_must_be_paired() {
1447        let missing_template =
1448            Cli::try_parse_from(["provenant", "--custom-output", "result.txt", "samples"]);
1449        assert!(missing_template.is_err());
1450
1451        let missing_output =
1452            Cli::try_parse_from(["provenant", "--custom-template", "tpl.tera", "samples"]);
1453        assert!(missing_output.is_err());
1454    }
1455
1456    #[test]
1457    fn test_parses_processes_and_timeout_options() {
1458        let parsed = Cli::try_parse_from([
1459            "provenant",
1460            "--json-pp",
1461            "scan.json",
1462            "-n",
1463            "4",
1464            "--timeout",
1465            "30",
1466            "samples",
1467        ])
1468        .expect("cli parse should succeed");
1469
1470        assert_eq!(parsed.processes, ProcessMode::Parallel(4));
1471        assert_eq!(parsed.timeout, 30.0);
1472    }
1473
1474    #[test]
1475    fn test_strip_root_conflicts_with_full_root() {
1476        let parsed = Cli::try_parse_from([
1477            "provenant",
1478            "--json-pp",
1479            "scan.json",
1480            "--strip-root",
1481            "--full-root",
1482            "samples",
1483        ]);
1484        assert!(parsed.is_err());
1485    }
1486
1487    #[test]
1488    fn test_parses_include_and_only_findings_and_filter_clues() {
1489        let parsed = Cli::try_parse_from([
1490            "provenant",
1491            "--json-pp",
1492            "scan.json",
1493            "--include",
1494            "src/**,Cargo.toml",
1495            "--only-findings",
1496            "--filter-clues",
1497            "samples",
1498        ])
1499        .expect("cli parse should succeed");
1500
1501        assert_eq!(parsed.include, vec!["src/**", "Cargo.toml"]);
1502        assert!(parsed.only_findings);
1503        assert!(parsed.filter_clues);
1504    }
1505
1506    #[test]
1507    fn test_parses_repeated_paths_file_flags_including_stdin_dash() {
1508        let parsed = Cli::try_parse_from([
1509            "provenant",
1510            "--json-pp",
1511            "scan.json",
1512            "--paths-file",
1513            "changed-files.txt",
1514            "--paths-file",
1515            "-",
1516            "samples",
1517        ])
1518        .expect("cli parse should accept repeated --paths-file flags");
1519
1520        assert_eq!(parsed.paths_file, vec!["changed-files.txt", "-"]);
1521    }
1522
1523    #[test]
1524    fn test_parses_ignore_author_and_holder_filters() {
1525        let parsed = Cli::try_parse_from([
1526            "provenant",
1527            "--json-pp",
1528            "scan.json",
1529            "--ignore-author",
1530            "Jane.*",
1531            "--ignore-author",
1532            ".*Bot$",
1533            "--ignore-copyright-holder",
1534            "Example Corp",
1535            "samples",
1536        ])
1537        .expect("cli parse should succeed");
1538
1539        assert_eq!(parsed.ignore_author, vec!["Jane.*", ".*Bot$"]);
1540        assert_eq!(parsed.ignore_copyright_holder, vec!["Example Corp"]);
1541    }
1542
1543    #[test]
1544    fn test_parses_ignore_alias_for_exclude_patterns() {
1545        let parsed = Cli::try_parse_from([
1546            "provenant",
1547            "--json-pp",
1548            "scan.json",
1549            "--ignore",
1550            "*.git*,target/*",
1551            "samples",
1552        ])
1553        .expect("cli parse should accept --ignore alias");
1554
1555        assert_eq!(parsed.exclude, vec!["*.git*", "target/*"]);
1556    }
1557
1558    #[test]
1559    fn test_quiet_conflicts_with_verbose() {
1560        let parsed = Cli::try_parse_from([
1561            "provenant",
1562            "--json-pp",
1563            "scan.json",
1564            "--quiet",
1565            "--verbose",
1566            "samples",
1567        ]);
1568        assert!(parsed.is_err());
1569    }
1570
1571    #[test]
1572    fn test_parses_from_json_and_mark_source() {
1573        let parsed = Cli::try_parse_from([
1574            "provenant",
1575            "--json-pp",
1576            "scan.json",
1577            "--from-json",
1578            "--info",
1579            "--mark-source",
1580            "sample-scan.json",
1581        ])
1582        .expect("cli parse should succeed");
1583
1584        assert!(parsed.from_json);
1585        assert!(parsed.info);
1586        assert_eq!(parsed.dir_path, vec!["sample-scan.json"]);
1587        assert!(parsed.mark_source);
1588    }
1589
1590    #[test]
1591    fn test_mark_source_requires_info() {
1592        let parsed = Cli::try_parse_from([
1593            "provenant",
1594            "--json-pp",
1595            "scan.json",
1596            "--mark-source",
1597            "samples",
1598        ]);
1599
1600        assert!(parsed.is_err());
1601    }
1602
1603    #[test]
1604    fn test_parses_classify_facet_and_tallies_by_facet() {
1605        let parsed = Cli::try_parse_from([
1606            "provenant",
1607            "--json-pp",
1608            "scan.json",
1609            "--classify",
1610            "--tallies",
1611            "--facet",
1612            "dev=*.c",
1613            "--facet",
1614            "tests=*/tests/*",
1615            "--tallies-by-facet",
1616            "samples",
1617        ])
1618        .expect("cli parse should succeed");
1619
1620        assert!(parsed.classify);
1621        assert!(parsed.tallies);
1622        assert_eq!(parsed.facet, vec!["dev=*.c", "tests=*/tests/*"]);
1623        assert!(parsed.tallies_by_facet);
1624    }
1625
1626    #[test]
1627    fn test_tallies_by_facet_requires_facet_definitions() {
1628        let parsed = Cli::try_parse_from([
1629            "provenant",
1630            "--json-pp",
1631            "scan.json",
1632            "--tallies-by-facet",
1633            "samples",
1634        ]);
1635
1636        assert!(parsed.is_err());
1637    }
1638
1639    #[test]
1640    fn test_summary_requires_classify() {
1641        let parsed = Cli::try_parse_from([
1642            "provenant",
1643            "--json-pp",
1644            "scan.json",
1645            "--summary",
1646            "samples",
1647        ]);
1648
1649        assert!(parsed.is_err());
1650    }
1651
1652    #[test]
1653    fn test_tallies_key_files_requires_tallies_and_classify() {
1654        let parsed = Cli::try_parse_from([
1655            "provenant",
1656            "--json-pp",
1657            "scan.json",
1658            "--tallies-key-files",
1659            "samples",
1660        ]);
1661
1662        assert!(parsed.is_err());
1663    }
1664
1665    #[test]
1666    fn test_parses_summary_tallies_and_generated_flags() {
1667        let parsed = Cli::try_parse_from([
1668            "provenant",
1669            "--json-pp",
1670            "scan.json",
1671            "--classify",
1672            "--summary",
1673            "--license-clarity-score",
1674            "--tallies",
1675            "--tallies-key-files",
1676            "--tallies-with-details",
1677            "--generated",
1678            "samples",
1679        ])
1680        .expect("cli parse should succeed");
1681
1682        assert!(parsed.classify);
1683        assert!(parsed.summary);
1684        assert!(parsed.license_clarity_score);
1685        assert!(parsed.tallies);
1686        assert!(parsed.tallies_key_files);
1687        assert!(parsed.tallies_with_details);
1688        assert!(parsed.generated);
1689    }
1690
1691    #[test]
1692    fn test_parses_copyright_flag() {
1693        let parsed = Cli::try_parse_from([
1694            "provenant",
1695            "--json-pp",
1696            "scan.json",
1697            "--copyright",
1698            "samples",
1699        ])
1700        .expect("cli parse should succeed");
1701
1702        assert!(parsed.copyright);
1703    }
1704
1705    #[test]
1706    fn test_package_flag_defaults_to_disabled() {
1707        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
1708            .expect("cli parse should succeed");
1709
1710        assert!(!parsed.package);
1711    }
1712
1713    #[test]
1714    fn test_parses_system_package_flag() {
1715        let parsed = Cli::try_parse_from([
1716            "provenant",
1717            "--json-pp",
1718            "scan.json",
1719            "--system-package",
1720            "samples",
1721        ])
1722        .expect("cli parse should succeed");
1723
1724        assert!(parsed.system_package);
1725    }
1726
1727    #[test]
1728    fn test_parses_package_in_compiled_flag() {
1729        let parsed = Cli::try_parse_from([
1730            "provenant",
1731            "--json-pp",
1732            "scan.json",
1733            "--package-in-compiled",
1734            "samples",
1735        ])
1736        .expect("cli parse should succeed");
1737
1738        assert!(parsed.package_in_compiled);
1739    }
1740
1741    #[test]
1742    fn test_parses_package_only_flag() {
1743        let parsed = Cli::try_parse_from([
1744            "provenant",
1745            "--json-pp",
1746            "scan.json",
1747            "--package-only",
1748            "samples",
1749        ])
1750        .expect("cli parse should succeed");
1751
1752        assert!(parsed.package_only);
1753    }
1754
1755    #[test]
1756    fn test_package_only_conflicts_with_upstream_incompatible_flags() {
1757        let with_license = Cli::try_parse_from([
1758            "provenant",
1759            "--json-pp",
1760            "scan.json",
1761            "--package-only",
1762            "--license",
1763            "samples",
1764        ]);
1765        assert!(with_license.is_err());
1766
1767        let with_package = Cli::try_parse_from([
1768            "provenant",
1769            "--json-pp",
1770            "scan.json",
1771            "--package-only",
1772            "--package",
1773            "samples",
1774        ]);
1775        assert!(with_package.is_err());
1776    }
1777
1778    #[test]
1779    fn test_parses_package_flag() {
1780        let parsed = Cli::try_parse_from([
1781            "provenant",
1782            "--json-pp",
1783            "scan.json",
1784            "--package",
1785            "samples",
1786        ])
1787        .expect("cli parse should succeed");
1788
1789        assert!(parsed.package);
1790    }
1791
1792    #[test]
1793    fn test_package_short_flag() {
1794        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-p", "samples"])
1795            .expect("cli parse should succeed");
1796
1797        assert!(parsed.package);
1798    }
1799
1800    #[test]
1801    fn test_parses_license_flag() {
1802        let parsed = Cli::try_parse_from([
1803            "provenant",
1804            "--json-pp",
1805            "scan.json",
1806            "--license",
1807            "samples",
1808        ])
1809        .expect("cli parse should succeed");
1810
1811        assert!(parsed.license);
1812    }
1813
1814    #[test]
1815    fn test_license_short_flag() {
1816        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-l", "samples"])
1817            .expect("cli parse should succeed");
1818
1819        assert!(parsed.license);
1820    }
1821
1822    #[test]
1823    fn test_license_text_requires_license() {
1824        let result = Cli::try_parse_from([
1825            "provenant",
1826            "--json-pp",
1827            "scan.json",
1828            "--license-text",
1829            "samples",
1830        ]);
1831        assert!(result.is_err());
1832    }
1833
1834    #[test]
1835    fn test_include_text_is_rejected() {
1836        let result = Cli::try_parse_from([
1837            "provenant",
1838            "--json-pp",
1839            "scan.json",
1840            "--license",
1841            "--include-text",
1842            "samples",
1843        ]);
1844
1845        assert!(result.is_err());
1846    }
1847
1848    #[test]
1849    fn test_license_text_diagnostics_requires_license_text() {
1850        let result = Cli::try_parse_from([
1851            "provenant",
1852            "--json-pp",
1853            "scan.json",
1854            "--license",
1855            "--license-text-diagnostics",
1856            "samples",
1857        ]);
1858
1859        assert!(result.is_err());
1860    }
1861
1862    #[test]
1863    fn test_parses_license_text_and_diagnostics_flags() {
1864        let parsed = Cli::try_parse_from([
1865            "provenant",
1866            "--json-pp",
1867            "scan.json",
1868            "--license",
1869            "--license-text",
1870            "--license-text-diagnostics",
1871            "--license-diagnostics",
1872            "--unknown-licenses",
1873            "samples",
1874        ])
1875        .expect("cli parse should succeed");
1876
1877        assert!(parsed.license_text);
1878        assert!(parsed.license_text_diagnostics);
1879        assert!(parsed.license_diagnostics);
1880        assert!(parsed.unknown_licenses);
1881        assert_eq!(parsed.license_score, 0);
1882        assert_eq!(parsed.license_url_template, DEFAULT_LICENSEDB_URL_TEMPLATE);
1883    }
1884
1885    #[test]
1886    fn test_license_score_requires_license() {
1887        let result = Cli::try_parse_from([
1888            "provenant",
1889            "--json-pp",
1890            "scan.json",
1891            "--license-score",
1892            "70",
1893            "samples",
1894        ]);
1895
1896        assert!(result.is_err());
1897    }
1898
1899    #[test]
1900    fn test_license_url_template_requires_license() {
1901        let result = Cli::try_parse_from([
1902            "provenant",
1903            "--json-pp",
1904            "scan.json",
1905            "--license-url-template",
1906            "https://example.com/licenses/{}/",
1907            "samples",
1908        ]);
1909
1910        assert!(result.is_err());
1911    }
1912
1913    #[test]
1914    fn test_parses_license_score_and_url_template_flags() {
1915        let parsed = Cli::try_parse_from([
1916            "provenant",
1917            "--json-pp",
1918            "scan.json",
1919            "--license",
1920            "--license-score",
1921            "70",
1922            "--license-url-template",
1923            "https://example.com/licenses/{}/",
1924            "samples",
1925        ])
1926        .expect("cli parse should succeed");
1927
1928        assert_eq!(parsed.license_score, 70);
1929        assert_eq!(
1930            parsed.license_url_template,
1931            "https://example.com/licenses/{}/"
1932        );
1933    }
1934
1935    #[test]
1936    fn test_rejects_license_score_above_range() {
1937        let result = Cli::try_parse_from([
1938            "provenant",
1939            "--json-pp",
1940            "scan.json",
1941            "--license",
1942            "--license-score",
1943            "101",
1944            "samples",
1945        ]);
1946
1947        assert!(result.is_err());
1948    }
1949
1950    #[test]
1951    fn test_license_references_requires_license() {
1952        let result = Cli::try_parse_from([
1953            "provenant",
1954            "--json-pp",
1955            "scan.json",
1956            "--license-references",
1957            "samples",
1958        ]);
1959
1960        assert!(result.is_err());
1961    }
1962
1963    #[test]
1964    fn test_parses_license_references_flag() {
1965        let parsed = Cli::try_parse_from([
1966            "provenant",
1967            "--json-pp",
1968            "scan.json",
1969            "--license",
1970            "--license-references",
1971            "samples",
1972        ])
1973        .expect("cli parse should succeed");
1974
1975        assert!(parsed.license_references);
1976    }
1977
1978    #[test]
1979    fn test_include_text_alias_is_not_supported() {
1980        let result = Cli::try_parse_from([
1981            "provenant",
1982            "--json-pp",
1983            "scan.json",
1984            "--license",
1985            "--include-text",
1986            "samples",
1987        ]);
1988
1989        assert!(result.is_err());
1990    }
1991
1992    #[test]
1993    fn test_parses_short_scan_flags() {
1994        let parsed = Cli::try_parse_from([
1995            "provenant",
1996            "--json-pp",
1997            "scan.json",
1998            "-c",
1999            "-e",
2000            "-u",
2001            "samples",
2002        ])
2003        .expect("cli parse should support short scan flags");
2004
2005        assert!(parsed.copyright);
2006        assert!(parsed.email);
2007        assert!(parsed.url);
2008    }
2009
2010    #[test]
2011    fn test_parses_processes_compat_values_zero_and_minus_one() {
2012        let zero =
2013            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "0", "samples"])
2014                .expect("cli parse should accept processes=0");
2015        assert_eq!(zero.processes, ProcessMode::SequentialWithTimeouts);
2016
2017        let parsed =
2018            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "-n", "-1", "samples"])
2019                .expect("cli parse should accept processes=-1");
2020        assert_eq!(parsed.processes, ProcessMode::SequentialWithoutTimeouts);
2021    }
2022
2023    #[test]
2024    fn test_parses_cache_flags() {
2025        let parsed = Cli::try_parse_from([
2026            "provenant",
2027            "--json-pp",
2028            "scan.json",
2029            "--cache-dir",
2030            "/tmp/sc-cache",
2031            "--cache-clear",
2032            "--max-in-memory",
2033            "5000",
2034            "samples",
2035        ])
2036        .expect("cli parse should accept cache flags");
2037
2038        assert_eq!(parsed.cache_dir.as_deref(), Some("/tmp/sc-cache"));
2039        assert!(parsed.cache_clear);
2040        assert!(!parsed.incremental);
2041        assert_eq!(parsed.max_in_memory, MemoryMode::Limit(5000));
2042    }
2043
2044    #[test]
2045    fn test_parses_incremental_flag() {
2046        let parsed = Cli::try_parse_from([
2047            "provenant",
2048            "--json-pp",
2049            "scan.json",
2050            "--incremental",
2051            "samples",
2052        ])
2053        .expect("cli parse should accept incremental flag");
2054
2055        assert!(parsed.incremental);
2056    }
2057
2058    #[test]
2059    fn test_parses_license_cache_control_flags() {
2060        let parsed = Cli::try_parse_from([
2061            "provenant",
2062            "--json-pp",
2063            "scan.json",
2064            "--license",
2065            "--reindex",
2066            "--no-license-index-cache",
2067            "samples",
2068        ])
2069        .expect("cli parse should accept license cache flags");
2070
2071        assert!(parsed.license);
2072        assert!(parsed.reindex);
2073        assert!(parsed.no_license_index_cache);
2074    }
2075
2076    #[test]
2077    fn test_max_in_memory_defaults_and_special_values() {
2078        let default_parsed =
2079            Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
2080                .expect("default max-in-memory should parse");
2081        assert_eq!(default_parsed.max_in_memory, MemoryMode::Limit(10000));
2082
2083        let disk_only = Cli::try_parse_from([
2084            "provenant",
2085            "--json-pp",
2086            "scan.json",
2087            "--max-in-memory",
2088            "-1",
2089            "samples",
2090        ])
2091        .expect("-1 should parse");
2092        assert_eq!(disk_only.max_in_memory, MemoryMode::StreamUnlimited);
2093
2094        let unlimited = Cli::try_parse_from([
2095            "provenant",
2096            "--json-pp",
2097            "scan.json",
2098            "--max-in-memory",
2099            "0",
2100            "samples",
2101        ])
2102        .expect("0 should parse");
2103        assert_eq!(unlimited.max_in_memory, MemoryMode::CollectFirst);
2104    }
2105
2106    #[test]
2107    fn test_max_in_memory_rejects_values_below_negative_one() {
2108        let result = Cli::try_parse_from([
2109            "provenant",
2110            "--json-pp",
2111            "scan.json",
2112            "--max-in-memory",
2113            "-2",
2114            "samples",
2115        ]);
2116
2117        assert!(result.is_err());
2118    }
2119
2120    #[test]
2121    fn test_max_depth_default_matches_reference_behavior() {
2122        let parsed = Cli::try_parse_from(["provenant", "--json-pp", "scan.json", "samples"])
2123            .expect("cli parse should succeed");
2124
2125        assert_eq!(parsed.max_depth, 0);
2126    }
2127}