use std::ffi::OsStr;
use std::io::Write;
use std::path::{Path, PathBuf};
use clap::Parser;
use ignore::overrides::OverrideBuilder;
use ignore::{Walk, WalkBuilder};
use crate::path_security::{
read_capped, write_in_dir, PathPolicy, PathSecurityError, DEFAULT_MAX_FILE_SIZE,
};
use crate::report::{
build_sarif, json_block, sarif_to_markdown, text_block, Finding, OutputFormat, ReportError,
};
use crate::validate::validate;
#[derive(Debug, Parser)]
#[command(name = "simdutf8-cli", version, about, long_about = None)]
pub struct Args {
#[arg(value_name = "PATH")]
pub files: Vec<PathBuf>,
#[arg(long, value_name = "GLOB")]
pub exclude: Vec<String>,
#[arg(long)]
pub no_ignore: bool,
#[arg(long)]
pub hidden: bool,
#[arg(long, value_name = "DIR")]
pub base_dir: Option<PathBuf>,
#[arg(long, value_name = "BYTES", default_value_t = DEFAULT_MAX_FILE_SIZE)]
pub max_size: u64,
#[arg(long)]
pub no_follow_symlinks: bool,
#[arg(long, value_enum, default_value_t = OutputFormat::Text)]
pub format: OutputFormat,
#[arg(short, long)]
pub quiet: bool,
#[arg(long, value_name = "DIR", default_value = ".")]
pub output_dir: PathBuf,
#[arg(long)]
pub no_report: bool,
}
pub fn run<O: Write, E: Write>(args: &Args, out: &mut O, err: &mut E) -> std::io::Result<u8> {
let policy = build_policy(args);
let mut state = RunState::default();
state.collect_inputs(args, &policy, err)?; if !args.quiet {
state.emit_stdout(args, out, err)?; }
if !args.no_report {
state.emit_reports(args, err)?; }
Ok(state.exit_code())
}
fn build_policy(args: &Args) -> PathPolicy {
let mut policy = PathPolicy::new()
.max_file_size(args.max_size)
.allow_symlinks(!args.no_follow_symlinks);
if let Some(base) = &args.base_dir {
policy = policy.base_dir(base.clone());
}
policy
}
fn is_dir(path: &Path) -> bool {
std::fs::metadata(path).is_ok_and(|meta| meta.is_dir())
}
fn build_walker(dir: &Path, args: &Args) -> Result<Walk, ignore::Error> {
let mut overrides = OverrideBuilder::new(dir);
for pattern in &args.exclude {
overrides.add(&format!("!{pattern}"))?;
}
let respect_ignores = !args.no_ignore;
let mut builder = WalkBuilder::new(dir);
builder
.overrides(overrides.build()?)
.hidden(!args.hidden)
.git_ignore(respect_ignores)
.git_global(respect_ignores)
.git_exclude(respect_ignores)
.ignore(respect_ignores)
.parents(respect_ignores)
.require_git(false)
.follow_links(false);
Ok(builder.build())
}
fn read_stdin(limit: u64) -> Result<Vec<u8>, PathSecurityError> {
let stdin = std::io::stdin();
read_capped(stdin.lock(), limit)
}
fn render_stdout(
format: OutputFormat,
findings: &[Finding],
) -> std::result::Result<String, ReportError> {
match format {
OutputFormat::Text => Ok(text_block(findings)),
OutputFormat::Json => Ok(json_block(findings)),
OutputFormat::Sarif => {
let mut sarif = build_sarif(findings)?;
sarif.push('\n');
Ok(sarif)
},
OutputFormat::Markdown => {
let sarif = build_sarif(findings)?;
let mut markdown = sarif_to_markdown(&sarif)?;
if !markdown.ends_with('\n') {
markdown.push('\n');
}
Ok(markdown)
},
}
}
fn write_reports(output_dir: &Path, findings: &[Finding]) -> std::result::Result<(), ReportError> {
let sarif = build_sarif(findings)?;
let markdown = sarif_to_markdown(&sarif)?;
write_in_dir(output_dir, "report.sarif", sarif.as_bytes())
.map_err(|error| ReportError::Sarif(error.to_string()))?;
write_in_dir(output_dir, "report.md", markdown.as_bytes())
.map_err(|error| ReportError::Markdown(error.to_string()))?;
Ok(())
}
#[derive(Default)]
struct RunState {
any_invalid: bool,
any_error: bool,
findings: Vec<Finding>,
}
impl RunState {
fn collect_inputs<E: Write>(
&mut self,
args: &Args,
policy: &PathPolicy,
err: &mut E,
) -> std::io::Result<()> {
if args.files.is_empty() {
return self.record("<stdin>", read_stdin(args.max_size), err);
}
for file in &args.files {
if file.as_os_str() == OsStr::new("-") {
self.record("<stdin>", read_stdin(args.max_size), err)?;
} else if is_dir(file) {
self.walk_dir(file, args, policy, err)?;
} else {
let label = file.display().to_string();
self.record(&label, policy.read(file), err)?;
}
}
Ok(())
}
fn walk_dir<E: Write>(
&mut self,
dir: &Path,
args: &Args,
policy: &PathPolicy,
err: &mut E,
) -> std::io::Result<()> {
let walker = match build_walker(dir, args) {
Ok(walker) => walker,
Err(error) => {
self.any_error = true;
writeln!(err, "error: {}: {error}", dir.display())?;
return Ok(());
},
};
for entry in walker {
match entry {
Ok(entry) if entry.file_type().is_some_and(|ft| ft.is_file()) => {
let path = entry.path();
let label = path.display().to_string();
self.record(&label, policy.read(path), err)?;
},
Ok(_) => {}, Err(error) => {
self.any_error = true;
writeln!(err, "error: walking {}: {error}", dir.display())?;
},
}
}
Ok(())
}
fn emit_stdout<O: Write, E: Write>(
&mut self,
args: &Args,
out: &mut O,
err: &mut E,
) -> std::io::Result<()> {
match render_stdout(args.format, &self.findings) {
Ok(rendered) => write!(out, "{rendered}"),
Err(report_error) => {
self.any_error = true;
writeln!(err, "error: {report_error}")
},
}
}
fn emit_reports<E: Write>(&mut self, args: &Args, err: &mut E) -> std::io::Result<()> {
if self.findings.is_empty() {
return Ok(());
}
if let Err(report_error) = write_reports(&args.output_dir, &self.findings) {
self.any_error = true;
writeln!(
err,
"error: writing reports to {}: {report_error}",
args.output_dir.display()
)?;
}
Ok(())
}
fn record<E: Write>(
&mut self,
label: &str,
bytes: Result<Vec<u8>, PathSecurityError>,
err: &mut E,
) -> std::io::Result<()> {
match bytes {
Ok(bytes) => {
let verdict = validate(&bytes);
if !verdict.is_valid() {
self.any_invalid = true;
}
self.findings.push(Finding {
label: label.to_owned(),
validity: verdict,
});
},
Err(error) => {
self.any_error = true;
writeln!(err, "error: {label}: {error}")?;
},
}
Ok(())
}
fn exit_code(&self) -> u8 {
if self.any_error {
2
} else {
u8::from(self.any_invalid)
}
}
}
#[cfg(test)]
mod tests {
use std::io::Write as _;
use super::*;
fn temp_file(name: &str, bytes: &[u8]) -> (tempfile::TempDir, PathBuf) {
let dir = tempfile::tempdir().unwrap();
let path = dir.path().join(name);
let mut f = std::fs::File::create(&path).unwrap();
f.write_all(bytes).unwrap();
(dir, path)
}
fn args_from(items: &[&str]) -> Args {
let mut argv = vec!["simdutf8-cli", "--no-report"];
argv.extend_from_slice(items);
Args::try_parse_from(argv).expect("args should parse")
}
#[test]
fn reports_valid_file_with_exit_zero() {
let (_dir, path) = temp_file("ok.txt", "héllo".as_bytes());
let args = args_from(&[path.to_str().unwrap()]);
let mut out = Vec::new();
let mut err = Vec::new();
let code = run(&args, &mut out, &mut err).unwrap();
assert_eq!(code, 0);
assert!(String::from_utf8_lossy(&out).contains("OK"));
}
#[test]
fn reports_invalid_file_with_exit_one() {
let (_dir, path) = temp_file("bad.bin", b"a\xFFb");
let args = args_from(&[path.to_str().unwrap()]);
let mut out = Vec::new();
let mut err = Vec::new();
let code = run(&args, &mut out, &mut err).unwrap();
assert_eq!(code, 1);
assert!(String::from_utf8_lossy(&out).contains("FAIL"));
}
#[test]
fn json_format_emits_an_array() {
let (_dir, path) = temp_file("ok.txt", b"hi");
let args = args_from(&["--format", "json", path.to_str().unwrap()]);
let mut out = Vec::new();
let mut err = Vec::new();
let code = run(&args, &mut out, &mut err).unwrap();
assert_eq!(code, 0);
let s = String::from_utf8(out).unwrap();
assert!(s.trim_start().starts_with('['), "got: {s}");
assert!(s.contains(r#""valid":true"#), "got: {s}");
}
#[test]
fn quiet_suppresses_stdout() {
let (_dir, path) = temp_file("ok.txt", b"hi");
let args = args_from(&["-q", path.to_str().unwrap()]);
let mut out = Vec::new();
let mut err = Vec::new();
let code = run(&args, &mut out, &mut err).unwrap();
assert_eq!(code, 0);
assert!(out.is_empty(), "expected no output, got: {out:?}");
}
#[test]
fn missing_file_yields_exit_two() {
let dir = tempfile::tempdir().unwrap();
let missing = dir.path().join("does-not-exist");
let args = args_from(&[missing.to_str().unwrap()]);
let mut out = Vec::new();
let mut err = Vec::new();
let code = run(&args, &mut out, &mut err).unwrap();
assert_eq!(code, 2);
assert!(String::from_utf8_lossy(&err).contains("error"));
}
#[test]
fn base_dir_blocks_files_outside_it() {
let base = tempfile::tempdir().unwrap();
let (_outside_dir, outside) = temp_file("secret.txt", b"data");
let args = args_from(&[
"--base-dir",
base.path().to_str().unwrap(),
outside.to_str().unwrap(),
]);
let mut out = Vec::new();
let mut err = Vec::new();
let code = run(&args, &mut out, &mut err).unwrap();
assert_eq!(code, 2);
}
}