1use std::ffi::OsStr;
18use std::io::Write;
19use std::path::{Path, PathBuf};
20
21use clap::Parser;
22use ignore::overrides::OverrideBuilder;
23use ignore::{Walk, WalkBuilder};
24
25use crate::path_security::{
26 read_capped, write_in_dir, PathPolicy, PathSecurityError, DEFAULT_MAX_FILE_SIZE,
27};
28use crate::report::{
29 build_sarif, json_block, sarif_to_markdown, text_block, Finding, OutputFormat, ReportError,
30};
31use crate::validate::validate;
32
33#[derive(Debug, Parser)]
35#[command(name = "simdutf8-cli", version, about, long_about = None)]
36pub struct Args {
37 #[arg(value_name = "PATH")]
40 pub files: Vec<PathBuf>,
41
42 #[arg(long, value_name = "GLOB")]
45 pub exclude: Vec<String>,
46
47 #[arg(long)]
50 pub no_ignore: bool,
51
52 #[arg(long)]
55 pub hidden: bool,
56
57 #[arg(long, value_name = "DIR")]
59 pub base_dir: Option<PathBuf>,
60
61 #[arg(long, value_name = "BYTES", default_value_t = DEFAULT_MAX_FILE_SIZE)]
63 pub max_size: u64,
64
65 #[arg(long)]
67 pub no_follow_symlinks: bool,
68
69 #[arg(long, value_enum, default_value_t = OutputFormat::Text)]
71 pub format: OutputFormat,
72
73 #[arg(short, long)]
75 pub quiet: bool,
76
77 #[arg(long, value_name = "DIR", default_value = ".")]
79 pub output_dir: PathBuf,
80
81 #[arg(long)]
83 pub no_report: bool,
84}
85
86pub fn run<O: Write, E: Write>(args: &Args, out: &mut O, err: &mut E) -> std::io::Result<u8> {
94 let policy = build_policy(args);
95 let mut state = RunState::default();
96
97 state.collect_inputs(args, &policy, err)?; if !args.quiet {
99 state.emit_stdout(args, out, err)?; }
101 if !args.no_report {
102 state.emit_reports(args, err)?; }
104
105 Ok(state.exit_code())
106}
107
108fn build_policy(args: &Args) -> PathPolicy {
110 let mut policy = PathPolicy::new()
111 .max_file_size(args.max_size)
112 .allow_symlinks(!args.no_follow_symlinks);
113 if let Some(base) = &args.base_dir {
114 policy = policy.base_dir(base.clone());
115 }
116 policy
117}
118
119fn is_dir(path: &Path) -> bool {
122 std::fs::metadata(path).is_ok_and(|meta| meta.is_dir())
123}
124
125fn build_walker(dir: &Path, args: &Args) -> Result<Walk, ignore::Error> {
129 let mut overrides = OverrideBuilder::new(dir);
130 for pattern in &args.exclude {
131 overrides.add(&format!("!{pattern}"))?;
133 }
134 let respect_ignores = !args.no_ignore;
135
136 let mut builder = WalkBuilder::new(dir);
137 builder
138 .overrides(overrides.build()?)
139 .hidden(!args.hidden)
140 .git_ignore(respect_ignores)
141 .git_global(respect_ignores)
142 .git_exclude(respect_ignores)
143 .ignore(respect_ignores)
144 .parents(respect_ignores)
145 .require_git(false)
146 .follow_links(false);
147 Ok(builder.build())
148}
149
150fn read_stdin(limit: u64) -> Result<Vec<u8>, PathSecurityError> {
152 let stdin = std::io::stdin();
153 read_capped(stdin.lock(), limit)
154}
155
156fn render_stdout(
159 format: OutputFormat,
160 findings: &[Finding],
161) -> std::result::Result<String, ReportError> {
162 match format {
163 OutputFormat::Text => Ok(text_block(findings)),
164 OutputFormat::Json => Ok(json_block(findings)),
165 OutputFormat::Sarif => {
166 let mut sarif = build_sarif(findings)?;
167 sarif.push('\n');
168 Ok(sarif)
169 },
170 OutputFormat::Markdown => {
171 let sarif = build_sarif(findings)?;
172 let mut markdown = sarif_to_markdown(&sarif)?;
173 if !markdown.ends_with('\n') {
174 markdown.push('\n');
175 }
176 Ok(markdown)
177 },
178 }
179}
180
181fn write_reports(output_dir: &Path, findings: &[Finding]) -> std::result::Result<(), ReportError> {
184 let sarif = build_sarif(findings)?;
185 let markdown = sarif_to_markdown(&sarif)?;
186 write_in_dir(output_dir, "report.sarif", sarif.as_bytes())
187 .map_err(|error| ReportError::Sarif(error.to_string()))?;
188 write_in_dir(output_dir, "report.md", markdown.as_bytes())
189 .map_err(|error| ReportError::Markdown(error.to_string()))?;
190 Ok(())
191}
192
193#[derive(Default)]
195struct RunState {
196 any_invalid: bool,
197 any_error: bool,
198 findings: Vec<Finding>,
199}
200
201impl RunState {
202 fn collect_inputs<E: Write>(
204 &mut self,
205 args: &Args,
206 policy: &PathPolicy,
207 err: &mut E,
208 ) -> std::io::Result<()> {
209 if args.files.is_empty() {
210 return self.record("<stdin>", read_stdin(args.max_size), err);
211 }
212 for file in &args.files {
213 if file.as_os_str() == OsStr::new("-") {
214 self.record("<stdin>", read_stdin(args.max_size), err)?;
215 } else if is_dir(file) {
216 self.walk_dir(file, args, policy, err)?;
217 } else {
218 let label = file.display().to_string();
222 self.record(&label, policy.read(file), err)?;
223 }
224 }
225 Ok(())
226 }
227
228 fn walk_dir<E: Write>(
232 &mut self,
233 dir: &Path,
234 args: &Args,
235 policy: &PathPolicy,
236 err: &mut E,
237 ) -> std::io::Result<()> {
238 let walker = match build_walker(dir, args) {
239 Ok(walker) => walker,
240 Err(error) => {
241 self.any_error = true;
242 writeln!(err, "error: {}: {error}", dir.display())?;
243 return Ok(());
244 },
245 };
246 for entry in walker {
247 match entry {
248 Ok(entry) if entry.file_type().is_some_and(|ft| ft.is_file()) => {
249 let path = entry.path();
250 let label = path.display().to_string();
251 self.record(&label, policy.read(path), err)?;
252 },
253 Ok(_) => {}, Err(error) => {
255 self.any_error = true;
256 writeln!(err, "error: walking {}: {error}", dir.display())?;
257 },
258 }
259 }
260 Ok(())
261 }
262
263 fn emit_stdout<O: Write, E: Write>(
265 &mut self,
266 args: &Args,
267 out: &mut O,
268 err: &mut E,
269 ) -> std::io::Result<()> {
270 match render_stdout(args.format, &self.findings) {
271 Ok(rendered) => write!(out, "{rendered}"),
272 Err(report_error) => {
273 self.any_error = true;
274 writeln!(err, "error: {report_error}")
275 },
276 }
277 }
278
279 fn emit_reports<E: Write>(&mut self, args: &Args, err: &mut E) -> std::io::Result<()> {
282 if self.findings.is_empty() {
283 return Ok(());
284 }
285 if let Err(report_error) = write_reports(&args.output_dir, &self.findings) {
286 self.any_error = true;
287 writeln!(
288 err,
289 "error: writing reports to {}: {report_error}",
290 args.output_dir.display()
291 )?;
292 }
293 Ok(())
294 }
295
296 fn record<E: Write>(
299 &mut self,
300 label: &str,
301 bytes: Result<Vec<u8>, PathSecurityError>,
302 err: &mut E,
303 ) -> std::io::Result<()> {
304 match bytes {
305 Ok(bytes) => {
306 let verdict = validate(&bytes);
307 if !verdict.is_valid() {
308 self.any_invalid = true;
309 }
310 self.findings.push(Finding {
311 label: label.to_owned(),
312 validity: verdict,
313 });
314 },
315 Err(error) => {
316 self.any_error = true;
317 writeln!(err, "error: {label}: {error}")?;
318 },
319 }
320 Ok(())
321 }
322
323 fn exit_code(&self) -> u8 {
326 if self.any_error {
327 2
328 } else {
329 u8::from(self.any_invalid)
330 }
331 }
332}
333
334#[cfg(test)]
335mod tests {
336 use std::io::Write as _;
337
338 use super::*;
339
340 fn temp_file(name: &str, bytes: &[u8]) -> (tempfile::TempDir, PathBuf) {
341 let dir = tempfile::tempdir().unwrap();
342 let path = dir.path().join(name);
343 let mut f = std::fs::File::create(&path).unwrap();
344 f.write_all(bytes).unwrap();
345 (dir, path)
346 }
347
348 fn args_from(items: &[&str]) -> Args {
349 let mut argv = vec!["simdutf8-cli", "--no-report"];
352 argv.extend_from_slice(items);
353 Args::try_parse_from(argv).expect("args should parse")
354 }
355
356 #[test]
357 fn reports_valid_file_with_exit_zero() {
358 let (_dir, path) = temp_file("ok.txt", "héllo".as_bytes());
359 let args = args_from(&[path.to_str().unwrap()]);
360 let mut out = Vec::new();
361 let mut err = Vec::new();
362 let code = run(&args, &mut out, &mut err).unwrap();
363 assert_eq!(code, 0);
364 assert!(String::from_utf8_lossy(&out).contains("OK"));
365 }
366
367 #[test]
368 fn reports_invalid_file_with_exit_one() {
369 let (_dir, path) = temp_file("bad.bin", b"a\xFFb");
370 let args = args_from(&[path.to_str().unwrap()]);
371 let mut out = Vec::new();
372 let mut err = Vec::new();
373 let code = run(&args, &mut out, &mut err).unwrap();
374 assert_eq!(code, 1);
375 assert!(String::from_utf8_lossy(&out).contains("FAIL"));
376 }
377
378 #[test]
379 fn json_format_emits_an_array() {
380 let (_dir, path) = temp_file("ok.txt", b"hi");
381 let args = args_from(&["--format", "json", path.to_str().unwrap()]);
382 let mut out = Vec::new();
383 let mut err = Vec::new();
384 let code = run(&args, &mut out, &mut err).unwrap();
385 assert_eq!(code, 0);
386 let s = String::from_utf8(out).unwrap();
387 assert!(s.trim_start().starts_with('['), "got: {s}");
388 assert!(s.contains(r#""valid":true"#), "got: {s}");
389 }
390
391 #[test]
392 fn quiet_suppresses_stdout() {
393 let (_dir, path) = temp_file("ok.txt", b"hi");
394 let args = args_from(&["-q", path.to_str().unwrap()]);
395 let mut out = Vec::new();
396 let mut err = Vec::new();
397 let code = run(&args, &mut out, &mut err).unwrap();
398 assert_eq!(code, 0);
399 assert!(out.is_empty(), "expected no output, got: {out:?}");
400 }
401
402 #[test]
403 fn missing_file_yields_exit_two() {
404 let dir = tempfile::tempdir().unwrap();
405 let missing = dir.path().join("does-not-exist");
406 let args = args_from(&[missing.to_str().unwrap()]);
407 let mut out = Vec::new();
408 let mut err = Vec::new();
409 let code = run(&args, &mut out, &mut err).unwrap();
410 assert_eq!(code, 2);
411 assert!(String::from_utf8_lossy(&err).contains("error"));
412 }
413
414 #[test]
415 fn base_dir_blocks_files_outside_it() {
416 let base = tempfile::tempdir().unwrap();
417 let (_outside_dir, outside) = temp_file("secret.txt", b"data");
418 let args = args_from(&[
419 "--base-dir",
420 base.path().to_str().unwrap(),
421 outside.to_str().unwrap(),
422 ]);
423 let mut out = Vec::new();
424 let mut err = Vec::new();
425 let code = run(&args, &mut out, &mut err).unwrap();
426 assert_eq!(code, 2);
427 }
428}