use crate::output::sarif::{self, SarifFinding};
use clap::{Args, Parser, Subcommand, ValueEnum};
use cloakrs_adapters::{
mask_log_reader, scan_csv_str, scan_json_str, scan_log_str, scan_sql_str, scan_text,
AdapterFinding, AdapterKind, AdapterReport, CsvScanOptions, JsonScanOptions, LogLineScanResult,
};
use cloakrs_core::{EntityType, Locale, MaskStrategy, Scanner};
use ignore::WalkBuilder;
use indicatif::{ProgressBar, ProgressStyle};
use rayon::prelude::*;
use serde::Serialize;
use std::collections::BTreeMap;
use std::fs;
use std::io::{self, BufRead, Write};
use std::path::{Path, PathBuf};
use std::process::ExitCode;
#[derive(Debug, Parser)]
#[command(name = "cloakrs")]
#[command(
version,
about = "Detect and mask PII in text, streams, and structured data"
)]
pub struct Cli {
#[command(flatten)]
pub global: GlobalOptions,
#[command(subcommand)]
pub command: Command,
}
#[derive(Debug, Clone, Args, PartialEq)]
pub struct GlobalOptions {
#[arg(
long,
global = true,
value_delimiter = ',',
default_value = "universal"
)]
pub locale: Vec<LocaleArg>,
#[arg(long, global = true, default_value = "redact")]
pub strategy: StrategyArg,
#[arg(long, global = true, default_value = "0.5", value_parser = parse_confidence)]
pub min_confidence: f64,
#[arg(long, global = true, default_value = "text")]
pub output_format: OutputFormat,
#[arg(long, global = true)]
pub quiet: bool,
}
#[derive(Debug, Clone, Subcommand, PartialEq)]
pub enum Command {
Scan(ScanArgs),
Stream(StreamArgs),
Audit(AuditArgs),
}
#[derive(Debug, Clone, Args, PartialEq)]
pub struct ScanArgs {
pub path: PathBuf,
#[arg(long, default_value = "auto")]
pub format: InputFormat,
#[arg(long)]
pub output: Option<PathBuf>,
#[arg(long, value_delimiter = ',')]
pub columns: Vec<String>,
#[arg(long, value_delimiter = ',')]
pub include_paths: Vec<String>,
#[arg(long, value_delimiter = ',')]
pub exclude_paths: Vec<String>,
}
#[derive(Debug, Clone, Args, PartialEq, Eq)]
pub struct StreamArgs {}
#[derive(Debug, Clone, Args, PartialEq)]
pub struct AuditArgs {
pub path: PathBuf,
#[arg(long, default_value_t = true)]
pub recursive: bool,
#[arg(long, default_value_t = true)]
pub respect_gitignore: bool,
#[arg(long)]
pub parallel: Option<usize>,
#[arg(long, default_value = "low")]
pub severity: SeverityArg,
#[arg(long)]
pub output: Option<PathBuf>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, ValueEnum)]
#[value(rename_all = "kebab-case")]
#[serde(rename_all = "kebab-case")]
pub enum InputFormat {
Auto,
Text,
Json,
Csv,
Log,
Sql,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
#[value(rename_all = "kebab-case")]
pub enum OutputFormat {
Text,
Json,
Sarif,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
#[value(rename_all = "kebab-case")]
pub enum StrategyArg {
Redact,
PartialMask,
Hash,
Replace,
Encrypt,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
#[value(rename_all = "kebab-case")]
pub enum LocaleArg {
Universal,
Us,
Nl,
Uk,
De,
Fr,
In,
Br,
Eu,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, ValueEnum)]
#[value(rename_all = "kebab-case")]
#[serde(rename_all = "lowercase")]
pub enum SeverityArg {
Low,
Medium,
High,
}
pub fn run(cli: Cli) -> ExitCode {
match cli.command {
Command::Scan(args) => run_scan(&cli.global, &args),
Command::Stream(args) => run_stream(&cli.global, &args),
Command::Audit(args) => run_audit(&cli.global, &args),
}
}
fn run_scan(global: &GlobalOptions, args: &ScanArgs) -> ExitCode {
match scan_file(global, args) {
Ok(found_pii) => {
if found_pii {
ExitCode::from(1)
} else {
ExitCode::SUCCESS
}
}
Err(error) => {
eprintln!("cloakrs scan: {error}");
ExitCode::from(2)
}
}
}
fn run_stream(global: &GlobalOptions, _args: &StreamArgs) -> ExitCode {
let stdin = io::stdin();
let stdout = io::stdout();
match stream_reader(global, stdin.lock(), stdout.lock()) {
Ok(summary) => {
if !global.quiet {
eprintln!("{}", render_stream_summary(&summary, global.output_format));
}
if summary.total_findings > 0 {
ExitCode::from(1)
} else {
ExitCode::SUCCESS
}
}
Err(error) => {
eprintln!("cloakrs stream: {error}");
ExitCode::from(2)
}
}
}
fn run_audit(global: &GlobalOptions, args: &AuditArgs) -> ExitCode {
match audit_directory(global, args) {
Ok(report) => {
let found_pii = report.total_findings > 0;
match render_audit_report(&report, global.output_format) {
Ok(rendered) => {
if let Some(output) = &args.output {
if let Err(error) = fs::write(output, rendered.as_bytes()) {
eprintln!(
"cloakrs audit: failed to write {}: {error}",
output.display()
);
return ExitCode::from(2);
}
} else if let Err(error) = write_stdout(&rendered) {
eprintln!("cloakrs audit: {error}");
return ExitCode::from(2);
}
if found_pii {
ExitCode::from(1)
} else {
ExitCode::SUCCESS
}
}
Err(error) => {
eprintln!("cloakrs audit: {error}");
ExitCode::from(2)
}
}
}
Err(error) => {
eprintln!("cloakrs audit: {error}");
ExitCode::from(2)
}
}
}
fn parse_confidence(value: &str) -> Result<f64, String> {
let parsed = value
.parse::<f64>()
.map_err(|error| format!("invalid confidence value: {error}"))?;
if parsed.is_finite() && (0.0..=1.0).contains(&parsed) {
Ok(parsed)
} else {
Err("confidence must be between 0.0 and 1.0".to_string())
}
}
fn scan_file(global: &GlobalOptions, args: &ScanArgs) -> Result<bool, String> {
let progress = progress_bar(&args.path, global)?;
let input = fs::read_to_string(&args.path)
.map_err(|error| format!("failed to read {}: {error}", args.path.display()))?;
if let Some(progress) = &progress {
progress.set_position(input.len() as u64);
}
let format = detect_format(&args.path, args.format);
let scanner = build_scanner(global)?;
let report = scan_input(&input, format, &scanner, args)?;
if let Some(progress) = progress {
progress.finish_and_clear();
}
let found_pii = !report.findings.is_empty();
if let Some(output) = &args.output {
fs::write(output, report.masked_output.as_bytes())
.map_err(|error| format!("failed to write {}: {error}", output.display()))?;
}
if global.quiet {
if args.output.is_none() {
write_stdout(&report.masked_output)?;
}
return Ok(found_pii);
}
let rendered = render_scan_report(&args.path, format, &report, global.output_format)?;
write_stdout(&rendered)?;
Ok(found_pii)
}
fn stream_reader<R, W>(
global: &GlobalOptions,
reader: R,
writer: W,
) -> Result<StreamSummary, String>
where
R: BufRead,
W: Write,
{
let scanner = build_scanner(global)?;
let lines = mask_log_reader(reader, writer, &scanner).map_err(|error| error.to_string())?;
Ok(StreamSummary::from_lines(lines))
}
fn audit_directory(global: &GlobalOptions, args: &AuditArgs) -> Result<AuditReport, String> {
if !args.path.is_dir() {
return Err(format!("{} is not a directory", args.path.display()));
}
let paths = collect_audit_paths(args)?;
let progress = audit_progress(paths.len(), global);
let outcomes = scan_audit_paths(&paths, global, args, progress.as_ref())?;
if let Some(progress) = progress {
progress.finish_and_clear();
}
Ok(AuditReport::from_outcomes(
args.path.display().to_string(),
args.severity,
outcomes,
))
}
fn collect_audit_paths(args: &AuditArgs) -> Result<Vec<PathBuf>, String> {
let mut builder = WalkBuilder::new(&args.path);
builder
.git_ignore(args.respect_gitignore)
.git_global(args.respect_gitignore)
.git_exclude(args.respect_gitignore);
if !args.recursive {
builder.max_depth(Some(1));
}
let mut paths = Vec::new();
for entry in builder.build() {
let entry = entry.map_err(|error| error.to_string())?;
if entry
.file_type()
.is_some_and(|file_type| file_type.is_file())
{
paths.push(entry.into_path());
}
}
paths.sort();
Ok(paths)
}
fn scan_audit_paths(
paths: &[PathBuf],
global: &GlobalOptions,
args: &AuditArgs,
progress: Option<&ProgressBar>,
) -> Result<Vec<AuditScanOutcome>, String> {
if let Some(threads) = args.parallel {
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(threads)
.build()
.map_err(|error| error.to_string())?;
return Ok(pool.install(|| {
paths
.par_iter()
.map(|path| scan_audit_path(path, global, args.severity, progress))
.collect()
}));
}
Ok(paths
.par_iter()
.map(|path| scan_audit_path(path, global, args.severity, progress))
.collect())
}
fn scan_audit_path(
path: &Path,
global: &GlobalOptions,
min_severity: SeverityArg,
progress: Option<&ProgressBar>,
) -> AuditScanOutcome {
let outcome = match read_text_file(path) {
Ok(input) => {
let format = detect_format(path, InputFormat::Auto);
match build_scanner(global).and_then(|scanner| {
scan_input(&input, format, &scanner, &audit_scan_args(path, format))
}) {
Ok(report) => AuditScanOutcome::Scanned(AuditFileReport::from_adapter_report(
path,
format,
report,
min_severity,
)),
Err(_) => AuditScanOutcome::Skipped,
}
}
Err(_) => AuditScanOutcome::Skipped,
};
if let Some(progress) = progress {
progress.inc(1);
}
outcome
}
fn audit_scan_args(path: &Path, format: InputFormat) -> ScanArgs {
ScanArgs {
path: path.to_path_buf(),
format,
output: None,
columns: Vec::new(),
include_paths: Vec::new(),
exclude_paths: Vec::new(),
}
}
fn read_text_file(path: &Path) -> Result<String, String> {
let bytes =
fs::read(path).map_err(|error| format!("failed to read {}: {error}", path.display()))?;
if bytes.contains(&0) {
return Err("binary file".to_string());
}
String::from_utf8(bytes).map_err(|_| "non-utf8 file".to_string())
}
fn build_scanner(global: &GlobalOptions) -> Result<Scanner, String> {
let mut builder = cloakrs_locales::default_registry()
.into_scanner_builder()
.locale(selected_locale(&global.locale))
.strategy(mask_strategy(global.strategy)?);
builder = builder
.min_confidence(global.min_confidence)
.map_err(|error| error.to_string())?;
builder.build().map_err(|error| error.to_string())
}
fn scan_input(
input: &str,
format: InputFormat,
scanner: &Scanner,
args: &ScanArgs,
) -> Result<AdapterReport, String> {
match format {
InputFormat::Auto => scan_input(input, InputFormat::Text, scanner, args),
InputFormat::Text => {
let lines = scan_text(input, scanner).map_err(|error| error.to_string())?;
let findings = lines
.iter()
.filter(|line| !line.findings.is_empty())
.map(|line| AdapterFinding {
location: format!("line:{}", line.line_number),
findings: line.findings.clone(),
masked_value: line.masked_line.clone(),
})
.collect();
let masked_output = masked_plaintext_output(input, &lines);
Ok(AdapterReport {
kind: AdapterKind::Plaintext,
findings,
masked_output,
})
}
InputFormat::Json => {
let options = JsonScanOptions {
include_paths: args.include_paths.clone(),
exclude_paths: args.exclude_paths.clone(),
};
let result =
scan_json_str(input, scanner, &options).map_err(|error| error.to_string())?;
Ok(AdapterReport {
kind: AdapterKind::Json,
findings: result
.strings
.into_iter()
.map(|string| AdapterFinding {
location: string.path,
findings: string.findings,
masked_value: string.masked_value,
})
.collect(),
masked_output: serde_json::to_string_pretty(&result.masked_json)
.map_err(|error| error.to_string())?,
})
}
InputFormat::Csv => {
let (columns, column_indexes) = split_csv_columns(&args.columns);
let options = CsvScanOptions {
has_headers: true,
columns,
column_indexes,
delimiter: b',',
};
let result =
scan_csv_str(input, scanner, &options).map_err(|error| error.to_string())?;
Ok(AdapterReport {
kind: AdapterKind::Csv,
findings: result
.cells
.into_iter()
.map(|cell| AdapterFinding {
location: format!("row:{},column:{}", cell.row_number, cell.column_index),
findings: cell.findings,
masked_value: cell.masked_value,
})
.collect(),
masked_output: result.masked_csv,
})
}
InputFormat::Log => {
let result = scan_log_str(input, scanner).map_err(|error| error.to_string())?;
Ok(AdapterReport {
kind: AdapterKind::LogStream,
findings: result
.lines
.into_iter()
.filter(|line| !line.findings.is_empty())
.map(|line| AdapterFinding {
location: format!("line:{}", line.line_number),
findings: line.findings,
masked_value: line.masked_line,
})
.collect(),
masked_output: result.masked_log,
})
}
InputFormat::Sql => {
let result = scan_sql_str(input, scanner).map_err(|error| error.to_string())?;
Ok(AdapterReport {
kind: AdapterKind::Sql,
findings: result
.values
.into_iter()
.map(|value| AdapterFinding {
location: format!(
"statement:{},value:{}",
value.statement_number, value.value_index
),
findings: value.findings,
masked_value: value.masked_value,
})
.collect(),
masked_output: result.masked_sql,
})
}
}
}
fn render_scan_report(
path: &Path,
format: InputFormat,
report: &AdapterReport,
output_format: OutputFormat,
) -> Result<String, String> {
match output_format {
OutputFormat::Text => Ok(render_text_report(path, format, report)),
OutputFormat::Json => {
serde_json::to_string_pretty(&ScanReportJson::from_report(path, format, report))
.map_err(|error| error.to_string())
}
OutputFormat::Sarif => render_sarif(&sarif_findings_from_report(path, report)),
}
}
fn render_text_report(path: &Path, format: InputFormat, report: &AdapterReport) -> String {
let mut output = String::new();
output.push_str(&format!("file: {}\n", path.display()));
output.push_str(&format!("format: {format:?}\n"));
output.push_str(&format!("findings: {}\n", total_findings(report)));
for location in &report.findings {
output.push_str(&format!("\n{}\n", location.location));
for finding in &location.findings {
output.push_str(&format!(
" {:?} {} confidence={} recognizer={}\n",
finding.entity_type, finding.text, finding.confidence, finding.recognizer_id
));
}
}
output
}
fn render_stream_summary(summary: &StreamSummary, output_format: OutputFormat) -> String {
match output_format {
OutputFormat::Json => serde_json::to_string_pretty(summary).unwrap_or_else(|error| {
format!(
"stream summary serialization failed: {error}; findings={}",
summary.total_findings
)
}),
OutputFormat::Text | OutputFormat::Sarif => {
let mut output = String::new();
output.push_str("stream summary\n");
output.push_str(&format!("lines scanned: {}\n", summary.lines_scanned));
output.push_str(&format!(
"lines with findings: {}\n",
summary.lines_with_findings
));
output.push_str(&format!("findings: {}\n", summary.total_findings));
for (entity_type, count) in &summary.findings_by_type {
output.push_str(&format!("{entity_type}: {count}\n"));
}
output.trim_end().to_string()
}
}
}
#[derive(Debug, Serialize)]
struct ScanReportJson<'a> {
file: String,
format: InputFormat,
adapter: AdapterKind,
total_findings: usize,
findings_by_type: BTreeMap<String, usize>,
findings: &'a [AdapterFinding],
}
impl<'a> ScanReportJson<'a> {
fn from_report(path: &Path, format: InputFormat, report: &'a AdapterReport) -> Self {
Self {
file: path.display().to_string(),
format,
adapter: report.kind,
total_findings: total_findings(report),
findings_by_type: findings_by_type(report),
findings: &report.findings,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
struct StreamSummary {
lines_scanned: usize,
lines_with_findings: usize,
total_findings: usize,
findings_by_type: BTreeMap<String, usize>,
}
impl StreamSummary {
fn from_lines(lines: Vec<LogLineScanResult>) -> Self {
let lines_scanned = lines.len();
let lines_with_findings = lines
.iter()
.filter(|line| !line.findings.is_empty())
.count();
let mut findings_by_type = BTreeMap::new();
let mut total_findings = 0;
for line in lines {
total_findings += line.findings.len();
for finding in line.findings {
*findings_by_type
.entry(format!("{:?}", finding.entity_type))
.or_insert(0) += 1;
}
}
Self {
lines_scanned,
lines_with_findings,
total_findings,
findings_by_type,
}
}
}
#[derive(Debug, Clone)]
enum AuditScanOutcome {
Scanned(AuditFileReport),
Skipped,
}
#[derive(Debug, Clone, Serialize)]
struct AuditReport {
root: String,
minimum_severity: SeverityArg,
files_scanned: usize,
files_skipped: usize,
total_findings: usize,
findings_by_type: BTreeMap<String, usize>,
severity: SeverityArg,
files: Vec<AuditFileReport>,
}
impl AuditReport {
fn from_outcomes(
root: String,
minimum_severity: SeverityArg,
outcomes: Vec<AuditScanOutcome>,
) -> Self {
let mut files_scanned = 0;
let mut files_skipped = 0;
let mut total_findings = 0;
let mut findings_by_type = BTreeMap::new();
let mut severity = SeverityArg::Low;
let mut files = Vec::new();
for outcome in outcomes {
match outcome {
AuditScanOutcome::Scanned(file) => {
files_scanned += 1;
if file.total_findings > 0 {
total_findings += file.total_findings;
severity = max_severity(severity, file.severity);
for (entity_type, count) in &file.findings_by_type {
*findings_by_type.entry(entity_type.clone()).or_insert(0) += count;
}
files.push(file);
}
}
AuditScanOutcome::Skipped => files_skipped += 1,
}
}
files.sort_by(|left, right| left.path.cmp(&right.path));
Self {
root,
minimum_severity,
files_scanned,
files_skipped,
total_findings,
findings_by_type,
severity,
files,
}
}
}
#[derive(Debug, Clone, Serialize)]
struct AuditFileReport {
path: String,
format: InputFormat,
adapter: AdapterKind,
total_findings: usize,
findings_by_type: BTreeMap<String, usize>,
severity: SeverityArg,
findings: Vec<AdapterFinding>,
}
impl AuditFileReport {
fn from_adapter_report(
path: &Path,
format: InputFormat,
report: AdapterReport,
min_severity: SeverityArg,
) -> Self {
let findings = filter_findings_by_severity(report.findings, min_severity);
let total_findings = findings
.iter()
.map(|location| location.findings.len())
.sum();
let findings_by_type = findings_by_type_for_locations(&findings);
let severity = max_location_severity(&findings);
Self {
path: path.display().to_string(),
format,
adapter: report.kind,
total_findings,
findings_by_type,
severity,
findings,
}
}
}
fn filter_findings_by_severity(
findings: Vec<AdapterFinding>,
min_severity: SeverityArg,
) -> Vec<AdapterFinding> {
findings
.into_iter()
.filter_map(|mut location| {
location.findings.retain(|finding| {
severity_rank(finding_severity(&finding.entity_type)) >= severity_rank(min_severity)
});
if location.findings.is_empty() {
None
} else {
Some(location)
}
})
.collect()
}
fn findings_by_type_for_locations(findings: &[AdapterFinding]) -> BTreeMap<String, usize> {
let mut counts = BTreeMap::new();
for location in findings {
for finding in &location.findings {
*counts
.entry(format!("{:?}", finding.entity_type))
.or_insert(0) += 1;
}
}
counts
}
fn max_location_severity(findings: &[AdapterFinding]) -> SeverityArg {
findings
.iter()
.flat_map(|location| &location.findings)
.map(|finding| finding_severity(&finding.entity_type))
.fold(SeverityArg::Low, max_severity)
}
fn max_severity(left: SeverityArg, right: SeverityArg) -> SeverityArg {
if severity_rank(right) > severity_rank(left) {
right
} else {
left
}
}
fn severity_rank(severity: SeverityArg) -> u8 {
match severity {
SeverityArg::Low => 0,
SeverityArg::Medium => 1,
SeverityArg::High => 2,
}
}
fn finding_severity(entity_type: &EntityType) -> SeverityArg {
match entity_type {
EntityType::CreditCard | EntityType::Ssn => SeverityArg::High,
EntityType::Email | EntityType::PhoneNumber | EntityType::Iban => SeverityArg::Medium,
_ => SeverityArg::Low,
}
}
fn render_audit_report(
report: &AuditReport,
output_format: OutputFormat,
) -> Result<String, String> {
match output_format {
OutputFormat::Text => Ok(render_audit_text_report(report)),
OutputFormat::Json => {
serde_json::to_string_pretty(report).map_err(|error| error.to_string())
}
OutputFormat::Sarif => render_sarif(&sarif_findings_from_audit(report)),
}
}
fn render_audit_text_report(report: &AuditReport) -> String {
let mut output = String::new();
output.push_str(&format!("audit root: {}\n", report.root));
output.push_str(&format!("files scanned: {}\n", report.files_scanned));
output.push_str(&format!("files skipped: {}\n", report.files_skipped));
output.push_str(&format!("findings: {}\n", report.total_findings));
output.push_str(&format!("severity: {:?}\n", report.severity));
if !report.findings_by_type.is_empty() {
output.push_str("\nfindings by type\n");
for (entity_type, count) in &report.findings_by_type {
output.push_str(&format!(" {entity_type}: {count}\n"));
}
}
if !report.files.is_empty() {
output.push_str("\nfiles\n");
for file in &report.files {
output.push_str(&format!(
" {}: {} findings ({:?})\n",
file.path, file.total_findings, file.severity
));
for (entity_type, count) in &file.findings_by_type {
output.push_str(&format!(" {entity_type}: {count}\n"));
}
}
}
output
}
fn render_sarif(findings: &[SarifFinding]) -> Result<String, String> {
let log = sarif::sarif_log(findings);
sarif::validate_sarif_shape(&log)?;
serde_json::to_string_pretty(&log).map_err(|error| error.to_string())
}
fn sarif_findings_from_report(path: &Path, report: &AdapterReport) -> Vec<SarifFinding> {
let uri = path.display().to_string();
report
.findings
.iter()
.flat_map(|location| {
location
.findings
.iter()
.map(|finding| SarifFinding::from_pii(&uri, &location.location, finding))
})
.collect()
}
fn sarif_findings_from_audit(report: &AuditReport) -> Vec<SarifFinding> {
report
.files
.iter()
.flat_map(|file| {
file.findings.iter().flat_map(|location| {
location
.findings
.iter()
.map(|finding| SarifFinding::from_pii(&file.path, &location.location, finding))
})
})
.collect()
}
fn audit_progress(total: usize, global: &GlobalOptions) -> Option<ProgressBar> {
if global.quiet || total < 100 {
return None;
}
let progress = ProgressBar::new(total as u64);
if let Ok(style) = ProgressStyle::with_template("{spinner:.green} auditing {pos}/{len} files") {
progress.set_style(style);
}
Some(progress)
}
fn detect_format(path: &Path, requested: InputFormat) -> InputFormat {
if requested != InputFormat::Auto {
return requested;
}
match path
.extension()
.and_then(|extension| extension.to_str())
.map(str::to_ascii_lowercase)
.as_deref()
{
Some("json") => InputFormat::Json,
Some("csv") => InputFormat::Csv,
Some("log") => InputFormat::Log,
Some("sql") => InputFormat::Sql,
_ => InputFormat::Text,
}
}
fn selected_locale(locales: &[LocaleArg]) -> Locale {
locales
.iter()
.copied()
.find(|locale| *locale != LocaleArg::Universal)
.unwrap_or(LocaleArg::Universal)
.into()
}
fn mask_strategy(strategy: StrategyArg) -> Result<MaskStrategy, String> {
match strategy {
StrategyArg::Redact => Ok(MaskStrategy::Redact),
StrategyArg::PartialMask => Ok(MaskStrategy::PartialMask {
reveal_prefix: 1,
reveal_suffix: 4,
mask_char: '*',
}),
StrategyArg::Hash => Ok(MaskStrategy::Hash { salt: None }),
StrategyArg::Replace => Ok(MaskStrategy::Replace),
StrategyArg::Encrypt => Err(
"encrypt strategy requires key management and is not wired into the CLI yet"
.to_string(),
),
}
}
fn split_csv_columns(columns: &[String]) -> (Vec<String>, Vec<usize>) {
let mut names = Vec::new();
let mut indexes = Vec::new();
for column in columns {
match column.parse::<usize>() {
Ok(index) => indexes.push(index),
Err(_) => names.push(column.clone()),
}
}
(names, indexes)
}
fn total_findings(report: &AdapterReport) -> usize {
report
.findings
.iter()
.map(|location| location.findings.len())
.sum()
}
fn findings_by_type(report: &AdapterReport) -> BTreeMap<String, usize> {
let mut counts = BTreeMap::new();
for location in &report.findings {
for finding in &location.findings {
*counts
.entry(format!("{:?}", finding.entity_type))
.or_insert(0) += 1;
}
}
counts
}
fn masked_plaintext_output(input: &str, lines: &[cloakrs_adapters::LineScanResult]) -> String {
let mut output = String::with_capacity(input.len());
for (index, segment) in input.split_inclusive('\n').enumerate() {
let line = segment.strip_suffix('\n').unwrap_or(segment);
let line = line.strip_suffix('\r').unwrap_or(line);
let masked = lines
.get(index)
.and_then(|result| result.masked_line.as_deref())
.unwrap_or(line);
output.push_str(masked);
if segment.ends_with('\n') {
if segment.ends_with("\r\n") {
output.push('\r');
}
output.push('\n');
}
}
if !input.contains('\n') {
return lines
.first()
.and_then(|result| result.masked_line.clone())
.unwrap_or_else(|| input.to_string());
}
output
}
fn write_stdout(output: &str) -> Result<(), String> {
let mut stdout = io::stdout().lock();
stdout
.write_all(output.as_bytes())
.and_then(|_| {
if output.ends_with('\n') {
Ok(())
} else {
stdout.write_all(b"\n")
}
})
.map_err(|error| format!("failed to write stdout: {error}"))
}
fn progress_bar(path: &Path, global: &GlobalOptions) -> Result<Option<ProgressBar>, String> {
if global.quiet {
return Ok(None);
}
let size = fs::metadata(path)
.map_err(|error| format!("failed to inspect {}: {error}", path.display()))?
.len();
if size < 1_000_000 {
return Ok(None);
}
let progress = ProgressBar::new(size);
if let Ok(style) =
ProgressStyle::with_template("{spinner:.green} scanning {bytes}/{total_bytes} {msg}")
{
progress.set_style(style);
}
progress.set_message(path.display().to_string());
Ok(Some(progress))
}
impl From<LocaleArg> for Locale {
fn from(value: LocaleArg) -> Self {
match value {
LocaleArg::Universal => Self::Universal,
LocaleArg::Us => Self::US,
LocaleArg::Nl => Self::NL,
LocaleArg::Uk => Self::UK,
LocaleArg::De => Self::DE,
LocaleArg::Fr => Self::FR,
LocaleArg::In => Self::IN,
LocaleArg::Br => Self::BR,
LocaleArg::Eu => Self::EU,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use clap::CommandFactory;
#[test]
fn test_cli_help_builds() {
Cli::command().debug_assert();
}
#[test]
fn test_cli_scan_parses_global_and_scan_options() {
let cli = Cli::parse_from([
"cloakrs",
"--locale",
"eu,nl",
"--strategy",
"partial-mask",
"--min-confidence",
"0.8",
"--output-format",
"json",
"scan",
"data.csv",
"--format",
"csv",
"--columns",
"email,phone",
]);
assert_eq!(cli.global.locale, vec![LocaleArg::Eu, LocaleArg::Nl]);
assert_eq!(cli.global.strategy, StrategyArg::PartialMask);
assert_eq!(cli.global.min_confidence, 0.8);
assert_eq!(cli.global.output_format, OutputFormat::Json);
let Command::Scan(args) = cli.command else {
panic!("expected scan command");
};
assert_eq!(args.format, InputFormat::Csv);
assert_eq!(args.columns, ["email", "phone"]);
}
#[test]
fn test_cli_stream_accepts_quiet_global_after_subcommand() {
let cli = Cli::parse_from(["cloakrs", "stream", "--quiet"]);
assert!(cli.global.quiet);
assert!(matches!(cli.command, Command::Stream(_)));
}
#[test]
fn test_cli_rejects_invalid_min_confidence() {
let error = Cli::try_parse_from(["cloakrs", "--min-confidence", "2", "stream"])
.expect_err("confidence above one should fail");
assert_eq!(error.kind(), clap::error::ErrorKind::ValueValidation);
}
#[test]
fn test_detect_format_uses_extension_for_auto() {
assert_eq!(
detect_format(Path::new("sample.json"), InputFormat::Auto),
InputFormat::Json
);
assert_eq!(
detect_format(Path::new("sample.txt"), InputFormat::Auto),
InputFormat::Text
);
}
#[test]
fn test_split_csv_columns_separates_names_and_indexes() {
let (names, indexes) = split_csv_columns(&["email".to_string(), "2".to_string()]);
assert_eq!(names, ["email"]);
assert_eq!(indexes, [2]);
}
#[test]
fn test_sarif_rule_id_uses_upper_snake_case() {
assert_eq!(
sarif::rule_id(&EntityType::CreditCard),
"CREDIT_CARD_DETECTED"
);
assert_eq!(sarif::rule_id(&EntityType::PhoneNumber), "PHONE_DETECTED");
}
#[test]
fn test_stream_reader_masks_lines_and_counts_findings() {
let global = GlobalOptions {
locale: vec![LocaleArg::Us],
strategy: StrategyArg::Redact,
min_confidence: 0.5,
output_format: OutputFormat::Text,
quiet: true,
};
let input = "email jane@example.com\nplain\n";
let mut output = Vec::new();
let summary = stream_reader(&global, io::Cursor::new(input), &mut output).unwrap();
let output = String::from_utf8(output).unwrap();
assert!(output.contains("[EMAIL]"));
assert_eq!(summary.lines_scanned, 2);
assert_eq!(summary.lines_with_findings, 1);
assert_eq!(summary.total_findings, 1);
}
#[test]
fn test_render_stream_summary_json_uses_machine_readable_shape() {
let summary = StreamSummary {
lines_scanned: 2,
lines_with_findings: 1,
total_findings: 1,
findings_by_type: BTreeMap::from([("Email".to_string(), 1)]),
};
let rendered = render_stream_summary(&summary, OutputFormat::Json);
assert!(rendered.contains("\"total_findings\": 1"));
}
#[test]
fn test_audit_directory_scans_text_and_skips_binary() {
let root = unique_temp_dir("audit_scans_text");
fs::create_dir_all(&root).unwrap();
fs::write(root.join("sample.txt"), "contact jane@example.com\n").unwrap();
fs::write(root.join("binary.bin"), b"\0\0\0").unwrap();
let global = GlobalOptions {
locale: vec![LocaleArg::Us],
strategy: StrategyArg::Redact,
min_confidence: 0.5,
output_format: OutputFormat::Text,
quiet: true,
};
let args = AuditArgs {
path: root.clone(),
recursive: true,
respect_gitignore: true,
parallel: Some(2),
severity: SeverityArg::Low,
output: None,
};
let report = audit_directory(&global, &args).unwrap();
assert_eq!(report.files_scanned, 1);
assert_eq!(report.files_skipped, 1);
assert_eq!(report.total_findings, 1);
assert_eq!(report.findings_by_type.get("Email"), Some(&1));
fs::remove_dir_all(root).unwrap();
}
#[test]
fn test_audit_directory_severity_filter_excludes_medium_findings() {
let root = unique_temp_dir("audit_severity_filter");
fs::create_dir_all(&root).unwrap();
fs::write(root.join("sample.txt"), "contact jane@example.com\n").unwrap();
let global = GlobalOptions {
locale: vec![LocaleArg::Us],
strategy: StrategyArg::Redact,
min_confidence: 0.5,
output_format: OutputFormat::Text,
quiet: true,
};
let args = AuditArgs {
path: root.clone(),
recursive: true,
respect_gitignore: true,
parallel: None,
severity: SeverityArg::High,
output: None,
};
let report = audit_directory(&global, &args).unwrap();
assert_eq!(report.files_scanned, 1);
assert_eq!(report.total_findings, 0);
assert!(report.files.is_empty());
fs::remove_dir_all(root).unwrap();
}
fn unique_temp_dir(name: &str) -> PathBuf {
std::env::temp_dir().join(format!("cloakrs_{name}_{}", std::process::id()))
}
}