mod config;
mod error;
mod format;
mod normalize;
pub mod output;
mod report;
mod strategy;
mod validator;
pub use config::{DiscoveryMode, FsSourceConfig, ValidationConfig, VendorPolicy};
pub use error::{ScanError, ScanErrorKind, ValidationError};
pub use report::ValidationReport;
use strategy::ContentFormat;
use strategy::fs::{ScanResult, content_format_for, find_files, read_file_bounded};
pub fn validate_fs(
fs_config: &FsSourceConfig,
validation_config: &ValidationConfig,
) -> anyhow::Result<ValidationReport> {
if fs_config.paths.is_empty() {
anyhow::bail!("No paths provided for validation");
}
for path in &fs_config.paths {
if !path.exists() {
anyhow::bail!("Path does not exist: {}", path.display());
}
}
let (files, mut scan_errors) = find_files(fs_config);
if files.is_empty() && scan_errors.is_empty() {
return Ok(ValidationReport {
scanned_files: 0,
failed_files: 0,
ok: true,
validation_errors: vec![],
scan_errors: vec![],
});
}
let heuristic = validation_config.discovery_mode == DiscoveryMode::Heuristic;
let effective_vendor = effective_vendor_for_scanning(&validation_config.vendor_policy);
let mut validation_errors = Vec::new();
let mut scanned_files: usize = 0;
let mut failed_files: usize = scan_errors.len();
let mut total_bytes: u64 = 0;
'files: for file_path in &files {
if scanned_files + failed_files >= fs_config.max_files {
scan_errors.push(ScanError {
file: file_path.clone(),
kind: ScanErrorKind::LimitExceeded,
message: format!(
"Scan aborted: max_files limit ({}) reached; remaining files not scanned",
fs_config.max_files
),
});
failed_files += 1;
break;
}
let content = match read_file_bounded(file_path, fs_config.max_file_size) {
ScanResult::Ok(c) => c,
ScanResult::Err(e) => {
scan_errors.push(e);
failed_files += 1;
continue;
}
};
let file_bytes = content.len() as u64;
if total_bytes.saturating_add(file_bytes) > fs_config.max_total_bytes {
scan_errors.push(ScanError {
file: file_path.clone(),
kind: ScanErrorKind::LimitExceeded,
message: format!(
"Scan aborted: max_total_bytes limit ({}) reached; remaining files not scanned",
fs_config.max_total_bytes
),
});
failed_files += 1;
break;
}
total_bytes = total_bytes.saturating_add(file_bytes);
let vendor = effective_vendor.as_deref();
let file_errors = match content_format_for(file_path) {
Some(ContentFormat::Markdown) => format::markdown::scan_markdown_content(
&content,
file_path,
vendor,
heuristic,
&validation_config.skip_tokens,
),
Some(ContentFormat::Json) => {
match format::json::scan_json_content(
&content,
file_path,
vendor,
validation_config.scan_keys,
) {
Ok(errs) => errs,
Err(scan_err) => {
scan_errors.push(scan_err);
failed_files += 1;
continue 'files;
}
}
}
Some(ContentFormat::Yaml) => {
let (val_errs, yaml_scan_errs) = format::yaml::scan_yaml_content(
&content,
file_path,
vendor,
validation_config.scan_keys,
);
if !yaml_scan_errs.is_empty() {
failed_files += 1;
scan_errors.extend(yaml_scan_errs);
}
val_errs
}
None => continue,
};
scanned_files += 1;
let file_errors = apply_allow_list_filter(file_errors, &validation_config.vendor_policy);
validation_errors.extend(file_errors);
}
let ok = validation_errors.is_empty() && scan_errors.is_empty();
Ok(ValidationReport {
scanned_files,
failed_files,
ok,
validation_errors,
scan_errors,
})
}
fn effective_vendor_for_scanning(policy: &VendorPolicy) -> Option<String> {
match policy {
VendorPolicy::Any => None,
VendorPolicy::MustMatch(v) => Some(v.clone()),
VendorPolicy::AllowList(_) => Some("\x00".to_owned()),
}
}
fn apply_allow_list_filter(
errors: Vec<ValidationError>,
policy: &VendorPolicy,
) -> Vec<ValidationError> {
let VendorPolicy::AllowList(allowed) = policy else {
return errors;
};
errors
.into_iter()
.filter(|e| {
if !e.error.contains("Vendor mismatch") {
return true; }
let id_vendor = e.normalized_id.split('.').nth(1).unwrap_or("");
!allowed.iter().any(|a| a == id_vendor)
})
.collect()
}