mod config;
mod error;
mod format;
mod normalize;
pub mod output;
mod report;
mod strategy;
mod validator;
pub use config::{DiscoveryMode, FsSourceConfig, ValidationConfig, VendorPolicy};
pub use error::{ScanError, ScanErrorKind, ValidationError};
pub use report::ValidationReport;
use strategy::ContentFormat;
use strategy::fs::{ScanResult, content_format_for, find_files, read_file_bounded};
fn format_allow_list_mismatch(allowed: &[String], found: &str) -> String {
format!(
"Vendor mismatch: expected one of '{}', found '{}'",
allowed.join(", "),
found
)
}
pub fn validate_fs(
fs_config: &FsSourceConfig,
validation_config: &ValidationConfig,
) -> anyhow::Result<ValidationReport> {
if fs_config.paths.is_empty() {
anyhow::bail!("No paths provided for validation");
}
for path in &fs_config.paths {
if !path.exists() {
anyhow::bail!("Path does not exist: {}", path.display());
}
}
let (files, mut scan_errors) = find_files(fs_config);
if files.is_empty() && scan_errors.is_empty() {
return Ok(ValidationReport {
scanned_files: 0,
failed_files: 0,
ok: true,
validation_errors: vec![],
scan_errors: vec![],
});
}
let heuristic = validation_config.discovery_mode == DiscoveryMode::Heuristic;
let effective_vendor = effective_vendor_for_scanning(&validation_config.vendor_policy);
let mut validation_errors = Vec::new();
let mut scanned_files: usize = 0;
let mut failed_files: usize = scan_errors.len();
let mut total_bytes: u64 = 0;
'files: for file_path in &files {
if scanned_files + failed_files >= fs_config.max_files {
scan_errors.push(ScanError {
file: file_path.clone(),
kind: ScanErrorKind::LimitExceeded,
message: format!(
"Scan aborted: max_files limit ({}) reached; remaining files not scanned",
fs_config.max_files
),
});
failed_files += 1;
break;
}
let content = match read_file_bounded(file_path, fs_config.max_file_size) {
ScanResult::Ok(c) => c,
ScanResult::Err(e) => {
scan_errors.push(e);
failed_files += 1;
continue;
}
};
let file_bytes = content.len() as u64;
if total_bytes.saturating_add(file_bytes) > fs_config.max_total_bytes {
scan_errors.push(ScanError {
file: file_path.clone(),
kind: ScanErrorKind::LimitExceeded,
message: format!(
"Scan aborted: max_total_bytes limit ({}) reached; remaining files not scanned",
fs_config.max_total_bytes
),
});
failed_files += 1;
break;
}
total_bytes = total_bytes.saturating_add(file_bytes);
let vendor = effective_vendor.as_deref();
let file_errors = match content_format_for(file_path) {
Some(ContentFormat::Markdown) => format::markdown::scan_markdown_content(
&content,
file_path,
vendor,
heuristic,
&validation_config.skip_tokens,
),
Some(ContentFormat::Json) => {
match format::json::scan_json_content(
&content,
file_path,
vendor,
validation_config.scan_keys,
) {
Ok(errs) => errs,
Err(scan_err) => {
scan_errors.push(scan_err);
failed_files += 1;
continue 'files;
}
}
}
Some(ContentFormat::Yaml) => {
let (val_errs, yaml_scan_errs) = format::yaml::scan_yaml_content(
&content,
file_path,
vendor,
validation_config.scan_keys,
);
if !yaml_scan_errs.is_empty() {
failed_files += 1;
scan_errors.extend(yaml_scan_errs);
}
val_errs
}
None => continue,
};
scanned_files += 1;
let file_errors = apply_allow_list_filter(file_errors, &validation_config.vendor_policy);
validation_errors.extend(file_errors);
}
let ok = validation_errors.is_empty() && scan_errors.is_empty();
Ok(ValidationReport {
scanned_files,
failed_files,
ok,
validation_errors,
scan_errors,
})
}
fn effective_vendor_for_scanning(policy: &VendorPolicy) -> Option<String> {
match policy {
VendorPolicy::Any => None,
VendorPolicy::MustMatch(v) => Some(v.clone()),
VendorPolicy::AllowList(_) => Some("\x00".to_owned()),
}
}
fn apply_allow_list_filter(
errors: Vec<ValidationError>,
policy: &VendorPolicy,
) -> Vec<ValidationError> {
let VendorPolicy::AllowList(allowed) = policy else {
return errors;
};
errors
.into_iter()
.filter_map(|mut e| {
if !e.error.contains("Vendor mismatch") {
return Some(e); }
let id_vendor = e.normalized_id.split('.').nth(1).unwrap_or("");
if allowed.iter().any(|a| a == id_vendor) {
return None;
}
e.error = format_allow_list_mismatch(allowed, id_vendor);
Some(e)
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn test_apply_allow_list_filter_rewrites_disallowed_vendor_message() {
let errors = vec![ValidationError {
file: PathBuf::from("docs/test.md"),
line: 1,
column: 1,
json_path: String::new(),
raw_value: "gts.w.core.org.department.v1~".to_owned(),
normalized_id: "gts.w.core.org.department.v1~".to_owned(),
error: "Vendor mismatch: expected '', found 'w'".to_owned(),
context: "gts.w.core.org.department.v1~".to_owned(),
}];
let filtered = apply_allow_list_filter(
errors,
&VendorPolicy::AllowList(vec!["x".to_owned(), "cf".to_owned()]),
);
assert_eq!(filtered.len(), 1);
assert_eq!(
filtered[0].error,
"Vendor mismatch: expected one of 'x, cf', found 'w'"
);
}
}