1mod config;
29mod error;
30mod format;
31mod normalize;
32pub mod output;
33mod report;
34mod strategy;
35mod validator;
36
37pub use config::{DiscoveryMode, FsSourceConfig, ValidationConfig, VendorPolicy};
38pub use error::{ScanError, ScanErrorKind, ValidationError};
39pub use report::ValidationReport;
40
41use strategy::ContentFormat;
42use strategy::fs::{ScanResult, content_format_for, find_files, read_file_bounded};
43
44fn format_allow_list_mismatch(allowed: &[String], found: &str) -> String {
45 format!(
46 "Vendor mismatch: expected one of '{}', found '{}'",
47 allowed.join(", "),
48 found
49 )
50}
51
52pub fn validate_fs(
68 fs_config: &FsSourceConfig,
69 validation_config: &ValidationConfig,
70) -> anyhow::Result<ValidationReport> {
71 if fs_config.paths.is_empty() {
72 anyhow::bail!("No paths provided for validation");
73 }
74
75 for path in &fs_config.paths {
76 if !path.exists() {
77 anyhow::bail!("Path does not exist: {}", path.display());
78 }
79 }
80
81 let (files, mut scan_errors) = find_files(fs_config);
82
83 if files.is_empty() && scan_errors.is_empty() {
84 return Ok(ValidationReport {
85 scanned_files: 0,
86 failed_files: 0,
87 ok: true,
88 validation_errors: vec![],
89 scan_errors: vec![],
90 });
91 }
92
93 let heuristic = validation_config.discovery_mode == DiscoveryMode::Heuristic;
94 let effective_vendor = effective_vendor_for_scanning(&validation_config.vendor_policy);
99
100 let mut validation_errors = Vec::new();
101 let mut scanned_files: usize = 0;
102 let mut failed_files: usize = scan_errors.len();
105 let mut total_bytes: u64 = 0;
106
107 'files: for file_path in &files {
108 if scanned_files + failed_files >= fs_config.max_files {
109 scan_errors.push(ScanError {
110 file: file_path.clone(),
111 kind: ScanErrorKind::LimitExceeded,
112 message: format!(
113 "Scan aborted: max_files limit ({}) reached; remaining files not scanned",
114 fs_config.max_files
115 ),
116 });
117 failed_files += 1;
118 break;
119 }
120
121 let content = match read_file_bounded(file_path, fs_config.max_file_size) {
122 ScanResult::Ok(c) => c,
123 ScanResult::Err(e) => {
124 scan_errors.push(e);
125 failed_files += 1;
126 continue;
127 }
128 };
129
130 let file_bytes = content.len() as u64;
131 if total_bytes.saturating_add(file_bytes) > fs_config.max_total_bytes {
132 scan_errors.push(ScanError {
133 file: file_path.clone(),
134 kind: ScanErrorKind::LimitExceeded,
135 message: format!(
136 "Scan aborted: max_total_bytes limit ({}) reached; remaining files not scanned",
137 fs_config.max_total_bytes
138 ),
139 });
140 failed_files += 1;
141 break;
142 }
143 total_bytes = total_bytes.saturating_add(file_bytes);
144
145 let vendor = effective_vendor.as_deref();
146 let file_errors = match content_format_for(file_path) {
147 Some(ContentFormat::Markdown) => format::markdown::scan_markdown_content(
148 &content,
149 file_path,
150 vendor,
151 heuristic,
152 &validation_config.skip_tokens,
153 ),
154 Some(ContentFormat::Json) => {
155 match format::json::scan_json_content(
156 &content,
157 file_path,
158 vendor,
159 validation_config.scan_keys,
160 ) {
161 Ok(errs) => errs,
162 Err(scan_err) => {
163 scan_errors.push(scan_err);
164 failed_files += 1;
165 continue 'files;
166 }
167 }
168 }
169 Some(ContentFormat::Yaml) => {
170 let (val_errs, yaml_scan_errs) = format::yaml::scan_yaml_content(
171 &content,
172 file_path,
173 vendor,
174 validation_config.scan_keys,
175 );
176 if !yaml_scan_errs.is_empty() {
177 failed_files += 1;
178 scan_errors.extend(yaml_scan_errs);
179 }
180 val_errs
181 }
182 None => continue,
183 };
184
185 scanned_files += 1;
186
187 let file_errors = apply_allow_list_filter(file_errors, &validation_config.vendor_policy);
190 validation_errors.extend(file_errors);
191 }
192
193 let ok = validation_errors.is_empty() && scan_errors.is_empty();
194 Ok(ValidationReport {
195 scanned_files,
196 failed_files,
197 ok,
198 validation_errors,
199 scan_errors,
200 })
201}
202
203fn effective_vendor_for_scanning(policy: &VendorPolicy) -> Option<String> {
213 match policy {
214 VendorPolicy::Any => None,
215 VendorPolicy::MustMatch(v) => Some(v.clone()),
216 VendorPolicy::AllowList(_) => Some("\x00".to_owned()),
217 }
218}
219
220fn apply_allow_list_filter(
226 errors: Vec<ValidationError>,
227 policy: &VendorPolicy,
228) -> Vec<ValidationError> {
229 let VendorPolicy::AllowList(allowed) = policy else {
230 return errors;
231 };
232
233 errors
234 .into_iter()
235 .filter_map(|mut e| {
236 if !e.error.contains("Vendor mismatch") {
240 return Some(e); }
242 let id_vendor = e.normalized_id.split('.').nth(1).unwrap_or("");
245 if allowed.iter().any(|a| a == id_vendor) {
246 return None;
247 }
248 e.error = format_allow_list_mismatch(allowed, id_vendor);
249 Some(e)
250 })
251 .collect()
252}
253
254#[cfg(test)]
255mod tests {
256 use super::*;
257 use std::path::PathBuf;
258
259 #[test]
260 fn test_apply_allow_list_filter_rewrites_disallowed_vendor_message() {
261 let errors = vec![ValidationError {
262 file: PathBuf::from("docs/test.md"),
263 line: 1,
264 column: 1,
265 json_path: String::new(),
266 raw_value: "gts.w.core.org.department.v1~".to_owned(),
267 normalized_id: "gts.w.core.org.department.v1~".to_owned(),
268 error: "Vendor mismatch: expected '', found 'w'".to_owned(),
269 context: "gts.w.core.org.department.v1~".to_owned(),
270 }];
271
272 let filtered = apply_allow_list_filter(
273 errors,
274 &VendorPolicy::AllowList(vec!["x".to_owned(), "cf".to_owned()]),
275 );
276
277 assert_eq!(filtered.len(), 1);
278 assert_eq!(
279 filtered[0].error,
280 "Vendor mismatch: expected one of 'x, cf', found 'w'"
281 );
282 }
283}