Skip to main content

gts_validator/
lib.rs

1//! # gts-validator
2//!
3//! GTS identifier validator for documentation and configuration files.
4//!
5//! This crate provides a clean separation between the **core validation engine**
6//! (input-agnostic) and **input strategies** (starting with filesystem scanning).
7//!
8//! ## Quick Start
9//!
10//! ```rust,no_run
11//! use std::path::PathBuf;
12//! use gts_validator::{validate_fs, FsSourceConfig, ValidationConfig, VendorPolicy};
13//!
14//! let mut fs_config = FsSourceConfig::default();
15//! fs_config.paths = vec![PathBuf::from("docs"), PathBuf::from("modules")];
16//! fs_config.exclude = vec!["target/*".to_owned()];
17//!
18//! let mut validation_config = ValidationConfig::default();
19//! validation_config.vendor_policy = VendorPolicy::MustMatch("x".to_owned());
20//!
21//! let report = validate_fs(&fs_config, &validation_config).unwrap();
22//! println!("Files scanned: {}", report.scanned_files);
23//! println!("Validation errors: {}", report.errors_count());
24//! println!("Scan errors: {}", report.scan_errors.len());
25//! println!("OK: {}", report.ok);
26//! ```
27
28mod config;
29mod error;
30mod format;
31mod normalize;
32pub mod output;
33mod report;
34mod strategy;
35mod validator;
36
37pub use config::{DiscoveryMode, FsSourceConfig, ValidationConfig, VendorPolicy};
38pub use error::{ScanError, ScanErrorKind, ValidationError};
39pub use report::ValidationReport;
40
41use strategy::ContentFormat;
42use strategy::fs::{ScanResult, content_format_for, find_files, read_file_bounded};
43
44fn format_allow_list_mismatch(allowed: &[String], found: &str) -> String {
45    format!(
46        "Vendor mismatch: expected one of '{}', found '{}'",
47        allowed.join(", "),
48        found
49    )
50}
51
52/// Validate GTS identifiers in files on disk.
53///
54/// This is the primary public API.
55///
56/// # Arguments
57///
58/// * `fs_config` - Filesystem-specific source options (paths, exclude, max file size, limits)
59/// * `validation_config` - Core validation config (vendor policy, `scan_keys`, discovery mode)
60///
61/// # Errors
62///
63/// Returns an error if `fs_config.paths` is empty or if any provided path does not exist.
64/// Returns `Ok` with `scanned_files: 0` if paths exist but contain no scannable files.
65/// Scan failures (unreadable files, parse errors, etc.) are reported in `report.scan_errors`
66/// and never silently discarded.
67pub fn validate_fs(
68    fs_config: &FsSourceConfig,
69    validation_config: &ValidationConfig,
70) -> anyhow::Result<ValidationReport> {
71    if fs_config.paths.is_empty() {
72        anyhow::bail!("No paths provided for validation");
73    }
74
75    for path in &fs_config.paths {
76        if !path.exists() {
77            anyhow::bail!("Path does not exist: {}", path.display());
78        }
79    }
80
81    let (files, mut scan_errors) = find_files(fs_config);
82
83    if files.is_empty() && scan_errors.is_empty() {
84        return Ok(ValidationReport {
85            scanned_files: 0,
86            failed_files: 0,
87            ok: true,
88            validation_errors: vec![],
89            scan_errors: vec![],
90        });
91    }
92
93    let heuristic = validation_config.discovery_mode == DiscoveryMode::Heuristic;
94    // For AllowList, pass a sentinel vendor that no real GTS ID can match.
95    // This causes validate_candidate to emit "Vendor mismatch" for every non-example
96    // vendor, and apply_allow_list_filter then removes the allowed ones — leaving only
97    // genuinely disallowed vendors as errors.
98    let effective_vendor = effective_vendor_for_scanning(&validation_config.vendor_policy);
99
100    let mut validation_errors = Vec::new();
101    let mut scanned_files: usize = 0;
102    // Discovery-stage failures (walk errors, boundary violations, canonicalization errors)
103    // are already in scan_errors from find_files. Count them as failed files upfront.
104    let mut failed_files: usize = scan_errors.len();
105    let mut total_bytes: u64 = 0;
106
107    'files: for file_path in &files {
108        if scanned_files + failed_files >= fs_config.max_files {
109            scan_errors.push(ScanError {
110                file: file_path.clone(),
111                kind: ScanErrorKind::LimitExceeded,
112                message: format!(
113                    "Scan aborted: max_files limit ({}) reached; remaining files not scanned",
114                    fs_config.max_files
115                ),
116            });
117            failed_files += 1;
118            break;
119        }
120
121        let content = match read_file_bounded(file_path, fs_config.max_file_size) {
122            ScanResult::Ok(c) => c,
123            ScanResult::Err(e) => {
124                scan_errors.push(e);
125                failed_files += 1;
126                continue;
127            }
128        };
129
130        let file_bytes = content.len() as u64;
131        if total_bytes.saturating_add(file_bytes) > fs_config.max_total_bytes {
132            scan_errors.push(ScanError {
133                file: file_path.clone(),
134                kind: ScanErrorKind::LimitExceeded,
135                message: format!(
136                    "Scan aborted: max_total_bytes limit ({}) reached; remaining files not scanned",
137                    fs_config.max_total_bytes
138                ),
139            });
140            failed_files += 1;
141            break;
142        }
143        total_bytes = total_bytes.saturating_add(file_bytes);
144
145        let vendor = effective_vendor.as_deref();
146        let file_errors = match content_format_for(file_path) {
147            Some(ContentFormat::Markdown) => format::markdown::scan_markdown_content(
148                &content,
149                file_path,
150                vendor,
151                heuristic,
152                &validation_config.skip_tokens,
153            ),
154            Some(ContentFormat::Json) => {
155                match format::json::scan_json_content(
156                    &content,
157                    file_path,
158                    vendor,
159                    validation_config.scan_keys,
160                ) {
161                    Ok(errs) => errs,
162                    Err(scan_err) => {
163                        scan_errors.push(scan_err);
164                        failed_files += 1;
165                        continue 'files;
166                    }
167                }
168            }
169            Some(ContentFormat::Yaml) => {
170                let (val_errs, yaml_scan_errs) = format::yaml::scan_yaml_content(
171                    &content,
172                    file_path,
173                    vendor,
174                    validation_config.scan_keys,
175                );
176                if !yaml_scan_errs.is_empty() {
177                    failed_files += 1;
178                    scan_errors.extend(yaml_scan_errs);
179                }
180                val_errs
181            }
182            None => continue,
183        };
184
185        scanned_files += 1;
186
187        // For AllowList: filter out errors where the vendor IS in the allow list.
188        // The sentinel vendor caused mismatches for all vendors; remove the allowed ones.
189        let file_errors = apply_allow_list_filter(file_errors, &validation_config.vendor_policy);
190        validation_errors.extend(file_errors);
191    }
192
193    let ok = validation_errors.is_empty() && scan_errors.is_empty();
194    Ok(ValidationReport {
195        scanned_files,
196        failed_files,
197        ok,
198        validation_errors,
199        scan_errors,
200    })
201}
202
203/// Determine the effective vendor string to pass to scanners for a given policy.
204///
205/// - `Any` → `None` (no vendor enforcement).
206/// - `MustMatch(v)` → `Some(v)` (scanner enforces exact match directly).
207/// - `AllowList(_)` → `Some("\x00")` (sentinel that no real GTS vendor can match).
208///   GTS vendors must be lowercase alphanumeric, so `\x00` is guaranteed to never
209///   equal any real vendor. This causes `validate_candidate` to emit "Vendor mismatch"
210///   for every non-example vendor, and `apply_allow_list_filter` then removes the
211///   vendors that are in the allow list — leaving only genuinely disallowed vendors.
212fn effective_vendor_for_scanning(policy: &VendorPolicy) -> Option<String> {
213    match policy {
214        VendorPolicy::Any => None,
215        VendorPolicy::MustMatch(v) => Some(v.clone()),
216        VendorPolicy::AllowList(_) => Some("\x00".to_owned()),
217    }
218}
219
220/// For `VendorPolicy::AllowList`, remove validation errors whose vendor IS in the list.
221///
222/// Scanners run with a sentinel vendor (`\x00`) that generates "Vendor mismatch" for
223/// every non-example vendor. This function retains only errors where the vendor is NOT
224/// in the allow list — i.e., genuinely disallowed vendors produce errors.
225fn apply_allow_list_filter(
226    errors: Vec<ValidationError>,
227    policy: &VendorPolicy,
228) -> Vec<ValidationError> {
229    let VendorPolicy::AllowList(allowed) = policy else {
230        return errors;
231    };
232
233    errors
234        .into_iter()
235        .filter_map(|mut e| {
236            // Keep the error only if it is NOT a vendor-mismatch for an allowed vendor.
237            // Vendor-mismatch errors contain "Vendor mismatch" in the message.
238            // Extract the actual vendor from normalized_id (first segment before '.').
239            if !e.error.contains("Vendor mismatch") {
240                return Some(e); // non-vendor errors always kept
241            }
242            // normalized_id format: "gts.<vendor>.<rest>..."
243            // The vendor is the second dot-separated segment (index 1).
244            let id_vendor = e.normalized_id.split('.').nth(1).unwrap_or("");
245            if allowed.iter().any(|a| a == id_vendor) {
246                return None;
247            }
248            e.error = format_allow_list_mismatch(allowed, id_vendor);
249            Some(e)
250        })
251        .collect()
252}
253
254#[cfg(test)]
255mod tests {
256    use super::*;
257    use std::path::PathBuf;
258
259    #[test]
260    fn test_apply_allow_list_filter_rewrites_disallowed_vendor_message() {
261        let errors = vec![ValidationError {
262            file: PathBuf::from("docs/test.md"),
263            line: 1,
264            column: 1,
265            json_path: String::new(),
266            raw_value: "gts.w.core.org.department.v1~".to_owned(),
267            normalized_id: "gts.w.core.org.department.v1~".to_owned(),
268            error: "Vendor mismatch: expected '', found 'w'".to_owned(),
269            context: "gts.w.core.org.department.v1~".to_owned(),
270        }];
271
272        let filtered = apply_allow_list_filter(
273            errors,
274            &VendorPolicy::AllowList(vec!["x".to_owned(), "cf".to_owned()]),
275        );
276
277        assert_eq!(filtered.len(), 1);
278        assert_eq!(
279            filtered[0].error,
280            "Vendor mismatch: expected one of 'x, cf', found 'w'"
281        );
282    }
283}