Skip to main content

gts_validator/
lib.rs

1//! # gts-validator
2//!
3//! GTS identifier validator for documentation and configuration files.
4//!
5//! This crate provides a clean separation between the **core validation engine**
6//! (input-agnostic) and **input strategies** (starting with filesystem scanning).
7//!
8//! ## Quick Start
9//!
10//! ```rust,no_run
11//! use std::path::PathBuf;
12//! use gts_validator::{validate_fs, FsSourceConfig, ValidationConfig, VendorPolicy};
13//!
14//! let mut fs_config = FsSourceConfig::default();
15//! fs_config.paths = vec![PathBuf::from("docs"), PathBuf::from("modules")];
16//! fs_config.exclude = vec!["target/*".to_owned()];
17//!
18//! let mut validation_config = ValidationConfig::default();
19//! validation_config.vendor_policy = VendorPolicy::MustMatch("x".to_owned());
20//!
21//! let report = validate_fs(&fs_config, &validation_config).unwrap();
22//! println!("Files scanned: {}", report.scanned_files);
23//! println!("Validation errors: {}", report.errors_count());
24//! println!("Scan errors: {}", report.scan_errors.len());
25//! println!("OK: {}", report.ok);
26//! ```
27
28mod config;
29mod error;
30mod format;
31mod normalize;
32pub mod output;
33mod report;
34mod strategy;
35mod validator;
36
37pub use config::{DiscoveryMode, FsSourceConfig, ValidationConfig, VendorPolicy};
38pub use error::{ScanError, ScanErrorKind, ValidationError};
39pub use report::ValidationReport;
40
41use strategy::ContentFormat;
42use strategy::fs::{ScanResult, content_format_for, find_files, read_file_bounded};
43
44/// Validate GTS identifiers in files on disk.
45///
46/// This is the primary public API.
47///
48/// # Arguments
49///
50/// * `fs_config` - Filesystem-specific source options (paths, exclude, max file size, limits)
51/// * `validation_config` - Core validation config (vendor policy, `scan_keys`, discovery mode)
52///
53/// # Errors
54///
55/// Returns an error if `fs_config.paths` is empty or if any provided path does not exist.
56/// Returns `Ok` with `scanned_files: 0` if paths exist but contain no scannable files.
57/// Scan failures (unreadable files, parse errors, etc.) are reported in `report.scan_errors`
58/// and never silently discarded.
59pub fn validate_fs(
60    fs_config: &FsSourceConfig,
61    validation_config: &ValidationConfig,
62) -> anyhow::Result<ValidationReport> {
63    if fs_config.paths.is_empty() {
64        anyhow::bail!("No paths provided for validation");
65    }
66
67    for path in &fs_config.paths {
68        if !path.exists() {
69            anyhow::bail!("Path does not exist: {}", path.display());
70        }
71    }
72
73    let (files, mut scan_errors) = find_files(fs_config);
74
75    if files.is_empty() && scan_errors.is_empty() {
76        return Ok(ValidationReport {
77            scanned_files: 0,
78            failed_files: 0,
79            ok: true,
80            validation_errors: vec![],
81            scan_errors: vec![],
82        });
83    }
84
85    let heuristic = validation_config.discovery_mode == DiscoveryMode::Heuristic;
86    // For AllowList, pass a sentinel vendor that no real GTS ID can match.
87    // This causes validate_candidate to emit "Vendor mismatch" for every non-example
88    // vendor, and apply_allow_list_filter then removes the allowed ones — leaving only
89    // genuinely disallowed vendors as errors.
90    let effective_vendor = effective_vendor_for_scanning(&validation_config.vendor_policy);
91
92    let mut validation_errors = Vec::new();
93    let mut scanned_files: usize = 0;
94    // Discovery-stage failures (walk errors, boundary violations, canonicalization errors)
95    // are already in scan_errors from find_files. Count them as failed files upfront.
96    let mut failed_files: usize = scan_errors.len();
97    let mut total_bytes: u64 = 0;
98
99    'files: for file_path in &files {
100        if scanned_files + failed_files >= fs_config.max_files {
101            scan_errors.push(ScanError {
102                file: file_path.clone(),
103                kind: ScanErrorKind::LimitExceeded,
104                message: format!(
105                    "Scan aborted: max_files limit ({}) reached; remaining files not scanned",
106                    fs_config.max_files
107                ),
108            });
109            failed_files += 1;
110            break;
111        }
112
113        let content = match read_file_bounded(file_path, fs_config.max_file_size) {
114            ScanResult::Ok(c) => c,
115            ScanResult::Err(e) => {
116                scan_errors.push(e);
117                failed_files += 1;
118                continue;
119            }
120        };
121
122        let file_bytes = content.len() as u64;
123        if total_bytes.saturating_add(file_bytes) > fs_config.max_total_bytes {
124            scan_errors.push(ScanError {
125                file: file_path.clone(),
126                kind: ScanErrorKind::LimitExceeded,
127                message: format!(
128                    "Scan aborted: max_total_bytes limit ({}) reached; remaining files not scanned",
129                    fs_config.max_total_bytes
130                ),
131            });
132            failed_files += 1;
133            break;
134        }
135        total_bytes = total_bytes.saturating_add(file_bytes);
136
137        let vendor = effective_vendor.as_deref();
138        let file_errors = match content_format_for(file_path) {
139            Some(ContentFormat::Markdown) => format::markdown::scan_markdown_content(
140                &content,
141                file_path,
142                vendor,
143                heuristic,
144                &validation_config.skip_tokens,
145            ),
146            Some(ContentFormat::Json) => {
147                match format::json::scan_json_content(
148                    &content,
149                    file_path,
150                    vendor,
151                    validation_config.scan_keys,
152                ) {
153                    Ok(errs) => errs,
154                    Err(scan_err) => {
155                        scan_errors.push(scan_err);
156                        failed_files += 1;
157                        continue 'files;
158                    }
159                }
160            }
161            Some(ContentFormat::Yaml) => {
162                let (val_errs, yaml_scan_errs) = format::yaml::scan_yaml_content(
163                    &content,
164                    file_path,
165                    vendor,
166                    validation_config.scan_keys,
167                );
168                if !yaml_scan_errs.is_empty() {
169                    failed_files += 1;
170                    scan_errors.extend(yaml_scan_errs);
171                }
172                val_errs
173            }
174            None => continue,
175        };
176
177        scanned_files += 1;
178
179        // For AllowList: filter out errors where the vendor IS in the allow list.
180        // The sentinel vendor caused mismatches for all vendors; remove the allowed ones.
181        let file_errors = apply_allow_list_filter(file_errors, &validation_config.vendor_policy);
182        validation_errors.extend(file_errors);
183    }
184
185    let ok = validation_errors.is_empty() && scan_errors.is_empty();
186    Ok(ValidationReport {
187        scanned_files,
188        failed_files,
189        ok,
190        validation_errors,
191        scan_errors,
192    })
193}
194
195/// Determine the effective vendor string to pass to scanners for a given policy.
196///
197/// - `Any` → `None` (no vendor enforcement).
198/// - `MustMatch(v)` → `Some(v)` (scanner enforces exact match directly).
199/// - `AllowList(_)` → `Some("\x00")` (sentinel that no real GTS vendor can match).
200///   GTS vendors must be lowercase alphanumeric, so `\x00` is guaranteed to never
201///   equal any real vendor. This causes `validate_candidate` to emit "Vendor mismatch"
202///   for every non-example vendor, and `apply_allow_list_filter` then removes the
203///   vendors that are in the allow list — leaving only genuinely disallowed vendors.
204fn effective_vendor_for_scanning(policy: &VendorPolicy) -> Option<String> {
205    match policy {
206        VendorPolicy::Any => None,
207        VendorPolicy::MustMatch(v) => Some(v.clone()),
208        VendorPolicy::AllowList(_) => Some("\x00".to_owned()),
209    }
210}
211
212/// For `VendorPolicy::AllowList`, remove validation errors whose vendor IS in the list.
213///
214/// Scanners run with a sentinel vendor (`\x00`) that generates "Vendor mismatch" for
215/// every non-example vendor. This function retains only errors where the vendor is NOT
216/// in the allow list — i.e., genuinely disallowed vendors produce errors.
217fn apply_allow_list_filter(
218    errors: Vec<ValidationError>,
219    policy: &VendorPolicy,
220) -> Vec<ValidationError> {
221    let VendorPolicy::AllowList(allowed) = policy else {
222        return errors;
223    };
224
225    errors
226        .into_iter()
227        .filter(|e| {
228            // Keep the error only if it is NOT a vendor-mismatch for an allowed vendor.
229            // Vendor-mismatch errors contain "Vendor mismatch" in the message.
230            // Extract the actual vendor from normalized_id (first segment before '.').
231            if !e.error.contains("Vendor mismatch") {
232                return true; // non-vendor errors always kept
233            }
234            // normalized_id format: "gts.<vendor>.<rest>..."
235            // The vendor is the second dot-separated segment (index 1).
236            let id_vendor = e.normalized_id.split('.').nth(1).unwrap_or("");
237            !allowed.iter().any(|a| a == id_vendor)
238        })
239        .collect()
240}