Skip to main content

lintel_check/
validate.rs

1use std::collections::{BTreeMap, HashMap};
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use anyhow::{Context, Result};
6use glob::glob;
7use serde_json::Value;
8
9use crate::catalog::{self, CompiledCatalog};
10use crate::config;
11use crate::diagnostics::{
12    FileDiagnostic, ParseDiagnostic, ValidationDiagnostic, find_instance_path_offset,
13};
14use crate::discover;
15use crate::parsers::{self, FileFormat, JsoncParser, Parser};
16use crate::registry;
17use crate::retriever::{CacheStatus, HttpClient, SchemaCache, ensure_cache_dir};
18use crate::validation_cache::{self, ValidationCacheStatus};
19
20pub struct ValidateArgs {
21    /// Glob patterns to find files (empty = auto-discover)
22    pub globs: Vec<String>,
23
24    /// Exclude files matching these globs (repeatable)
25    pub exclude: Vec<String>,
26
27    /// Cache directory for remote schemas
28    pub cache_dir: Option<String>,
29
30    /// Bypass schema cache reads (still writes fetched schemas to cache)
31    pub force_schema_fetch: bool,
32
33    /// Bypass validation cache reads (still writes results to cache)
34    pub force_validation: bool,
35
36    /// Disable `SchemaStore` catalog matching
37    pub no_catalog: bool,
38
39    /// Force file format for all inputs
40    pub format: Option<parsers::FileFormat>,
41
42    /// Directory to search for `lintel.toml` (defaults to cwd)
43    pub config_dir: Option<PathBuf>,
44
45    /// TTL for cached schemas. `None` means no expiry.
46    pub schema_cache_ttl: Option<std::time::Duration>,
47}
48
49/// A single lint error produced during validation.
50pub enum LintError {
51    Parse(ParseDiagnostic),
52    Validation(ValidationDiagnostic),
53    File(FileDiagnostic),
54}
55
56impl LintError {
57    /// File path associated with this error.
58    pub fn path(&self) -> &str {
59        match self {
60            LintError::Parse(d) => d.src.name(),
61            LintError::Validation(d) => &d.path,
62            LintError::File(d) => &d.path,
63        }
64    }
65
66    /// Human-readable error message.
67    pub fn message(&self) -> &str {
68        match self {
69            LintError::Parse(d) => &d.message,
70            LintError::Validation(d) => &d.message,
71            LintError::File(d) => &d.message,
72        }
73    }
74
75    /// Byte offset in the source file (for sorting).
76    fn offset(&self) -> usize {
77        match self {
78            LintError::Parse(d) => d.span.offset(),
79            LintError::Validation(d) => d.span.offset(),
80            LintError::File(_) => 0,
81        }
82    }
83
84    /// Convert into a boxed miette Diagnostic for rich rendering.
85    pub fn into_diagnostic(self) -> Box<dyn miette::Diagnostic + Send + Sync> {
86        match self {
87            LintError::Parse(d) => Box::new(d),
88            LintError::Validation(d) => Box::new(d),
89            LintError::File(d) => Box::new(d),
90        }
91    }
92}
93
94/// A file that was checked and the schema it resolved to.
95pub struct CheckedFile {
96    pub path: String,
97    pub schema: String,
98    /// `None` for local schemas and builtins; `Some` for remote schemas.
99    pub cache_status: Option<CacheStatus>,
100    /// `None` when validation caching is not applicable; `Some` for validation cache hits/misses.
101    pub validation_cache_status: Option<ValidationCacheStatus>,
102}
103
104/// Result of a validation run.
105pub struct ValidateResult {
106    pub errors: Vec<LintError>,
107    pub checked: Vec<CheckedFile>,
108}
109
110impl ValidateResult {
111    pub fn has_errors(&self) -> bool {
112        !self.errors.is_empty()
113    }
114
115    pub fn files_checked(&self) -> usize {
116        self.checked.len()
117    }
118}
119
120// ---------------------------------------------------------------------------
121// Internal types
122// ---------------------------------------------------------------------------
123
124/// A file that has been parsed and matched to a schema URI.
125struct ParsedFile {
126    path: String,
127    content: String,
128    instance: Value,
129    /// Original schema URI before rewrites (for override matching).
130    original_schema_uri: String,
131}
132
133// ---------------------------------------------------------------------------
134// Config loading
135// ---------------------------------------------------------------------------
136
137/// Locate `lintel.toml`, load the full config, and return the config directory.
138/// Returns `(config, config_dir, config_path)`.  When no config is found or
139/// cwd is unavailable the config is default and `config_path` is `None`.
140#[tracing::instrument(skip_all)]
141fn load_config(search_dir: Option<&Path>) -> (config::Config, PathBuf, Option<PathBuf>) {
142    let start_dir = match search_dir {
143        Some(d) => d.to_path_buf(),
144        None => match std::env::current_dir() {
145            Ok(d) => d,
146            Err(_) => return (config::Config::default(), PathBuf::from("."), None),
147        },
148    };
149
150    let Some(config_path) = config::find_config_path(&start_dir) else {
151        return (config::Config::default(), start_dir, None);
152    };
153
154    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
155    let cfg = config::find_and_load(&start_dir)
156        .ok()
157        .flatten()
158        .unwrap_or_default();
159    (cfg, dir, Some(config_path))
160}
161
162// ---------------------------------------------------------------------------
163// File collection
164// ---------------------------------------------------------------------------
165
166/// Collect input files from globs/directories, applying exclude filters.
167#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
168fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
169    if globs.is_empty() {
170        return discover::discover_files(".", exclude);
171    }
172
173    let mut result = Vec::new();
174    for pattern in globs {
175        let path = Path::new(pattern);
176        if path.is_dir() {
177            result.extend(discover::discover_files(pattern, exclude)?);
178        } else {
179            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
180                let path = entry?;
181                if path.is_file() && !is_excluded(&path, exclude) {
182                    result.push(path);
183                }
184            }
185        }
186    }
187    Ok(result)
188}
189
190fn is_excluded(path: &Path, excludes: &[String]) -> bool {
191    let path_str = match path.to_str() {
192        Some(s) => s.strip_prefix("./").unwrap_or(s),
193        None => return false,
194    };
195    excludes
196        .iter()
197        .any(|pattern| glob_match::glob_match(pattern, path_str))
198}
199
200// ---------------------------------------------------------------------------
201// lintel.toml self-validation
202// ---------------------------------------------------------------------------
203
204/// Validate `lintel.toml` against its built-in schema.
205fn validate_config(
206    config_path: &Path,
207    errors: &mut Vec<LintError>,
208    checked: &mut Vec<CheckedFile>,
209    on_check: &mut impl FnMut(&CheckedFile),
210) -> Result<()> {
211    let content = fs::read_to_string(config_path)?;
212    let config_value: Value = toml::from_str(&content)
213        .map_err(|e| anyhow::anyhow!("failed to parse {}: {e}", config_path.display()))?;
214    let schema_value: Value = serde_json::from_str(include_str!(concat!(
215        env!("OUT_DIR"),
216        "/lintel-config.schema.json"
217    )))
218    .context("failed to parse embedded lintel config schema")?;
219    if let Ok(validator) = jsonschema::options().build(&schema_value) {
220        let path_str = config_path.display().to_string();
221        for error in validator.iter_errors(&config_value) {
222            let ip = error.instance_path().to_string();
223            let offset = find_instance_path_offset(&content, &ip);
224            errors.push(LintError::Validation(ValidationDiagnostic {
225                src: miette::NamedSource::new(&path_str, content.clone()),
226                span: offset.into(),
227                path: path_str.clone(),
228                instance_path: ip,
229                message: error.to_string(),
230            }));
231        }
232        let cf = CheckedFile {
233            path: path_str,
234            schema: "(builtin)".to_string(),
235            cache_status: None,
236            validation_cache_status: None,
237        };
238        on_check(&cf);
239        checked.push(cf);
240    }
241    Ok(())
242}
243
244// ---------------------------------------------------------------------------
245// Phase 1: Parse files and resolve schema URIs
246// ---------------------------------------------------------------------------
247
248/// Try parsing content with each known format, returning the first success.
249///
250/// JSONC is tried first (superset of JSON, handles comments), then YAML and
251/// TOML which cover the most common config formats, followed by the rest.
252fn try_parse_all(content: &str, file_name: &str) -> Option<(parsers::FileFormat, Value)> {
253    use parsers::FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
254    const FORMATS: [parsers::FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
255
256    for fmt in FORMATS {
257        let parser = parsers::parser_for(fmt);
258        if let Ok(val) = parser.parse(content, file_name) {
259            return Some((fmt, val));
260        }
261    }
262    None
263}
264
265/// Result of processing a single file: either a parsed file with its schema URI,
266/// a lint error, or nothing (file was skipped).
267enum FileResult {
268    Parsed {
269        schema_uri: String,
270        parsed: ParsedFile,
271    },
272    Error(LintError),
273    Skip,
274}
275
276/// Process a single file: read, parse, resolve schema URI.
277fn process_one_file(
278    path: &Path,
279    format_override: Option<FileFormat>,
280    config: &config::Config,
281    config_dir: &Path,
282    compiled_catalogs: &[CompiledCatalog],
283) -> FileResult {
284    let content = match fs::read_to_string(path) {
285        Ok(c) => c,
286        Err(e) => {
287            return FileResult::Error(LintError::File(FileDiagnostic {
288                path: path.display().to_string(),
289                message: format!("failed to read: {e}"),
290            }));
291        }
292    };
293
294    let path_str = path.display().to_string();
295    let file_name = path
296        .file_name()
297        .and_then(|n| n.to_str())
298        .unwrap_or(&path_str);
299
300    let detected_format = format_override.or_else(|| parsers::detect_format(path));
301
302    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
303    if detected_format.is_none() {
304        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
305            || compiled_catalogs
306                .iter()
307                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
308        if !has_match {
309            return FileResult::Skip;
310        }
311    }
312
313    // Parse the file content.
314    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
315        let parser = parsers::parser_for(fmt);
316        match parser.parse(&content, &path_str) {
317            Ok(val) => (parser, val),
318            Err(parse_err) => {
319                // JSONC fallback for .json files that match a catalog entry.
320                if fmt == FileFormat::Json
321                    && compiled_catalogs
322                        .iter()
323                        .any(|cat| cat.find_schema(&path_str, file_name).is_some())
324                {
325                    match JsoncParser.parse(&content, &path_str) {
326                        Ok(val) => (parsers::parser_for(FileFormat::Jsonc), val),
327                        Err(jsonc_err) => return FileResult::Error(LintError::Parse(jsonc_err)),
328                    }
329                } else {
330                    return FileResult::Error(LintError::Parse(parse_err));
331                }
332            }
333        }
334    } else {
335        match try_parse_all(&content, &path_str) {
336            Some((fmt, val)) => (parsers::parser_for(fmt), val),
337            None => return FileResult::Skip,
338        }
339    };
340
341    // Skip markdown files with no frontmatter
342    if instance.is_null() {
343        return FileResult::Skip;
344    }
345
346    // Schema resolution priority:
347    // 1. Inline $schema / YAML modeline (always wins)
348    // 2. Custom schema mappings from lintel.toml [schemas]
349    // 3. Catalog matching (SchemaStore + additional registries)
350    let schema_uri = parser
351        .extract_schema_uri(&content, &instance)
352        .or_else(|| {
353            config
354                .find_schema_mapping(&path_str, file_name)
355                .map(str::to_string)
356        })
357        .or_else(|| {
358            compiled_catalogs
359                .iter()
360                .find_map(|cat| cat.find_schema(&path_str, file_name))
361                .map(str::to_string)
362        });
363
364    let Some(schema_uri) = schema_uri else {
365        return FileResult::Skip;
366    };
367
368    // Keep original URI for override matching (before rewrites)
369    let original_schema_uri = schema_uri.clone();
370
371    // Apply rewrite rules, then resolve // paths relative to lintel.toml
372    let schema_uri = config::apply_rewrites(&schema_uri, &config.rewrite);
373    let schema_uri = config::resolve_double_slash(&schema_uri, config_dir);
374
375    // Resolve relative local paths against the file's parent directory.
376    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
377    let schema_uri = if is_remote {
378        schema_uri
379    } else {
380        path.parent()
381            .map(|parent| parent.join(&schema_uri).to_string_lossy().to_string())
382            .unwrap_or(schema_uri)
383    };
384
385    FileResult::Parsed {
386        schema_uri,
387        parsed: ParsedFile {
388            path: path_str,
389            content,
390            instance,
391            original_schema_uri,
392        },
393    }
394}
395
396/// Parse each file in parallel, extract its schema URI, apply rewrites, and
397/// group by resolved schema URI.
398#[tracing::instrument(skip_all, fields(file_count = files.len()))]
399fn parse_and_group_files(
400    files: &[PathBuf],
401    args: &ValidateArgs,
402    config: &config::Config,
403    config_dir: &Path,
404    compiled_catalogs: &[CompiledCatalog],
405    errors: &mut Vec<LintError>,
406) -> BTreeMap<String, Vec<ParsedFile>> {
407    use rayon::prelude::*;
408
409    let results: Vec<FileResult> = files
410        .par_iter()
411        .map(|path| process_one_file(path, args.format, config, config_dir, compiled_catalogs))
412        .collect();
413
414    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
415    for result in results {
416        match result {
417            FileResult::Parsed { schema_uri, parsed } => {
418                schema_groups.entry(schema_uri).or_default().push(parsed);
419            }
420            FileResult::Error(e) => errors.push(e),
421            FileResult::Skip => {}
422        }
423    }
424
425    schema_groups
426}
427
428// ---------------------------------------------------------------------------
429// Phase 2: Schema fetching, compilation, and instance validation
430// ---------------------------------------------------------------------------
431
432/// Fetch a schema by URI, returning its parsed JSON and cache status.
433///
434/// For remote URIs, checks the prefetched map first; for local URIs, reads
435/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
436fn fetch_schema_from_prefetched(
437    schema_uri: &str,
438    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
439    local_cache: &mut HashMap<String, Value>,
440    group: &[ParsedFile],
441    errors: &mut Vec<LintError>,
442    checked: &mut Vec<CheckedFile>,
443    on_check: &mut impl FnMut(&CheckedFile),
444) -> Option<(Value, Option<CacheStatus>)> {
445    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
446
447    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
448        match prefetched.get(schema_uri) {
449            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
450            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
451            None => Err(format!("schema not prefetched: {schema_uri}")),
452        }
453    } else if let Some(cached) = local_cache.get(schema_uri) {
454        Ok((cached.clone(), None))
455    } else {
456        fs::read_to_string(schema_uri)
457            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
458            .and_then(|content| {
459                serde_json::from_str::<Value>(&content)
460                    .map(|v| {
461                        local_cache.insert(schema_uri.to_string(), v.clone());
462                        (v, None)
463                    })
464                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
465            })
466    };
467
468    match result {
469        Ok(value) => Some(value),
470        Err(message) => {
471            report_group_error(&message, schema_uri, None, group, errors, checked, on_check);
472            None
473        }
474    }
475}
476
477/// Report the same error for every file in a schema group.
478fn report_group_error<P: std::borrow::Borrow<ParsedFile>>(
479    message: &str,
480    schema_uri: &str,
481    cache_status: Option<CacheStatus>,
482    group: &[P],
483    errors: &mut Vec<LintError>,
484    checked: &mut Vec<CheckedFile>,
485    on_check: &mut impl FnMut(&CheckedFile),
486) {
487    for item in group {
488        let pf = item.borrow();
489        let cf = CheckedFile {
490            path: pf.path.clone(),
491            schema: schema_uri.to_string(),
492            cache_status,
493            validation_cache_status: None,
494        };
495        on_check(&cf);
496        checked.push(cf);
497        errors.push(LintError::File(FileDiagnostic {
498            path: pf.path.clone(),
499            message: message.to_string(),
500        }));
501    }
502}
503
504/// Mark every file in a group as checked (no errors).
505fn mark_group_checked<P: std::borrow::Borrow<ParsedFile>>(
506    schema_uri: &str,
507    cache_status: Option<CacheStatus>,
508    validation_cache_status: Option<ValidationCacheStatus>,
509    group: &[P],
510    checked: &mut Vec<CheckedFile>,
511    on_check: &mut impl FnMut(&CheckedFile),
512) {
513    for item in group {
514        let pf = item.borrow();
515        let cf = CheckedFile {
516            path: pf.path.clone(),
517            schema: schema_uri.to_string(),
518            cache_status,
519            validation_cache_status,
520        };
521        on_check(&cf);
522        checked.push(cf);
523    }
524}
525
526/// Convert `(instance_path, message)` pairs into `LintError::Validation` diagnostics.
527fn push_error_pairs(
528    pf: &ParsedFile,
529    error_pairs: &[(String, String)],
530    errors: &mut Vec<LintError>,
531) {
532    for (ip, msg) in error_pairs {
533        let offset = find_instance_path_offset(&pf.content, ip);
534        errors.push(LintError::Validation(ValidationDiagnostic {
535            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
536            span: offset.into(),
537            path: pf.path.clone(),
538            instance_path: ip.clone(),
539            message: msg.clone(),
540        }));
541    }
542}
543
544/// Validate all files in a group against an already-compiled validator and store
545/// results in the validation cache.
546#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
547#[allow(clippy::too_many_arguments)]
548async fn validate_group<P: std::borrow::Borrow<ParsedFile>>(
549    validator: &jsonschema::Validator,
550    schema_uri: &str,
551    schema_hash: &str,
552    validate_formats: bool,
553    cache_status: Option<CacheStatus>,
554    group: &[P],
555    vcache: &validation_cache::ValidationCache,
556    errors: &mut Vec<LintError>,
557    checked: &mut Vec<CheckedFile>,
558    on_check: &mut impl FnMut(&CheckedFile),
559) {
560    for item in group {
561        let pf = item.borrow();
562        let file_errors: Vec<(String, String)> = validator
563            .iter_errors(&pf.instance)
564            .map(|error| (error.instance_path().to_string(), error.to_string()))
565            .collect();
566
567        vcache
568            .store(&pf.content, schema_hash, validate_formats, &file_errors)
569            .await;
570        push_error_pairs(pf, &file_errors, errors);
571
572        let cf = CheckedFile {
573            path: pf.path.clone(),
574            schema: schema_uri.to_string(),
575            cache_status,
576            validation_cache_status: Some(ValidationCacheStatus::Miss),
577        };
578        on_check(&cf);
579        checked.push(cf);
580    }
581}
582
583// ---------------------------------------------------------------------------
584// Public API
585// ---------------------------------------------------------------------------
586
587/// # Errors
588///
589/// Returns an error if file collection or schema validation encounters an I/O error.
590pub async fn run<C: HttpClient>(args: &ValidateArgs, client: C) -> Result<ValidateResult> {
591    run_with(args, client, |_| {}).await
592}
593
594/// Like [`run`], but calls `on_check` each time a file is checked, allowing
595/// callers to stream progress (e.g. verbose output) as files are processed.
596///
597/// # Errors
598///
599/// Returns an error if file collection or schema validation encounters an I/O error.
600#[tracing::instrument(skip_all, name = "validate")]
601#[allow(clippy::too_many_lines)]
602pub async fn run_with<C: HttpClient>(
603    args: &ValidateArgs,
604    client: C,
605    mut on_check: impl FnMut(&CheckedFile),
606) -> Result<ValidateResult> {
607    let cache_dir = match &args.cache_dir {
608        Some(dir) => {
609            let path = PathBuf::from(dir);
610            let _ = fs::create_dir_all(&path);
611            path
612        }
613        None => ensure_cache_dir(),
614    };
615    let retriever = SchemaCache::new(
616        Some(cache_dir),
617        client.clone(),
618        args.force_schema_fetch,
619        args.schema_cache_ttl,
620    );
621
622    let (config, config_dir, config_path) = load_config(args.config_dir.as_deref());
623    let files = collect_files(&args.globs, &args.exclude)?;
624    tracing::info!(file_count = files.len(), "collected files");
625
626    let mut compiled_catalogs = Vec::new();
627
628    if !args.no_catalog {
629        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
630
631        // Fetch all catalogs in parallel using JoinSet.
632        // Each task returns (label, result) so error messages stay specific.
633        #[allow(clippy::items_after_statements)]
634        type CatalogResult = (
635            String,
636            Result<CompiledCatalog, Box<dyn std::error::Error + Send + Sync>>,
637        );
638        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
639
640        // Lintel catalog
641        let r = retriever.clone();
642        let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
643        catalog_tasks.spawn(async move {
644            let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
645                .await
646                .map(|cat| CompiledCatalog::compile(&cat));
647            (label, result)
648        });
649
650        // SchemaStore catalog
651        let r = retriever.clone();
652        catalog_tasks.spawn(async move {
653            let result = catalog::fetch_catalog(&r)
654                .await
655                .map(|cat| CompiledCatalog::compile(&cat));
656            ("SchemaStore catalog".to_string(), result)
657        });
658
659        // Additional registries from lintel.toml
660        for registry_url in &config.registries {
661            let r = retriever.clone();
662            let url = registry_url.clone();
663            let label = format!("registry {url}");
664            catalog_tasks.spawn(async move {
665                let result = registry::fetch(&r, &url)
666                    .await
667                    .map(|cat| CompiledCatalog::compile(&cat));
668                (label, result)
669            });
670        }
671
672        while let Some(result) = catalog_tasks.join_next().await {
673            match result {
674                Ok((_, Ok(compiled))) => compiled_catalogs.push(compiled),
675                Ok((label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
676                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
677            }
678        }
679
680        drop(catalog_span);
681    }
682
683    let mut errors: Vec<LintError> = Vec::new();
684    let mut checked: Vec<CheckedFile> = Vec::new();
685
686    // Validate lintel.toml against its own schema
687    if let Some(config_path) = config_path {
688        validate_config(&config_path, &mut errors, &mut checked, &mut on_check)?;
689    }
690
691    // Phase 1: Parse files and resolve schema URIs
692    let schema_groups = parse_and_group_files(
693        &files,
694        args,
695        &config,
696        &config_dir,
697        &compiled_catalogs,
698        &mut errors,
699    );
700    tracing::info!(
701        schema_count = schema_groups.len(),
702        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
703        "grouped files by schema"
704    );
705
706    // Create validation cache
707    let vcache = validation_cache::ValidationCache::new(
708        validation_cache::ensure_cache_dir(),
709        args.force_validation,
710    );
711
712    // Prefetch all remote schemas in parallel
713    let remote_uris: Vec<&String> = schema_groups
714        .keys()
715        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
716        .collect();
717
718    let prefetched = {
719        let _prefetch_span =
720            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
721
722        let mut schema_tasks = tokio::task::JoinSet::new();
723        for uri in remote_uris {
724            let r = retriever.clone();
725            let u = uri.clone();
726            schema_tasks.spawn(async move {
727                let result = r.fetch(&u).await;
728                (u, result)
729            });
730        }
731
732        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
733        while let Some(result) = schema_tasks.join_next().await {
734            match result {
735                Ok((uri, fetch_result)) => {
736                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
737                }
738                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
739            }
740        }
741
742        prefetched
743    };
744
745    // Phase 2: Compile each schema once and validate all matching files
746    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
747    let mut fetch_time = std::time::Duration::ZERO;
748    let mut hash_time = std::time::Duration::ZERO;
749    let mut vcache_time = std::time::Duration::ZERO;
750    let mut compile_time = std::time::Duration::ZERO;
751    let mut validate_time = std::time::Duration::ZERO;
752
753    for (schema_uri, group) in &schema_groups {
754        let _group_span = tracing::debug_span!(
755            "schema_group",
756            schema = schema_uri.as_str(),
757            files = group.len(),
758        )
759        .entered();
760
761        // If ANY file in the group matches a `validate_formats = false` override,
762        // disable format validation for the whole group (they share one compiled validator).
763        let validate_formats = group.iter().all(|pf| {
764            config
765                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
766        });
767
768        // Remote schemas were prefetched in parallel above; local schemas are
769        // read from disk here (with in-memory caching).
770        let t = std::time::Instant::now();
771        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
772            schema_uri,
773            &prefetched,
774            &mut local_schema_cache,
775            group,
776            &mut errors,
777            &mut checked,
778            &mut on_check,
779        ) else {
780            fetch_time += t.elapsed();
781            continue;
782        };
783        fetch_time += t.elapsed();
784
785        // Pre-compute schema hash once for the entire group.
786        let t = std::time::Instant::now();
787        let schema_hash = validation_cache::schema_hash(&schema_value);
788        hash_time += t.elapsed();
789
790        // Split the group into validation cache hits and misses.
791        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
792
793        let t = std::time::Instant::now();
794        for pf in group {
795            let (cached, vcache_status) = vcache
796                .lookup(&pf.content, &schema_hash, validate_formats)
797                .await;
798
799            if let Some(cached_errors) = cached {
800                push_error_pairs(pf, &cached_errors, &mut errors);
801                let cf = CheckedFile {
802                    path: pf.path.clone(),
803                    schema: schema_uri.clone(),
804                    cache_status,
805                    validation_cache_status: Some(vcache_status),
806                };
807                on_check(&cf);
808                checked.push(cf);
809            } else {
810                cache_misses.push(pf);
811            }
812        }
813        vcache_time += t.elapsed();
814
815        tracing::debug!(
816            cache_hits = group.len() - cache_misses.len(),
817            cache_misses = cache_misses.len(),
818            "validation cache"
819        );
820
821        // If all files hit the validation cache, skip schema compilation entirely.
822        if cache_misses.is_empty() {
823            continue;
824        }
825
826        // Compile the schema for cache misses.
827        let t = std::time::Instant::now();
828        let validator = {
829            match jsonschema::async_options()
830                .with_retriever(retriever.clone())
831                .should_validate_formats(validate_formats)
832                .build(&schema_value)
833                .await
834            {
835                Ok(v) => v,
836                Err(e) => {
837                    compile_time += t.elapsed();
838                    // When format validation is disabled and the compilation error
839                    // is a uri-reference issue (e.g. Rust-style $ref paths in
840                    // vector.json), skip validation silently.
841                    if !validate_formats && e.to_string().contains("uri-reference") {
842                        mark_group_checked(
843                            schema_uri,
844                            cache_status,
845                            Some(ValidationCacheStatus::Miss),
846                            &cache_misses,
847                            &mut checked,
848                            &mut on_check,
849                        );
850                        continue;
851                    }
852                    report_group_error(
853                        &format!("failed to compile schema: {e}"),
854                        schema_uri,
855                        cache_status,
856                        &cache_misses,
857                        &mut errors,
858                        &mut checked,
859                        &mut on_check,
860                    );
861                    continue;
862                }
863            }
864        };
865        compile_time += t.elapsed();
866
867        let t = std::time::Instant::now();
868        validate_group(
869            &validator,
870            schema_uri,
871            &schema_hash,
872            validate_formats,
873            cache_status,
874            &cache_misses,
875            &vcache,
876            &mut errors,
877            &mut checked,
878            &mut on_check,
879        )
880        .await;
881        validate_time += t.elapsed();
882    }
883
884    #[allow(clippy::cast_possible_truncation)]
885    {
886        tracing::info!(
887            fetch_ms = fetch_time.as_millis() as u64,
888            hash_ms = hash_time.as_millis() as u64,
889            vcache_ms = vcache_time.as_millis() as u64,
890            compile_ms = compile_time.as_millis() as u64,
891            validate_ms = validate_time.as_millis() as u64,
892            "phase2 breakdown"
893        );
894    }
895
896    // Sort errors for deterministic output (by path, then by span offset)
897    errors.sort_by(|a, b| {
898        a.path()
899            .cmp(b.path())
900            .then_with(|| a.offset().cmp(&b.offset()))
901    });
902
903    Ok(ValidateResult { errors, checked })
904}
905
906#[cfg(test)]
907mod tests {
908    use super::*;
909    use crate::retriever::HttpClient;
910    use std::collections::HashMap;
911    use std::error::Error;
912    use std::path::Path;
913
914    #[derive(Clone)]
915    struct MockClient(HashMap<String, String>);
916
917    #[async_trait::async_trait]
918    impl HttpClient for MockClient {
919        async fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
920            self.0
921                .get(uri)
922                .cloned()
923                .ok_or_else(|| format!("mock: no response for {uri}").into())
924        }
925    }
926
927    fn mock(entries: &[(&str, &str)]) -> MockClient {
928        MockClient(
929            entries
930                .iter()
931                .map(|(k, v)| (k.to_string(), v.to_string()))
932                .collect(),
933        )
934    }
935
936    fn testdata() -> PathBuf {
937        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
938    }
939
940    /// Build glob patterns that scan one or more testdata directories for all supported file types.
941    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
942        dirs.iter()
943            .flat_map(|dir| {
944                let base = testdata().join(dir);
945                vec![
946                    base.join("*.json").to_string_lossy().to_string(),
947                    base.join("*.yaml").to_string_lossy().to_string(),
948                    base.join("*.yml").to_string_lossy().to_string(),
949                    base.join("*.json5").to_string_lossy().to_string(),
950                    base.join("*.jsonc").to_string_lossy().to_string(),
951                    base.join("*.toml").to_string_lossy().to_string(),
952                ]
953            })
954            .collect()
955    }
956
957    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
958        ValidateArgs {
959            globs: scenario_globs(dirs),
960            exclude: vec![],
961            cache_dir: None,
962            force_schema_fetch: true,
963            force_validation: true,
964            no_catalog: true,
965            format: None,
966            config_dir: None,
967            schema_cache_ttl: None,
968        }
969    }
970
971    const SCHEMA: &str =
972        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
973
974    fn schema_mock() -> MockClient {
975        mock(&[("https://example.com/schema.json", SCHEMA)])
976    }
977
978    // --- Directory scanning tests ---
979
980    #[tokio::test]
981    async fn no_matching_files() -> anyhow::Result<()> {
982        let tmp = tempfile::tempdir()?;
983        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
984        let c = ValidateArgs {
985            globs: vec![pattern],
986            exclude: vec![],
987            cache_dir: None,
988            force_schema_fetch: true,
989            force_validation: true,
990            no_catalog: true,
991            format: None,
992            config_dir: None,
993            schema_cache_ttl: None,
994        };
995        let result = run(&c, mock(&[])).await?;
996        assert!(!result.has_errors());
997        Ok(())
998    }
999
1000    #[tokio::test]
1001    async fn dir_all_valid() -> anyhow::Result<()> {
1002        let c = args_for_dirs(&["positive_tests"]);
1003        let result = run(&c, schema_mock()).await?;
1004        assert!(!result.has_errors());
1005        Ok(())
1006    }
1007
1008    #[tokio::test]
1009    async fn dir_all_invalid() -> anyhow::Result<()> {
1010        let c = args_for_dirs(&["negative_tests"]);
1011        let result = run(&c, schema_mock()).await?;
1012        assert!(result.has_errors());
1013        Ok(())
1014    }
1015
1016    #[tokio::test]
1017    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1018        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1019        let result = run(&c, schema_mock()).await?;
1020        assert!(result.has_errors());
1021        Ok(())
1022    }
1023
1024    #[tokio::test]
1025    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1026        let c = args_for_dirs(&["no_schema"]);
1027        let result = run(&c, mock(&[])).await?;
1028        assert!(!result.has_errors());
1029        Ok(())
1030    }
1031
1032    #[tokio::test]
1033    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1034        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1035        let result = run(&c, schema_mock()).await?;
1036        assert!(!result.has_errors());
1037        Ok(())
1038    }
1039
1040    // --- Directory as positional arg ---
1041
1042    #[tokio::test]
1043    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1044        let dir = testdata().join("positive_tests");
1045        let c = ValidateArgs {
1046            globs: vec![dir.to_string_lossy().to_string()],
1047            exclude: vec![],
1048            cache_dir: None,
1049            force_schema_fetch: true,
1050            force_validation: true,
1051            no_catalog: true,
1052            format: None,
1053            config_dir: None,
1054            schema_cache_ttl: None,
1055        };
1056        let result = run(&c, schema_mock()).await?;
1057        assert!(!result.has_errors());
1058        assert!(result.files_checked() > 0);
1059        Ok(())
1060    }
1061
1062    #[tokio::test]
1063    async fn multiple_directory_args() -> anyhow::Result<()> {
1064        let pos_dir = testdata().join("positive_tests");
1065        let no_schema_dir = testdata().join("no_schema");
1066        let c = ValidateArgs {
1067            globs: vec![
1068                pos_dir.to_string_lossy().to_string(),
1069                no_schema_dir.to_string_lossy().to_string(),
1070            ],
1071            exclude: vec![],
1072            cache_dir: None,
1073            force_schema_fetch: true,
1074            force_validation: true,
1075            no_catalog: true,
1076            format: None,
1077            config_dir: None,
1078            schema_cache_ttl: None,
1079        };
1080        let result = run(&c, schema_mock()).await?;
1081        assert!(!result.has_errors());
1082        Ok(())
1083    }
1084
1085    #[tokio::test]
1086    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1087        let dir = testdata().join("positive_tests");
1088        let glob_pattern = testdata()
1089            .join("no_schema")
1090            .join("*.json")
1091            .to_string_lossy()
1092            .to_string();
1093        let c = ValidateArgs {
1094            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1095            exclude: vec![],
1096            cache_dir: None,
1097            force_schema_fetch: true,
1098            force_validation: true,
1099            no_catalog: true,
1100            format: None,
1101            config_dir: None,
1102            schema_cache_ttl: None,
1103        };
1104        let result = run(&c, schema_mock()).await?;
1105        assert!(!result.has_errors());
1106        Ok(())
1107    }
1108
1109    #[tokio::test]
1110    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1111        let base = testdata().join("malformed");
1112        let c = ValidateArgs {
1113            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1114            exclude: vec![],
1115            cache_dir: None,
1116            force_schema_fetch: true,
1117            force_validation: true,
1118            no_catalog: true,
1119            format: None,
1120            config_dir: None,
1121            schema_cache_ttl: None,
1122        };
1123        let result = run(&c, mock(&[])).await?;
1124        assert!(result.has_errors());
1125        Ok(())
1126    }
1127
1128    #[tokio::test]
1129    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1130        let base = testdata().join("malformed");
1131        let c = ValidateArgs {
1132            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1133            exclude: vec![],
1134            cache_dir: None,
1135            force_schema_fetch: true,
1136            force_validation: true,
1137            no_catalog: true,
1138            format: None,
1139            config_dir: None,
1140            schema_cache_ttl: None,
1141        };
1142        let result = run(&c, mock(&[])).await?;
1143        assert!(result.has_errors());
1144        Ok(())
1145    }
1146
1147    // --- Exclude filter ---
1148
1149    #[tokio::test]
1150    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1151        let base = testdata().join("negative_tests");
1152        let c = ValidateArgs {
1153            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1154            exclude: vec![
1155                base.join("missing_name.json").to_string_lossy().to_string(),
1156                base.join("missing_name.toml").to_string_lossy().to_string(),
1157                base.join("missing_name.yaml").to_string_lossy().to_string(),
1158            ],
1159            cache_dir: None,
1160            force_schema_fetch: true,
1161            force_validation: true,
1162            no_catalog: true,
1163            format: None,
1164            config_dir: None,
1165            schema_cache_ttl: None,
1166        };
1167        let result = run(&c, schema_mock()).await?;
1168        assert!(!result.has_errors());
1169        Ok(())
1170    }
1171
1172    // --- Cache options ---
1173
1174    #[tokio::test]
1175    async fn custom_cache_dir() -> anyhow::Result<()> {
1176        let cache_tmp = tempfile::tempdir()?;
1177        let c = ValidateArgs {
1178            globs: scenario_globs(&["positive_tests"]),
1179            exclude: vec![],
1180            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1181            force_schema_fetch: true,
1182            force_validation: true,
1183            no_catalog: true,
1184            format: None,
1185            config_dir: None,
1186            schema_cache_ttl: None,
1187        };
1188        let result = run(&c, schema_mock()).await?;
1189        assert!(!result.has_errors());
1190
1191        // Schema was fetched once and cached
1192        let entries: Vec<_> = fs::read_dir(cache_tmp.path())?.collect();
1193        assert_eq!(entries.len(), 1);
1194        Ok(())
1195    }
1196
1197    // --- Local schema ---
1198
1199    #[tokio::test]
1200    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1201        let tmp = tempfile::tempdir()?;
1202        let schema_path = tmp.path().join("schema.json");
1203        fs::write(&schema_path, SCHEMA)?;
1204
1205        let f = tmp.path().join("valid.json");
1206        fs::write(
1207            &f,
1208            format!(
1209                r#"{{"$schema":"{}","name":"hello"}}"#,
1210                schema_path.to_string_lossy()
1211            ),
1212        )?;
1213
1214        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1215        let c = ValidateArgs {
1216            globs: vec![pattern],
1217            exclude: vec![],
1218            cache_dir: None,
1219            force_schema_fetch: true,
1220            force_validation: true,
1221            no_catalog: true,
1222            format: None,
1223            config_dir: None,
1224            schema_cache_ttl: None,
1225        };
1226        let result = run(&c, mock(&[])).await?;
1227        assert!(!result.has_errors());
1228        Ok(())
1229    }
1230
1231    #[tokio::test]
1232    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1233        let tmp = tempfile::tempdir()?;
1234        let schema_path = tmp.path().join("schema.json");
1235        fs::write(&schema_path, SCHEMA)?;
1236
1237        let f = tmp.path().join("valid.yaml");
1238        fs::write(
1239            &f,
1240            format!(
1241                "# yaml-language-server: $schema={}\nname: hello\n",
1242                schema_path.to_string_lossy()
1243            ),
1244        )?;
1245
1246        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1247        let c = ValidateArgs {
1248            globs: vec![pattern],
1249            exclude: vec![],
1250            cache_dir: None,
1251            force_schema_fetch: true,
1252            force_validation: true,
1253            no_catalog: true,
1254            format: None,
1255            config_dir: None,
1256            schema_cache_ttl: None,
1257        };
1258        let result = run(&c, mock(&[])).await?;
1259        assert!(!result.has_errors());
1260        Ok(())
1261    }
1262
1263    #[tokio::test]
1264    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1265        let tmp = tempfile::tempdir()?;
1266        let f = tmp.path().join("ref.json");
1267        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1268
1269        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1270        let c = ValidateArgs {
1271            globs: vec![pattern],
1272            exclude: vec![],
1273            cache_dir: None,
1274            force_schema_fetch: true,
1275            force_validation: true,
1276            no_catalog: true,
1277            format: None,
1278            config_dir: None,
1279            schema_cache_ttl: None,
1280        };
1281        let result = run(&c, mock(&[])).await?;
1282        assert!(result.has_errors());
1283        Ok(())
1284    }
1285
1286    // --- JSON5 / JSONC tests ---
1287
1288    #[tokio::test]
1289    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1290        let tmp = tempfile::tempdir()?;
1291        let schema_path = tmp.path().join("schema.json");
1292        fs::write(&schema_path, SCHEMA)?;
1293
1294        let f = tmp.path().join("config.json5");
1295        fs::write(
1296            &f,
1297            format!(
1298                r#"{{
1299  // JSON5 comment
1300  "$schema": "{}",
1301  name: "hello",
1302}}"#,
1303                schema_path.to_string_lossy()
1304            ),
1305        )?;
1306
1307        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1308        let c = ValidateArgs {
1309            globs: vec![pattern],
1310            exclude: vec![],
1311            cache_dir: None,
1312            force_schema_fetch: true,
1313            force_validation: true,
1314            no_catalog: true,
1315            format: None,
1316            config_dir: None,
1317            schema_cache_ttl: None,
1318        };
1319        let result = run(&c, mock(&[])).await?;
1320        assert!(!result.has_errors());
1321        Ok(())
1322    }
1323
1324    #[tokio::test]
1325    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1326        let tmp = tempfile::tempdir()?;
1327        let schema_path = tmp.path().join("schema.json");
1328        fs::write(&schema_path, SCHEMA)?;
1329
1330        let f = tmp.path().join("config.jsonc");
1331        fs::write(
1332            &f,
1333            format!(
1334                r#"{{
1335  /* JSONC comment */
1336  "$schema": "{}",
1337  "name": "hello"
1338}}"#,
1339                schema_path.to_string_lossy()
1340            ),
1341        )?;
1342
1343        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1344        let c = ValidateArgs {
1345            globs: vec![pattern],
1346            exclude: vec![],
1347            cache_dir: None,
1348            force_schema_fetch: true,
1349            force_validation: true,
1350            no_catalog: true,
1351            format: None,
1352            config_dir: None,
1353            schema_cache_ttl: None,
1354        };
1355        let result = run(&c, mock(&[])).await?;
1356        assert!(!result.has_errors());
1357        Ok(())
1358    }
1359
1360    // --- Catalog-based schema matching ---
1361
1362    const GH_WORKFLOW_SCHEMA: &str = r#"{
1363        "type": "object",
1364        "properties": {
1365            "name": { "type": "string" },
1366            "on": {},
1367            "jobs": { "type": "object" }
1368        },
1369        "required": ["on", "jobs"]
1370    }"#;
1371
1372    fn gh_catalog_json() -> String {
1373        r#"{"schemas":[{
1374            "name": "GitHub Workflow",
1375            "url": "https://www.schemastore.org/github-workflow.json",
1376            "fileMatch": [
1377                "**/.github/workflows/*.yml",
1378                "**/.github/workflows/*.yaml"
1379            ]
1380        }]}"#
1381            .to_string()
1382    }
1383
1384    #[tokio::test]
1385    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1386        let tmp = tempfile::tempdir()?;
1387        let wf_dir = tmp.path().join(".github/workflows");
1388        fs::create_dir_all(&wf_dir)?;
1389        fs::write(
1390            wf_dir.join("ci.yml"),
1391            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1392        )?;
1393
1394        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1395        let client = mock(&[
1396            (
1397                "https://www.schemastore.org/api/json/catalog.json",
1398                &gh_catalog_json(),
1399            ),
1400            (
1401                "https://www.schemastore.org/github-workflow.json",
1402                GH_WORKFLOW_SCHEMA,
1403            ),
1404        ]);
1405        let c = ValidateArgs {
1406            globs: vec![pattern],
1407            exclude: vec![],
1408            cache_dir: None,
1409            force_schema_fetch: true,
1410            force_validation: true,
1411            no_catalog: false,
1412            format: None,
1413            config_dir: None,
1414            schema_cache_ttl: None,
1415        };
1416        let result = run(&c, client).await?;
1417        assert!(!result.has_errors());
1418        Ok(())
1419    }
1420
1421    #[tokio::test]
1422    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1423        let tmp = tempfile::tempdir()?;
1424        let wf_dir = tmp.path().join(".github/workflows");
1425        fs::create_dir_all(&wf_dir)?;
1426        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1427
1428        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1429        let client = mock(&[
1430            (
1431                "https://www.schemastore.org/api/json/catalog.json",
1432                &gh_catalog_json(),
1433            ),
1434            (
1435                "https://www.schemastore.org/github-workflow.json",
1436                GH_WORKFLOW_SCHEMA,
1437            ),
1438        ]);
1439        let c = ValidateArgs {
1440            globs: vec![pattern],
1441            exclude: vec![],
1442            cache_dir: None,
1443            force_schema_fetch: true,
1444            force_validation: true,
1445            no_catalog: false,
1446            format: None,
1447            config_dir: None,
1448            schema_cache_ttl: None,
1449        };
1450        let result = run(&c, client).await?;
1451        assert!(result.has_errors());
1452        Ok(())
1453    }
1454
1455    #[tokio::test]
1456    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1457        let tmp = tempfile::tempdir()?;
1458        let wf_dir = tmp.path().join(".github/workflows");
1459        fs::create_dir_all(&wf_dir)?;
1460        fs::write(
1461            wf_dir.join("ci.yml"),
1462            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1463        )?;
1464
1465        let client = mock(&[
1466            (
1467                "https://www.schemastore.org/api/json/catalog.json",
1468                &gh_catalog_json(),
1469            ),
1470            (
1471                "https://www.schemastore.org/github-workflow.json",
1472                GH_WORKFLOW_SCHEMA,
1473            ),
1474        ]);
1475        let c = ValidateArgs {
1476            globs: vec![],
1477            exclude: vec![],
1478            cache_dir: None,
1479            force_schema_fetch: true,
1480            force_validation: true,
1481            no_catalog: false,
1482            format: None,
1483            config_dir: None,
1484            schema_cache_ttl: None,
1485        };
1486
1487        let orig_dir = std::env::current_dir()?;
1488        std::env::set_current_dir(tmp.path())?;
1489        let result = run(&c, client).await?;
1490        std::env::set_current_dir(orig_dir)?;
1491
1492        assert!(!result.has_errors());
1493        Ok(())
1494    }
1495
1496    // --- TOML tests ---
1497
1498    #[tokio::test]
1499    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1500        let tmp = tempfile::tempdir()?;
1501        let schema_path = tmp.path().join("schema.json");
1502        fs::write(&schema_path, SCHEMA)?;
1503
1504        let f = tmp.path().join("config.toml");
1505        fs::write(
1506            &f,
1507            format!(
1508                "# :schema {}\nname = \"hello\"\n",
1509                schema_path.to_string_lossy()
1510            ),
1511        )?;
1512
1513        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1514        let c = ValidateArgs {
1515            globs: vec![pattern],
1516            exclude: vec![],
1517            cache_dir: None,
1518            force_schema_fetch: true,
1519            force_validation: true,
1520            no_catalog: true,
1521            format: None,
1522            config_dir: None,
1523            schema_cache_ttl: None,
1524        };
1525        let result = run(&c, mock(&[])).await?;
1526        assert!(!result.has_errors());
1527        Ok(())
1528    }
1529
1530    // --- Rewrite rules + // resolution ---
1531
1532    #[tokio::test]
1533    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1534        let tmp = tempfile::tempdir()?;
1535
1536        let schemas_dir = tmp.path().join("schemas");
1537        fs::create_dir_all(&schemas_dir)?;
1538        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1539
1540        fs::write(
1541            tmp.path().join("lintel.toml"),
1542            r#"
1543[rewrite]
1544"http://localhost:9000/" = "//schemas/"
1545"#,
1546        )?;
1547
1548        let f = tmp.path().join("config.json");
1549        fs::write(
1550            &f,
1551            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1552        )?;
1553
1554        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1555        let c = ValidateArgs {
1556            globs: vec![pattern],
1557            exclude: vec![],
1558            cache_dir: None,
1559            force_schema_fetch: true,
1560            force_validation: true,
1561            no_catalog: true,
1562            format: None,
1563            config_dir: Some(tmp.path().to_path_buf()),
1564            schema_cache_ttl: None,
1565        };
1566
1567        let result = run(&c, mock(&[])).await?;
1568        assert!(!result.has_errors());
1569        assert_eq!(result.files_checked(), 2); // lintel.toml + config.json
1570        Ok(())
1571    }
1572
1573    #[tokio::test]
1574    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1575        let tmp = tempfile::tempdir()?;
1576
1577        let schemas_dir = tmp.path().join("schemas");
1578        fs::create_dir_all(&schemas_dir)?;
1579        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1580
1581        fs::write(tmp.path().join("lintel.toml"), "")?;
1582
1583        let sub = tmp.path().join("deeply/nested");
1584        fs::create_dir_all(&sub)?;
1585        let f = sub.join("config.json");
1586        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1587
1588        let pattern = sub.join("*.json").to_string_lossy().to_string();
1589        let c = ValidateArgs {
1590            globs: vec![pattern],
1591            exclude: vec![],
1592            cache_dir: None,
1593            force_schema_fetch: true,
1594            force_validation: true,
1595            no_catalog: true,
1596            format: None,
1597            config_dir: Some(tmp.path().to_path_buf()),
1598            schema_cache_ttl: None,
1599        };
1600
1601        let result = run(&c, mock(&[])).await?;
1602        assert!(!result.has_errors());
1603        Ok(())
1604    }
1605
1606    // --- Format validation override ---
1607
1608    const FORMAT_SCHEMA: &str = r#"{
1609        "type": "object",
1610        "properties": {
1611            "link": { "type": "string", "format": "uri-reference" }
1612        }
1613    }"#;
1614
1615    #[tokio::test]
1616    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1617        let tmp = tempfile::tempdir()?;
1618        let schema_path = tmp.path().join("schema.json");
1619        fs::write(&schema_path, FORMAT_SCHEMA)?;
1620
1621        let f = tmp.path().join("data.json");
1622        fs::write(
1623            &f,
1624            format!(
1625                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1626                schema_path.to_string_lossy()
1627            ),
1628        )?;
1629
1630        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1631        let c = ValidateArgs {
1632            globs: vec![pattern],
1633            exclude: vec![],
1634            cache_dir: None,
1635            force_schema_fetch: true,
1636            force_validation: true,
1637            no_catalog: true,
1638            format: None,
1639            config_dir: Some(tmp.path().to_path_buf()),
1640            schema_cache_ttl: None,
1641        };
1642        let result = run(&c, mock(&[])).await?;
1643        assert!(
1644            result.has_errors(),
1645            "expected format error without override"
1646        );
1647        Ok(())
1648    }
1649
1650    #[tokio::test]
1651    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1652        let tmp = tempfile::tempdir()?;
1653        let schema_path = tmp.path().join("schema.json");
1654        fs::write(&schema_path, FORMAT_SCHEMA)?;
1655
1656        let f = tmp.path().join("data.json");
1657        fs::write(
1658            &f,
1659            format!(
1660                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1661                schema_path.to_string_lossy()
1662            ),
1663        )?;
1664
1665        // Use **/data.json to match the absolute path from the tempdir.
1666        fs::write(
1667            tmp.path().join("lintel.toml"),
1668            r#"
1669[[override]]
1670files = ["**/data.json"]
1671validate_formats = false
1672"#,
1673        )?;
1674
1675        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1676        let c = ValidateArgs {
1677            globs: vec![pattern],
1678            exclude: vec![],
1679            cache_dir: None,
1680            force_schema_fetch: true,
1681            force_validation: true,
1682            no_catalog: true,
1683            format: None,
1684            config_dir: Some(tmp.path().to_path_buf()),
1685            schema_cache_ttl: None,
1686        };
1687        let result = run(&c, mock(&[])).await?;
1688        assert!(
1689            !result.has_errors(),
1690            "expected no errors with validate_formats = false override"
1691        );
1692        Ok(())
1693    }
1694
1695    // --- Unrecognized extension handling ---
1696
1697    #[tokio::test]
1698    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1699        let tmp = tempfile::tempdir()?;
1700        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1701
1702        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1703        let c = ValidateArgs {
1704            globs: vec![pattern],
1705            exclude: vec![],
1706            cache_dir: None,
1707            force_schema_fetch: true,
1708            force_validation: true,
1709            no_catalog: true,
1710            format: None,
1711            config_dir: Some(tmp.path().to_path_buf()),
1712            schema_cache_ttl: None,
1713        };
1714        let result = run(&c, mock(&[])).await?;
1715        assert!(!result.has_errors());
1716        assert_eq!(result.files_checked(), 0);
1717        Ok(())
1718    }
1719
1720    #[tokio::test]
1721    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1722        let tmp = tempfile::tempdir()?;
1723        // File has .cfg extension (unrecognized) but content is valid JSON
1724        fs::write(
1725            tmp.path().join("myapp.cfg"),
1726            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1727        )?;
1728
1729        let catalog_json = r#"{"schemas":[{
1730            "name": "MyApp Config",
1731            "url": "https://example.com/myapp.schema.json",
1732            "fileMatch": ["*.cfg"]
1733        }]}"#;
1734        let schema =
1735            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1736
1737        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1738        let client = mock(&[
1739            (
1740                "https://www.schemastore.org/api/json/catalog.json",
1741                catalog_json,
1742            ),
1743            ("https://example.com/myapp.schema.json", schema),
1744        ]);
1745        let c = ValidateArgs {
1746            globs: vec![pattern],
1747            exclude: vec![],
1748            cache_dir: None,
1749            force_schema_fetch: true,
1750            force_validation: true,
1751            no_catalog: false,
1752            format: None,
1753            config_dir: Some(tmp.path().to_path_buf()),
1754            schema_cache_ttl: None,
1755        };
1756        let result = run(&c, client).await?;
1757        assert!(!result.has_errors());
1758        assert_eq!(result.files_checked(), 1);
1759        Ok(())
1760    }
1761
1762    #[tokio::test]
1763    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1764        let tmp = tempfile::tempdir()?;
1765        // File matches catalog but content isn't parseable by any format
1766        fs::write(
1767            tmp.path().join("myapp.cfg"),
1768            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1769        )?;
1770
1771        let catalog_json = r#"{"schemas":[{
1772            "name": "MyApp Config",
1773            "url": "https://example.com/myapp.schema.json",
1774            "fileMatch": ["*.cfg"]
1775        }]}"#;
1776
1777        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1778        let client = mock(&[(
1779            "https://www.schemastore.org/api/json/catalog.json",
1780            catalog_json,
1781        )]);
1782        let c = ValidateArgs {
1783            globs: vec![pattern],
1784            exclude: vec![],
1785            cache_dir: None,
1786            force_schema_fetch: true,
1787            force_validation: true,
1788            no_catalog: false,
1789            format: None,
1790            config_dir: Some(tmp.path().to_path_buf()),
1791            schema_cache_ttl: None,
1792        };
1793        let result = run(&c, client).await?;
1794        assert!(!result.has_errors());
1795        assert_eq!(result.files_checked(), 0);
1796        Ok(())
1797    }
1798
1799    #[tokio::test]
1800    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1801        let tmp = tempfile::tempdir()?;
1802        // File has .cfg extension, content is valid JSON but fails schema validation
1803        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1804
1805        let catalog_json = r#"{"schemas":[{
1806            "name": "MyApp Config",
1807            "url": "https://example.com/myapp.schema.json",
1808            "fileMatch": ["*.cfg"]
1809        }]}"#;
1810        let schema =
1811            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1812
1813        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1814        let client = mock(&[
1815            (
1816                "https://www.schemastore.org/api/json/catalog.json",
1817                catalog_json,
1818            ),
1819            ("https://example.com/myapp.schema.json", schema),
1820        ]);
1821        let c = ValidateArgs {
1822            globs: vec![pattern],
1823            exclude: vec![],
1824            cache_dir: None,
1825            force_schema_fetch: true,
1826            force_validation: true,
1827            no_catalog: false,
1828            format: None,
1829            config_dir: Some(tmp.path().to_path_buf()),
1830            schema_cache_ttl: None,
1831        };
1832        let result = run(&c, client).await?;
1833        assert!(result.has_errors());
1834        assert_eq!(result.files_checked(), 1);
1835        Ok(())
1836    }
1837
1838    // --- Validation cache ---
1839
1840    #[tokio::test]
1841    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1842        let tmp = tempfile::tempdir()?;
1843        let schema_path = tmp.path().join("schema.json");
1844        fs::write(&schema_path, SCHEMA)?;
1845
1846        let f = tmp.path().join("valid.json");
1847        fs::write(
1848            &f,
1849            format!(
1850                r#"{{"$schema":"{}","name":"hello"}}"#,
1851                schema_path.to_string_lossy()
1852            ),
1853        )?;
1854
1855        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1856
1857        // First run: force_validation = false so results get cached
1858        let c = ValidateArgs {
1859            globs: vec![pattern.clone()],
1860            exclude: vec![],
1861            cache_dir: None,
1862            force_schema_fetch: true,
1863            force_validation: false,
1864            no_catalog: true,
1865            format: None,
1866            config_dir: None,
1867            schema_cache_ttl: None,
1868        };
1869        let mut first_statuses = Vec::new();
1870        let result = run_with(&c, mock(&[]), |cf| {
1871            first_statuses.push(cf.validation_cache_status);
1872        })
1873        .await?;
1874        assert!(!result.has_errors());
1875        assert!(result.files_checked() > 0);
1876
1877        // Verify the first run recorded a validation cache miss
1878        assert!(
1879            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
1880            "expected at least one validation cache miss on first run"
1881        );
1882
1883        // Second run: same file, same schema — should hit validation cache
1884        let mut second_statuses = Vec::new();
1885        let result = run_with(&c, mock(&[]), |cf| {
1886            second_statuses.push(cf.validation_cache_status);
1887        })
1888        .await?;
1889        assert!(!result.has_errors());
1890
1891        // Verify the second run got a validation cache hit
1892        assert!(
1893            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
1894            "expected at least one validation cache hit on second run"
1895        );
1896        Ok(())
1897    }
1898}