Skip to main content

lintel_check/
validate.rs

1use std::collections::{BTreeMap, HashMap};
2use std::fs;
3use std::path::{Path, PathBuf};
4
5use anyhow::{Context, Result};
6use glob::glob;
7use serde_json::Value;
8
9use crate::catalog::{self, CompiledCatalog};
10use crate::config;
11use crate::diagnostics::{
12    FileDiagnostic, ParseDiagnostic, ValidationDiagnostic, find_instance_path_offset,
13};
14use crate::discover;
15use crate::parsers::{self, FileFormat, JsoncParser, Parser};
16use crate::registry;
17use crate::retriever::{CacheStatus, HttpClient, SchemaCache, ensure_cache_dir};
18use crate::validation_cache::{self, ValidationCacheStatus};
19
20pub struct ValidateArgs {
21    /// Glob patterns to find files (empty = auto-discover)
22    pub globs: Vec<String>,
23
24    /// Exclude files matching these globs (repeatable)
25    pub exclude: Vec<String>,
26
27    /// Cache directory for remote schemas
28    pub cache_dir: Option<String>,
29
30    /// Bypass schema cache reads (still writes fetched schemas to cache)
31    pub force_schema_fetch: bool,
32
33    /// Bypass validation cache reads (still writes results to cache)
34    pub force_validation: bool,
35
36    /// Disable `SchemaStore` catalog matching
37    pub no_catalog: bool,
38
39    /// Directory to search for `lintel.toml` (defaults to cwd)
40    pub config_dir: Option<PathBuf>,
41
42    /// TTL for cached schemas. `None` means no expiry.
43    pub schema_cache_ttl: Option<std::time::Duration>,
44}
45
46/// A single lint error produced during validation.
47pub enum LintError {
48    Parse(ParseDiagnostic),
49    Validation(ValidationDiagnostic),
50    File(FileDiagnostic),
51}
52
53impl LintError {
54    /// File path associated with this error.
55    pub fn path(&self) -> &str {
56        match self {
57            LintError::Parse(d) => d.src.name(),
58            LintError::Validation(d) => &d.path,
59            LintError::File(d) => &d.path,
60        }
61    }
62
63    /// Human-readable error message.
64    pub fn message(&self) -> &str {
65        match self {
66            LintError::Parse(d) => &d.message,
67            LintError::Validation(d) => &d.message,
68            LintError::File(d) => &d.message,
69        }
70    }
71
72    /// Byte offset in the source file (for sorting).
73    pub fn offset(&self) -> usize {
74        match self {
75            LintError::Parse(d) => d.span.offset(),
76            LintError::Validation(d) => d.span.offset(),
77            LintError::File(_) => 0,
78        }
79    }
80
81    /// Convert into a boxed miette Diagnostic for rich rendering.
82    pub fn into_diagnostic(self) -> Box<dyn miette::Diagnostic + Send + Sync> {
83        match self {
84            LintError::Parse(d) => Box::new(d),
85            LintError::Validation(d) => Box::new(d),
86            LintError::File(d) => Box::new(d),
87        }
88    }
89}
90
91/// A file that was checked and the schema it resolved to.
92pub struct CheckedFile {
93    pub path: String,
94    pub schema: String,
95    /// `None` for local schemas and builtins; `Some` for remote schemas.
96    pub cache_status: Option<CacheStatus>,
97    /// `None` when validation caching is not applicable; `Some` for validation cache hits/misses.
98    pub validation_cache_status: Option<ValidationCacheStatus>,
99}
100
101/// Result of a validation run.
102pub struct ValidateResult {
103    pub errors: Vec<LintError>,
104    pub checked: Vec<CheckedFile>,
105}
106
107impl ValidateResult {
108    pub fn has_errors(&self) -> bool {
109        !self.errors.is_empty()
110    }
111
112    pub fn files_checked(&self) -> usize {
113        self.checked.len()
114    }
115}
116
117// ---------------------------------------------------------------------------
118// Internal types
119// ---------------------------------------------------------------------------
120
121/// A file that has been parsed and matched to a schema URI.
122struct ParsedFile {
123    path: String,
124    content: String,
125    instance: Value,
126    /// Original schema URI before rewrites (for override matching).
127    original_schema_uri: String,
128}
129
130// ---------------------------------------------------------------------------
131// Config loading
132// ---------------------------------------------------------------------------
133
134/// Locate `lintel.toml`, load the full config, and return the config directory.
135/// Returns `(config, config_dir, config_path)`.  When no config is found or
136/// cwd is unavailable the config is default and `config_path` is `None`.
137#[tracing::instrument(skip_all)]
138fn load_config(search_dir: Option<&Path>) -> (config::Config, PathBuf, Option<PathBuf>) {
139    let start_dir = match search_dir {
140        Some(d) => d.to_path_buf(),
141        None => match std::env::current_dir() {
142            Ok(d) => d,
143            Err(_) => return (config::Config::default(), PathBuf::from("."), None),
144        },
145    };
146
147    let Some(config_path) = config::find_config_path(&start_dir) else {
148        return (config::Config::default(), start_dir, None);
149    };
150
151    let dir = config_path.parent().unwrap_or(&start_dir).to_path_buf();
152    let cfg = config::find_and_load(&start_dir)
153        .ok()
154        .flatten()
155        .unwrap_or_default();
156    (cfg, dir, Some(config_path))
157}
158
159// ---------------------------------------------------------------------------
160// File collection
161// ---------------------------------------------------------------------------
162
163/// Collect input files from globs/directories, applying exclude filters.
164#[tracing::instrument(skip_all, fields(glob_count = globs.len(), exclude_count = exclude.len()))]
165fn collect_files(globs: &[String], exclude: &[String]) -> Result<Vec<PathBuf>> {
166    if globs.is_empty() {
167        return discover::discover_files(".", exclude);
168    }
169
170    let mut result = Vec::new();
171    for pattern in globs {
172        let path = Path::new(pattern);
173        if path.is_dir() {
174            result.extend(discover::discover_files(pattern, exclude)?);
175        } else {
176            for entry in glob(pattern).with_context(|| format!("invalid glob: {pattern}"))? {
177                let path = entry?;
178                if path.is_file() && !is_excluded(&path, exclude) {
179                    result.push(path);
180                }
181            }
182        }
183    }
184    Ok(result)
185}
186
187fn is_excluded(path: &Path, excludes: &[String]) -> bool {
188    let path_str = match path.to_str() {
189        Some(s) => s.strip_prefix("./").unwrap_or(s),
190        None => return false,
191    };
192    excludes
193        .iter()
194        .any(|pattern| glob_match::glob_match(pattern, path_str))
195}
196
197// ---------------------------------------------------------------------------
198// lintel.toml self-validation
199// ---------------------------------------------------------------------------
200
201/// Validate `lintel.toml` against its built-in schema.
202fn validate_config(
203    config_path: &Path,
204    errors: &mut Vec<LintError>,
205    checked: &mut Vec<CheckedFile>,
206    on_check: &mut impl FnMut(&CheckedFile),
207) -> Result<()> {
208    let content = fs::read_to_string(config_path)?;
209    let config_value: Value = toml::from_str(&content)
210        .map_err(|e| anyhow::anyhow!("failed to parse {}: {e}", config_path.display()))?;
211    let schema_value: Value = serde_json::from_str(include_str!(concat!(
212        env!("OUT_DIR"),
213        "/lintel-config.schema.json"
214    )))
215    .context("failed to parse embedded lintel config schema")?;
216    if let Ok(validator) = jsonschema::options().build(&schema_value) {
217        let path_str = config_path.display().to_string();
218        for error in validator.iter_errors(&config_value) {
219            let ip = error.instance_path().to_string();
220            let offset = find_instance_path_offset(&content, &ip);
221            errors.push(LintError::Validation(ValidationDiagnostic {
222                src: miette::NamedSource::new(&path_str, content.clone()),
223                span: offset.into(),
224                path: path_str.clone(),
225                instance_path: ip,
226                message: error.to_string(),
227            }));
228        }
229        let cf = CheckedFile {
230            path: path_str,
231            schema: "(builtin)".to_string(),
232            cache_status: None,
233            validation_cache_status: None,
234        };
235        on_check(&cf);
236        checked.push(cf);
237    }
238    Ok(())
239}
240
241// ---------------------------------------------------------------------------
242// Phase 1: Parse files and resolve schema URIs
243// ---------------------------------------------------------------------------
244
245/// Try parsing content with each known format, returning the first success.
246///
247/// JSONC is tried first (superset of JSON, handles comments), then YAML and
248/// TOML which cover the most common config formats, followed by the rest.
249fn try_parse_all(content: &str, file_name: &str) -> Option<(parsers::FileFormat, Value)> {
250    use parsers::FileFormat::{Json, Json5, Jsonc, Markdown, Toml, Yaml};
251    const FORMATS: [parsers::FileFormat; 6] = [Jsonc, Yaml, Toml, Json, Json5, Markdown];
252
253    for fmt in FORMATS {
254        let parser = parsers::parser_for(fmt);
255        if let Ok(val) = parser.parse(content, file_name) {
256            return Some((fmt, val));
257        }
258    }
259    None
260}
261
262/// Result of processing a single file: either a parsed file with its schema URI,
263/// a lint error, or nothing (file was skipped).
264enum FileResult {
265    Parsed {
266        schema_uri: String,
267        parsed: ParsedFile,
268    },
269    Error(LintError),
270    Skip,
271}
272
273/// Process a single file: read, parse, resolve schema URI.
274fn process_one_file(
275    path: &Path,
276    config: &config::Config,
277    config_dir: &Path,
278    compiled_catalogs: &[CompiledCatalog],
279) -> FileResult {
280    let content = match fs::read_to_string(path) {
281        Ok(c) => c,
282        Err(e) => {
283            return FileResult::Error(LintError::File(FileDiagnostic {
284                path: path.display().to_string(),
285                message: format!("failed to read: {e}"),
286            }));
287        }
288    };
289
290    let path_str = path.display().to_string();
291    let file_name = path
292        .file_name()
293        .and_then(|n| n.to_str())
294        .unwrap_or(&path_str);
295
296    let detected_format = parsers::detect_format(path);
297
298    // For unrecognized extensions, only proceed if a catalog or config mapping matches.
299    if detected_format.is_none() {
300        let has_match = config.find_schema_mapping(&path_str, file_name).is_some()
301            || compiled_catalogs
302                .iter()
303                .any(|cat| cat.find_schema(&path_str, file_name).is_some());
304        if !has_match {
305            return FileResult::Skip;
306        }
307    }
308
309    // Parse the file content.
310    let (parser, instance): (Box<dyn Parser>, Value) = if let Some(fmt) = detected_format {
311        let parser = parsers::parser_for(fmt);
312        match parser.parse(&content, &path_str) {
313            Ok(val) => (parser, val),
314            Err(parse_err) => {
315                // JSONC fallback for .json files that match a catalog entry.
316                if fmt == FileFormat::Json
317                    && compiled_catalogs
318                        .iter()
319                        .any(|cat| cat.find_schema(&path_str, file_name).is_some())
320                {
321                    match JsoncParser.parse(&content, &path_str) {
322                        Ok(val) => (parsers::parser_for(FileFormat::Jsonc), val),
323                        Err(jsonc_err) => return FileResult::Error(LintError::Parse(jsonc_err)),
324                    }
325                } else {
326                    return FileResult::Error(LintError::Parse(parse_err));
327                }
328            }
329        }
330    } else {
331        match try_parse_all(&content, &path_str) {
332            Some((fmt, val)) => (parsers::parser_for(fmt), val),
333            None => return FileResult::Skip,
334        }
335    };
336
337    // Skip markdown files with no frontmatter
338    if instance.is_null() {
339        return FileResult::Skip;
340    }
341
342    // Schema resolution priority:
343    // 1. Inline $schema / YAML modeline (always wins)
344    // 2. Custom schema mappings from lintel.toml [schemas]
345    // 3. Catalog matching (SchemaStore + additional registries)
346    let schema_uri = parser
347        .extract_schema_uri(&content, &instance)
348        .or_else(|| {
349            config
350                .find_schema_mapping(&path_str, file_name)
351                .map(str::to_string)
352        })
353        .or_else(|| {
354            compiled_catalogs
355                .iter()
356                .find_map(|cat| cat.find_schema(&path_str, file_name))
357                .map(str::to_string)
358        });
359
360    let Some(schema_uri) = schema_uri else {
361        return FileResult::Skip;
362    };
363
364    // Keep original URI for override matching (before rewrites)
365    let original_schema_uri = schema_uri.clone();
366
367    // Apply rewrite rules, then resolve // paths relative to lintel.toml
368    let schema_uri = config::apply_rewrites(&schema_uri, &config.rewrite);
369    let schema_uri = config::resolve_double_slash(&schema_uri, config_dir);
370
371    // Resolve relative local paths against the file's parent directory.
372    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
373    let schema_uri = if is_remote {
374        schema_uri
375    } else {
376        path.parent()
377            .map(|parent| parent.join(&schema_uri).to_string_lossy().to_string())
378            .unwrap_or(schema_uri)
379    };
380
381    FileResult::Parsed {
382        schema_uri,
383        parsed: ParsedFile {
384            path: path_str,
385            content,
386            instance,
387            original_schema_uri,
388        },
389    }
390}
391
392/// Parse each file in parallel, extract its schema URI, apply rewrites, and
393/// group by resolved schema URI.
394#[tracing::instrument(skip_all, fields(file_count = files.len()))]
395fn parse_and_group_files(
396    files: &[PathBuf],
397    config: &config::Config,
398    config_dir: &Path,
399    compiled_catalogs: &[CompiledCatalog],
400    errors: &mut Vec<LintError>,
401) -> BTreeMap<String, Vec<ParsedFile>> {
402    use rayon::prelude::*;
403
404    let results: Vec<FileResult> = files
405        .par_iter()
406        .map(|path| process_one_file(path, config, config_dir, compiled_catalogs))
407        .collect();
408
409    let mut schema_groups: BTreeMap<String, Vec<ParsedFile>> = BTreeMap::new();
410    for result in results {
411        match result {
412            FileResult::Parsed { schema_uri, parsed } => {
413                schema_groups.entry(schema_uri).or_default().push(parsed);
414            }
415            FileResult::Error(e) => errors.push(e),
416            FileResult::Skip => {}
417        }
418    }
419
420    schema_groups
421}
422
423// ---------------------------------------------------------------------------
424// Phase 2: Schema fetching, compilation, and instance validation
425// ---------------------------------------------------------------------------
426
427/// Fetch a schema by URI, returning its parsed JSON and cache status.
428///
429/// For remote URIs, checks the prefetched map first; for local URIs, reads
430/// from disk (with in-memory caching to avoid redundant I/O for shared schemas).
431fn fetch_schema_from_prefetched(
432    schema_uri: &str,
433    prefetched: &HashMap<String, Result<(Value, CacheStatus), String>>,
434    local_cache: &mut HashMap<String, Value>,
435    group: &[ParsedFile],
436    errors: &mut Vec<LintError>,
437    checked: &mut Vec<CheckedFile>,
438    on_check: &mut impl FnMut(&CheckedFile),
439) -> Option<(Value, Option<CacheStatus>)> {
440    let is_remote = schema_uri.starts_with("http://") || schema_uri.starts_with("https://");
441
442    let result: Result<(Value, Option<CacheStatus>), String> = if is_remote {
443        match prefetched.get(schema_uri) {
444            Some(Ok((v, status))) => Ok((v.clone(), Some(*status))),
445            Some(Err(e)) => Err(format!("failed to fetch schema: {schema_uri}: {e}")),
446            None => Err(format!("schema not prefetched: {schema_uri}")),
447        }
448    } else if let Some(cached) = local_cache.get(schema_uri) {
449        Ok((cached.clone(), None))
450    } else {
451        fs::read_to_string(schema_uri)
452            .map_err(|e| format!("failed to read local schema {schema_uri}: {e}"))
453            .and_then(|content| {
454                serde_json::from_str::<Value>(&content)
455                    .map(|v| {
456                        local_cache.insert(schema_uri.to_string(), v.clone());
457                        (v, None)
458                    })
459                    .map_err(|e| format!("failed to parse local schema {schema_uri}: {e}"))
460            })
461    };
462
463    match result {
464        Ok(value) => Some(value),
465        Err(message) => {
466            report_group_error(&message, schema_uri, None, group, errors, checked, on_check);
467            None
468        }
469    }
470}
471
472/// Report the same error for every file in a schema group.
473fn report_group_error<P: std::borrow::Borrow<ParsedFile>>(
474    message: &str,
475    schema_uri: &str,
476    cache_status: Option<CacheStatus>,
477    group: &[P],
478    errors: &mut Vec<LintError>,
479    checked: &mut Vec<CheckedFile>,
480    on_check: &mut impl FnMut(&CheckedFile),
481) {
482    for item in group {
483        let pf = item.borrow();
484        let cf = CheckedFile {
485            path: pf.path.clone(),
486            schema: schema_uri.to_string(),
487            cache_status,
488            validation_cache_status: None,
489        };
490        on_check(&cf);
491        checked.push(cf);
492        errors.push(LintError::File(FileDiagnostic {
493            path: pf.path.clone(),
494            message: message.to_string(),
495        }));
496    }
497}
498
499/// Mark every file in a group as checked (no errors).
500fn mark_group_checked<P: std::borrow::Borrow<ParsedFile>>(
501    schema_uri: &str,
502    cache_status: Option<CacheStatus>,
503    validation_cache_status: Option<ValidationCacheStatus>,
504    group: &[P],
505    checked: &mut Vec<CheckedFile>,
506    on_check: &mut impl FnMut(&CheckedFile),
507) {
508    for item in group {
509        let pf = item.borrow();
510        let cf = CheckedFile {
511            path: pf.path.clone(),
512            schema: schema_uri.to_string(),
513            cache_status,
514            validation_cache_status,
515        };
516        on_check(&cf);
517        checked.push(cf);
518    }
519}
520
521/// Convert `(instance_path, message)` pairs into `LintError::Validation` diagnostics.
522fn push_error_pairs(
523    pf: &ParsedFile,
524    error_pairs: &[(String, String)],
525    errors: &mut Vec<LintError>,
526) {
527    for (ip, msg) in error_pairs {
528        let offset = find_instance_path_offset(&pf.content, ip);
529        errors.push(LintError::Validation(ValidationDiagnostic {
530            src: miette::NamedSource::new(&pf.path, pf.content.clone()),
531            span: offset.into(),
532            path: pf.path.clone(),
533            instance_path: ip.clone(),
534            message: msg.clone(),
535        }));
536    }
537}
538
539/// Validate all files in a group against an already-compiled validator and store
540/// results in the validation cache.
541#[tracing::instrument(skip_all, fields(schema_uri, file_count = group.len()))]
542#[allow(clippy::too_many_arguments)]
543async fn validate_group<P: std::borrow::Borrow<ParsedFile>>(
544    validator: &jsonschema::Validator,
545    schema_uri: &str,
546    schema_hash: &str,
547    validate_formats: bool,
548    cache_status: Option<CacheStatus>,
549    group: &[P],
550    vcache: &validation_cache::ValidationCache,
551    errors: &mut Vec<LintError>,
552    checked: &mut Vec<CheckedFile>,
553    on_check: &mut impl FnMut(&CheckedFile),
554) {
555    for item in group {
556        let pf = item.borrow();
557        let file_errors: Vec<(String, String)> = validator
558            .iter_errors(&pf.instance)
559            .map(|error| (error.instance_path().to_string(), error.to_string()))
560            .collect();
561
562        vcache
563            .store(&pf.content, schema_hash, validate_formats, &file_errors)
564            .await;
565        push_error_pairs(pf, &file_errors, errors);
566
567        let cf = CheckedFile {
568            path: pf.path.clone(),
569            schema: schema_uri.to_string(),
570            cache_status,
571            validation_cache_status: Some(ValidationCacheStatus::Miss),
572        };
573        on_check(&cf);
574        checked.push(cf);
575    }
576}
577
578// ---------------------------------------------------------------------------
579// Public API
580// ---------------------------------------------------------------------------
581
582/// # Errors
583///
584/// Returns an error if file collection or schema validation encounters an I/O error.
585pub async fn run<C: HttpClient>(args: &ValidateArgs, client: C) -> Result<ValidateResult> {
586    run_with(args, client, |_| {}).await
587}
588
589/// Like [`run`], but calls `on_check` each time a file is checked, allowing
590/// callers to stream progress (e.g. verbose output) as files are processed.
591///
592/// # Errors
593///
594/// Returns an error if file collection or schema validation encounters an I/O error.
595#[tracing::instrument(skip_all, name = "validate")]
596#[allow(clippy::too_many_lines)]
597pub async fn run_with<C: HttpClient>(
598    args: &ValidateArgs,
599    client: C,
600    mut on_check: impl FnMut(&CheckedFile),
601) -> Result<ValidateResult> {
602    let cache_dir = match &args.cache_dir {
603        Some(dir) => {
604            let path = PathBuf::from(dir);
605            let _ = fs::create_dir_all(&path);
606            path
607        }
608        None => ensure_cache_dir(),
609    };
610    let retriever = SchemaCache::new(
611        Some(cache_dir),
612        client.clone(),
613        args.force_schema_fetch,
614        args.schema_cache_ttl,
615    );
616
617    let (config, config_dir, config_path) = load_config(args.config_dir.as_deref());
618    let files = collect_files(&args.globs, &args.exclude)?;
619    tracing::info!(file_count = files.len(), "collected files");
620
621    let mut compiled_catalogs = Vec::new();
622
623    if !args.no_catalog {
624        let catalog_span = tracing::info_span!("fetch_catalogs").entered();
625
626        // Fetch all catalogs in parallel using JoinSet.
627        // Each task returns (label, result) so error messages stay specific.
628        #[allow(clippy::items_after_statements)]
629        type CatalogResult = (
630            String,
631            Result<CompiledCatalog, Box<dyn std::error::Error + Send + Sync>>,
632        );
633        let mut catalog_tasks: tokio::task::JoinSet<CatalogResult> = tokio::task::JoinSet::new();
634
635        // Lintel catalog
636        if !config.no_default_catalog {
637            let r = retriever.clone();
638            let label = format!("default catalog {}", registry::DEFAULT_REGISTRY);
639            catalog_tasks.spawn(async move {
640                let result = registry::fetch(&r, registry::DEFAULT_REGISTRY)
641                    .await
642                    .map(|cat| CompiledCatalog::compile(&cat));
643                (label, result)
644            });
645        }
646
647        // SchemaStore catalog
648        let r = retriever.clone();
649        catalog_tasks.spawn(async move {
650            let result = catalog::fetch_catalog(&r)
651                .await
652                .map(|cat| CompiledCatalog::compile(&cat));
653            ("SchemaStore catalog".to_string(), result)
654        });
655
656        // Additional registries from lintel.toml
657        for registry_url in &config.registries {
658            let r = retriever.clone();
659            let url = registry_url.clone();
660            let label = format!("registry {url}");
661            catalog_tasks.spawn(async move {
662                let result = registry::fetch(&r, &url)
663                    .await
664                    .map(|cat| CompiledCatalog::compile(&cat));
665                (label, result)
666            });
667        }
668
669        while let Some(result) = catalog_tasks.join_next().await {
670            match result {
671                Ok((_, Ok(compiled))) => compiled_catalogs.push(compiled),
672                Ok((label, Err(e))) => eprintln!("warning: failed to fetch {label}: {e}"),
673                Err(e) => eprintln!("warning: catalog fetch task failed: {e}"),
674            }
675        }
676
677        drop(catalog_span);
678    }
679
680    let mut errors: Vec<LintError> = Vec::new();
681    let mut checked: Vec<CheckedFile> = Vec::new();
682
683    // Validate lintel.toml against its own schema
684    if let Some(config_path) = config_path {
685        validate_config(&config_path, &mut errors, &mut checked, &mut on_check)?;
686    }
687
688    // Phase 1: Parse files and resolve schema URIs
689    let schema_groups = parse_and_group_files(
690        &files,
691        &config,
692        &config_dir,
693        &compiled_catalogs,
694        &mut errors,
695    );
696    tracing::info!(
697        schema_count = schema_groups.len(),
698        total_files = schema_groups.values().map(Vec::len).sum::<usize>(),
699        "grouped files by schema"
700    );
701
702    // Create validation cache
703    let vcache = validation_cache::ValidationCache::new(
704        validation_cache::ensure_cache_dir(),
705        args.force_validation,
706    );
707
708    // Prefetch all remote schemas in parallel
709    let remote_uris: Vec<&String> = schema_groups
710        .keys()
711        .filter(|uri| uri.starts_with("http://") || uri.starts_with("https://"))
712        .collect();
713
714    let prefetched = {
715        let _prefetch_span =
716            tracing::info_span!("prefetch_schemas", count = remote_uris.len()).entered();
717
718        let mut schema_tasks = tokio::task::JoinSet::new();
719        for uri in remote_uris {
720            let r = retriever.clone();
721            let u = uri.clone();
722            schema_tasks.spawn(async move {
723                let result = r.fetch(&u).await;
724                (u, result)
725            });
726        }
727
728        let mut prefetched: HashMap<String, Result<(Value, CacheStatus), String>> = HashMap::new();
729        while let Some(result) = schema_tasks.join_next().await {
730            match result {
731                Ok((uri, fetch_result)) => {
732                    prefetched.insert(uri, fetch_result.map_err(|e| e.to_string()));
733                }
734                Err(e) => eprintln!("warning: schema prefetch task failed: {e}"),
735            }
736        }
737
738        prefetched
739    };
740
741    // Phase 2: Compile each schema once and validate all matching files
742    let mut local_schema_cache: HashMap<String, Value> = HashMap::new();
743    let mut fetch_time = std::time::Duration::ZERO;
744    let mut hash_time = std::time::Duration::ZERO;
745    let mut vcache_time = std::time::Duration::ZERO;
746    let mut compile_time = std::time::Duration::ZERO;
747    let mut validate_time = std::time::Duration::ZERO;
748
749    for (schema_uri, group) in &schema_groups {
750        let _group_span = tracing::debug_span!(
751            "schema_group",
752            schema = schema_uri.as_str(),
753            files = group.len(),
754        )
755        .entered();
756
757        // If ANY file in the group matches a `validate_formats = false` override,
758        // disable format validation for the whole group (they share one compiled validator).
759        let validate_formats = group.iter().all(|pf| {
760            config
761                .should_validate_formats(&pf.path, &[&pf.original_schema_uri, schema_uri.as_str()])
762        });
763
764        // Remote schemas were prefetched in parallel above; local schemas are
765        // read from disk here (with in-memory caching).
766        let t = std::time::Instant::now();
767        let Some((schema_value, cache_status)) = fetch_schema_from_prefetched(
768            schema_uri,
769            &prefetched,
770            &mut local_schema_cache,
771            group,
772            &mut errors,
773            &mut checked,
774            &mut on_check,
775        ) else {
776            fetch_time += t.elapsed();
777            continue;
778        };
779        fetch_time += t.elapsed();
780
781        // Pre-compute schema hash once for the entire group.
782        let t = std::time::Instant::now();
783        let schema_hash = validation_cache::schema_hash(&schema_value);
784        hash_time += t.elapsed();
785
786        // Split the group into validation cache hits and misses.
787        let mut cache_misses: Vec<&ParsedFile> = Vec::new();
788
789        let t = std::time::Instant::now();
790        for pf in group {
791            let (cached, vcache_status) = vcache
792                .lookup(&pf.content, &schema_hash, validate_formats)
793                .await;
794
795            if let Some(cached_errors) = cached {
796                push_error_pairs(pf, &cached_errors, &mut errors);
797                let cf = CheckedFile {
798                    path: pf.path.clone(),
799                    schema: schema_uri.clone(),
800                    cache_status,
801                    validation_cache_status: Some(vcache_status),
802                };
803                on_check(&cf);
804                checked.push(cf);
805            } else {
806                cache_misses.push(pf);
807            }
808        }
809        vcache_time += t.elapsed();
810
811        tracing::debug!(
812            cache_hits = group.len() - cache_misses.len(),
813            cache_misses = cache_misses.len(),
814            "validation cache"
815        );
816
817        // If all files hit the validation cache, skip schema compilation entirely.
818        if cache_misses.is_empty() {
819            continue;
820        }
821
822        // Compile the schema for cache misses.
823        let t = std::time::Instant::now();
824        let validator = {
825            match jsonschema::async_options()
826                .with_retriever(retriever.clone())
827                .should_validate_formats(validate_formats)
828                .build(&schema_value)
829                .await
830            {
831                Ok(v) => v,
832                Err(e) => {
833                    compile_time += t.elapsed();
834                    // When format validation is disabled and the compilation error
835                    // is a uri-reference issue (e.g. Rust-style $ref paths in
836                    // vector.json), skip validation silently.
837                    if !validate_formats && e.to_string().contains("uri-reference") {
838                        mark_group_checked(
839                            schema_uri,
840                            cache_status,
841                            Some(ValidationCacheStatus::Miss),
842                            &cache_misses,
843                            &mut checked,
844                            &mut on_check,
845                        );
846                        continue;
847                    }
848                    report_group_error(
849                        &format!("failed to compile schema: {e}"),
850                        schema_uri,
851                        cache_status,
852                        &cache_misses,
853                        &mut errors,
854                        &mut checked,
855                        &mut on_check,
856                    );
857                    continue;
858                }
859            }
860        };
861        compile_time += t.elapsed();
862
863        let t = std::time::Instant::now();
864        validate_group(
865            &validator,
866            schema_uri,
867            &schema_hash,
868            validate_formats,
869            cache_status,
870            &cache_misses,
871            &vcache,
872            &mut errors,
873            &mut checked,
874            &mut on_check,
875        )
876        .await;
877        validate_time += t.elapsed();
878    }
879
880    #[allow(clippy::cast_possible_truncation)]
881    {
882        tracing::info!(
883            fetch_ms = fetch_time.as_millis() as u64,
884            hash_ms = hash_time.as_millis() as u64,
885            vcache_ms = vcache_time.as_millis() as u64,
886            compile_ms = compile_time.as_millis() as u64,
887            validate_ms = validate_time.as_millis() as u64,
888            "phase2 breakdown"
889        );
890    }
891
892    // Sort errors for deterministic output (by path, then by span offset)
893    errors.sort_by(|a, b| {
894        a.path()
895            .cmp(b.path())
896            .then_with(|| a.offset().cmp(&b.offset()))
897    });
898
899    Ok(ValidateResult { errors, checked })
900}
901
902#[cfg(test)]
903mod tests {
904    use super::*;
905    use crate::retriever::HttpClient;
906    use std::collections::HashMap;
907    use std::error::Error;
908    use std::path::Path;
909
910    #[derive(Clone)]
911    struct MockClient(HashMap<String, String>);
912
913    #[async_trait::async_trait]
914    impl HttpClient for MockClient {
915        async fn get(&self, uri: &str) -> Result<String, Box<dyn Error + Send + Sync>> {
916            self.0
917                .get(uri)
918                .cloned()
919                .ok_or_else(|| format!("mock: no response for {uri}").into())
920        }
921    }
922
923    fn mock(entries: &[(&str, &str)]) -> MockClient {
924        MockClient(
925            entries
926                .iter()
927                .map(|(k, v)| (k.to_string(), v.to_string()))
928                .collect(),
929        )
930    }
931
932    fn testdata() -> PathBuf {
933        Path::new(env!("CARGO_MANIFEST_DIR")).join("testdata")
934    }
935
936    /// Build glob patterns that scan one or more testdata directories for all supported file types.
937    fn scenario_globs(dirs: &[&str]) -> Vec<String> {
938        dirs.iter()
939            .flat_map(|dir| {
940                let base = testdata().join(dir);
941                vec![
942                    base.join("*.json").to_string_lossy().to_string(),
943                    base.join("*.yaml").to_string_lossy().to_string(),
944                    base.join("*.yml").to_string_lossy().to_string(),
945                    base.join("*.json5").to_string_lossy().to_string(),
946                    base.join("*.jsonc").to_string_lossy().to_string(),
947                    base.join("*.toml").to_string_lossy().to_string(),
948                ]
949            })
950            .collect()
951    }
952
953    fn args_for_dirs(dirs: &[&str]) -> ValidateArgs {
954        ValidateArgs {
955            globs: scenario_globs(dirs),
956            exclude: vec![],
957            cache_dir: None,
958            force_schema_fetch: true,
959            force_validation: true,
960            no_catalog: true,
961            config_dir: None,
962            schema_cache_ttl: None,
963        }
964    }
965
966    const SCHEMA: &str =
967        r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
968
969    fn schema_mock() -> MockClient {
970        mock(&[("https://example.com/schema.json", SCHEMA)])
971    }
972
973    // --- Directory scanning tests ---
974
975    #[tokio::test]
976    async fn no_matching_files() -> anyhow::Result<()> {
977        let tmp = tempfile::tempdir()?;
978        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
979        let c = ValidateArgs {
980            globs: vec![pattern],
981            exclude: vec![],
982            cache_dir: None,
983            force_schema_fetch: true,
984            force_validation: true,
985            no_catalog: true,
986            config_dir: None,
987            schema_cache_ttl: None,
988        };
989        let result = run(&c, mock(&[])).await?;
990        assert!(!result.has_errors());
991        Ok(())
992    }
993
994    #[tokio::test]
995    async fn dir_all_valid() -> anyhow::Result<()> {
996        let c = args_for_dirs(&["positive_tests"]);
997        let result = run(&c, schema_mock()).await?;
998        assert!(!result.has_errors());
999        Ok(())
1000    }
1001
1002    #[tokio::test]
1003    async fn dir_all_invalid() -> anyhow::Result<()> {
1004        let c = args_for_dirs(&["negative_tests"]);
1005        let result = run(&c, schema_mock()).await?;
1006        assert!(result.has_errors());
1007        Ok(())
1008    }
1009
1010    #[tokio::test]
1011    async fn dir_mixed_valid_and_invalid() -> anyhow::Result<()> {
1012        let c = args_for_dirs(&["positive_tests", "negative_tests"]);
1013        let result = run(&c, schema_mock()).await?;
1014        assert!(result.has_errors());
1015        Ok(())
1016    }
1017
1018    #[tokio::test]
1019    async fn dir_no_schemas_skipped() -> anyhow::Result<()> {
1020        let c = args_for_dirs(&["no_schema"]);
1021        let result = run(&c, mock(&[])).await?;
1022        assert!(!result.has_errors());
1023        Ok(())
1024    }
1025
1026    #[tokio::test]
1027    async fn dir_valid_with_no_schema_files() -> anyhow::Result<()> {
1028        let c = args_for_dirs(&["positive_tests", "no_schema"]);
1029        let result = run(&c, schema_mock()).await?;
1030        assert!(!result.has_errors());
1031        Ok(())
1032    }
1033
1034    // --- Directory as positional arg ---
1035
1036    #[tokio::test]
1037    async fn directory_arg_discovers_files() -> anyhow::Result<()> {
1038        let dir = testdata().join("positive_tests");
1039        let c = ValidateArgs {
1040            globs: vec![dir.to_string_lossy().to_string()],
1041            exclude: vec![],
1042            cache_dir: None,
1043            force_schema_fetch: true,
1044            force_validation: true,
1045            no_catalog: true,
1046            config_dir: None,
1047            schema_cache_ttl: None,
1048        };
1049        let result = run(&c, schema_mock()).await?;
1050        assert!(!result.has_errors());
1051        assert!(result.files_checked() > 0);
1052        Ok(())
1053    }
1054
1055    #[tokio::test]
1056    async fn multiple_directory_args() -> anyhow::Result<()> {
1057        let pos_dir = testdata().join("positive_tests");
1058        let no_schema_dir = testdata().join("no_schema");
1059        let c = ValidateArgs {
1060            globs: vec![
1061                pos_dir.to_string_lossy().to_string(),
1062                no_schema_dir.to_string_lossy().to_string(),
1063            ],
1064            exclude: vec![],
1065            cache_dir: None,
1066            force_schema_fetch: true,
1067            force_validation: true,
1068            no_catalog: true,
1069            config_dir: None,
1070            schema_cache_ttl: None,
1071        };
1072        let result = run(&c, schema_mock()).await?;
1073        assert!(!result.has_errors());
1074        Ok(())
1075    }
1076
1077    #[tokio::test]
1078    async fn mix_directory_and_glob_args() -> anyhow::Result<()> {
1079        let dir = testdata().join("positive_tests");
1080        let glob_pattern = testdata()
1081            .join("no_schema")
1082            .join("*.json")
1083            .to_string_lossy()
1084            .to_string();
1085        let c = ValidateArgs {
1086            globs: vec![dir.to_string_lossy().to_string(), glob_pattern],
1087            exclude: vec![],
1088            cache_dir: None,
1089            force_schema_fetch: true,
1090            force_validation: true,
1091            no_catalog: true,
1092            config_dir: None,
1093            schema_cache_ttl: None,
1094        };
1095        let result = run(&c, schema_mock()).await?;
1096        assert!(!result.has_errors());
1097        Ok(())
1098    }
1099
1100    #[tokio::test]
1101    async fn malformed_json_parse_error() -> anyhow::Result<()> {
1102        let base = testdata().join("malformed");
1103        let c = ValidateArgs {
1104            globs: vec![base.join("*.json").to_string_lossy().to_string()],
1105            exclude: vec![],
1106            cache_dir: None,
1107            force_schema_fetch: true,
1108            force_validation: true,
1109            no_catalog: true,
1110            config_dir: None,
1111            schema_cache_ttl: None,
1112        };
1113        let result = run(&c, mock(&[])).await?;
1114        assert!(result.has_errors());
1115        Ok(())
1116    }
1117
1118    #[tokio::test]
1119    async fn malformed_yaml_parse_error() -> anyhow::Result<()> {
1120        let base = testdata().join("malformed");
1121        let c = ValidateArgs {
1122            globs: vec![base.join("*.yaml").to_string_lossy().to_string()],
1123            exclude: vec![],
1124            cache_dir: None,
1125            force_schema_fetch: true,
1126            force_validation: true,
1127            no_catalog: true,
1128            config_dir: None,
1129            schema_cache_ttl: None,
1130        };
1131        let result = run(&c, mock(&[])).await?;
1132        assert!(result.has_errors());
1133        Ok(())
1134    }
1135
1136    // --- Exclude filter ---
1137
1138    #[tokio::test]
1139    async fn exclude_filters_files_in_dir() -> anyhow::Result<()> {
1140        let base = testdata().join("negative_tests");
1141        let c = ValidateArgs {
1142            globs: scenario_globs(&["positive_tests", "negative_tests"]),
1143            exclude: vec![
1144                base.join("missing_name.json").to_string_lossy().to_string(),
1145                base.join("missing_name.toml").to_string_lossy().to_string(),
1146                base.join("missing_name.yaml").to_string_lossy().to_string(),
1147            ],
1148            cache_dir: None,
1149            force_schema_fetch: true,
1150            force_validation: true,
1151            no_catalog: true,
1152            config_dir: None,
1153            schema_cache_ttl: None,
1154        };
1155        let result = run(&c, schema_mock()).await?;
1156        assert!(!result.has_errors());
1157        Ok(())
1158    }
1159
1160    // --- Cache options ---
1161
1162    #[tokio::test]
1163    async fn custom_cache_dir() -> anyhow::Result<()> {
1164        let cache_tmp = tempfile::tempdir()?;
1165        let c = ValidateArgs {
1166            globs: scenario_globs(&["positive_tests"]),
1167            exclude: vec![],
1168            cache_dir: Some(cache_tmp.path().to_string_lossy().to_string()),
1169            force_schema_fetch: true,
1170            force_validation: true,
1171            no_catalog: true,
1172            config_dir: None,
1173            schema_cache_ttl: None,
1174        };
1175        let result = run(&c, schema_mock()).await?;
1176        assert!(!result.has_errors());
1177
1178        // Schema was fetched once and cached
1179        let entries: Vec<_> = fs::read_dir(cache_tmp.path())?.collect();
1180        assert_eq!(entries.len(), 1);
1181        Ok(())
1182    }
1183
1184    // --- Local schema ---
1185
1186    #[tokio::test]
1187    async fn json_valid_with_local_schema() -> anyhow::Result<()> {
1188        let tmp = tempfile::tempdir()?;
1189        let schema_path = tmp.path().join("schema.json");
1190        fs::write(&schema_path, SCHEMA)?;
1191
1192        let f = tmp.path().join("valid.json");
1193        fs::write(
1194            &f,
1195            format!(
1196                r#"{{"$schema":"{}","name":"hello"}}"#,
1197                schema_path.to_string_lossy()
1198            ),
1199        )?;
1200
1201        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1202        let c = ValidateArgs {
1203            globs: vec![pattern],
1204            exclude: vec![],
1205            cache_dir: None,
1206            force_schema_fetch: true,
1207            force_validation: true,
1208            no_catalog: true,
1209            config_dir: None,
1210            schema_cache_ttl: None,
1211        };
1212        let result = run(&c, mock(&[])).await?;
1213        assert!(!result.has_errors());
1214        Ok(())
1215    }
1216
1217    #[tokio::test]
1218    async fn yaml_valid_with_local_schema() -> anyhow::Result<()> {
1219        let tmp = tempfile::tempdir()?;
1220        let schema_path = tmp.path().join("schema.json");
1221        fs::write(&schema_path, SCHEMA)?;
1222
1223        let f = tmp.path().join("valid.yaml");
1224        fs::write(
1225            &f,
1226            format!(
1227                "# yaml-language-server: $schema={}\nname: hello\n",
1228                schema_path.to_string_lossy()
1229            ),
1230        )?;
1231
1232        let pattern = tmp.path().join("*.yaml").to_string_lossy().to_string();
1233        let c = ValidateArgs {
1234            globs: vec![pattern],
1235            exclude: vec![],
1236            cache_dir: None,
1237            force_schema_fetch: true,
1238            force_validation: true,
1239            no_catalog: true,
1240            config_dir: None,
1241            schema_cache_ttl: None,
1242        };
1243        let result = run(&c, mock(&[])).await?;
1244        assert!(!result.has_errors());
1245        Ok(())
1246    }
1247
1248    #[tokio::test]
1249    async fn missing_local_schema_errors() -> anyhow::Result<()> {
1250        let tmp = tempfile::tempdir()?;
1251        let f = tmp.path().join("ref.json");
1252        fs::write(&f, r#"{"$schema":"/nonexistent/schema.json"}"#)?;
1253
1254        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1255        let c = ValidateArgs {
1256            globs: vec![pattern],
1257            exclude: vec![],
1258            cache_dir: None,
1259            force_schema_fetch: true,
1260            force_validation: true,
1261            no_catalog: true,
1262            config_dir: None,
1263            schema_cache_ttl: None,
1264        };
1265        let result = run(&c, mock(&[])).await?;
1266        assert!(result.has_errors());
1267        Ok(())
1268    }
1269
1270    // --- JSON5 / JSONC tests ---
1271
1272    #[tokio::test]
1273    async fn json5_valid_with_schema() -> anyhow::Result<()> {
1274        let tmp = tempfile::tempdir()?;
1275        let schema_path = tmp.path().join("schema.json");
1276        fs::write(&schema_path, SCHEMA)?;
1277
1278        let f = tmp.path().join("config.json5");
1279        fs::write(
1280            &f,
1281            format!(
1282                r#"{{
1283  // JSON5 comment
1284  "$schema": "{}",
1285  name: "hello",
1286}}"#,
1287                schema_path.to_string_lossy()
1288            ),
1289        )?;
1290
1291        let pattern = tmp.path().join("*.json5").to_string_lossy().to_string();
1292        let c = ValidateArgs {
1293            globs: vec![pattern],
1294            exclude: vec![],
1295            cache_dir: None,
1296            force_schema_fetch: true,
1297            force_validation: true,
1298            no_catalog: true,
1299            config_dir: None,
1300            schema_cache_ttl: None,
1301        };
1302        let result = run(&c, mock(&[])).await?;
1303        assert!(!result.has_errors());
1304        Ok(())
1305    }
1306
1307    #[tokio::test]
1308    async fn jsonc_valid_with_schema() -> anyhow::Result<()> {
1309        let tmp = tempfile::tempdir()?;
1310        let schema_path = tmp.path().join("schema.json");
1311        fs::write(&schema_path, SCHEMA)?;
1312
1313        let f = tmp.path().join("config.jsonc");
1314        fs::write(
1315            &f,
1316            format!(
1317                r#"{{
1318  /* JSONC comment */
1319  "$schema": "{}",
1320  "name": "hello"
1321}}"#,
1322                schema_path.to_string_lossy()
1323            ),
1324        )?;
1325
1326        let pattern = tmp.path().join("*.jsonc").to_string_lossy().to_string();
1327        let c = ValidateArgs {
1328            globs: vec![pattern],
1329            exclude: vec![],
1330            cache_dir: None,
1331            force_schema_fetch: true,
1332            force_validation: true,
1333            no_catalog: true,
1334            config_dir: None,
1335            schema_cache_ttl: None,
1336        };
1337        let result = run(&c, mock(&[])).await?;
1338        assert!(!result.has_errors());
1339        Ok(())
1340    }
1341
1342    // --- Catalog-based schema matching ---
1343
1344    const GH_WORKFLOW_SCHEMA: &str = r#"{
1345        "type": "object",
1346        "properties": {
1347            "name": { "type": "string" },
1348            "on": {},
1349            "jobs": { "type": "object" }
1350        },
1351        "required": ["on", "jobs"]
1352    }"#;
1353
1354    fn gh_catalog_json() -> String {
1355        r#"{"schemas":[{
1356            "name": "GitHub Workflow",
1357            "url": "https://www.schemastore.org/github-workflow.json",
1358            "fileMatch": [
1359                "**/.github/workflows/*.yml",
1360                "**/.github/workflows/*.yaml"
1361            ]
1362        }]}"#
1363            .to_string()
1364    }
1365
1366    #[tokio::test]
1367    async fn catalog_matches_github_workflow_valid() -> anyhow::Result<()> {
1368        let tmp = tempfile::tempdir()?;
1369        let wf_dir = tmp.path().join(".github/workflows");
1370        fs::create_dir_all(&wf_dir)?;
1371        fs::write(
1372            wf_dir.join("ci.yml"),
1373            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1374        )?;
1375
1376        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1377        let client = mock(&[
1378            (
1379                "https://www.schemastore.org/api/json/catalog.json",
1380                &gh_catalog_json(),
1381            ),
1382            (
1383                "https://www.schemastore.org/github-workflow.json",
1384                GH_WORKFLOW_SCHEMA,
1385            ),
1386        ]);
1387        let c = ValidateArgs {
1388            globs: vec![pattern],
1389            exclude: vec![],
1390            cache_dir: None,
1391            force_schema_fetch: true,
1392            force_validation: true,
1393            no_catalog: false,
1394            config_dir: None,
1395            schema_cache_ttl: None,
1396        };
1397        let result = run(&c, client).await?;
1398        assert!(!result.has_errors());
1399        Ok(())
1400    }
1401
1402    #[tokio::test]
1403    async fn catalog_matches_github_workflow_invalid() -> anyhow::Result<()> {
1404        let tmp = tempfile::tempdir()?;
1405        let wf_dir = tmp.path().join(".github/workflows");
1406        fs::create_dir_all(&wf_dir)?;
1407        fs::write(wf_dir.join("bad.yml"), "name: Broken\n")?;
1408
1409        let pattern = wf_dir.join("*.yml").to_string_lossy().to_string();
1410        let client = mock(&[
1411            (
1412                "https://www.schemastore.org/api/json/catalog.json",
1413                &gh_catalog_json(),
1414            ),
1415            (
1416                "https://www.schemastore.org/github-workflow.json",
1417                GH_WORKFLOW_SCHEMA,
1418            ),
1419        ]);
1420        let c = ValidateArgs {
1421            globs: vec![pattern],
1422            exclude: vec![],
1423            cache_dir: None,
1424            force_schema_fetch: true,
1425            force_validation: true,
1426            no_catalog: false,
1427            config_dir: None,
1428            schema_cache_ttl: None,
1429        };
1430        let result = run(&c, client).await?;
1431        assert!(result.has_errors());
1432        Ok(())
1433    }
1434
1435    #[tokio::test]
1436    async fn auto_discover_finds_github_workflows() -> anyhow::Result<()> {
1437        let tmp = tempfile::tempdir()?;
1438        let wf_dir = tmp.path().join(".github/workflows");
1439        fs::create_dir_all(&wf_dir)?;
1440        fs::write(
1441            wf_dir.join("ci.yml"),
1442            "name: CI\non: push\njobs:\n  build:\n    runs-on: ubuntu-latest\n    steps: []\n",
1443        )?;
1444
1445        let client = mock(&[
1446            (
1447                "https://www.schemastore.org/api/json/catalog.json",
1448                &gh_catalog_json(),
1449            ),
1450            (
1451                "https://www.schemastore.org/github-workflow.json",
1452                GH_WORKFLOW_SCHEMA,
1453            ),
1454        ]);
1455        let c = ValidateArgs {
1456            globs: vec![],
1457            exclude: vec![],
1458            cache_dir: None,
1459            force_schema_fetch: true,
1460            force_validation: true,
1461            no_catalog: false,
1462            config_dir: None,
1463            schema_cache_ttl: None,
1464        };
1465
1466        let orig_dir = std::env::current_dir()?;
1467        std::env::set_current_dir(tmp.path())?;
1468        let result = run(&c, client).await?;
1469        std::env::set_current_dir(orig_dir)?;
1470
1471        assert!(!result.has_errors());
1472        Ok(())
1473    }
1474
1475    // --- TOML tests ---
1476
1477    #[tokio::test]
1478    async fn toml_valid_with_schema() -> anyhow::Result<()> {
1479        let tmp = tempfile::tempdir()?;
1480        let schema_path = tmp.path().join("schema.json");
1481        fs::write(&schema_path, SCHEMA)?;
1482
1483        let f = tmp.path().join("config.toml");
1484        fs::write(
1485            &f,
1486            format!(
1487                "# :schema {}\nname = \"hello\"\n",
1488                schema_path.to_string_lossy()
1489            ),
1490        )?;
1491
1492        let pattern = tmp.path().join("*.toml").to_string_lossy().to_string();
1493        let c = ValidateArgs {
1494            globs: vec![pattern],
1495            exclude: vec![],
1496            cache_dir: None,
1497            force_schema_fetch: true,
1498            force_validation: true,
1499            no_catalog: true,
1500            config_dir: None,
1501            schema_cache_ttl: None,
1502        };
1503        let result = run(&c, mock(&[])).await?;
1504        assert!(!result.has_errors());
1505        Ok(())
1506    }
1507
1508    // --- Rewrite rules + // resolution ---
1509
1510    #[tokio::test]
1511    async fn rewrite_rule_with_double_slash_resolves_schema() -> anyhow::Result<()> {
1512        let tmp = tempfile::tempdir()?;
1513
1514        let schemas_dir = tmp.path().join("schemas");
1515        fs::create_dir_all(&schemas_dir)?;
1516        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1517
1518        fs::write(
1519            tmp.path().join("lintel.toml"),
1520            r#"
1521[rewrite]
1522"http://localhost:9000/" = "//schemas/"
1523"#,
1524        )?;
1525
1526        let f = tmp.path().join("config.json");
1527        fs::write(
1528            &f,
1529            r#"{"$schema":"http://localhost:9000/test.json","name":"hello"}"#,
1530        )?;
1531
1532        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1533        let c = ValidateArgs {
1534            globs: vec![pattern],
1535            exclude: vec![],
1536            cache_dir: None,
1537            force_schema_fetch: true,
1538            force_validation: true,
1539            no_catalog: true,
1540            config_dir: Some(tmp.path().to_path_buf()),
1541            schema_cache_ttl: None,
1542        };
1543
1544        let result = run(&c, mock(&[])).await?;
1545        assert!(!result.has_errors());
1546        assert_eq!(result.files_checked(), 2); // lintel.toml + config.json
1547        Ok(())
1548    }
1549
1550    #[tokio::test]
1551    async fn double_slash_schema_resolves_relative_to_config() -> anyhow::Result<()> {
1552        let tmp = tempfile::tempdir()?;
1553
1554        let schemas_dir = tmp.path().join("schemas");
1555        fs::create_dir_all(&schemas_dir)?;
1556        fs::write(schemas_dir.join("test.json"), SCHEMA)?;
1557
1558        fs::write(tmp.path().join("lintel.toml"), "")?;
1559
1560        let sub = tmp.path().join("deeply/nested");
1561        fs::create_dir_all(&sub)?;
1562        let f = sub.join("config.json");
1563        fs::write(&f, r#"{"$schema":"//schemas/test.json","name":"hello"}"#)?;
1564
1565        let pattern = sub.join("*.json").to_string_lossy().to_string();
1566        let c = ValidateArgs {
1567            globs: vec![pattern],
1568            exclude: vec![],
1569            cache_dir: None,
1570            force_schema_fetch: true,
1571            force_validation: true,
1572            no_catalog: true,
1573            config_dir: Some(tmp.path().to_path_buf()),
1574            schema_cache_ttl: None,
1575        };
1576
1577        let result = run(&c, mock(&[])).await?;
1578        assert!(!result.has_errors());
1579        Ok(())
1580    }
1581
1582    // --- Format validation override ---
1583
1584    const FORMAT_SCHEMA: &str = r#"{
1585        "type": "object",
1586        "properties": {
1587            "link": { "type": "string", "format": "uri-reference" }
1588        }
1589    }"#;
1590
1591    #[tokio::test]
1592    async fn format_errors_reported_without_override() -> anyhow::Result<()> {
1593        let tmp = tempfile::tempdir()?;
1594        let schema_path = tmp.path().join("schema.json");
1595        fs::write(&schema_path, FORMAT_SCHEMA)?;
1596
1597        let f = tmp.path().join("data.json");
1598        fs::write(
1599            &f,
1600            format!(
1601                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1602                schema_path.to_string_lossy()
1603            ),
1604        )?;
1605
1606        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1607        let c = ValidateArgs {
1608            globs: vec![pattern],
1609            exclude: vec![],
1610            cache_dir: None,
1611            force_schema_fetch: true,
1612            force_validation: true,
1613            no_catalog: true,
1614            config_dir: Some(tmp.path().to_path_buf()),
1615            schema_cache_ttl: None,
1616        };
1617        let result = run(&c, mock(&[])).await?;
1618        assert!(
1619            result.has_errors(),
1620            "expected format error without override"
1621        );
1622        Ok(())
1623    }
1624
1625    #[tokio::test]
1626    async fn format_errors_suppressed_with_override() -> anyhow::Result<()> {
1627        let tmp = tempfile::tempdir()?;
1628        let schema_path = tmp.path().join("schema.json");
1629        fs::write(&schema_path, FORMAT_SCHEMA)?;
1630
1631        let f = tmp.path().join("data.json");
1632        fs::write(
1633            &f,
1634            format!(
1635                r#"{{"$schema":"{}","link":"not a valid {{uri}}"}}"#,
1636                schema_path.to_string_lossy()
1637            ),
1638        )?;
1639
1640        // Use **/data.json to match the absolute path from the tempdir.
1641        fs::write(
1642            tmp.path().join("lintel.toml"),
1643            r#"
1644[[override]]
1645files = ["**/data.json"]
1646validate_formats = false
1647"#,
1648        )?;
1649
1650        let pattern = tmp.path().join("data.json").to_string_lossy().to_string();
1651        let c = ValidateArgs {
1652            globs: vec![pattern],
1653            exclude: vec![],
1654            cache_dir: None,
1655            force_schema_fetch: true,
1656            force_validation: true,
1657            no_catalog: true,
1658            config_dir: Some(tmp.path().to_path_buf()),
1659            schema_cache_ttl: None,
1660        };
1661        let result = run(&c, mock(&[])).await?;
1662        assert!(
1663            !result.has_errors(),
1664            "expected no errors with validate_formats = false override"
1665        );
1666        Ok(())
1667    }
1668
1669    // --- Unrecognized extension handling ---
1670
1671    #[tokio::test]
1672    async fn unrecognized_extension_skipped_without_catalog() -> anyhow::Result<()> {
1673        let tmp = tempfile::tempdir()?;
1674        fs::write(tmp.path().join("config.nix"), r#"{"name":"hello"}"#)?;
1675
1676        let pattern = tmp.path().join("config.nix").to_string_lossy().to_string();
1677        let c = ValidateArgs {
1678            globs: vec![pattern],
1679            exclude: vec![],
1680            cache_dir: None,
1681            force_schema_fetch: true,
1682            force_validation: true,
1683            no_catalog: true,
1684            config_dir: Some(tmp.path().to_path_buf()),
1685            schema_cache_ttl: None,
1686        };
1687        let result = run(&c, mock(&[])).await?;
1688        assert!(!result.has_errors());
1689        assert_eq!(result.files_checked(), 0);
1690        Ok(())
1691    }
1692
1693    #[tokio::test]
1694    async fn unrecognized_extension_parsed_when_catalog_matches() -> anyhow::Result<()> {
1695        let tmp = tempfile::tempdir()?;
1696        // File has .cfg extension (unrecognized) but content is valid JSON
1697        fs::write(
1698            tmp.path().join("myapp.cfg"),
1699            r#"{"name":"hello","on":"push","jobs":{"build":{}}}"#,
1700        )?;
1701
1702        let catalog_json = r#"{"schemas":[{
1703            "name": "MyApp Config",
1704            "url": "https://example.com/myapp.schema.json",
1705            "fileMatch": ["*.cfg"]
1706        }]}"#;
1707        let schema =
1708            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1709
1710        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1711        let client = mock(&[
1712            (
1713                "https://www.schemastore.org/api/json/catalog.json",
1714                catalog_json,
1715            ),
1716            ("https://example.com/myapp.schema.json", schema),
1717        ]);
1718        let c = ValidateArgs {
1719            globs: vec![pattern],
1720            exclude: vec![],
1721            cache_dir: None,
1722            force_schema_fetch: true,
1723            force_validation: true,
1724            no_catalog: false,
1725            config_dir: Some(tmp.path().to_path_buf()),
1726            schema_cache_ttl: None,
1727        };
1728        let result = run(&c, client).await?;
1729        assert!(!result.has_errors());
1730        assert_eq!(result.files_checked(), 1);
1731        Ok(())
1732    }
1733
1734    #[tokio::test]
1735    async fn unrecognized_extension_unparseable_skipped() -> anyhow::Result<()> {
1736        let tmp = tempfile::tempdir()?;
1737        // File matches catalog but content isn't parseable by any format
1738        fs::write(
1739            tmp.path().join("myapp.cfg"),
1740            "{ pkgs, ... }: { packages = [ pkgs.git ]; }",
1741        )?;
1742
1743        let catalog_json = r#"{"schemas":[{
1744            "name": "MyApp Config",
1745            "url": "https://example.com/myapp.schema.json",
1746            "fileMatch": ["*.cfg"]
1747        }]}"#;
1748
1749        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1750        let client = mock(&[(
1751            "https://www.schemastore.org/api/json/catalog.json",
1752            catalog_json,
1753        )]);
1754        let c = ValidateArgs {
1755            globs: vec![pattern],
1756            exclude: vec![],
1757            cache_dir: None,
1758            force_schema_fetch: true,
1759            force_validation: true,
1760            no_catalog: false,
1761            config_dir: Some(tmp.path().to_path_buf()),
1762            schema_cache_ttl: None,
1763        };
1764        let result = run(&c, client).await?;
1765        assert!(!result.has_errors());
1766        assert_eq!(result.files_checked(), 0);
1767        Ok(())
1768    }
1769
1770    #[tokio::test]
1771    async fn unrecognized_extension_invalid_against_schema() -> anyhow::Result<()> {
1772        let tmp = tempfile::tempdir()?;
1773        // File has .cfg extension, content is valid JSON but fails schema validation
1774        fs::write(tmp.path().join("myapp.cfg"), r#"{"wrong":"field"}"#)?;
1775
1776        let catalog_json = r#"{"schemas":[{
1777            "name": "MyApp Config",
1778            "url": "https://example.com/myapp.schema.json",
1779            "fileMatch": ["*.cfg"]
1780        }]}"#;
1781        let schema =
1782            r#"{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}"#;
1783
1784        let pattern = tmp.path().join("myapp.cfg").to_string_lossy().to_string();
1785        let client = mock(&[
1786            (
1787                "https://www.schemastore.org/api/json/catalog.json",
1788                catalog_json,
1789            ),
1790            ("https://example.com/myapp.schema.json", schema),
1791        ]);
1792        let c = ValidateArgs {
1793            globs: vec![pattern],
1794            exclude: vec![],
1795            cache_dir: None,
1796            force_schema_fetch: true,
1797            force_validation: true,
1798            no_catalog: false,
1799            config_dir: Some(tmp.path().to_path_buf()),
1800            schema_cache_ttl: None,
1801        };
1802        let result = run(&c, client).await?;
1803        assert!(result.has_errors());
1804        assert_eq!(result.files_checked(), 1);
1805        Ok(())
1806    }
1807
1808    // --- Validation cache ---
1809
1810    #[tokio::test]
1811    async fn validation_cache_hit_skips_revalidation() -> anyhow::Result<()> {
1812        let tmp = tempfile::tempdir()?;
1813        let schema_path = tmp.path().join("schema.json");
1814        fs::write(&schema_path, SCHEMA)?;
1815
1816        let f = tmp.path().join("valid.json");
1817        fs::write(
1818            &f,
1819            format!(
1820                r#"{{"$schema":"{}","name":"hello"}}"#,
1821                schema_path.to_string_lossy()
1822            ),
1823        )?;
1824
1825        let pattern = tmp.path().join("*.json").to_string_lossy().to_string();
1826
1827        // First run: force_validation = false so results get cached
1828        let c = ValidateArgs {
1829            globs: vec![pattern.clone()],
1830            exclude: vec![],
1831            cache_dir: None,
1832            force_schema_fetch: true,
1833            force_validation: false,
1834            no_catalog: true,
1835            config_dir: None,
1836            schema_cache_ttl: None,
1837        };
1838        let mut first_statuses = Vec::new();
1839        let result = run_with(&c, mock(&[]), |cf| {
1840            first_statuses.push(cf.validation_cache_status);
1841        })
1842        .await?;
1843        assert!(!result.has_errors());
1844        assert!(result.files_checked() > 0);
1845
1846        // Verify the first run recorded a validation cache miss
1847        assert!(
1848            first_statuses.contains(&Some(ValidationCacheStatus::Miss)),
1849            "expected at least one validation cache miss on first run"
1850        );
1851
1852        // Second run: same file, same schema — should hit validation cache
1853        let mut second_statuses = Vec::new();
1854        let result = run_with(&c, mock(&[]), |cf| {
1855            second_statuses.push(cf.validation_cache_status);
1856        })
1857        .await?;
1858        assert!(!result.has_errors());
1859
1860        // Verify the second run got a validation cache hit
1861        assert!(
1862            second_statuses.contains(&Some(ValidationCacheStatus::Hit)),
1863            "expected at least one validation cache hit on second run"
1864        );
1865        Ok(())
1866    }
1867}